diff --git a/scripts/ml/MultModelsCl.py b/scripts/ml/MultModelsCl.py index 078d60a..dfcd87c 100755 --- a/scripts/ml/MultModelsCl.py +++ b/scripts/ml/MultModelsCl.py @@ -147,7 +147,7 @@ def MultModelsCl(input_df, target, skf_cv mlp = MLPClassifier(max_iter = 500, **rs) dt = DecisionTreeClassifier(**rs) ets = ExtraTreesClassifier(**rs) - + et = ExtraTreeClassifier(**rs) rf = RandomForestClassifier(**rs, n_estimators = 1000 ) rf2 = RandomForestClassifier( min_samples_leaf = 5 @@ -169,7 +169,6 @@ def MultModelsCl(input_df, target, skf_cv abc = AdaBoostClassifier(**rs) bc = BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True) - et = ExtraTreeClassifier(**rs) gpc = GaussianProcessClassifier(**rs) gbc = GradientBoostingClassifier(**rs) qda = QuadraticDiscriminantAnalysis() @@ -181,14 +180,13 @@ def MultModelsCl(input_df, target, skf_cv , ('Gaussian NB' , gnb) , ('Naive Bayes' , nb) , ('K-Nearest Neighbors' , knn) - , ('SVM' , svc) + , ('SVC' , svc) , ('MLP' , mlp) , ('Decision Tree' , dt) , ('Extra Trees' , ets) , ('Extra Tree' , et) , ('Random Forest' , rf) , ('Random Forest2' , rf2) - , ('Naive Bayes' , nb) , ('XGBoost' , xgb) , ('LDA' , lda) , ('Multinomial' , mnb) diff --git a/scripts/ml/alr_config.py b/scripts/ml/alr_config.py index 55a82eb..996748f 100755 --- a/scripts/ml/alr_config.py +++ b/scripts/ml/alr_config.py @@ -32,15 +32,36 @@ from ml_data import * # TT run all ML clfs: baseline mode from MultModelsCl import MultModelsCl -#%%########################################################################### - -print('\n#####################################################################\n') - -print('TESTING cmd:' +############################################################################ +print('\n#####################################################################\n' + , '\nRunning ML analysis: UQ [without AA index but with active site annotations]' , '\nGene name:', gene - , '\nDrug name:', drug - , '\nTotal input features:', X.shape - , '\n', Counter(y)) + , '\nDrug name:', drug) + +#================== +# Specify outdir +#================== + +outdir_ml = outdir + 'ml/uq_v1/' + +print('\nOutput directory:', outdir_ml) + +#%%########################################################################### +print('\nSanity checks:' + , '\nTotal input features:', len(X.columns) + , '\n' + , '\nTraining data size:', X.shape + , '\nTest data size:', X_bts.shape + , '\n' + , '\nTarget feature numbers (training data):', Counter(y) + , '\nTarget features ratio (training data:', yc1_ratio + , '\n' + , '\nTarget feature numbers (test data):', Counter(y_bts) + , '\nTarget features ratio (test data):', yc2_ratio + + , '\n\n#####################################################################\n') + +print('\n================================================================\n') print('Strucutral features (n):' , len(X_ssFN) @@ -50,11 +71,11 @@ print('Strucutral features (n):' , '\nOther struc columns:', X_str , '\n================================================================\n') -print('AAindex features (n):' - , len(X_aaindexFN) - , '\nThese are:\n' - , X_aaindexFN - , '\n================================================================\n') +# print('AAindex features (n):' +# , len(X_aaindexFN) +# , '\nThese are:\n' +# , X_aaindexFN +# , '\n================================================================\n') print('Evolutionary features (n):' , len(X_evolFN) @@ -75,20 +96,15 @@ print('Categorical features (n):' , categorical_FN , '\n================================================================\n') -if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): +#if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): +if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): print('\nPass: No. of features match') else: sys.exit('\nFail: Count of feature mismatch') print('\n#####################################################################\n') -################################################################################ -#================== -# Specify outdir -#================== -outdir_ml = outdir + 'ml/v2/' - -################################################################################ +############################################################################### #================== # Baseline models #================== diff --git a/scripts/ml/embb_config.py b/scripts/ml/embb_config.py index be4d609..381f95f 100755 --- a/scripts/ml/embb_config.py +++ b/scripts/ml/embb_config.py @@ -32,15 +32,36 @@ from ml_data import * # TT run all ML clfs: baseline mode from MultModelsCl import MultModelsCl -#%%########################################################################### - -print('\n#####################################################################\n') - -print('TESTING cmd:' +############################################################################ +print('\n#####################################################################\n' + , '\nRunning ML analysis: UQ [without AA index but with active site annotations]' , '\nGene name:', gene - , '\nDrug name:', drug - , '\nTotal input features:', X.shape - , '\n', Counter(y)) + , '\nDrug name:', drug) + +#================== +# Specify outdir +#================== + +outdir_ml = outdir + 'ml/uq_v1/' + +print('\nOutput directory:', outdir_ml) + +#%%########################################################################### +print('\nSanity checks:' + , '\nTotal input features:', len(X.columns) + , '\n' + , '\nTraining data size:', X.shape + , '\nTest data size:', X_bts.shape + , '\n' + , '\nTarget feature numbers (training data):', Counter(y) + , '\nTarget features ratio (training data:', yc1_ratio + , '\n' + , '\nTarget feature numbers (test data):', Counter(y_bts) + , '\nTarget features ratio (test data):', yc2_ratio + + , '\n\n#####################################################################\n') + +print('\n================================================================\n') print('Strucutral features (n):' , len(X_ssFN) @@ -50,11 +71,11 @@ print('Strucutral features (n):' , '\nOther struc columns:', X_str , '\n================================================================\n') -print('AAindex features (n):' - , len(X_aaindexFN) - , '\nThese are:\n' - , X_aaindexFN - , '\n================================================================\n') +# print('AAindex features (n):' +# , len(X_aaindexFN) +# , '\nThese are:\n' +# , X_aaindexFN +# , '\n================================================================\n') print('Evolutionary features (n):' , len(X_evolFN) @@ -75,20 +96,15 @@ print('Categorical features (n):' , categorical_FN , '\n================================================================\n') -if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): +#if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): +if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): print('\nPass: No. of features match') else: sys.exit('\nFail: Count of feature mismatch') print('\n#####################################################################\n') -################################################################################ -#================== -# Specify outdir -#================== -outdir_ml = outdir + 'ml/v2/' - -################################################################################ +############################################################################### #================== # Baseline models #================== diff --git a/scripts/ml/gid_config.py b/scripts/ml/gid_config.py index 73594af..8541086 100755 --- a/scripts/ml/gid_config.py +++ b/scripts/ml/gid_config.py @@ -32,15 +32,36 @@ from ml_data import * # TT run all ML clfs: baseline mode from MultModelsCl import MultModelsCl -#%%########################################################################### - -print('\n#####################################################################\n') - -print('TESTING cmd:' +############################################################################ +print('\n#####################################################################\n' + , '\nRunning ML analysis: UQ [without AA index but with active site annotations]' , '\nGene name:', gene - , '\nDrug name:', drug - , '\nTotal input features:', X.shape - , '\n', Counter(y)) + , '\nDrug name:', drug) + +#================== +# Specify outdir +#================== + +outdir_ml = outdir + 'ml/uq_v1/' + +print('\nOutput directory:', outdir_ml) + +#%%########################################################################### +print('\nSanity checks:' + , '\nTotal input features:', len(X.columns) + , '\n' + , '\nTraining data size:', X.shape + , '\nTest data size:', X_bts.shape + , '\n' + , '\nTarget feature numbers (training data):', Counter(y) + , '\nTarget features ratio (training data:', yc1_ratio + , '\n' + , '\nTarget feature numbers (test data):', Counter(y_bts) + , '\nTarget features ratio (test data):', yc2_ratio + + , '\n\n#####################################################################\n') + +print('\n================================================================\n') print('Strucutral features (n):' , len(X_ssFN) @@ -50,11 +71,11 @@ print('Strucutral features (n):' , '\nOther struc columns:', X_str , '\n================================================================\n') -print('AAindex features (n):' - , len(X_aaindexFN) - , '\nThese are:\n' - , X_aaindexFN - , '\n================================================================\n') +# print('AAindex features (n):' +# , len(X_aaindexFN) +# , '\nThese are:\n' +# , X_aaindexFN +# , '\n================================================================\n') print('Evolutionary features (n):' , len(X_evolFN) @@ -75,20 +96,15 @@ print('Categorical features (n):' , categorical_FN , '\n================================================================\n') -if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): +#if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): +if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): print('\nPass: No. of features match') else: sys.exit('\nFail: Count of feature mismatch') print('\n#####################################################################\n') -################################################################################ -#================== -# Specify outdir -#================== -outdir_ml = outdir + 'ml/v2/' - -################################################################################ +############################################################################### #================== # Baseline models #================== diff --git a/scripts/ml/katg_config.py b/scripts/ml/katg_config.py index 59cf24a..1258f39 100755 --- a/scripts/ml/katg_config.py +++ b/scripts/ml/katg_config.py @@ -32,15 +32,36 @@ from ml_data import * # TT run all ML clfs: baseline mode from MultModelsCl import MultModelsCl -#%%########################################################################### - -print('\n#####################################################################\n') - -print('TESTING cmd:' +############################################################################ +print('\n#####################################################################\n' + , '\nRunning ML analysis: UQ [without AA index but with active site annotations]' , '\nGene name:', gene - , '\nDrug name:', drug - , '\nTotal input features:', X.shape - , '\n', Counter(y)) + , '\nDrug name:', drug) + +#================== +# Specify outdir +#================== + +outdir_ml = outdir + 'ml/uq_v1/' + +print('\nOutput directory:', outdir_ml) + +#%%########################################################################### +print('\nSanity checks:' + , '\nTotal input features:', len(X.columns) + , '\n' + , '\nTraining data size:', X.shape + , '\nTest data size:', X_bts.shape + , '\n' + , '\nTarget feature numbers (training data):', Counter(y) + , '\nTarget features ratio (training data:', yc1_ratio + , '\n' + , '\nTarget feature numbers (test data):', Counter(y_bts) + , '\nTarget features ratio (test data):', yc2_ratio + + , '\n\n#####################################################################\n') + +print('\n================================================================\n') print('Strucutral features (n):' , len(X_ssFN) @@ -50,11 +71,11 @@ print('Strucutral features (n):' , '\nOther struc columns:', X_str , '\n================================================================\n') -print('AAindex features (n):' - , len(X_aaindexFN) - , '\nThese are:\n' - , X_aaindexFN - , '\n================================================================\n') +# print('AAindex features (n):' +# , len(X_aaindexFN) +# , '\nThese are:\n' +# , X_aaindexFN +# , '\n================================================================\n') print('Evolutionary features (n):' , len(X_evolFN) @@ -75,20 +96,15 @@ print('Categorical features (n):' , categorical_FN , '\n================================================================\n') -if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): +#if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): +if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): print('\nPass: No. of features match') else: sys.exit('\nFail: Count of feature mismatch') print('\n#####################################################################\n') -################################################################################ -#================== -# Specify outdir -#================== -outdir_ml = outdir + 'ml/v2/' - -################################################################################ +############################################################################### #================== # Baseline models #================== diff --git a/scripts/ml/log_gid_7030.txt b/scripts/ml/log_gid_7030.txt index d0eca99..d1a73f6 100644 --- a/scripts/ml/log_gid_7030.txt +++ b/scripts/ml/log_gid_7030.txt @@ -1,58 +1,10 @@ -/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_7030.py:549: SettingWithCopyWarning: +/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_7030.py:548: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True) /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead. from pandas import MultiIndex, Int64Index -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( 1.22.4 1.4.1 @@ -114,6 +66,7 @@ No. of columns for x_features: 174 ------------------------------------------------------------- Successfully split data with stratification: 70/30 +Input features data size: (119, 174) Train data size: (79, 174) Test data size: (40, 174) y_train numbers: Counter({0: 50, 1: 29}) @@ -123,6 +76,18 @@ y_test_numbers: Counter({0: 26, 1: 14}) y_test ratio: 1.8571428571428572 ------------------------------------------------------------- +index: 0 +ind: 1 + +Mask count check: True + +index: 1 +ind: 2 + +Mask count check: True +Original Data + Counter({0: 50, 1: 29}) Data dim: (79, 174) + Simple Random OverSampling Counter({1: 50, 0: 50}) (100, 174) @@ -146,11 +111,18 @@ Gene name: gid Drug name: streptomycin Output directory: /home/tanu/git/Data/streptomycin/output/ml/tts_7030/ + Sanity checks: -ML source data size: (119, 174) -Total input features: (79, 174) -Target feature numbers: Counter({0: 50, 1: 29}) -Target features ratio: 1.7241379310344827 +Total input features: 174 + +Training data size: (79, 174) +Test data size: (40, 174) + +Target feature numbers (training data): Counter({0: 50, 1: 29}) +Target features ratio (training data: 1.7241379310344827 + +Target feature numbers (test data): Counter({0: 26, 1: 14}) +Target features ratio (test data): 1.8571428571428572 ##################################################################### @@ -165,6 +137,8 @@ Other struc columns: ['rsa', 'kd_values', 'rd_values'] ================================================================ AAindex features (n): 123 +These are: + ['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'] ================================================================ Evolutionary features (n): 3 @@ -191,160 +165,7 @@ Pass: No. of features match Model_name: Logistic Regression Model func: LogisticRegression(random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', LogisticRegression(random_state=42))]) - -key: fit_time -value: [0.02347183 0.02686572 0.0307076 0.02450418 0.02523112 0.02600336 - 0.02457237 0.02707028 0.02396798 0.02385616] - -mean value: 0.025625061988830567 - -key: score_time -value: [0.01185417 0.00754213 0.01175284 0.01153183 0.01146317 0.01155972 - 0.01150918 0.01154113 0.01146698 0.01157117] - -mean value: 0.011179232597351074 - -key: test_mcc -value: [ 0.48795004 nan 0.46666667 0.46666667 0.74535599 0.6 - 0.77459667 -0.29277002 0.74535599 0.09128709] - -mean value: nan - -key: train_mcc -value: [0.91067388 0.88152145 0.90865445 0.90865445 0.90865445 0.87863248 - 0.84744528 0.87830162 0.87830162 0.91085367] - -mean value: 0.8911693370709752 - -key: test_accuracy -value: [0.75 nan 0.75 0.75 0.875 0.75 - 0.875 0.5 0.875 0.57142857] - -mean value: nan - -key: train_accuracy -value: [0.95774648 0.94366197 0.95774648 0.95774648 0.95774648 0.94366197 - 0.92957746 0.94366197 0.94366197 0.95833333] - -mean value: 0.9493544600938967 - -key: test_fscore -value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call return cache[method] @@ -392,407 +213,72 @@ Traceback (most recent call last): ValueError: Found unknown categories ['Other'] in column 5 during transform warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. +[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', LogisticRegression(random_state=42))]) -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. +key: fit_time +value: [0.0366919 0.03364944 0.04210711 0.02675176 0.02643323 0.02680922 + 0.02558064 0.02712679 0.02658963 0.02430034] -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. +mean value: 0.029604005813598632 -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. +key: score_time +value: [0.01219821 0.00790191 0.01219344 0.01198339 0.01178885 0.01182723 + 0.01177597 0.01183701 0.01194477 0.01177979] -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. +mean value: 0.011523056030273437 -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. +key: test_mcc +value: [ 0.48795004 nan 0.46666667 0.46666667 0.74535599 0.6 + 0.77459667 -0.29277002 0.74535599 0.09128709] -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. +mean value: nan -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. +key: train_mcc +value: [0.91067388 0.88152145 0.90865445 0.90865445 0.90865445 0.87863248 + 0.84744528 0.87830162 0.87830162 0.91085367] -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. +mean value: 0.8911693370709752 -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. +key: test_accuracy +value: [0.75 nan 0.75 0.75 0.875 0.75 + 0.875 0.5 0.875 0.57142857] -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. +mean value: nan -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. +key: train_accuracy +value: [0.95774648 0.94366197 0.95774648 0.95774648 0.95774648 0.94366197 + 0.92957746 0.94366197 0.94366197 0.95833333] -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. +mean value: 0.9493544600938967 -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -[0.5 nan 0.66666667 0.66666667 0.8 0.75 +key: test_fscore +value: [0.5 nan 0.66666667 0.66666667 0.8 0.75 0.85714286 0. 0.8 0.4 ] mean value: nan @@ -869,7 +355,591 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ( predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, scale_pos_weight=None, subsample=None, tree_method=None, use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', +Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +Pipeline(steps=[('prep', ColumnTransformer(remainder='passthrough', transformers=[('num', MinMaxScaler(), Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', @@ -886,16 +956,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', LogisticRegressionCV(random_state=42))]) key: fit_time -value: [0.53741789 0.69267988 0.48575902 0.56827617 0.53399444 0.59227157 - 0.52956486 0.53589177 0.53715754 0.65834451] +value: [0.61568356 0.59473109 0.68769026 0.86828899 0.86762929 0.69212842 + 0.64828324 0.55574918 0.70834208 0.64952207] -mean value: 0.5671357631683349 +mean value: 0.688804817199707 key: score_time -value: [0.01202703 0.00642514 0.01250315 0.01604509 0.01320004 0.01179504 - 0.01307392 0.01395082 0.01368642 0.01708794] +value: [0.01196933 0.00655389 0.01198149 0.01488495 0.01500368 0.012532 + 0.01223516 0.01520872 0.01664925 0.01557779] -mean value: 0.012979459762573243 +mean value: 0.013259625434875489 key: test_mcc value: [0.1490712 nan 0.46666667 0.46666667 0.46666667 0.6 @@ -986,7 +1056,88 @@ Accuracy on Blind test: 0.72 Model_name: Gaussian NB Model func: GaussianNB() -List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', GaussianNB())]) + +key: fit_time +value: [0.01205611 0.01126623 0.00906897 0.00851703 0.008286 0.00828934 + 0.00834227 0.00843191 0.00838542 0.00838542] + +mean value: 0.009102869033813476 + +key: score_time +value: [0.01170921 0.00516486 0.00907874 0.00863242 0.00857592 0.00856733 + 0.00852275 0.0085392 0.00850701 0.00861979] + +mean value: 0.008591723442077637 + +key: test_mcc +value: [-0.06666667 nan 0.06666667 -0.25819889 0. 0.6 + 0.06666667 -0.06666667 -0.46666667 0.54772256] + +mean value: nan + +key: train_mcc +value: [0.63589744 0.3217793 0.4760037 0.61337378 0.48136848 0.59111411 + 0.59101806 0.61021596 0.61021596 0.61560271] + +mean value: 0.554658949505139 + +key: test_accuracy +value: [0.5 nan 0.5 0.375 0.375 0.75 + 0.5 0.5 0.25 0.71428571] + +mean value: nan + +key: train_accuracy +value: [0.83098592 0.54929577 0.64788732 0.76056338 0.67605634 0.77464789 + 0.76056338 0.77464789 0.77464789 0.77777778] + +mean value: 0.7327073552425665 + +key: test_fscore +value: [0.33333333 nan 0.5 0.28571429 0.54545455 0.75 + 0.5 0.33333333 0.25 0.66666667] + +mean value: nan + +key: train_fscore +value: [0.76923077 0.6097561 0.67532468 0.75362319 0.68493151 0.75 + 0.74626866 0.75757576 0.75757576 0.76470588] + +mean value: 0.7268992291592407 + +key: test_precision +value: [0.33333333 nan 0.4 0.25 0.375 0.6 + 0.4 0.33333333 0.2 0.5 ] + +mean value: nan +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call return cache[method] @@ -1034,87 +1185,8 @@ Traceback (most recent call last): ValueError: Found unknown categories ['Other'] in column 5 during transform warnings.warn( -[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', GaussianNB())]) - -key: fit_time -value: [0.01300025 0.01131201 0.01010251 0.00901103 0.00829911 0.00847673 - 0.00845861 0.00850534 0.00863886 0.00855303] - -mean value: 0.009435749053955078 - -key: score_time -value: [0.01224828 0.00450897 0.00966024 0.00870657 0.00878048 0.00860023 - 0.00848126 0.00857925 0.00853801 0.00865126] - -mean value: 0.008675456047058105 - -key: test_mcc -value: [-0.06666667 nan 0.06666667 -0.25819889 0. 0.6 - 0.06666667 -0.06666667 -0.46666667 0.54772256] - -mean value: nan - -key: train_mcc -value: [0.63589744 0.3217793 0.4760037 0.61337378 0.48136848 0.59111411 - 0.59101806 0.61021596 0.61021596 0.61560271] - -mean value: 0.554658949505139 - -key: test_accuracy -value: [0.5 nan 0.5 0.375 0.375 0.75 - 0.5 0.5 0.25 0.71428571] - -mean value: nan - -key: train_accuracy -value: [0.83098592 0.54929577 0.64788732 0.76056338 0.67605634 0.77464789 - 0.76056338 0.77464789 0.77464789 0.77777778] - -mean value: 0.7327073552425665 - -key: test_fscore -value: [0.33333333 nan 0.5 0.28571429 0.54545455 0.75 - 0.5 0.33333333 0.25 0.66666667] - -mean value: nan - -key: train_fscore -value: [0.76923077 0.6097561 0.67532468 0.75362319 0.68493151 0.75 - 0.74626866 0.75757576 0.75757576 0.76470588] - -mean value: 0.7268992291592407 - -key: test_precision -value: [0.33333333 nan 0.4 0.25 0.375 0.6 - 0.4 0.33333333 0.2 0.5 ] - -mean value: nan +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior. + _warn_prf(average, modifier, msg_start, len(result)) key: train_precision value: [0.76923077 0.44642857 0.50980392 0.60465116 0.53191489 0.63157895 @@ -1176,107 +1248,7 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ( predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, scale_pos_weight=None, subsample=None, tree_method=None, use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior. - _warn_prf(average, modifier, msg_start, len(result)) -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior. - _warn_prf(average, modifier, msg_start, len(result)) -Pipeline(steps=[('prep', +Running model pipeline: Pipeline(steps=[('prep', ColumnTransformer(remainder='passthrough', transformers=[('num', MinMaxScaler(), Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', @@ -1293,16 +1265,16 @@ Pipeline(steps=[('prep', ('model', BernoulliNB())]) key: fit_time -value: [0.00992274 0.00908804 0.01092887 0.00855708 0.00939322 0.00976706 - 0.00851512 0.00860643 0.00846624 0.00871468] +value: [0.00886774 0.00868058 0.00883889 0.00872326 0.00893021 0.00871801 + 0.00855303 0.00864196 0.00846744 0.00878453] -mean value: 0.009195947647094726 +mean value: 0.00872056484222412 key: score_time -value: [0.01040363 0.00474834 0.00870204 0.00912285 0.00954747 0.0092864 - 0.00849009 0.00853539 0.00894189 0.00856447] +value: [0.00870299 0.0042479 0.00908852 0.00942707 0.00868988 0.00882626 + 0.00883055 0.00864148 0.00859904 0.0089457 ] -mean value: 0.008634257316589355 +mean value: 0.00839993953704834 key: test_mcc value: [-0.29277002 nan 0.1490712 0. 0.46666667 0.25819889 @@ -1393,84 +1365,7 @@ Accuracy on Blind test: 0.62 Model_name: K-Nearest Neighbors Model func: KNeighborsClassifier() -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', KNeighborsClassifier())]) - -key: fit_time -value: [0.00953317 0.01132226 0.00892282 0.00931168 0.00937891 0.00864029 - 0.00916529 0.00923681 0.00909519 0.00845647] - -mean value: 0.00930628776550293 - -key: score_time -value: [0.04363441 0.00601792 0.01144981 0.01010704 0.00944614 0.00927305 - 0.00956726 0.01000333 0.014678 0.00946236] - -mean value: 0.013363933563232422 - -key: test_mcc -value: [ 0.48795004 nan 0.46666667 0.48795004 0.46666667 0.25819889 - 0.48795004 0. -0.4472136 0.64549722] - -mean value: nan - -key: train_mcc -value: [0.39440661 0.50503962 0.35808137 0.42968701 0.50503962 0.51530373 - 0.4660252 0.49787306 0.56963094 0.52098273] - -mean value: 0.47620698672135525 - -key: test_accuracy -value: [0.75 nan 0.75 0.75 0.75 0.625 - 0.75 0.625 0.375 0.85714286] - -mean value: nan - -key: train_accuracy -value: [0.73239437 0.77464789 0.71830986 0.74647887 0.77464789 0.77464789 - 0.76056338 0.77464789 0.8028169 0.77777778] - -mean value: 0.7636932707355243 - -key: test_fscore -value: [0.5 nan 0.66666667 0.5 0.66666667 0.57142857 - 0.5 0. 0. 0.66666667] - -mean value: nan - -key: train_fscore -value: [0.53658537 0.6 0.5 0.57142857 0.6 0.57894737 - 0.58536585 0.63636364 0.66666667 0.61904762] - -mean value: 0.5894405081439741 - -key: test_precision -value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call return cache[method] @@ -1520,9 +1415,84 @@ ValueError: Found unknown categories ['Other'] in column 5 during transform warnings.warn( /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result)) -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior. - _warn_prf(average, modifier, msg_start, len(result)) -[1. nan 0.66666667 1. 0.66666667 0.5 +[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', KNeighborsClassifier())]) + +key: fit_time +value: [0.00874114 0.01144648 0.0086658 0.00911021 0.00933123 0.00923896 + 0.00934291 0.00915861 0.00966763 0.00907493] + +mean value: 0.009377789497375489 + +key: score_time +value: [0.0487206 0.006109 0.01472044 0.01498747 0.0102694 0.01016641 + 0.01003814 0.01018739 0.01121283 0.0092566 ] + +mean value: 0.014566826820373534 + +key: test_mcc +value: [ 0.48795004 nan 0.46666667 0.48795004 0.46666667 0.25819889 + 0.48795004 0. -0.4472136 0.64549722] + +mean value: nan + +key: train_mcc +value: [0.39440661 0.50503962 0.35808137 0.42968701 0.50503962 0.51530373 + 0.4660252 0.49787306 0.56963094 0.52098273] + +mean value: 0.47620698672135525 + +key: test_accuracy +value: [0.75 nan 0.75 0.75 0.75 0.625 + 0.75 0.625 0.375 0.85714286] + +mean value: nan + +key: train_accuracy +value: [0.73239437 0.77464789 0.71830986 0.74647887 0.77464789 0.77464789 + 0.76056338 0.77464789 0.8028169 0.77777778] + +mean value: 0.7636932707355243 + +key: test_fscore +value: [0.5 nan 0.66666667 0.5 0.66666667 0.57142857 + 0.5 0. 0. 0.66666667] + +mean value: nan + +key: train_fscore +value: [0.53658537 0.6 0.5 0.57142857 0.6 0.57894737 + 0.58536585 0.63636364 0.66666667 0.61904762] + +mean value: 0.5894405081439741 + +key: test_precision +value: [1. nan 0.66666667 1. 0.66666667 0.5 1. 0. 0. 1. ] mean value: nan @@ -1587,7 +1557,107 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ( predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, scale_pos_weight=None, subsample=None, tree_method=None, use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', +Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior. + _warn_prf(average, modifier, msg_start, len(result)) +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior. + _warn_prf(average, modifier, msg_start, len(result)) +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +Pipeline(steps=[('prep', ColumnTransformer(remainder='passthrough', transformers=[('num', MinMaxScaler(), Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', @@ -1604,16 +1674,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', SVC(random_state=42))]) key: fit_time -value: [0.00903344 0.0091846 0.00961351 0.00917578 0.0096128 0.00965309 - 0.00942969 0.0092864 0.00940824 0.00987148] +value: [0.01034451 0.0102098 0.00897241 0.00901365 0.0088799 0.00927591 + 0.00998259 0.01031137 0.01042175 0.01003623] -mean value: 0.00942690372467041 +mean value: 0.009744811058044433 key: score_time -value: [0.00883532 0.00442553 0.00902295 0.00915003 0.00871062 0.00882101 - 0.00899625 0.00945115 0.00905657 0.00935435] +value: [0.0098021 0.00447941 0.0090816 0.00874925 0.00895143 0.00880194 + 0.0096159 0.00961876 0.00949502 0.00887012] -mean value: 0.008582377433776855 +mean value: 0.00874655246734619 key: test_mcc value: [0.48795004 nan 0.48795004 0.48795004 0.48795004 0.74535599 @@ -1702,55 +1772,7 @@ Accuracy on Blind test: 0.68 Model_name: MLP Model func: MLPClassifier(max_iter=500, random_state=42) -List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, n_estimators=1000, n_jobs=10, oob_score=True, random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, colsample_bynode=None, colsample_bytree=None, @@ -1779,16 +1801,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', MLPClassifier(max_iter=500, random_state=42))]) key: fit_time -value: [0.36889744 0.38598275 0.39493704 0.38300252 0.44200897 0.40054107 - 0.43577385 0.40177441 0.4005568 0.47488117] +value: [0.55261087 0.37978768 0.40832138 0.53674722 0.38702536 0.39668012 + 0.45907712 0.38502288 0.56192374 0.36470985] -mean value: 0.40883560180664064 +mean value: 0.4431906223297119 key: score_time -value: [0.01228499 0.00684047 0.01225257 0.0120151 0.0121727 0.0118804 - 0.01362801 0.01195502 0.01198483 0.01258683] +value: [0.0121274 0.00678778 0.01212931 0.01215839 0.01212978 0.01212168 + 0.01210904 0.01213956 0.01217985 0.01214075] -mean value: 0.011760091781616211 +mean value: 0.011602354049682618 key: test_mcc value: [ 0.48795004 nan 0.46666667 0.25819889 0.46666667 0.6 @@ -1846,7 +1868,55 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.66666667 nan 0.73333333 0.63333333 0.73333333 0.8 +value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +[0.66666667 nan 0.73333333 0.63333333 0.73333333 0.8 0.9 0.4 0.73333333 0.45 ] mean value: nan @@ -1885,103 +1955,7 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ( predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, scale_pos_weight=None, subsample=None, tree_method=None, use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -Pipeline(steps=[('prep', +Running model pipeline: Pipeline(steps=[('prep', ColumnTransformer(remainder='passthrough', transformers=[('num', MinMaxScaler(), Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', @@ -1998,16 +1972,16 @@ Pipeline(steps=[('prep', ('model', DecisionTreeClassifier(random_state=42))]) key: fit_time -value: [0.01571584 0.01269054 0.01136661 0.01086378 0.00994062 0.00986242 - 0.01026821 0.01012969 0.00951695 0.01019168] +value: [0.0133059 0.01309037 0.01105928 0.00985241 0.00974655 0.00949502 + 0.00951028 0.00971913 0.00915694 0.00968456] -mean value: 0.011054635047912598 +mean value: 0.010462045669555664 key: score_time -value: [0.01275587 0.00504398 0.00905156 0.00882149 0.00875378 0.00857162 - 0.00925541 0.00867009 0.00854111 0.00858116] +value: [0.01178765 0.00480032 0.00885653 0.00857639 0.00867677 0.00849724 + 0.00837517 0.00841808 0.0084424 0.00845408] -mean value: 0.008804607391357421 +mean value: 0.00848846435546875 key: test_mcc value: [0.25819889 nan 0.77459667 0.77459667 1. 0.6 @@ -2103,7 +2077,103 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ( predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, scale_pos_weight=None, subsample=None, tree_method=None, use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', +Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +Pipeline(steps=[('prep', ColumnTransformer(remainder='passthrough', transformers=[('num', MinMaxScaler(), Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', @@ -2120,16 +2190,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', ExtraTreesClassifier(random_state=42))]) key: fit_time -value: [0.08180642 0.08224797 0.08108139 0.08005643 0.08400655 0.08195567 - 0.083637 0.08279467 0.08375001 0.08794379] +value: [0.07962322 0.07938647 0.0790019 0.07999277 0.07923889 0.0796628 + 0.0828433 0.08033228 0.08008289 0.08019423] -mean value: 0.0829279899597168 +mean value: 0.0800358772277832 key: score_time -value: [0.01682615 0.00449514 0.01753378 0.01695251 0.01697278 0.01840544 - 0.01810956 0.01845503 0.01751947 0.01944685] +value: [0.0167439 0.00441599 0.01676273 0.01670766 0.01685452 0.01676798 + 0.01670623 0.0169127 0.01742005 0.01756716] -mean value: 0.016471672058105468 +mean value: 0.015685892105102538 key: test_mcc value: [ 0.48795004 nan 0.74535599 -0.06666667 0.48795004 0.6 @@ -2214,55 +2284,7 @@ Accuracy on Blind test: 0.7 Model_name: Extra Tree Model func: ExtraTreeClassifier(random_state=42) -List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, n_estimators=1000, n_jobs=10, oob_score=True, random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, colsample_bynode=None, colsample_bytree=None, @@ -2291,16 +2313,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', ExtraTreeClassifier(random_state=42))]) key: fit_time -value: [0.00917554 0.00918388 0.01030898 0.00913 0.00861025 0.00867605 - 0.00893021 0.00875688 0.00861931 0.00882649] +value: [0.00855803 0.00837731 0.0085175 0.00922489 0.00838661 0.0093317 + 0.00823355 0.00841951 0.00875092 0.00846076] -mean value: 0.009021759033203125 +mean value: 0.008626079559326172 key: score_time -value: [0.00911689 0.00463772 0.00918674 0.00968862 0.00868392 0.00932837 - 0.00896454 0.00857091 0.00860333 0.00897813] +value: [0.0088222 0.00437546 0.00849724 0.00917506 0.00854778 0.00906491 + 0.00846219 0.0085783 0.00849533 0.00848794] -mean value: 0.008575916290283203 +mean value: 0.00825064182281494 key: test_mcc value: [ 0.1490712 nan 0.46666667 -0.06666667 0.1490712 1. @@ -2383,20 +2405,7 @@ MCC on Blind test: 0.28 Accuracy on Blind test: 0.6 Model_name: Random Forest -Model func: RandomForestClassifier(n_estimators=1000, random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Model func: RandomForestClassifier(n_estimators=1000, random_state=42) /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call return cache[method] @@ -2444,9 +2453,20 @@ Traceback (most recent call last): ValueError: Found unknown categories ['Other'] in column 5 during transform warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. - warn( -Pipeline(steps=[('prep', + +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', ColumnTransformer(remainder='passthrough', transformers=[('num', MinMaxScaler(), Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', @@ -2464,16 +2484,16 @@ Pipeline(steps=[('prep', RandomForestClassifier(n_estimators=1000, random_state=42))]) key: fit_time -value: [1.00864148 1.00945377 1.01145029 1.02092719 1.02262855 1.02108669 - 1.03264499 1.03610301 0.99609876 0.99602723] +value: [0.99221253 1.00797582 1.01229548 1.01305676 0.99885559 0.99018335 + 0.99298692 0.98534179 0.99022436 0.99618149] -mean value: 1.0155061960220337 +mean value: 0.9979314088821412 key: score_time -value: [0.09197426 0.00447416 0.09149528 0.14959788 0.09277177 0.09351969 - 0.09247947 0.09435916 0.08660769 0.09346843] +value: [0.09150147 0.00473714 0.09475374 0.14177513 0.09324384 0.088516 + 0.09004092 0.0887692 0.08675432 0.09039307] -mean value: 0.08907477855682373 +mean value: 0.08704848289489746 key: test_mcc value: [ 0.48795004 nan 0.74535599 0.25819889 0.48795004 0.77459667 @@ -2572,7 +2592,9 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ( predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, scale_pos_weight=None, subsample=None, tree_method=None, use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', +Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. + warn( +Pipeline(steps=[('prep', ColumnTransformer(remainder='passthrough', transformers=[('num', MinMaxScaler(), Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', @@ -2710,16 +2732,16 @@ ValueError: Found unknown categories ['Other'] in column 5 during transform _warn_prf(average, modifier, msg_start, len(result)) key: fit_time -value: [1.67271042 0.90515995 0.86359429 0.90234375 0.96636534 0.87128806 - 0.8657763 0.87282944 0.88145161 0.83073807] +value: [1.71230912 0.82662535 0.84278703 0.86609745 0.93222427 0.88134384 + 0.90477657 0.81548691 0.85592294 0.81256628] -mean value: 0.9632257223129272 +mean value: 0.9450139760971069 key: score_time -value: [0.21594787 0.00481105 0.17994237 0.17904091 0.21373725 0.20884776 - 0.17252564 0.20995092 0.20395184 0.22969842] +value: [0.17363358 0.00458622 0.17959046 0.21361065 0.21203494 0.17785597 + 0.22314191 0.22879744 0.1943121 0.22552323] -mean value: 0.18184540271759034 +mean value: 0.18330864906311034 key: test_mcc value: [0.48795004 nan 0.46666667 0.48795004 0.48795004 0.74535599 @@ -2839,16 +2861,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', BernoulliNB())]) key: fit_time -value: [0.00874162 0.00872374 0.00922036 0.00902081 0.00892901 0.00882077 - 0.0089941 0.00903893 0.00934553 0.00889754] +value: [0.00989461 0.00995517 0.01037693 0.00869536 0.00957036 0.00846767 + 0.00956583 0.00920486 0.00919366 0.00914788] -mean value: 0.008973240852355957 +mean value: 0.009407234191894532 key: score_time -value: [0.00875211 0.00437403 0.00965929 0.00880575 0.00879216 0.00873375 - 0.00872302 0.00874352 0.00864363 0.0090158 ] +value: [0.00942397 0.00489521 0.00996447 0.0094893 0.00896859 0.00871682 + 0.00867128 0.00852704 0.00943971 0.00916314] -mean value: 0.00842430591583252 +mean value: 0.008725953102111817 key: test_mcc value: [-0.29277002 nan 0.1490712 0. 0.46666667 0.25819889 @@ -3029,16 +3051,16 @@ Running model pipeline: Pipeline(steps=[('prep', validate_parameters=None, verbosity=0))]) key: fit_time -value: [0.08834314 0.03863764 0.04068565 0.0569241 0.07989025 0.09637451 - 0.07425714 0.03369784 0.03540182 0.0447278 ] +value: [0.14565539 0.03244257 0.05331469 0.03653431 0.03764868 0.03769803 + 0.0387702 0.03797412 0.03629756 0.06974673] -mean value: 0.058893990516662595 +mean value: 0.05260822772979736 key: score_time -value: [0.01159501 0.00512886 0.0114882 0.01043797 0.01063323 0.01238704 - 0.01091051 0.01177049 0.01157951 0.01068592] +value: [0.01081181 0.00494266 0.01059127 0.01046586 0.01142001 0.01110435 + 0.01143312 0.01069999 0.01111579 0.01013684] -mean value: 0.010661673545837403 +mean value: 0.01027216911315918 key: test_mcc value: [0.74535599 nan 0.74535599 1. 1. 0.6 @@ -3199,16 +3221,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', LinearDiscriminantAnalysis())]) key: fit_time -value: [0.02861381 0.01710796 0.01704741 0.02752972 0.01703906 0.0173142 - 0.0171032 0.01710773 0.01708961 0.01672173] +value: [0.02565622 0.01628876 0.04023051 0.03998733 0.04027724 0.03976345 + 0.03319478 0.03937459 0.03932738 0.03963566] -mean value: 0.01926743984222412 +mean value: 0.035373592376708986 key: score_time -value: [0.01201081 0.00585961 0.01186156 0.0116775 0.01184201 0.01153588 - 0.01147604 0.01150894 0.01155281 0.01160359] +value: [0.01197481 0.00604296 0.02297831 0.0204556 0.02054095 0.02182841 + 0.02347851 0.02111363 0.02293038 0.02010679] -mean value: 0.011092877388000489 +mean value: 0.01914503574371338 key: test_mcc value: [ 0.77459667 nan 0.77459667 0.25819889 0.1490712 0.25819889 @@ -3424,16 +3446,16 @@ Pipeline(steps=[('prep', ('model', MultinomialNB())]) key: fit_time -value: [0.01906657 0.00880098 0.0084815 0.00850773 0.00844955 0.0085187 - 0.00853658 0.00868392 0.00857282 0.00848532] +value: [0.01452279 0.00926256 0.01047921 0.00929594 0.00924563 0.00948405 + 0.00887465 0.00973344 0.00907183 0.00911689] -mean value: 0.009610366821289063 +mean value: 0.009908699989318847 key: score_time -value: [0.00889063 0.00434232 0.00853539 0.00835586 0.00840497 0.00841403 - 0.00842023 0.00843906 0.00841045 0.00846195] +value: [0.00940919 0.00476885 0.00952935 0.00888324 0.00898743 0.00851965 + 0.00895286 0.00930977 0.00942397 0.00866628] -mean value: 0.008067488670349121 +mean value: 0.008645057678222656 key: test_mcc value: [-0.06666667 nan 0.74535599 0.1490712 0.74535599 0.48795004 @@ -3555,16 +3577,16 @@ Running model pipeline: Pipeline(steps=[('prep', PassiveAggressiveClassifier(n_jobs=10, random_state=42))]) key: fit_time -value: [0.00974989 0.01256609 0.01255202 0.01197124 0.01251864 0.01321697 - 0.01367021 0.012501 0.01297402 0.01391482] +value: [0.01042056 0.01261353 0.01291299 0.0122683 0.01264381 0.01341867 + 0.01380587 0.02929306 0.03145814 0.01436043] -mean value: 0.012563490867614746 +mean value: 0.016319537162780763 key: score_time -value: [0.00909543 0.00559807 0.01082706 0.01195002 0.01125741 0.01126671 - 0.01122785 0.0112493 0.01124573 0.01133037] +value: [0.00941873 0.00572133 0.01120472 0.01170421 0.01149988 0.0115881 + 0.01157069 0.01993561 0.01752448 0.01183438] -mean value: 0.010504794120788575 +mean value: 0.012200212478637696 key: test_mcc value: [ 0.48795004 nan 0.46666667 0.25819889 0.46666667 0.29277002 @@ -3733,16 +3755,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', SGDClassifier(n_jobs=10, random_state=42))]) key: fit_time -value: [0.01293874 0.01226783 0.01234913 0.01200628 0.01229644 0.01227403 - 0.0121088 0.01211166 0.01241088 0.01239038] +value: [0.01315737 0.01237726 0.01252127 0.01203179 0.01235271 0.01230645 + 0.01251531 0.01241851 0.01265669 0.01256299] -mean value: 0.01231541633605957 +mean value: 0.012490034103393555 key: score_time -value: [0.00994158 0.00611711 0.01135921 0.01127291 0.01130056 0.01134491 - 0.01127958 0.01128697 0.01127529 0.01119423] +value: [0.01045203 0.00614882 0.01162457 0.01147652 0.01155424 0.0219295 + 0.01169086 0.01164365 0.01157975 0.01150036] -mean value: 0.010637235641479493 +mean value: 0.011960029602050781 key: test_mcc value: [ 0.48795004 nan 0.06666667 0.46666667 0.46666667 0.77459667 @@ -3911,16 +3933,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', AdaBoostClassifier(random_state=42))]) key: fit_time -value: [0.08456826 0.07948041 0.08607769 0.07694125 0.076864 0.07677102 - 0.07680297 0.08007669 0.07709074 0.07826424] +value: [0.08493304 0.08175111 0.08369136 0.08218789 0.08108163 0.08307648 + 0.08458972 0.08156514 0.08213234 0.08229351] -mean value: 0.07929372787475586 +mean value: 0.08273022174835205 key: score_time -value: [0.01530957 0.00458312 0.01483393 0.01464057 0.01444936 0.01463532 - 0.01460624 0.01483655 0.01560974 0.01539278] +value: [0.01478672 0.00491738 0.01560259 0.01490855 0.015692 0.0162847 + 0.01612735 0.0150218 0.01616716 0.01500249] -mean value: 0.013889718055725097 +mean value: 0.014451074600219726 key: test_mcc value: [0.74535599 nan 0.25819889 1. 1. 0.6 @@ -4131,16 +4153,16 @@ Pipeline(steps=[('prep', random_state=42))]) key: fit_time -value: [0.03413796 0.02626228 0.03821015 0.04624867 0.04347873 0.03817987 - 0.04379535 0.03776097 0.0358882 0.04368043] +value: [0.03491879 0.02965927 0.05070114 0.04325414 0.03710151 0.04708791 + 0.04051805 0.05239248 0.0487051 0.02523303] -mean value: 0.03876426219940186 +mean value: 0.04095714092254639 key: score_time -value: [0.01770973 0.00455666 0.03768253 0.02266836 0.02085519 0.02388167 - 0.03207541 0.02712274 0.03294992 0.02003694] +value: [0.02097654 0.00495124 0.03698468 0.01774836 0.03910136 0.03681517 + 0.02154684 0.03762245 0.01965308 0.01705694] -mean value: 0.023953914642333984 +mean value: 0.02524566650390625 key: test_mcc value: [0.74535599 nan 0.74535599 1. 1. 0.6 @@ -4259,16 +4281,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', GaussianProcessClassifier(random_state=42))]) key: fit_time -value: [0.01672459 0.01487875 0.01532412 0.01514101 0.01528788 0.01714444 - 0.015203 0.01521587 0.01526046 0.02377939] +value: [0.0122931 0.01473594 0.01660824 0.01521087 0.0151813 0.01513147 + 0.02177501 0.01516676 0.01508951 0.01509905] -mean value: 0.016395950317382814 +mean value: 0.015629124641418458 key: score_time -value: [0.01178098 0.00585222 0.01161599 0.01168609 0.01185441 0.01175952 - 0.01163292 0.01173878 0.01166463 0.01176715] +value: [0.01127362 0.00562072 0.01160932 0.01198483 0.01171732 0.01169324 + 0.01196361 0.01168847 0.01174283 0.01177049] -mean value: 0.011135268211364745 +mean value: 0.011106443405151368 key: test_mcc value: [ 0.1490712 nan 0.74535599 -0.4472136 0.48795004 0.77459667 @@ -4430,16 +4452,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', GradientBoostingClassifier(random_state=42))]) key: fit_time -value: [0.16644502 0.15082502 0.17178988 0.15492988 0.15712714 0.15647697 - 0.15715313 0.15811729 0.12830138 0.17333484] +value: [0.16718197 0.15094709 0.1711874 0.15357113 0.15574336 0.15652514 + 0.15600491 0.15759158 0.1278398 0.17194462] -mean value: 0.1574500560760498 +mean value: 0.15685369968414306 key: score_time -value: [0.00920653 0.00468278 0.00959659 0.00929022 0.009588 0.0092895 - 0.00926042 0.00923944 0.00946832 0.00917864] +value: [0.00918221 0.00462985 0.00915265 0.00921178 0.00939751 0.00919819 + 0.00938702 0.00930691 0.00974512 0.00909448] -mean value: 0.008880043029785156 +mean value: 0.008830571174621582 key: test_mcc value: [0.74535599 nan 0.46666667 0.77459667 1. 0.6 @@ -4671,16 +4693,16 @@ Pipeline(steps=[('prep', ('model', QuadraticDiscriminantAnalysis())]) key: fit_time -value: [0.01092577 0.01285172 0.01395202 0.01353192 0.01414061 0.01355553 - 0.01369452 0.01398444 0.01363087 0.01456404] +value: [0.01070952 0.01322937 0.01341176 0.02545214 0.02427244 0.02728081 + 0.0287056 0.0138135 0.0139854 0.01462626] -mean value: 0.013483142852783203 +mean value: 0.01854867935180664 key: score_time -value: [0.01237583 0.00615168 0.01228261 0.01294899 0.01175308 0.01328015 - 0.01323271 0.01173949 0.01321077 0.01327252] +value: [0.01166081 0.00594354 0.01179552 0.02356005 0.02031541 0.0121429 + 0.01230502 0.0118804 0.01562524 0.02522063] -mean value: 0.012024784088134765 +mean value: 0.015044951438903808 key: test_mcc value: [-0.46666667 nan -0.29277002 -0.29277002 -0.25819889 0.1490712 @@ -4793,16 +4815,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', RidgeClassifier(random_state=42))]) key: fit_time -value: [0.02124691 0.01254821 0.01597905 0.03487706 0.01338506 0.01416159 - 0.01252818 0.01272392 0.03272414 0.03284836] +value: [0.02903557 0.03166127 0.02882695 0.03186989 0.03176379 0.03159523 + 0.03179622 0.03586578 0.03634501 0.0317626 ] -mean value: 0.020302248001098634 +mean value: 0.032052230834960935 key: score_time -value: [0.01174521 0.00608397 0.01180792 0.01995564 0.01202583 0.01157284 - 0.01147699 0.01158595 0.02148509 0.02132988] +value: [0.0200243 0.01208067 0.01757097 0.01424813 0.02132797 0.01160336 + 0.02168918 0.02083015 0.02316499 0.0211401 ] -mean value: 0.01390693187713623 +mean value: 0.018367981910705565 key: test_mcc value: [0.74535599 nan 0.46666667 0.6 0.74535599 0.6 @@ -4918,12 +4940,12 @@ Traceback (most recent call last): ValueError: Found unknown categories ['Other'] in column 5 during transform warnings.warn( -/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:107: SettingWithCopyWarning: +/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:115: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True) -/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:110: SettingWithCopyWarning: +/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:118: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy @@ -4980,16 +5002,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', RidgeClassifierCV(cv=10))]) key: fit_time -value: [0.18935609 0.18282723 0.19223666 0.17152286 0.21240568 0.25662017 - 0.20577478 0.19005656 0.19769907 0.12422442] +value: [0.17595625 0.17720246 0.23368216 0.2604897 0.20024538 0.22788954 + 0.17445135 0.18081164 0.20982456 0.23728633] -mean value: 0.19227235317230223 +mean value: 0.20778393745422363 key: score_time -value: [0.02291918 0.01264501 0.02030349 0.02279305 0.02387357 0.02354836 - 0.02319574 0.02317953 0.01894236 0.01210165] +value: [0.0230782 0.01266408 0.01375008 0.02251482 0.01994252 0.0219214 + 0.0211935 0.02238035 0.02076578 0.01844668] -mean value: 0.020350193977355956 +mean value: 0.019665741920471193 key: test_mcc value: [0.74535599 nan 0.46666667 0.6 0.74535599 0.6 @@ -5205,16 +5227,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', LogisticRegression(random_state=42))]) key: fit_time -value: [0.02668691 0.02563429 0.02581406 0.02586079 0.02586055 0.02636909 - 0.02358246 0.02464199 0.02246094 0.02333188] +value: [0.02724361 0.02601719 0.02918768 0.02592826 0.02982807 0.02673316 + 0.02467227 0.02559614 0.02310491 0.02411938] -mean value: 0.02502429485321045 +mean value: 0.026243066787719725 key: score_time -value: [0.01169181 0.00628114 0.00639367 0.0120008 0.01172805 0.0117023 - 0.01168251 0.01178622 0.01171184 0.01170659] +value: [0.0119884 0.00643563 0.01028681 0.01191902 0.01183963 0.01192975 + 0.01201987 0.0121851 0.01160169 0.0118041 ] -mean value: 0.010668492317199707 +mean value: 0.011201000213623047 key: test_mcc value: [0.65465367 nan nan 0.81649658 0.6 0.81649658 @@ -5980,16 +6002,16 @@ Pipeline(steps=[('prep', ('model', LogisticRegressionCV(random_state=42))]) key: fit_time -value: [0.67839217 0.48034787 0.64701414 0.51438808 0.60679531 0.65355921 - 0.49431872 0.5318253 0.58624101 0.55083251] +value: [0.59747982 0.64376473 0.71673417 0.69791865 0.47388649 1.05191803 + 0.63061428 0.58212113 0.71103501 0.60764217] -mean value: 0.5743714332580566 +mean value: 0.6713114500045776 key: score_time -value: [0.01309419 0.00647235 0.00659251 0.01193786 0.01207781 0.01458716 - 0.01210022 0.01188588 0.01342583 0.01195741] +value: [0.01522326 0.0066607 0.00642395 0.01201344 0.01186037 0.01513076 + 0.01198053 0.01196361 0.01532221 0.01208282] -mean value: 0.011413121223449707 +mean value: 0.011866164207458497 key: test_mcc value: [0.6 nan nan 0.81649658 0.40824829 0.81649658 @@ -6107,16 +6129,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', GaussianNB())]) key: fit_time -value: [0.01233888 0.01138854 0.01001382 0.00979853 0.00967813 0.00976253 - 0.00926352 0.00987411 0.00966692 0.00973201] +value: [0.0123117 0.00990081 0.00964355 0.0083921 0.00831342 0.00837278 + 0.00830507 0.00861096 0.00842547 0.0087893 ] -mean value: 0.01015169620513916 +mean value: 0.00910651683807373 key: score_time -value: [0.01177788 0.00480366 0.00498605 0.00967026 0.00950933 0.00950623 - 0.00948453 0.00956655 0.00957298 0.0094316 ] +value: [0.02768016 0.0048759 0.00427985 0.00843048 0.00844884 0.0084765 + 0.00845861 0.00852132 0.00856638 0.00871158] -mean value: 0.008830904960632324 +mean value: 0.0096449613571167 key: test_mcc value: [-0.33333333 nan nan 0.2 0.5 0.21821789 @@ -6166,7 +6188,8 @@ value: [0.63235294 0.61111111 0.79245283 0.82352941 0.74576271 0.72131148 mean value: 0.7575522057355462 key: test_recall -value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +value: [0.8 nan nan 0.6 1. 0.8 0.6 1. 0.8 1. ] +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call return cache[method] @@ -6262,7 +6285,6 @@ Traceback (most recent call last): ValueError: Found unknown categories ['XDR'] in column 5 during transform warnings.warn( -[0.8 nan nan 0.6 1. 0.8 0.6 1. 0.8 1. ] mean value: nan @@ -6330,16 +6352,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', BernoulliNB())]) key: fit_time -value: [0.01022792 0.00995421 0.01036096 0.00926161 0.01014042 0.00990748 - 0.00995111 0.00963116 0.00946689 0.01010084] +value: [0.00870204 0.00929213 0.00878859 0.00928044 0.00863266 0.00860143 + 0.00866652 0.00958157 0.0096643 0.00961232] -mean value: 0.009900259971618652 +mean value: 0.009082198143005371 key: score_time -value: [0.00955129 0.00466061 0.00484204 0.00874567 0.00902104 0.00952482 - 0.00964499 0.00959897 0.00870395 0.00953412] +value: [0.00865149 0.00441241 0.00431967 0.00865722 0.00870681 0.00857425 + 0.00904512 0.00933361 0.00930762 0.00936174] -mean value: 0.008382749557495118 +mean value: 0.008036994934082031 key: test_mcc value: [0.21821789 nan nan 0.21821789 0.6 0.40824829 @@ -6553,16 +6575,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', KNeighborsClassifier())]) key: fit_time -value: [0.00974655 0.00932145 0.01000047 0.00874233 0.00839186 0.00837994 - 0.00840974 0.00860238 0.00835204 0.00856972] +value: [0.00962067 0.00819492 0.00826192 0.00823355 0.00825858 0.0082972 + 0.00824308 0.0086062 0.0086453 0.00830889] -mean value: 0.00885164737701416 +mean value: 0.00846703052520752 key: score_time -value: [0.01500773 0.00461984 0.00566602 0.01129222 0.00940275 0.00949812 - 0.01562929 0.00947976 0.00929308 0.00932264] +value: [0.0149827 0.00423265 0.00422812 0.00957608 0.00942659 0.0144031 + 0.00948668 0.00979161 0.01269317 0.00941229] -mean value: 0.00992114543914795 +mean value: 0.009823298454284668 key: test_mcc value: [0.2 nan nan 0.5 0.5 0. @@ -6872,16 +6894,16 @@ Pipeline(steps=[('prep', ('model', SVC(random_state=42))]) key: fit_time -value: [0.01139522 0.00962234 0.0094831 0.00941205 0.0092721 0.00994563 - 0.00933862 0.00987744 0.00951862 0.00935507] +value: [0.00986791 0.01017141 0.00918722 0.00919867 0.0091598 0.00916481 + 0.00927687 0.00930643 0.00905871 0.00913 ] -mean value: 0.009722018241882324 +mean value: 0.00935218334197998 key: score_time -value: [0.00916266 0.00445056 0.00433707 0.00926876 0.00877261 0.00935793 - 0.00875735 0.00887942 0.00921822 0.00887632] +value: [0.00940585 0.0044322 0.00426841 0.00933194 0.0090754 0.00920653 + 0.0087769 0.0087471 0.0087831 0.00867248] -mean value: 0.008108091354370118 +mean value: 0.008069992065429688 key: test_mcc value: [0.2 nan nan 0.81649658 0.65465367 0.81649658 @@ -6999,16 +7021,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', MLPClassifier(max_iter=500, random_state=42))]) key: fit_time -value: [0.37492514 0.37046218 0.38234639 0.44610929 0.35609388 0.35016608 - 0.38355708 0.37324095 0.34541011 0.59514403] +value: [0.37450528 0.35082269 0.39461231 0.50876474 0.3729012 0.39715028 + 0.39133334 0.37829828 0.58005452 0.37452221] -mean value: 0.39774551391601565 +mean value: 0.4122964859008789 key: score_time -value: [0.01198101 0.00661445 0.00665283 0.01227736 0.01198196 0.0120244 - 0.01200557 0.01206875 0.01202154 0.0120492 ] +value: [0.01205969 0.00659204 0.00670195 0.01243973 0.01200485 0.01204276 + 0.01203871 0.01203322 0.012043 0.01204824] -mean value: 0.010967707633972168 +mean value: 0.011000418663024902 key: test_mcc value: [0.2 nan nan 0.5 0.6 0.81649658 @@ -7215,16 +7237,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', DecisionTreeClassifier(random_state=42))]) key: fit_time -value: [0.01511621 0.01407719 0.01127934 0.01145411 0.01078749 0.01093793 - 0.01074314 0.01044631 0.01040411 0.01171279] +value: [0.0144136 0.01382351 0.01106048 0.01080775 0.0104351 0.01044345 + 0.01019263 0.00999427 0.0097239 0.01033235] -mean value: 0.01169586181640625 +mean value: 0.011122703552246094 key: score_time -value: [0.01174808 0.00465941 0.00485921 0.00918746 0.00877357 0.00866652 - 0.00861716 0.0088768 0.00946498 0.00948787] +value: [0.01140714 0.00496936 0.00459981 0.00874352 0.00843453 0.00867224 + 0.00845146 0.0083878 0.00837374 0.00837708] -mean value: 0.008434104919433593 +mean value: 0.008041667938232421 key: test_mcc value: [1. nan nan 0.6 0.81649658 0.6 @@ -7527,16 +7549,16 @@ Pipeline(steps=[('prep', ('model', ExtraTreesClassifier(random_state=42))]) key: fit_time -value: [0.08262682 0.08500862 0.0852046 0.08543134 0.08545947 0.08390474 - 0.08447599 0.08485103 0.08712196 0.08414125] +value: [0.08056545 0.08084106 0.08170795 0.08255649 0.08109879 0.08101583 + 0.08092213 0.08285975 0.08272982 0.08114171] -mean value: 0.08482258319854737 +mean value: 0.0815438985824585 key: score_time -value: [0.01772976 0.0046699 0.00469708 0.01746058 0.01755953 0.01747441 - 0.01821804 0.01745749 0.01838636 0.01701069] +value: [0.01655602 0.00445127 0.0046792 0.01720977 0.01671934 0.01684332 + 0.01761675 0.01713228 0.0171361 0.01674366] -mean value: 0.015066385269165039 +mean value: 0.014508771896362304 key: test_mcc value: [0.81649658 nan nan 0.6 0.81649658 0.65465367 @@ -7647,16 +7669,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', ExtraTreeClassifier(random_state=42))]) key: fit_time -value: [0.00895452 0.00875449 0.00881481 0.00910234 0.00885272 0.00899577 - 0.00944829 0.00882363 0.00928545 0.00872445] +value: [0.00848937 0.00846291 0.00848842 0.00857568 0.00871515 0.00847149 + 0.0084784 0.00839734 0.00870109 0.00886941] -mean value: 0.008975648880004882 +mean value: 0.008564925193786621 key: score_time -value: [0.00846648 0.00439858 0.00429606 0.00868988 0.00855374 0.00891685 - 0.0090065 0.00850105 0.00932431 0.00891042] +value: [0.00837517 0.00423932 0.00420213 0.00869727 0.00871015 0.00843763 + 0.00841641 0.00845981 0.00853753 0.00839472] -mean value: 0.007906389236450196 +mean value: 0.007647013664245606 key: test_mcc value: [0.40824829 nan nan 0.40824829 0.65465367 0.65465367 @@ -7864,16 +7886,16 @@ Running model pipeline: Pipeline(steps=[('prep', RandomForestClassifier(n_estimators=1000, random_state=42))]) key: fit_time -value: [1.05830765 1.05878472 1.03069663 1.03076839 1.05383444 1.04063582 - 1.02942109 1.03039336 1.04405308 1.04590487] +value: [1.02116418 1.0257473 1.08174706 1.0322454 1.0248096 1.0253284 + 1.02173543 1.02932763 1.02692318 1.02705503] -mean value: 1.0422800064086915 +mean value: 1.0316083192825318 key: score_time -value: [0.09411407 0.00471926 0.00459909 0.09293795 0.09200311 0.08624506 - 0.08817077 0.08822632 0.08993959 0.09330368] +value: [0.09387398 0.00442934 0.0045464 0.09425235 0.0929327 0.09006643 + 0.09262896 0.09255028 0.08933902 0.09436941] -mean value: 0.07342588901519775 +mean value: 0.07489888668060303 key: test_mcc value: [0.81649658 nan nan 0.65465367 0.81649658 0.81649658 @@ -8203,16 +8225,16 @@ Pipeline(steps=[('prep', oob_score=True, random_state=42))]) key: fit_time -value: [0.85999107 0.87137699 0.85217786 0.87492132 0.86495948 0.84439301 - 0.88340187 0.85964608 0.85427666 0.96079803] +value: [0.80205727 0.85240149 0.84424591 0.98081374 0.8455205 0.89721966 + 0.84490728 0.85584474 0.9097147 0.88582087] -mean value: 0.8725942373275757 +mean value: 0.8718546152114868 key: score_time -value: [0.2267487 0.00457191 0.00463676 0.16841388 0.15269232 0.22285342 - 0.1705575 0.19267631 0.23203325 0.16924214] +value: [0.20501709 0.00500822 0.0045805 0.19695258 0.12441659 0.21853828 + 0.22464013 0.22292161 0.20894432 0.14386797] -mean value: 0.1544426202774048 +mean value: 0.1554887294769287 key: test_mcc value: [0.81649658 nan nan 0.65465367 1. 0.81649658 @@ -8330,16 +8352,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', BernoulliNB())]) key: fit_time -value: [0.02133918 0.00901937 0.00918055 0.00923777 0.00924492 0.00955749 - 0.00870299 0.00861335 0.00901318 0.00863385] +value: [0.02338266 0.0094502 0.00960112 0.01006866 0.00941682 0.00956082 + 0.00956559 0.00960374 0.0094769 0.00946879] -mean value: 0.010254263877868652 +mean value: 0.010959529876708984 key: score_time -value: [0.01596808 0.00419235 0.00490522 0.00853872 0.0092361 0.00877595 - 0.00848365 0.00854468 0.00853395 0.00849319] +value: [0.0109334 0.00468445 0.00506949 0.00934649 0.00933862 0.00940108 + 0.00930977 0.00933194 0.00933313 0.00930548] -mean value: 0.008567190170288086 +mean value: 0.008605384826660156 key: test_mcc value: [0.21821789 nan nan 0.21821789 0.6 0.40824829 @@ -8566,16 +8588,16 @@ Running model pipeline: Pipeline(steps=[('prep', validate_parameters=None, verbosity=0))]) key: fit_time -value: [0.0385325 0.07943583 0.07845998 0.04148769 0.03596449 0.03703904 - 0.07786202 0.07349586 0.03462934 0.08684826] +value: [0.03791428 0.039891 0.04524422 0.05228448 0.03879333 0.04074168 + 0.03861666 0.04192305 0.03786802 0.04126 ] -mean value: 0.05837550163269043 +mean value: 0.0414536714553833 key: score_time -value: [0.01043248 0.00514793 0.00486374 0.01063561 0.01104069 0.01106238 - 0.01134038 0.0113461 0.01019478 0.01071811] +value: [0.01134562 0.0051043 0.00507307 0.01123905 0.01100111 0.0111165 + 0.01112223 0.01104426 0.0112226 0.01109934] -mean value: 0.009678220748901368 +mean value: 0.009936809539794922 key: test_mcc value: [1. nan nan 1. 1. 0.6 @@ -8877,16 +8899,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', LinearDiscriminantAnalysis())]) key: fit_time -value: [0.03867555 0.02319288 0.02706194 0.02529454 0.05284071 0.11862731 - 0.03852725 0.04383326 0.03280854 0.04009724] +value: [0.02424765 0.03926849 0.02169585 0.02134705 0.03105235 0.01888251 + 0.01905227 0.04464221 0.01857352 0.03482223] -mean value: 0.04409592151641846 +mean value: 0.027358412742614746 key: score_time -value: [0.0272522 0.00630307 0.00678635 0.01231098 0.02257872 0.02594495 - 0.01236582 0.01228881 0.02403021 0.02088833] +value: [0.02080226 0.00596833 0.00588465 0.01170087 0.01165652 0.01164246 + 0.01558542 0.01165223 0.01164913 0.02148533] -mean value: 0.017074942588806152 +mean value: 0.012802720069885254 key: test_mcc value: [0.65465367 nan nan 0.65465367 0.81649658 0.2 @@ -9093,16 +9115,16 @@ Traceback (most recent call last): ValueError: Found unknown categories ['XDR'] in column 5 during transform warnings.warn( -[0.02235413 0.01077485 0.01104784 0.00940537 0.00992942 0.00974631 - 0.01046252 0.01258326 0.00965023 0.00989842] +[0.01784444 0.00874496 0.00863242 0.00858641 0.00845957 0.00878644 + 0.00931096 0.00858283 0.00858426 0.0092175 ] -mean value: 0.011585235595703125 +mean value: 0.009674978256225587 key: score_time -value: [0.010818 0.0060277 0.00485873 0.01032233 0.00985336 0.0115025 - 0.00923038 0.00966311 0.01026726 0.01118207] +value: [0.00886011 0.00435925 0.0042181 0.00851774 0.00852823 0.00907302 + 0.00854731 0.00856113 0.00867009 0.00898337] -mean value: 0.009372544288635255 +mean value: 0.007831835746765136 key: test_mcc value: [0. nan nan 0.40824829 0.2 0.81649658 @@ -9221,16 +9243,16 @@ Running model pipeline: Pipeline(steps=[('prep', PassiveAggressiveClassifier(n_jobs=10, random_state=42))]) key: fit_time -value: [0.01186442 0.01330304 0.01402235 0.01382852 0.01425862 0.01367593 - 0.01314926 0.01479983 0.01317763 0.01350975] +value: [0.00957155 0.01297784 0.01338482 0.01337719 0.01387429 0.01330495 + 0.01264739 0.01446295 0.01269341 0.01309991] -mean value: 0.01355893611907959 +mean value: 0.01293942928314209 key: score_time -value: [0.0099504 0.00586057 0.00639057 0.01186395 0.01181006 0.01167011 - 0.01159191 0.01164412 0.01165104 0.01212764] +value: [0.0086 0.00586104 0.00588679 0.01135945 0.01134872 0.01132274 + 0.01132727 0.01135826 0.01136208 0.01132488] -mean value: 0.010456037521362305 +mean value: 0.009975123405456542 key: test_mcc value: [0.40824829 nan nan 0.81649658 0.6 0.81649658 @@ -9444,16 +9466,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', SGDClassifier(n_jobs=10, random_state=42))]) key: fit_time -value: [0.01379657 0.01267099 0.01356316 0.01448107 0.01277828 0.01268101 - 0.01251578 0.01251745 0.01239395 0.01302099] +value: [0.01255083 0.01222014 0.01237512 0.0122931 0.01268792 0.01234984 + 0.01206565 0.01226902 0.01228833 0.01291275] -mean value: 0.013041925430297852 +mean value: 0.012401270866394042 key: score_time -value: [0.01063323 0.00612164 0.00623727 0.01173258 0.01149225 0.01172876 - 0.01138878 0.0114789 0.0114975 0.01158094] +value: [0.01079416 0.00594401 0.00597167 0.01558065 0.01139426 0.01133919 + 0.01131606 0.01139021 0.01131845 0.01140761] -mean value: 0.010389184951782227 +mean value: 0.010645627975463867 key: test_mcc value: [0. nan nan 0.40824829 0.6 0.81649658 @@ -9763,16 +9785,16 @@ Pipeline(steps=[('prep', ('model', AdaBoostClassifier(random_state=42))]) key: fit_time -value: [0.12627959 0.11029863 0.10071945 0.09155011 0.08990955 0.09815741 - 0.09487963 0.10431266 0.08676529 0.08815813] +value: [0.09700513 0.08642244 0.08795667 0.08845925 0.08627391 0.08648276 + 0.08725667 0.08758473 0.08776522 0.08776069] -mean value: 0.09910304546356201 +mean value: 0.08829674720764161 key: score_time -value: [0.0188818 0.0063622 0.00565004 0.01617861 0.01684022 0.01686287 - 0.01757121 0.01591206 0.0148592 0.01506853] +value: [0.01467133 0.00474548 0.00496674 0.01501131 0.01460814 0.01463032 + 0.01517797 0.01503372 0.01542163 0.01494527] -mean value: 0.014418673515319825 +mean value: 0.01292119026184082 key: test_mcc value: [0.65465367 nan nan 1. 1. 0.6 @@ -9885,16 +9907,16 @@ Running model pipeline: Pipeline(steps=[('prep', random_state=42))]) key: fit_time -value: [0.03483057 0.03842282 0.03175402 0.02523565 0.04487014 0.03826332 - 0.03274846 0.04900002 0.04477096 0.03169918] +value: [0.03509688 0.04568958 0.0302496 0.03195786 0.02884889 0.03385544 + 0.05140829 0.04297328 0.03036499 0.0328958 ] -mean value: 0.037159514427185056 +mean value: 0.03633406162261963 key: score_time -value: [0.02208352 0.00828576 0.00562572 0.02282691 0.02469707 0.02239871 - 0.03312135 0.02478552 0.02329421 0.02396297] +value: [0.02096653 0.00952625 0.00549054 0.02393532 0.02444291 0.02201509 + 0.03557968 0.02399278 0.02232742 0.02703047] -mean value: 0.021108174324035646 +mean value: 0.021530699729919434 key: test_mcc value: [1. nan nan 1. 1. 0.6 @@ -10106,16 +10128,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', GaussianProcessClassifier(random_state=42))]) key: fit_time -value: [0.01488805 0.01686454 0.02178621 0.01625896 0.0167253 0.01618171 - 0.01634598 0.02672648 0.02222109 0.02587366] +value: [0.01417232 0.01547766 0.02389431 0.02053976 0.0161953 0.01604009 + 0.02661467 0.01625609 0.01613426 0.01619077] -mean value: 0.019387197494506837 +mean value: 0.018151521682739258 key: score_time -value: [0.0114007 0.00593019 0.00617599 0.01183033 0.01183867 0.01179862 - 0.01180983 0.01191282 0.01200986 0.01189065] +value: [0.01128316 0.00590849 0.00611544 0.01191759 0.01187563 0.01181817 + 0.01201296 0.01186419 0.0119288 0.01185203] -mean value: 0.01065976619720459 +mean value: 0.010657644271850586 key: test_mcc value: [0.81649658 nan nan 0.40824829 0.40824829 0.65465367 @@ -10440,16 +10462,16 @@ Pipeline(steps=[('prep', ('model', GradientBoostingClassifier(random_state=42))]) key: fit_time -value: [0.23585057 0.21870351 0.2274735 0.20173454 0.19497991 0.21246147 - 0.19365239 0.22760653 0.19109488 0.20255446] +value: [0.22346067 0.21064496 0.22775483 0.20194101 0.20313954 0.22486734 + 0.20193768 0.22959948 0.19129062 0.20147419] -mean value: 0.2106111764907837 +mean value: 0.21161103248596191 key: score_time -value: [0.01025057 0.00483108 0.00519347 0.00991392 0.00999832 0.00909162 - 0.01007318 0.00993586 0.00930429 0.01151347] +value: [0.009547 0.00503063 0.00500417 0.00985026 0.01011467 0.01005173 + 0.00999117 0.01001072 0.01000142 0.00996733] -mean value: 0.009010577201843261 +mean value: 0.0089569091796875 key: test_mcc value: [1. nan nan 0.40824829 0.81649658 0.6 @@ -10560,16 +10582,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', QuadraticDiscriminantAnalysis())]) key: fit_time -value: [0.01237321 0.014323 0.01440096 0.01429844 0.0142436 0.01766992 - 0.01455665 0.01430321 0.01442313 0.01517463] +value: [0.01206875 0.01536727 0.01444435 0.0142355 0.01431799 0.01439047 + 0.01433372 0.01445889 0.01462865 0.01538968] -mean value: 0.01457667350769043 +mean value: 0.014363527297973633 key: score_time -value: [0.01152301 0.00598216 0.00594306 0.01162028 0.01163912 0.01281118 - 0.01165009 0.01300716 0.01285219 0.0118072 ] +value: [0.01155353 0.00604582 0.0060041 0.01183033 0.01183915 0.01176929 + 0.0118506 0.01583171 0.01520729 0.01568818] -mean value: 0.010883545875549317 +mean value: 0.011761999130249024 key: test_mcc value: [0.65465367 nan nan 0.21821789 0.81649658 0.40824829 @@ -10776,16 +10798,16 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', RidgeClassifier(random_state=42))]) key: fit_time -value: [0.03715086 0.01295328 0.01913691 0.02935195 0.03285313 0.05956721 - 0.03427505 0.03019619 0.02694535 0.03023982] +value: [0.02940583 0.03634906 0.03245473 0.03262615 0.03238082 0.02871442 + 0.01274729 0.01276231 0.02434325 0.03222251] -mean value: 0.031266975402832034 +mean value: 0.027400636672973634 key: score_time -value: [0.01183128 0.00623417 0.00630283 0.02030396 0.02208257 0.03155112 - 0.02232051 0.01882124 0.02124667 0.02309394] +value: [0.0238688 0.01215315 0.0120945 0.01822114 0.02281642 0.01169848 + 0.01157546 0.01160693 0.0200026 0.02159095] -mean value: 0.018378829956054686 +mean value: 0.016562843322753908 key: test_mcc value: [0.81649658 nan nan 0.65465367 0.81649658 0.81649658 @@ -10981,12 +11003,12 @@ Traceback (most recent call last): ValueError: Found unknown categories ['XDR'] in column 5 during transform warnings.warn( -/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:128: SettingWithCopyWarning: +/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:136: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True) -/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:131: SettingWithCopyWarning: +/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:139: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy @@ -11056,16 +11078,16 @@ Pipeline(steps=[('prep', ('model', RidgeClassifierCV(cv=10))]) key: fit_time -value: [0.10745692 0.17037439 0.22704196 0.24130416 0.19570732 0.18806911 - 0.18574762 0.18309569 0.18836188 0.18505669] +value: [0.10528541 0.17085743 0.17106438 0.21791244 0.18295407 0.1117785 + 0.12498665 0.18348122 0.09826803 0.16421723] -mean value: 0.18722157478332518 +mean value: 0.15308053493499757 key: score_time -value: [0.01179934 0.01459885 0.01264095 0.02238703 0.02003407 0.02027035 - 0.01176071 0.02208042 0.02279568 0.02302146] +value: [0.01189971 0.01263165 0.01266694 0.02156377 0.01987004 0.01182532 + 0.0199995 0.02175546 0.01192117 0.02374363] -mean value: 0.018138885498046875 +mean value: 0.0167877197265625 key: test_mcc value: [0.81649658 nan nan 0.65465367 0.81649658 0.81649658 @@ -11182,71 +11204,86 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', LogisticRegression(random_state=42))]) key: fit_time -value: [0.03232861 0.02856731 0.02625203 0.02819514 0.0350008 0.03808117 - 0.03861642 0.03147697 0.03068471 0.02853966] +value: [0.0232265 0.02718496 0.02470827 0.04355407 0.02657723 0.02697539 + 0.02654147 0.02200747 0.03470922 0.02654457] -mean value: 0.031774282455444336 +mean value: 0.028202915191650392 key: score_time -value: [0.01172328 0.01187897 0.0063858 0.01171517 0.0116365 0.01235962 - 0.0118258 0.01185846 0.01206326 0.01185799] +value: [0.01154375 0.01186514 0.00623894 0.01168394 0.01151109 0.01170611 + 0.01173449 0.01145792 0.01147342 0.01157451] -mean value: 0.011330485343933105 +mean value: 0.011078929901123047 key: test_mcc -value: [0.2 0.81649658 nan 0.40824829 0.6 0.65465367 +value: [0.40824829 0.65465367 nan 0.40824829 0.81649658 0.81649658 0.40824829 0.65465367 0.21821789 0.81649658] mean value: nan key: train_mcc -value: [0.95555556 0.95650071 0.88910845 0.93356387 0.93356387 0.95555556 - 0.88910845 0.91201231 0.93356387 0.91201231] +value: [0.88910845 0.91111111 0.88910845 0.91111111 0.93356387 0.88910845 + 0.93356387 0.88910845 0.95555556 0.88910845] -mean value: 0.9270544956323105 +mean value: 0.9090447765314074 key: test_accuracy -value: [0.6 0.9 nan 0.7 0.8 0.8 0.7 0.8 0.6 0.9] +value: [0.7 0.8 nan 0.7 0.9 0.9 0.7 0.8 0.6 0.9] mean value: nan key: train_accuracy -value: [0.97777778 0.97777778 0.94444444 0.96666667 0.96666667 0.97777778 - 0.94444444 0.95555556 0.96666667 0.95555556] +value: [0.94444444 0.95555556 0.94444444 0.95555556 0.96666667 0.94444444 + 0.96666667 0.94444444 0.97777778 0.94444444] -mean value: 0.9633333333333334 +mean value: 0.9544444444444444 key: test_fscore -value: [0.6 0.90909091 nan 0.66666667 0.8 0.75 +value: [0.72727273 0.83333333 nan 0.72727273 0.90909091 0.88888889 0.66666667 0.83333333 0.66666667 0.90909091] mean value: nan key: train_fscore -value: [0.97777778 0.97826087 0.94505495 0.96629213 0.96703297 0.97777778 - 0.94382022 0.95652174 0.96703297 0.95652174] +value: [0.94505495 0.95555556 0.94382022 0.95555556 0.96703297 0.94382022 + 0.96703297 0.94382022 0.97777778 0.94505495] -mean value: 0.9636093142053084 +mean value: 0.9544525387222017 key: test_precision -value: [0.6 0.83333333 nan 0.75 0.8 1. +value: [0.66666667 0.71428571 nan 0.66666667 0.83333333 1. 0.75 0.71428571 0.57142857 0.83333333] mean value: nan key: train_precision -value: [0.97777778 0.95744681 0.93478261 0.97727273 0.95652174 0.97777778 - 0.95454545 0.93617021 0.95652174 0.93617021] +value: [0.93478261 0.95555556 0.95454545 0.95555556 0.95652174 0.95454545 + 0.95652174 0.95454545 0.97777778 0.93478261] -mean value: 0.9564987058372812 +mean value: 0.9535133948177427 key: test_recall -value: [0.6 1. nan 0.6 0.8 0.6 0.6 1. 0.8 1. ] +value: [0.8 1. nan 0.8 1. 0.8 0.6 1. 0.8 1. ] mean value: nan -key: train_recall -value:/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +key: train_recall /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. Increase the number of iterations (max_iter) or scale the data as shown in: @@ -11790,37 +11827,38 @@ Increase the number of iterations (max_iter) or scale the data as shown in: Please also refer to the documentation for alternative solver options: https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression n_iter_i = _check_optimize_result( - [0.97777778 1. 0.95555556 0.95555556 0.97777778 0.97777778 - 0.93333333 0.97777778 0.97777778 0.97777778] -mean value: 0.9711111111111111 +value: [0.95555556 0.95555556 0.93333333 0.95555556 0.97777778 0.93333333 + 0.97777778 0.93333333 0.97777778 0.95555556] + +mean value: 0.9555555555555556 key: test_roc_auc -value: [0.6 0.9 nan 0.7 0.8 0.8 0.7 0.8 0.6 0.9] +value: [0.7 0.8 nan 0.7 0.9 0.9 0.7 0.8 0.6 0.9] mean value: nan key: train_roc_auc -value: [0.97777778 0.97777778 0.94444444 0.96666667 0.96666667 0.97777778 - 0.94444444 0.95555556 0.96666667 0.95555556] +value: [0.94444444 0.95555556 0.94444444 0.95555556 0.96666667 0.94444444 + 0.96666667 0.94444444 0.97777778 0.94444444] -mean value: 0.9633333333333334 +mean value: 0.9544444444444445 key: test_jcc -value: [0.42857143 0.83333333 nan 0.5 0.66666667 0.6 +value: [0.57142857 0.71428571 nan 0.57142857 0.83333333 0.8 0.5 0.71428571 0.5 0.83333333] mean value: nan key: train_jcc -value: [0.95652174 0.95744681 0.89583333 0.93478261 0.93617021 0.95652174 - 0.89361702 0.91666667 0.93617021 0.91666667] +value: [0.89583333 0.91489362 0.89361702 0.91489362 0.93617021 0.89361702 + 0.93617021 0.89361702 0.95652174 0.89583333] -mean value: 0.9300397008942337 +mean value: 0.9131167129201356 -MCC on Blind test: 0.37 +MCC on Blind test: 0.42 -Accuracy on Blind test: 0.72 +Accuracy on Blind test: 0.75 Model_name: Logistic RegressionCV Model func: LogisticRegressionCV(random_state=42) @@ -11853,101 +11891,100 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', LogisticRegressionCV(random_state=42))]) key: fit_time -value: [0.55768061 0.55615211 0.58541512 0.71465158 0.54918408 0.57905293 - 0.62343574 0.64894342 0.58266759 0.57590222] +value: [0.67746806 0.59781671 0.69200182 0.71163416 0.61749101 0.63935566 + 0.77134275 0.60597873 0.80454826 0.74892688] -mean value: 0.5973085403442383 +mean value: 0.686656403541565 key: score_time -value: [0.0122087 0.01510215 0.00644422 0.01736808 0.01485038 0.01541257 - 0.01190186 0.01582146 0.01725316 0.01302958] +value: [0.01449347 0.01300931 0.00646234 0.0151124 0.01494288 0.01490331 + 0.01547623 0.0119102 0.01237154 0.01246715] -mean value: 0.013939213752746583 +mean value: 0.01311488151550293 key: test_mcc -value: [0.2 0.81649658 nan 0.65465367 0.40824829 0.81649658 - 0.81649658 0.65465367 0.5 0.81649658] +value: [0.65465367 0.81649658 nan 0.2 0.65465367 1. + 0.65465367 0.65465367 0.21821789 0.81649658] mean value: nan key: train_mcc -value: [0.95555556 1. 0.95555556 1. 1. 1. - 0.93356387 1. 1. 0.97801929] +value: [1. 1. 1. 1. 1. 1. + 1. 0.95650071 1. 1. ] -mean value: 0.9822694276350975 +mean value: 0.9956500714595278 key: test_accuracy -value: [0.6 0.9 nan 0.8 0.7 0.9 0.9 0.8 0.7 0.9] +value: [0.8 0.9 nan 0.6 0.8 1. 0.8 0.8 0.6 0.9] mean value: nan key: train_accuracy -value: [0.97777778 1. 0.97777778 1. 1. 1. - 0.96666667 1. 1. 0.98888889] +value: [1. 1. 1. 1. 1. 1. + 1. 0.97777778 1. 1. ] -mean value: 0.991111111111111 +mean value: 0.9977777777777778 key: test_fscore -value: [0.6 0.90909091 nan 0.83333333 0.72727273 0.88888889 - 0.88888889 0.83333333 0.76923077 0.90909091] +value: [0.83333333 0.90909091 nan 0.6 0.83333333 1. + 0.75 0.83333333 0.66666667 0.90909091] mean value: nan key: train_fscore -value: [0.97777778 1. 0.97777778 1. 1. 1. - 0.96629213 1. 1. 0.98901099] +value: [1. 1. 1. 1. 1. 1. + 1. 0.97826087 1. 1. ] -mean value: 0.9910858679398006 +mean value: 0.9978260869565218 key: test_precision -value: [0.6 0.83333333 nan 0.71428571 0.66666667 1. - 1. 0.71428571 0.625 0.83333333] +value: [0.71428571 0.83333333 nan 0.6 0.71428571 1. + 1. 0.71428571 0.57142857 0.83333333] mean value: nan key: train_precision -value: [0.97777778 1. 0.97777778 1. 1. 1. - 0.97727273 1. 1. 0.97826087] +value: [1. 1. 1. 1. 1. 1. + 1. 0.95744681 1. 1. ] -mean value: 0.99110891523935 +mean value: 0.9957446808510638 key: test_recall -value: [0.6 1. nan 1. 0.8 0.8 0.8 1. 1. 1. ] +value: [1. 1. nan 0.6 1. 1. 0.6 1. 0.8 1. ] mean value: nan key: train_recall -value: [0.97777778 1. 0.97777778 1. 1. 1. - 0.95555556 1. 1. 1. ] +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] -mean value: 0.991111111111111 +mean value: 1.0 key: test_roc_auc -value: [0.6 0.9 nan 0.8 0.7 0.9 0.9 0.8 0.7 0.9] +value: [0.8 0.9 nan 0.6 0.8 1. 0.8 0.8 0.6 0.9] mean value: nan key: train_roc_auc -value: [0.97777778 1. 0.97777778 1. 1. 1. - 0.96666667 1. 1. 0.98888889] +value: [1. 1. 1. 1. 1. 1. + 1. 0.97777778 1. 1. ] -mean value: 0.991111111111111 +mean value: 0.9977777777777778 key: test_jcc -value: [0.42857143 0.83333333 nan 0.71428571 0.57142857 0.8 - 0.8 0.71428571 0.625 0.83333333] +value: [0.71428571 0.83333333 nan 0.42857143 0.71428571 1. + 0.6 0.71428571 0.5 0.83333333] mean value: nan key: train_jcc -value: [0.95652174 1. 0.95652174 1. 1. 1. - 0.93478261 1. 1. 0.97826087] +value: [1. 1. 1. 1. 1. 1. + 1. 0.95744681 1. 1. ] -mean value: 0.9826086956521739 +mean value: 0.9957446808510638 -MCC on Blind test: 0.42 +MCC on Blind test: 0.48 -Accuracy on Blind test: 0.75 +Accuracy on Blind test: 0.78 Model_name: Gaussian NB Model func: GaussianNB() @@ -12028,101 +12065,101 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', GaussianNB())]) key: fit_time -value: [0.01230741 0.01123023 0.00950027 0.00946021 0.00964046 0.00955129 - 0.00977182 0.00977874 0.00968146 0.00962782] +value: [0.01217723 0.00952673 0.00885081 0.00973725 0.00852966 0.00841856 + 0.00850034 0.0093751 0.0097971 0.00986147] -mean value: 0.010054969787597656 +mean value: 0.00947742462158203 key: score_time -value: [0.01180935 0.00984812 0.00469971 0.00937915 0.00949502 0.00958204 - 0.00940156 0.00952315 0.00942564 0.00944281] +value: [0.01179314 0.00913548 0.00443888 0.00915837 0.00861049 0.00860596 + 0.00853419 0.00947094 0.00863123 0.0086143 ] -mean value: 0.00926065444946289 +mean value: 0.008699297904968262 key: test_mcc -value: [-0.21821789 0.33333333 nan 0.2 0.5 -0.21821789 - 0.40824829 0. 0.21821789 0.5 ] +value: [0. 0.40824829 nan 0.2 0.5 0.5 + 0. 0.40824829 0.21821789 0.21821789] mean value: nan key: train_mcc -value: [0.53495589 0.66097134 0.71269665 0.84970583 0.54684459 0.60350985 - 0.8230355 0.53031442 0.76486616 0.58456547] +value: [0.43305953 0.66097134 0.68957028 0.82548988 0.60971232 0.56980288 + 0.73405869 0.6350529 0.73624773 0.56454844] -mean value: 0.6611465698795331 +mean value: 0.6458513998944028 key: test_accuracy -value: [0.4 0.6 nan 0.6 0.7 0.4 0.7 0.5 0.6 0.7] +value: [0.5 0.7 nan 0.6 0.7 0.7 0.5 0.7 0.6 0.6] mean value: nan key: train_accuracy -value: [0.73333333 0.82222222 0.85555556 0.92222222 0.75555556 0.78888889 - 0.91111111 0.75555556 0.87777778 0.77777778] +value: [0.68888889 0.82222222 0.84444444 0.91111111 0.8 0.77777778 + 0.86666667 0.81111111 0.86666667 0.77777778] -mean value: 0.82 +mean value: 0.8166666666666667 key: test_fscore -value: [0.5 0.71428571 nan 0.6 0.76923077 0.5 - 0.72727273 0.61538462 0.66666667 0.76923077] +value: [0.61538462 0.72727273 nan 0.6 0.76923077 0.76923077 + 0.44444444 0.72727273 0.66666667 0.66666667] mean value: nan key: train_fscore -value: [0.78571429 0.84 0.85057471 0.92631579 0.79245283 0.81553398 - 0.90909091 0.78431373 0.86746988 0.80769231] +value: [0.75 0.84 0.84782609 0.91489362 0.81632653 0.8 + 0.86956522 0.82828283 0.86046512 0.79591837] -mean value: 0.8379158420394337 +mean value: 0.8323277763890184 key: test_precision -value: [0.42857143 0.55555556 nan 0.6 0.625 0.42857143 - 0.66666667 0.5 0.57142857 0.625 ] +value: [0.5 0.66666667 nan 0.6 0.625 0.625 + 0.5 0.66666667 0.57142857 0.57142857] mean value: nan key: train_precision -value: [0.65671642 0.76363636 0.88095238 0.88 0.68852459 0.72413793 - 0.93023256 0.70175439 0.94736842 0.71186441] +value: [0.62686567 0.76363636 0.82978723 0.87755102 0.75471698 0.72727273 + 0.85106383 0.75925926 0.90243902 0.73584906] -mean value: 0.7885187455634349 +mean value: 0.7828441168174185 key: test_recall -value: [0.6 1. nan 0.6 1. 0.6 0.8 0.8 0.8 1. ] +value: [0.8 0.8 nan 0.6 1. 1. 0.4 0.8 0.8 0.8] mean value: nan key: train_recall -value: [0.97777778 0.93333333 0.82222222 0.97777778 0.93333333 0.93333333 - 0.88888889 0.88888889 0.8 0.93333333] +value: [0.93333333 0.93333333 0.86666667 0.95555556 0.88888889 0.88888889 + 0.88888889 0.91111111 0.82222222 0.86666667] -mean value: 0.9088888888888889 +mean value: 0.8955555555555555 key: test_roc_auc -value: [0.4 0.6 nan 0.6 0.7 0.4 0.7 0.5 0.6 0.7] +value: [0.5 0.7 nan 0.6 0.7 0.7 0.5 0.7 0.6 0.6] mean value: nan key: train_roc_auc -value: [0.73333333 0.82222222 0.85555556 0.92222222 0.75555556 0.78888889 - 0.91111111 0.75555556 0.87777778 0.77777778] +value: [0.68888889 0.82222222 0.84444444 0.91111111 0.8 0.77777778 + 0.86666667 0.81111111 0.86666667 0.77777778] -mean value: 0.8200000000000001 +mean value: 0.8166666666666667 key: test_jcc -value: [0.33333333 0.55555556 nan 0.42857143 0.625 0.33333333 - 0.57142857 0.44444444 0.5 0.625 ] +value: [0.44444444 0.57142857 nan 0.42857143 0.625 0.625 + 0.28571429 0.57142857 0.5 0.5 ] mean value: nan key: train_jcc -value: [0.64705882 0.72413793 0.74 0.8627451 0.65625 0.68852459 - 0.83333333 0.64516129 0.76595745 0.67741935] +value: [0.6 0.72413793 0.73584906 0.84313725 0.68965517 0.66666667 + 0.76923077 0.70689655 0.75510204 0.66101695] -mean value: 0.7240587868070179 +mean value: 0.7151692392544453 -MCC on Blind test: 0.31 +MCC on Blind test: 0.07 -Accuracy on Blind test: 0.65 +Accuracy on Blind test: 0.52 Model_name: Naive Bayes Model func: BernoulliNB() @@ -12251,101 +12288,101 @@ Pipeline(steps=[('prep', ('model', BernoulliNB())]) key: fit_time -value: [0.00987244 0.00975513 0.00979519 0.00990796 0.00982952 0.00979829 - 0.00977135 0.00987482 0.00979662 0.01000142] +value: [0.00936604 0.00989652 0.00981975 0.00979877 0.00879645 0.00878477 + 0.00904775 0.0089612 0.00923443 0.00946689] -mean value: 0.009840273857116699 +mean value: 0.009317255020141602 key: score_time -value: [0.00938535 0.00935745 0.00481558 0.00939775 0.00938678 0.00941658 - 0.00940442 0.00948787 0.00941515 0.00943661] +value: [0.0095799 0.00932693 0.0049901 0.00928187 0.00873828 0.00865293 + 0.00907826 0.0087533 0.00945044 0.00921941] -mean value: 0.008950352668762207 +mean value: 0.008707141876220703 key: test_mcc -value: [0.33333333 0.21821789 nan 0. 0.6 0.5 - 0.5 0.40824829 0.2 0.6 ] +value: [0.65465367 0.21821789 nan 0. 0.40824829 0.65465367 + 0.21821789 0. 0.40824829 0.40824829] mean value: nan key: train_mcc -value: [0.71269665 0.64700558 0.53452248 0.68041382 0.62609903 0.58137767 - 0.58137767 0.62609903 0.69509522 0.53452248] +value: [0.57906602 0.73624773 0.57906602 0.60540551 0.60238451 0.56056066 + 0.64700558 0.67082039 0.62609903 0.64700558] -mean value: 0.6219209651318979 +mean value: 0.6253661066190971 key: test_accuracy -value: [0.6 0.6 nan 0.5 0.8 0.7 0.7 0.7 0.6 0.8] +value: [0.8 0.6 nan 0.5 0.7 0.8 0.6 0.5 0.7 0.7] mean value: nan key: train_accuracy -value: [0.85555556 0.82222222 0.76666667 0.83333333 0.81111111 0.78888889 - 0.78888889 0.81111111 0.84444444 0.76666667] +value: [0.78888889 0.86666667 0.78888889 0.8 0.8 0.77777778 + 0.82222222 0.83333333 0.81111111 0.82222222] -mean value: 0.8088888888888889 +mean value: 0.8111111111111111 key: test_fscore -value: [0.33333333 0.66666667 nan 0.54545455 0.8 0.57142857 - 0.57142857 0.72727273 0.6 0.8 ] +value: [0.75 0.66666667 nan 0.54545455 0.72727273 0.75 + 0.5 0.44444444 0.66666667 0.66666667] mean value: nan key: train_fscore -value: [0.85057471 0.81395349 0.75862069 0.81481481 0.8 0.77647059 - 0.77647059 0.8 0.83333333 0.75862069] +value: [0.7816092 0.86046512 0.7816092 0.78571429 0.79069767 0.76190476 + 0.81395349 0.82352941 0.8 0.81395349] -mean value: 0.7982858904944852 +mean value: 0.8013436617630212 key: test_precision -value: [1. 0.57142857 nan 0.5 0.8 1. - 1. 0.66666667 0.6 0.8 ] +value: [1. 0.57142857 nan 0.5 0.66666667 1. + 0.66666667 0.5 0.75 0.75 ] mean value: nan key: train_precision -value: [0.88095238 0.85365854 0.78571429 0.91666667 0.85 0.825 - 0.825 0.85 0.8974359 0.78571429] +value: [0.80952381 0.90243902 0.80952381 0.84615385 0.82926829 0.82051282 + 0.85365854 0.875 0.85 0.85365854] -mean value: 0.8470142053068882 +mean value: 0.8449738675958188 key: test_recall -value: [0.2 0.8 nan 0.6 0.8 0.4 0.4 0.8 0.6 0.8] +value: [0.6 0.8 nan 0.6 0.8 0.6 0.4 0.4 0.6 0.6] mean value: nan key: train_recall -value: [0.82222222 0.77777778 0.73333333 0.73333333 0.75555556 0.73333333 - 0.73333333 0.75555556 0.77777778 0.73333333] +value: [0.75555556 0.82222222 0.75555556 0.73333333 0.75555556 0.71111111 + 0.77777778 0.77777778 0.75555556 0.77777778] -mean value: 0.7555555555555555 +mean value: 0.7622222222222222 key: test_roc_auc -value: [0.6 0.6 nan 0.5 0.8 0.7 0.7 0.7 0.6 0.8] +value: [0.8 0.6 nan 0.5 0.7 0.8 0.6 0.5 0.7 0.7] mean value: nan key: train_roc_auc -value: [0.85555556 0.82222222 0.76666667 0.83333333 0.81111111 0.78888889 - 0.78888889 0.81111111 0.84444444 0.76666667] +value: [0.78888889 0.86666667 0.78888889 0.8 0.8 0.77777778 + 0.82222222 0.83333333 0.81111111 0.82222222] -mean value: 0.8088888888888889 +mean value: 0.8111111111111111 key: test_jcc -value: [0.2 0.5 nan 0.375 0.66666667 0.4 - 0.4 0.57142857 0.42857143 0.66666667] +value: [0.6 0.5 nan 0.375 0.57142857 0.6 + 0.33333333 0.28571429 0.5 0.5 ] mean value: nan key: train_jcc -value: [0.74 0.68627451 0.61111111 0.6875 0.66666667 0.63461538 - 0.63461538 0.66666667 0.71428571 0.61111111] +value: [0.64150943 0.75510204 0.64150943 0.64705882 0.65384615 0.61538462 + 0.68627451 0.7 0.66666667 0.68627451] -mean value: 0.665284654887596 +mean value: 0.6693626187775545 -MCC on Blind test: 0.12 +MCC on Blind test: 0.18 -Accuracy on Blind test: 0.6 +Accuracy on Blind test: 0.65 Model_name: K-Nearest Neighbors Model func: KNeighborsClassifier() @@ -12378,66 +12415,64 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', KNeighborsClassifier())]) key: fit_time -value: [0.00961757 0.00889277 0.009341 0.00934768 0.00927901 0.0093441 - 0.00930309 0.00911593 0.00942564 0.00927877] +value: [0.00885844 0.00982785 0.00806379 0.00800562 0.00807977 0.00805044 + 0.00798988 0.00812101 0.00803423 0.00798821] -mean value: 0.009294557571411132 +mean value: 0.008301925659179688 key: score_time -value: [0.01543903 0.010463 0.00477624 0.01466155 0.01007581 0.01023936 - 0.01033044 0.00999165 0.01001239 0.01013422] +value: [0.01715279 0.01941204 0.00412107 0.00909305 0.00904918 0.00910521 + 0.01407719 0.01381826 0.01290107 0.01411104] -mean value: 0.0106123685836792 +mean value: 0.012284088134765624 key: test_mcc -value: [ 0.40824829 0.65465367 nan 0.2 0.81649658 0. - 0.2 0.21821789 -0.21821789 0.2 ] +value: [ 0.81649658 0.6 nan 0.40824829 0.40824829 0. + 0.40824829 0. -0.33333333 0. ] mean value: nan key: train_mcc -value: [0.57792049 0.51161666 0.51161666 0.60238451 0.64700558 0.58137767 - 0.64508188 0.60059347 0.62237591 0.51161666] +value: [0.6 0.62237591 0.66683134 0.60059347 0.67082039 0.53990552 + 0.64444444 0.57906602 0.71128676 0.51111111] -mean value: 0.5811589508465446 +mean value: 0.6146434979838011 key: test_accuracy -value: [0.7 0.8 nan 0.6 0.9 0.5 0.6 0.6 0.4 0.6] +value: [0.9 0.8 nan 0.7 0.7 0.5 0.7 0.5 0.4 0.5] mean value: nan key: train_accuracy -value: [0.78888889 0.75555556 0.75555556 0.8 0.82222222 0.78888889 - 0.82222222 0.8 0.81111111 0.75555556] +value: [0.8 0.81111111 0.83333333 0.8 0.83333333 0.76666667 + 0.82222222 0.78888889 0.85555556 0.75555556] -mean value: 0.79 +mean value: 0.8066666666666666 key: test_fscore -value: [0.66666667 0.75 nan 0.6 0.90909091 0.44444444 - 0.6 0.66666667 0.5 0.6 ] +value: [0.88888889 0.8 nan 0.66666667 0.72727273 0.44444444 + 0.66666667 0.54545455 0.57142857 0.44444444] mean value: nan key: train_fscore -value: [0.79120879 0.75 0.75 0.79069767 0.82978723 0.8 - 0.81818182 0.79545455 0.80898876 0.75 ] +value: [0.8 0.81318681 0.83516484 0.79545455 0.84210526 0.78350515 + 0.82222222 0.7816092 0.85714286 0.75555556] -mean value: 0.7884318827351257 +mean value: 0.8085946441926197 key: test_precision -value: [0.75 1. nan 0.6 0.83333333 0.5 - 0.6 0.57142857 0.42857143 0.6 ] +value: [1. 0.8 nan 0.75 0.66666667 0.5 + 0.75 0.5 0.44444444 0.5 ] mean value: nan key: train_precision -value: [0.7826087 0.76744186 0.76744186 0.82926829 0.79591837 0.76 - 0.8372093 0.81395349 0.81818182 0.76744186] +value: [0.8 0.80434783 0.82608696 0.81395349 0.8 0.73076923 + 0.82222222 0.80952381 0.84782609 0.75555556] -mean value: 0.7939465545956881 - -key: test_recall -value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +mean value: 0.8010285176008128 +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call return cache[method] @@ -12485,42 +12520,44 @@ Traceback (most recent call last): ValueError: Found unknown categories ['XDR'] in column 5 during transform warnings.warn( -[0.6 0.6 nan 0.6 1. 0.4 0.6 0.8 0.6 0.6] + +key: test_recall +value: [0.8 0.8 nan 0.6 0.8 0.4 0.6 0.6 0.8 0.4] mean value: nan key: train_recall -value: [0.8 0.73333333 0.73333333 0.75555556 0.86666667 0.84444444 - 0.8 0.77777778 0.8 0.73333333] +value: [0.8 0.82222222 0.84444444 0.77777778 0.88888889 0.84444444 + 0.82222222 0.75555556 0.86666667 0.75555556] -mean value: 0.7844444444444445 +mean value: 0.8177777777777777 key: test_roc_auc -value: [0.7 0.8 nan 0.6 0.9 0.5 0.6 0.6 0.4 0.6] +value: [0.9 0.8 nan 0.7 0.7 0.5 0.7 0.5 0.4 0.5] mean value: nan key: train_roc_auc -value: [0.78888889 0.75555556 0.75555556 0.8 0.82222222 0.78888889 - 0.82222222 0.8 0.81111111 0.75555556] +value: [0.8 0.81111111 0.83333333 0.8 0.83333333 0.76666667 + 0.82222222 0.78888889 0.85555556 0.75555556] -mean value: 0.79 +mean value: 0.8066666666666666 key: test_jcc -value: [0.5 0.6 nan 0.42857143 0.83333333 0.28571429 - 0.42857143 0.5 0.33333333 0.42857143] +value: [0.8 0.66666667 nan 0.5 0.57142857 0.28571429 + 0.5 0.375 0.4 0.28571429] mean value: nan key: train_jcc -value: [0.65454545 0.6 0.6 0.65384615 0.70909091 0.66666667 - 0.69230769 0.66037736 0.67924528 0.6 ] +value: [0.66666667 0.68518519 0.71698113 0.66037736 0.72727273 0.6440678 + 0.69811321 0.64150943 0.75 0.60714286] -mean value: 0.651607951796631 +mean value: 0.6797316364953078 -MCC on Blind test: 0.08 +MCC on Blind test: 0.0 -Accuracy on Blind test: 0.57 +Accuracy on Blind test: 0.5 Model_name: SVM Model func: SVC(random_state=42) @@ -12553,101 +12590,101 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', SVC(random_state=42))]) key: fit_time -value: [0.00917888 0.00928307 0.0092535 0.00920248 0.00919914 0.00918078 - 0.00919771 0.00921726 0.00927758 0.00922322] +value: [0.0089519 0.0091002 0.00914049 0.00959015 0.01245856 0.01036382 + 0.01422167 0.01016212 0.01498318 0.01038194] -mean value: 0.009221363067626952 +mean value: 0.010935401916503907 key: score_time -value: [0.00861764 0.0091691 0.0043776 0.00870633 0.008672 0.0087018 - 0.00878572 0.00866151 0.00867128 0.008708 ] +value: [0.00858164 0.00850725 0.00424981 0.01098704 0.01503611 0.00963306 + 0.01035309 0.00957203 0.01383162 0.0097847 ] -mean value: 0.008307099342346191 +mean value: 0.010053634643554688 key: test_mcc -value: [0.2 0.6 nan 0.40824829 0.81649658 0.65465367 - 0.2 0.21821789 0.21821789 1. ] +value: [0.40824829 0.6 nan 0.40824829 0.65465367 1. + 0.40824829 0.21821789 0.21821789 0.65465367] mean value: nan key: train_mcc -value: [0.84632727 0.80498447 0.82548988 0.87447463 0.82222222 0.80498447 - 0.91473203 0.87011096 0.84970583 0.82548988] +value: [0.86666667 0.88910845 0.91201231 0.80178373 0.8675239 0.86666667 + 0.91201231 0.84632727 0.8675239 0.80178373] -mean value: 0.8438521657272541 +mean value: 0.8631408926213697 key: test_accuracy -value: [0.6 0.8 nan 0.7 0.9 0.8 0.6 0.6 0.6 1. ] +value: [0.7 0.8 nan 0.7 0.8 1. 0.7 0.6 0.6 0.8] mean value: nan key: train_accuracy -value: [0.92222222 0.9 0.91111111 0.93333333 0.91111111 0.9 - 0.95555556 0.93333333 0.92222222 0.91111111] +value: [0.93333333 0.94444444 0.95555556 0.9 0.93333333 0.93333333 + 0.95555556 0.92222222 0.93333333 0.9 ] -mean value: 0.92 +mean value: 0.9311111111111111 key: test_fscore -value: [0.6 0.8 nan 0.66666667 0.90909091 0.75 - 0.6 0.66666667 0.66666667 1. ] +value: [0.72727273 0.8 nan 0.66666667 0.83333333 1. + 0.66666667 0.66666667 0.66666667 0.75 ] mean value: nan key: train_fscore -value: [0.91954023 0.89411765 0.90697674 0.92857143 0.91111111 0.89411765 - 0.95348837 0.93023256 0.91764706 0.90697674] +value: [0.93333333 0.94382022 0.95454545 0.89655172 0.93181818 0.93333333 + 0.95652174 0.91954023 0.93181818 0.89655172] -mean value: 0.9162779541113425 +mean value: 0.929783412685894 key: test_precision -value: [0.6 0.8 nan 0.75 0.83333333 1. - 0.6 0.57142857 0.57142857 1. ] +value: [0.66666667 0.8 nan 0.75 0.71428571 1. + 0.75 0.57142857 0.57142857 1. ] mean value: nan key: train_precision -value: [0.95238095 0.95 0.95121951 1. 0.91111111 0.95 - 1. 0.97560976 0.975 0.95121951] +value: [0.93333333 0.95454545 0.97674419 0.92857143 0.95348837 0.93333333 + 0.93617021 0.95238095 0.95348837 0.92857143] -mean value: 0.9616540843979868 +mean value: 0.9450627073734447 key: test_recall -value: [0.6 0.8 nan 0.6 1. 0.6 0.6 0.8 0.8 1. ] +value: [0.8 0.8 nan 0.6 1. 1. 0.6 0.8 0.8 0.6] mean value: nan key: train_recall -value: [0.88888889 0.84444444 0.86666667 0.86666667 0.91111111 0.84444444 - 0.91111111 0.88888889 0.86666667 0.86666667] +value: [0.93333333 0.93333333 0.93333333 0.86666667 0.91111111 0.93333333 + 0.97777778 0.88888889 0.91111111 0.86666667] -mean value: 0.8755555555555555 +mean value: 0.9155555555555556 key: test_roc_auc -value: [0.6 0.8 nan 0.7 0.9 0.8 0.6 0.6 0.6 1. ] +value: [0.7 0.8 nan 0.7 0.8 1. 0.7 0.6 0.6 0.8] mean value: nan key: train_roc_auc -value: [0.92222222 0.9 0.91111111 0.93333333 0.91111111 0.9 - 0.95555556 0.93333333 0.92222222 0.91111111] +value: [0.93333333 0.94444444 0.95555556 0.9 0.93333333 0.93333333 + 0.95555556 0.92222222 0.93333333 0.9 ] -mean value: 0.92 +mean value: 0.9311111111111111 key: test_jcc -value: [0.42857143 0.66666667 nan 0.5 0.83333333 0.6 - 0.42857143 0.5 0.5 1. ] +value: [0.57142857 0.66666667 nan 0.5 0.71428571 1. + 0.5 0.5 0.5 0.6 ] mean value: nan key: train_jcc -value: [0.85106383 0.80851064 0.82978723 0.86666667 0.83673469 0.80851064 - 0.91111111 0.86956522 0.84782609 0.82978723] +value: [0.875 0.89361702 0.91304348 0.8125 0.87234043 0.875 + 0.91666667 0.85106383 0.87234043 0.8125 ] -mean value: 0.845956335047124 +mean value: 0.8694071847055196 -MCC on Blind test: 0.37 +MCC on Blind test: 0.21 -Accuracy on Blind test: 0.72 +Accuracy on Blind test: 0.65 Model_name: MLP Model func: MLPClassifier(max_iter=500, random_state=42) @@ -12728,20 +12765,20 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', MLPClassifier(max_iter=500, random_state=42))]) key: fit_time -value: [0.36695099 0.50412679 0.45356798 0.3968966 0.37350249 0.40489578 - 0.58160663 0.40739989 0.36945391 0.39499092] +value: [0.44935966 0.68770981 0.63709402 0.90934205 1.31588888 0.48195362 + 0.40868592 0.56449747 0.35664296 0.61879086] -mean value: 0.42533919811248777 +mean value: 0.6429965257644653 key: score_time -value: [0.01202917 0.01205993 0.00681949 0.01205707 0.01209044 0.0119729 - 0.01197028 0.0120163 0.01199508 0.01207495] +value: [0.01234674 0.01238608 0.00677371 0.01211309 0.01210904 0.01201606 + 0.01210451 0.01206374 0.01207805 0.0169487 ] -mean value: 0.011508560180664063 +mean value: 0.01209397315979004 key: test_mcc -value: [0. 0.81649658 nan 0.40824829 0.6 0.81649658 - 0.81649658 0.5 0.65465367 0.81649658] +value: [0.6 0.81649658 nan 0.2 0.81649658 0.81649658 + 0.40824829 0.65465367 0.21821789 0.81649658] mean value: nan @@ -12751,7 +12788,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.5 0.9 nan 0.7 0.8 0.9 0.9 0.7 0.8 0.9] +value: [0.8 0.9 nan 0.6 0.9 0.9 0.7 0.8 0.6 0.9] mean value: nan @@ -12761,8 +12798,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.28571429 0.90909091 nan 0.72727273 0.8 0.88888889 - 0.90909091 0.76923077 0.83333333 0.90909091] +value: [0.8 0.90909091 nan 0.6 0.90909091 0.88888889 + 0.66666667 0.83333333 0.66666667 0.90909091] mean value: nan @@ -12772,8 +12809,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [0.5 0.83333333 nan 0.66666667 0.8 1. - 0.83333333 0.625 0.71428571 0.83333333] +value: [0.8 0.83333333 nan 0.6 0.83333333 1. + 0.75 0.71428571 0.57142857 0.83333333] mean value: nan @@ -12783,7 +12820,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [0.2 1. nan 0.8 0.8 0.8 1. 1. 1. 1. ] +value: [0.8 1. nan 0.6 1. 0.8 0.6 1. 0.8 1. ] mean value: nan @@ -12793,7 +12830,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.5 0.9 nan 0.7 0.8 0.9 0.9 0.7 0.8 0.9] +value: [0.8 0.9 nan 0.6 0.9 0.9 0.7 0.8 0.6 0.9] mean value: nan @@ -12803,8 +12840,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.16666667 0.83333333 nan 0.57142857 0.66666667 0.8 - 0.83333333 0.625 0.71428571 0.83333333] +value: [0.66666667 0.83333333 nan 0.42857143 0.83333333 0.8 + 0.5 0.71428571 0.5 0.83333333] mean value: nan @@ -12944,544 +12981,19 @@ Pipeline(steps=[('prep', ('model', DecisionTreeClassifier(random_state=42))]) key: fit_time -value: [0.01837254 0.01201224 0.01022887 0.01003718 0.00986743 0.00974107 - 0.00972939 0.00980163 0.00968575 0.00952888] +value: [0.01760101 0.01270008 0.010602 0.01664591 0.01388907 0.0104475 + 0.01092863 0.01405859 0.01037669 0.01104903] -mean value: 0.010900497436523438 +mean value: 0.012829852104187012 key: score_time -value: [0.01520014 0.0088861 0.00448728 0.00853777 0.00867915 0.00838041 - 0.00833416 0.0084734 0.00840139 0.00852203] +value: [0.01295376 0.00925541 0.00528264 0.01082516 0.01204586 0.00963569 + 0.01535797 0.00973344 0.00954556 0.00905228] -mean value: 0.008790183067321777 +mean value: 0.010368776321411134 key: test_mcc -value: [0.6 0.81649658 nan 0.6 0.81649658 0.81649658 - 1. 0.21821789 0.65465367 0.81649658] - -mean value: nan - -key: train_mcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_accuracy -value: [0.8 0.9 nan 0.8 0.9 0.9 1. 0.6 0.8 0.9] - -mean value: nan - -key: train_accuracy -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_fscore -value: [0.8 0.90909091 nan 0.8 0.90909091 0.88888889 - 1. 0.66666667 0.83333333 0.88888889] - -mean value: nan - -key: train_fscore -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_precision -value: [0.8 0.83333333 nan 0.8 0.83333333 1. - 1. 0.57142857 0.71428571 1. ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: [0.8 1. nan 0.8 1. 0.8 1. 0.8 1. 0.8] - -mean value: nan - -key: train_recall -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_roc_auc -value: [0.8 0.9 nan 0.8 0.9 0.9 1. 0.6 0.8 0.9] - -mean value: nan - -key: train_roc_auc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_jcc -value: [0.66666667 0.83333333 nan 0.66666667 0.83333333 0.8 - 1. 0.5 0.71428571 0.8 ] - -mean value: nan - -key: train_jcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -MCC on Blind test: 0.89 - -Accuracy on Blind test: 0.95 - -Model_name: Extra Trees -Model func: ExtraTreesClassifier(random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', ExtraTreesClassifier(random_state=42))]) - -key: fit_time -value: [0.08040762 0.08057427 0.08129692 0.08119392 0.08340263 0.08066964 - 0.08185148 0.08536839 0.0816021 0.08244801] - -mean value: 0.08188149929046631 - -key: score_time -value: [0.01654506 0.01672173 0.0045855 0.01680946 0.0167408 0.01680946 - 0.01687789 0.02278209 0.01735115 0.01731062] - -mean value: 0.01625337600708008 - -key: test_mcc -value: [0.65465367 0.40824829 nan 0.65465367 0.81649658 0.6 - 0.81649658 0.5 0.5 1. ] - -mean value: nan - -key: train_mcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_accuracy -value: [0.8 0.7 nan 0.8 0.9 0.8 0.9 0.7 0.7 1. ] - -mean value: nan - -key: train_accuracy -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_fscore -value: [0.75 0.72727273 nan 0.75 0.90909091 0.8 - 0.90909091 0.76923077 0.76923077 1. ] - -mean value: nan - -key: train_fscore -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_precision -value: [1. 0.66666667 nan 1. 0.83333333 0.8 - 0.83333333 0.625 0.625 1. ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: [0.6 0.8 nan 0.6 1. 0.8 1. 1. 1. 1. ] - -mean value: nan - -key: train_recall -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_roc_auc -value: [0.8 0.7 nan 0.8 0.9 0.8 0.9 0.7 0.7 1. ] - -mean value: nan - -key: train_roc_auc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_jcc -value: [0.6 0.57142857 nan 0.6 0.83333333 0.66666667 - 0.83333333 0.625 0.625 1. ] - -mean value: nan - -key: train_jcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -MCC on Blind test: 0.36 - -Accuracy on Blind test: 0.72 - -Model_name: Extra Tree -Model func: ExtraTreeClassifier(random_state=42) -List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', ExtraTreeClassifier(random_state=42))]) - -key: fit_time -value: [0.00936699 0.00906682 0.00868106 0.00933576 0.00893283 0.00876093 - 0.00901222 0.00940871 0.00933743 0.00886393] - -mean value: 0.009076666831970216 - -key: score_time -value: [0.00862956 0.00860763 0.00448084 0.00889158 0.00875568 0.00860119 - 0.00875163 0.0087719 0.00861621 0.00862575] - -mean value: 0.00827319622039795 - -key: test_mcc -value: [0.21821789 0.40824829 nan 0.81649658 0.81649658 0.81649658 - 0.65465367 0.40824829 0.65465367 0.5 ] - -mean value: nan - -key: train_mcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_accuracy -value: [0.6 0.7 nan 0.9 0.9 0.9 0.8 0.7 0.8 0.7] - -mean value: nan - -key: train_accuracy -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_fscore -value: [0.5 0.72727273 nan 0.90909091 0.88888889 0.88888889 - 0.83333333 0.72727273 0.83333333 0.76923077] - -mean value: nan - -key: train_fscore -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_precision -value: [0.66666667 0.66666667 nan 0.83333333 1. 1. - 0.71428571 0.66666667 0.71428571 0.625 ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: [0.4 0.8 nan 1. 0.8 0.8 1. 0.8 1. 1. ] - -mean value: nan - -key: train_recall -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_roc_auc -value: [0.6 0.7 nan 0.9 0.9 0.9 0.8 0.7 0.8 0.7] - -mean value: nan - -key: train_roc_auc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_jcc -value: [0.33333333 0.57142857 nan 0.83333333 0.8 0.8 - 0.71428571 0.57142857 0.71428571 0.625 ] - -mean value: nan - -key: train_jcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -MCC on Blind test: -0.04 - -Accuracy on Blind test: 0.48 - -Model_name: Random Forest -Model func: RandomForestClassifier(n_estimators=1000, random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. - warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. - warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. - warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. - warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. - warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. - warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. - warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. - warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. - warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. - warn( -Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', - RandomForestClassifier(n_estimators=1000, random_state=42))]) - -key: fit_time -value: [1.02382302 1.05082822 1.03594112 1.02103114 1.04056716 1.02298737 - 1.02116394 1.020087 1.01779222 1.02446771] - -mean value: 1.027868890762329 - -key: score_time -value: [0.16805339 0.09164691 0.00471544 0.08566546 0.09267664 0.08607197 - 0.09215879 0.09033847 0.09076619 0.08630776] - -mean value: 0.08884010314941407 - -key: test_mcc -value: [0.65465367 0.65465367 nan 0.65465367 0.81649658 0.81649658 +value: [0.81649658 0.81649658 nan 0.81649658 1. 0.81649658 1. 0.21821789 0.65465367 1. ] mean value: nan @@ -13492,7 +13004,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.8 0.8 nan 0.8 0.9 0.9 1. 0.6 0.8 1. ] +value: [0.9 0.9 nan 0.9 1. 0.9 1. 0.6 0.8 1. ] mean value: nan @@ -13502,7 +13014,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.75 0.83333333 nan 0.75 0.90909091 0.88888889 +value: [0.90909091 0.90909091 nan 0.88888889 1. 0.88888889 1. 0.66666667 0.83333333 1. ] mean value: nan @@ -13513,7 +13025,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [1. 0.71428571 nan 1. 0.83333333 1. +value: [0.83333333 0.83333333 nan 1. 1. 1. 1. 0.57142857 0.71428571 1. ] mean value: nan @@ -13524,7 +13036,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [0.6 1. nan 0.6 1. 0.8 1. 0.8 1. 1. ] +value: [1. 1. nan 0.8 1. 0.8 1. 0.8 1. 1. ] mean value: nan @@ -13534,7 +13046,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.8 0.8 nan 0.8 0.9 0.9 1. 0.6 0.8 1. ] +value: [0.9 0.9 nan 0.9 1. 0.9 1. 0.6 0.8 1. ] mean value: nan @@ -13544,7 +13056,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.6 0.71428571 nan 0.6 0.83333333 0.8 +value: [0.83333333 0.83333333 nan 0.8 1. 0.8 1. 0.5 0.71428571 1. ] mean value: nan @@ -13554,4439 +13066,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 -MCC on Blind test: 0.48 - -Accuracy on Blind test: 0.78 - -Model_name: Random Forest2 -Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', - RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, - oob_score=True, random_state=42))]) - -key: fit_time -value: [0.79851103 0.84037805 0.94003201 0.90074062 0.83170342 0.8777082 - 0.84478021 0.83035374 0.87192893 0.84235287] - -mean value: 0.8578489065170288 - -key: score_time -value: [0.18074512 0.13748908 0.00518966 0.15447927 0.22545338 0.22390962 - 0.19519472 0.20300436 0.18351603 0.17219543] - -mean value: 0.16811766624450683 - -key: test_mcc -value: [0.6 0.81649658 nan 0.65465367 1. 0.65465367 - 0.81649658 0.21821789 0.81649658 1. ] - -mean value: nan - -key: train_mcc -value: [0.97801929 1. 1. 1. 0.97801929 0.95650071 - 0.97801929 1. 1. 1. ] - -mean value: 0.9890558596126232 - -key: test_accuracy -value: [0.8 0.9 nan 0.8 1. 0.8 0.9 0.6 0.9 1. ] - -mean value: nan - -key: train_accuracy -value: [0.98888889 1. 1. 1. 0.98888889 0.97777778 - 0.98888889 1. 1. 1. ] - -mean value: 0.9944444444444445 - -key: test_fscore -value: [0.8 0.90909091 nan 0.75 1. 0.75 - 0.88888889 0.66666667 0.90909091 1. ] - -mean value: nan - -key: train_fscore -value: [0.98876404 1. 1. 1. 0.98901099 0.97727273 - 0.98876404 1. 1. 1. ] - -mean value: 0.9943811806171357 - -key: test_precision -value: [0.8 0.83333333 nan 1. 1. 1. - 1. 0.57142857 0.83333333 1. ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 0.97826087 1. - 1. 1. 1. 1. ] - -mean value: 0.9978260869565218 - -key: test_recall -value: [0.8 1. nan 0.6 1. 0.6 0.8 0.8 1. 1. ] - -mean value: nan - -key: train_recall -value: [0.97777778 1. 1. 1. 1. 0.95555556 - 0.97777778 1. 1. 1. ] - -mean value: 0.991111111111111 - -key: test_roc_auc -value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. - warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -[0.8 0.9 nan 0.8 1. 0.8 0.9 0.6 0.9 1. ] - -mean value: nan - -key: train_roc_auc -value: [0.98888889 1. 1. 1. 0.98888889 0.97777778 - 0.98888889 1. 1. 1. ] - -mean value: 0.9944444444444445 - -key: test_jcc -value: [0.66666667 0.83333333 nan 0.6 1. 0.6 - 0.8 0.5 0.83333333 1. ] - -mean value: nan - -key: train_jcc -value: [0.97777778 1. 1. 1. 0.97826087 0.95555556 - 0.97777778 1. 1. 1. ] - -mean value: 0.9889371980676328 - -MCC on Blind test: 0.6 - -Accuracy on Blind test: 0.82 - -Model_name: Naive Bayes -Model func: BernoulliNB() -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', BernoulliNB())]) - -key: fit_time -value: [0.02122736 0.00870228 0.00875449 0.00875592 0.00878906 0.00892305 - 0.00867987 0.00895619 0.00886059 0.00871706] - -mean value: 0.010036587715148926 - -key: score_time -value: [0.01413703 0.0085597 0.00445867 0.00852251 0.00865173 0.00862718 - 0.00861049 0.0086062 0.00850129 0.00863886] - -mean value: 0.008731365203857422 - -key: test_mcc -value: [0.33333333 0.21821789 nan 0. 0.6 0.5 - 0.5 0.40824829 0.2 0.6 ] - -mean value: nan - -key: train_mcc -value: [0.71269665 0.64700558 0.53452248 0.68041382 0.62609903 0.58137767 - 0.58137767 0.62609903 0.69509522 0.53452248] - -mean value: 0.6219209651318979 - -key: test_accuracy -value: [0.6 0.6 nan 0.5 0.8 0.7 0.7 0.7 0.6 0.8] - -mean value: nan - -key: train_accuracy -value: [0.85555556 0.82222222 0.76666667 0.83333333 0.81111111 0.78888889 - 0.78888889 0.81111111 0.84444444 0.76666667] - -mean value: 0.8088888888888889 - -key: test_fscore -value: [0.33333333 0.66666667 nan 0.54545455 0.8 0.57142857 - 0.57142857 0.72727273 0.6 0.8 ] - -mean value: nan - -key: train_fscore -value: [0.85057471 0.81395349 0.75862069 0.81481481 0.8 0.77647059 - 0.77647059 0.8 0.83333333 0.75862069] - -mean value: 0.7982858904944852 - -key: test_precision -value: [1. 0.57142857 nan 0.5 0.8 1. - 1. 0.66666667 0.6 0.8 ] - -mean value: nan - -key: train_precision -value: [0.88095238 0.85365854 0.78571429 0.91666667 0.85 0.825 - 0.825 0.85 0.8974359 0.78571429] - -mean value: 0.8470142053068882 - -key: test_recall -value: [0.2 0.8 nan 0.6 0.8 0.4 0.4 0.8 0.6 0.8] - -mean value: nan - -key: train_recall -value: [0.82222222 0.77777778 0.73333333 0.73333333 0.75555556 0.73333333 - 0.73333333 0.75555556 0.77777778 0.73333333] - -mean value: 0.7555555555555555 - -key: test_roc_auc -value: [0.6 0.6 nan 0.5 0.8 0.7 0.7 0.7 0.6 0.8] - -mean value: nan - -key: train_roc_auc -value: [0.85555556 0.82222222 0.76666667 0.83333333 0.81111111 0.78888889 - 0.78888889 0.81111111 0.84444444 0.76666667] - -mean value: 0.8088888888888889 - -key: test_jcc -value: [0.2 0.5 nan 0.375 0.66666667 0.4 - 0.4 0.57142857 0.42857143 0.66666667] - -mean value: nan - -key: train_jcc -value: [0.74 0.68627451 0.61111111 0.6875 0.66666667 0.63461538 - 0.63461538 0.66666667 0.71428571 0.61111111] - -mean value: 0.665284654887596 - -MCC on Blind test: 0.12 - -Accuracy on Blind test: 0.6 - -Model_name: XGBoost -Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0) -List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000... - interaction_constraints=None, learning_rate=None, - max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, - monotone_constraints=None, n_estimators=100, - n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, - reg_lambda=None, scale_pos_weight=None, - subsample=None, tree_method=None, - use_label_encoder=False, - validate_parameters=None, verbosity=0))]) - -key: fit_time -value: [0.06223869 0.05609274 0.04758739 0.18425703 0.03499866 0.03480005 - 0.05428028 0.03800774 0.06300688 0.03989649] - -mean value: 0.06151659488677978 - -key: score_time -value: [0.01010013 0.0104847 0.00464034 0.01078033 0.01093912 0.01051426 - 0.01065564 0.0102036 0.01068449 0.0109179 ] - -mean value: 0.009992051124572753 - -key: test_mcc -value: [0.81649658 0.81649658 nan 1. 1. 0.81649658 - 1. 0.6 0.65465367 0.81649658] - -mean value: nan - -key: train_mcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_accuracy -value: [0.9 0.9 nan 1. 1. 0.9 1. 0.8 0.8 0.9] - -mean value: nan - -key: train_accuracy -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_fscore -value: [0.88888889 0.90909091 nan 1. 1. 0.90909091 - 1. 0.8 0.83333333 0.88888889] - -mean value: nan - -key: train_fscore -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_precision -value: [1. 0.83333333 nan 1. 1. 0.83333333 - 1. 0.8 0.71428571 1. ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: [0.8 1. nan 1. 1. 1. 1. 0.8 1. 0.8] - -mean value: nan - -key: train_recall -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_roc_auc -value: [0.9 0.9 nan 1. 1. 0.9 1. 0.8 0.8 0.9] - -mean value: nan - -key: train_roc_auc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_jcc -value: [0.8 0.83333333 nan 1. 1. 0.83333333 - 1. 0.66666667 0.71428571 0.8 ] - -mean value: nan - -key: train_jcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - MCC on Blind test: 0.89 Accuracy on Blind test: 0.95 -Model_name: LDA -Model func: LinearDiscriminantAnalysis() -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, - colsample_bynode=1, colsample_bytree=1, enable_categorical=False, - gamma=0, gpu_id=-1, importance_type=None, - interaction_constraints='', learning_rate=0.300000012, - max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, - monotone_constraints='()', n_estimators=100, n_jobs=12, - num_parallel_tree=1, predictor='auto', random_state=42, - reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, - tree_method='exact', use_label_encoder=False, - validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', LinearDiscriminantAnalysis())]) - -key: fit_time -value: [0.0236156 0.04939389 0.06203413 0.03593135 0.03797984 0.04009914 - 0.04238033 0.03859186 0.03845286 0.04021764] - -mean value: 0.04086966514587402 - -key: score_time -value: [0.02123761 0.0344398 0.0061059 0.0222311 0.02070427 0.02340961 - 0.01904702 0.02245331 0.02260351 0.02361083] - -mean value: 0.021584296226501466 - -key: test_mcc -value: [0. 0.81649658 nan 0.81649658 1. 0.40824829 - 0.40824829 0.65465367 0.5 0.81649658] - -mean value: nan - -key: train_mcc -value: [1. 1. 1. 1. 1. 0.97801929 - 1. 1. 1. 1. ] - -mean value: 0.9978019293843652 - -key: test_accuracy -value: [0.5 0.9 nan 0.9 1. 0.7 0.7 0.8 0.7 0.9] - -mean value: nan - -key: train_accuracy -value: [1. 1. 1. 1. 1. 0.98888889 - 1. 1. 1. 1. ] - -mean value: 0.9988888888888889 - -key: test_fscore -value: [0.44444444 0.90909091 nan 0.90909091 1. 0.72727273 - 0.72727273 0.83333333 0.76923077 0.90909091] - -mean value: nan - -key: train_fscore -value: [1. 1. 1. 1. 1. 0.98876404 - 1. 1. 1. 1. ] - -mean value: 0.998876404494382 - -key: test_precision -value: [0.5 0.83333333 nan 0.83333333 1. 0.66666667 - 0.66666667 0.71428571 0.625 0.83333333] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: [0.4 1. nan 1. 1. 0.8 0.8 1. 1. 1. ] - -mean value: nan - -key: train_recall -value: [1. 1. 1. 1. 1. 0.97777778 - 1. 1. 1. 1. ] - -mean value: 0.9977777777777778 - -key: test_roc_auc -value: [0.5 0.9 nan 0.9 1. 0.7 0.7 0.8 0.7 0.9] - -mean value: nan - -key: train_roc_auc -value: [1. 1. 1. 1. 1. 0.98888889 - 1. 1. 1. 1. ] - -mean value: 0.9988888888888889 - -key: test_jcc -value: [0.28571429 0.83333333 nan 0.83333333 1. 0.57142857 - 0.57142857 0.71428571 0.625 0.83333333] - -mean value: nan - -key: train_jcc -value: [1. 1. 1. 1. 1. 0.97777778 - 1. 1. 1. 1. ] - -mean value: 0.9977777777777778 - -MCC on Blind test: 0.01 - -Accuracy on Blind test: 0.52 - -Model_name: Multinomial -Model func: MultinomialNB() -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, - colsample_bynode=1, colsample_bytree=1, enable_categorical=False, - gamma=0, gpu_id=-1, importance_type=None, - interaction_constraints='', learning_rate=0.300000012, - max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, - monotone_constraints='()', n_estimators=100, n_jobs=12, - num_parallel_tree=1, predictor='auto', random_state=42, - reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, - tree_method='exact', use_label_encoder=False, - validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', MultinomialNB())]) - -key: fit_time -value: [0.01126742 0.00883031 0.00877166 0.00853825 0.00856185 0.00841284 - 0.00843716 0.0084455 0.0083921 0.00846934] - -mean value: 0.008812642097473145 - -key: score_time -value: [0.00877571 0.0087781 0.00430918 0.0083077 0.00839949 0.00832129 - 0.00838757 0.0083096 0.00846934 0.00847459] - -mean value: 0.008053255081176759 - -key: test_mcc -value: [0. 0.2 nan 0.2 0.81649658 0.65465367 - 0.21821789 0. 0. 0.81649658] - -mean value: nan - -key: train_mcc -value: [0.58137767 0.57792049 0.56056066 0.66683134 0.6 0.56056066 - 0.64700558 0.66683134 0.53990552 0.53665631] - -mean value: 0.5937649587083046 - -key: test_accuracy -value: [0.5 0.6 nan 0.6 0.9 0.8 0.6 0.5 0.5 0.9] - -mean value: nan - -key: train_accuracy -value: [0.78888889 0.78888889 0.77777778 0.83333333 0.8 0.77777778 - 0.82222222 0.83333333 0.76666667 0.76666667] - -mean value: 0.7955555555555556 - -key: test_fscore -value: [0.54545455 0.6 nan 0.6 0.90909091 0.75 - 0.5 0.44444444 0.61538462 0.90909091] - -mean value: nan - -key: train_fscore -value: [0.77647059 0.78651685 0.76190476 0.83146067 0.8 0.76190476 - 0.81395349 0.83146067 0.74698795 0.75294118] - -mean value: 0.7863600930941919 - -key: test_precision -value: [0.5 0.6 nan 0.6 0.83333333 1. - 0.66666667 0.5 0.5 0.83333333] - -mean value: nan - -key: train_precision -value: [0.825 0.79545455 0.82051282 0.84090909 0.8 0.82051282 - 0.85365854 0.84090909 0.81578947 0.8 ] - -mean value: 0.8212746378567944 - -key: test_recall -value: [0.6 0.6 nan 0.6 1. 0.6 0.4 0.4 0.8 1. ] - -mean value: nan - -key: train_recall -value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -[0.73333333 0.77777778 0.71111111 0.82222222 0.8 0.71111111 - 0.77777778 0.82222222 0.68888889 0.71111111] - -mean value: 0.7555555555555555 - -key: test_roc_auc -value: [0.5 0.6 nan 0.6 0.9 0.8 0.6 0.5 0.5 0.9] - -mean value: nan - -key: train_roc_auc -value: [0.78888889 0.78888889 0.77777778 0.83333333 0.8 0.77777778 - 0.82222222 0.83333333 0.76666667 0.76666667] - -mean value: 0.7955555555555556 - -key: test_jcc -value: [0.375 0.42857143 nan 0.42857143 0.83333333 0.6 - 0.33333333 0.28571429 0.44444444 0.83333333] - -mean value: nan - -key: train_jcc -value: [0.63461538 0.64814815 0.61538462 0.71153846 0.66666667 0.61538462 - 0.68627451 0.71153846 0.59615385 0.60377358] - -mean value: 0.6489478294139781 - -MCC on Blind test: 0.3 - -Accuracy on Blind test: 0.68 - -Model_name: Passive Aggresive -Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, - colsample_bynode=1, colsample_bytree=1, enable_categorical=False, - gamma=0, gpu_id=-1, importance_type=None, - interaction_constraints='', learning_rate=0.300000012, - max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, - monotone_constraints='()', n_estimators=100, n_jobs=12, - num_parallel_tree=1, predictor='auto', random_state=42, - reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, - tree_method='exact', use_label_encoder=False, - validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', - PassiveAggressiveClassifier(n_jobs=10, random_state=42))]) - -key: fit_time -value: [0.00969601 0.01305962 0.01228189 0.01330137 0.01364422 0.01439548 - 0.01438236 0.01451087 0.01334405 0.01380467] - -mean value: 0.013242053985595702 - -key: score_time -value: [0.0084312 0.01083541 0.0055151 0.01124859 0.01355529 0.0133152 - 0.03446317 0.01134872 0.01132798 0.01131558] - -mean value: 0.013135623931884766 - -key: test_mcc -value: [0. 0.65465367 nan 0.6 0.6 0.81649658 - 0.65465367 0.81649658 0.33333333 0.81649658] - -mean value: nan - -key: train_mcc -value: [0.95555556 0.91473203 0.91111111 0.95555556 0.95650071 0.95650071 - 0.81649658 0.87447463 0.79772404 0.85485041] - -mean value: 0.8993501347937264 - -key: test_accuracy -value: [0.5 0.8 nan 0.8 0.8 0.9 0.8 0.9 0.6 0.9] - -mean value: nan - -key: train_accuracy -value: [0.97777778 0.95555556 0.95555556 0.97777778 0.97777778 0.97777778 - 0.9 0.93333333 0.88888889 0.92222222] - -mean value: 0.9466666666666667 - -key: test_fscore -value: [0.28571429 0.83333333 nan 0.8 0.8 0.88888889 - 0.75 0.88888889 0.71428571 0.88888889] - -mean value: nan - -key: train_fscore -value: [0.97777778 0.95744681 0.95555556 0.97777778 0.97826087 0.97826087 - 0.88888889 0.92857143 0.9 0.91566265] - -mean value: 0.9458202626814911 - -key: test_precision -value: [0.5 0.71428571 nan 0.8 0.8 1. - 1. 1. 0.55555556 1. ] - -mean value: nan - -key: train_precision -value: [0.97777778 0.91836735 0.95555556 0.97777778 0.95744681 0.95744681 - 1. 1. 0.81818182 1. ] - -mean value: 0.9562553893252982 - -key: test_recall -value: [0.2 1. nan 0.8 0.8 0.8 0.6 0.8 1. 0.8] - -mean value: nan - -key: train_recall -value: [0.97777778 1. 0.95555556 0.97777778 1. 1. - 0.8 0.86666667 1. 0.84444444] - -mean value: 0.9422222222222222 - -key: test_roc_auc -value: [0.5 0.8 nan 0.8 0.8 0.9 0.8 0.9 0.6 0.9] - -mean value: nan - -key: train_roc_auc -value: [0.97777778 0.95555556 0.95555556 0.97777778 0.97777778 0.97777778 - 0.9 0.93333333 0.88888889 0.92222222] - -mean value: 0.9466666666666667 - -key: test_jcc -value: [0.16666667 0.71428571 nan 0.66666667 0.66666667 0.8 - 0.6 0.8 0.55555556 0.8 ] - -mean value: nan - -key: train_jcc -value: [0.95652174 0.91836735 0.91489362 0.95652174 0.95744681 0.95744681 - 0.8 0.86666667 0.81818182 0.84444444] - -mean value: 0.8990490988535128 - -MCC on Blind test: 0.15 - -Accuracy on Blind test: 0.6 - -Model_name: Stochastic GDescent -Model func: SGDClassifier(n_jobs=10, random_state=42) -List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, - colsample_bynode=1, colsample_bytree=1, enable_categorical=False, - gamma=0, gpu_id=-1, importance_type=None, - interaction_constraints='', learning_rate=0.300000012, - max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, - monotone_constraints='()', n_estimators=100, n_jobs=12, - num_parallel_tree=1, predictor='auto', random_state=42, - reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, - tree_method='exact', use_label_encoder=False, - validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', SGDClassifier(n_jobs=10, random_state=42))]) - -key: fit_time -value: [0.01253462 0.01219559 0.01248789 0.01262569 0.01222777 0.01228476 - 0.01211166 0.01249099 0.01256251 0.01269126] - -mean value: 0.012421274185180664 - -key: score_time -value: [0.01041532 0.01126981 0.00603032 0.01128006 0.01135278 0.01132679 - 0.01124406 0.01126432 0.01123857 0.0112381 ] - -mean value: 0.01066601276397705 - -key: test_mcc -value: [0. 0.5 nan 0.6 0.6 0.65465367 - 0.40824829 0.6 0.6 1. ] - -mean value: nan - -key: train_mcc -value: [0.95555556 0.74278135 0.97801929 0.89442719 0.97801929 0.46499055 - 0.81649658 0.77919372 0.91473203 1. ] - -mean value: 0.8524215579246943 - -key: test_accuracy -value: [0.5 0.7 nan 0.8 0.8 0.8 0.7 0.8 0.8 1. ] - -mean value: nan - -key: train_accuracy -value: [0.97777778 0.85555556 0.98888889 0.94444444 0.98888889 0.67777778 - 0.9 0.87777778 0.95555556 1. ] - -mean value: 0.9166666666666666 - -key: test_fscore -value: [0.44444444 0.76923077 nan 0.8 0.8 0.83333333 - 0.66666667 0.8 0.8 1. ] - -mean value: nan - -key: train_fscore -value: [0.97777778 0.87378641 0.98876404 0.94736842 0.98876404 0.75630252 - 0.88888889 0.86075949 0.95348837 1. ] - -mean value: 0.9235899972146242 - -key: test_precision -value: [0.5 0.625 nan 0.8 0.8 0.71428571 - 0.75 0.8 0.8 1. ] - -mean value: nan - -key: train_precision -value: [0.97777778 0.77586207 1. 0.9 1. 0.60810811 - 1. 1. 1. 1. ] - -mean value: 0.9261747954851403 - -key: test_recall -value: [0.4 1. nan 0.8 0.8 1. 0.6 0.8 0.8 1. ] - -mean value: nan - -key: train_recall -value: [0.97777778 1. 0.97777778 1. 0.97777778 1. - 0.8 0.75555556 0.91111111 1. ] - -mean value: 0.94 - -key: test_roc_auc -value: [0.5 0.7 nan 0.8 0.8 0.8 0.7 0.8 0.8 1. ] - -mean value: nan - -key: train_roc_auc -value: [0.97777778 0.85555556 0.98888889 0.94444444 0.98888889 0.67777778 - 0.9 0.87777778 0.95555556 1. ] - -mean value: 0.9166666666666666 - -key: test_jcc -value: [0.28571429 0.625 nan 0.66666667 0.66666667 0.71428571 - 0.5 0.66666667 0.66666667 1. ] - -mean value: nan - -key: train_jcc -value: [0.95652174 0.77586207 0.97777778 0.9 0.97777778 0.60810811 - 0.8 0.75555556 0.91111111 1. ] - -mean value: 0.8662714138426282 - -MCC on Blind test: 0.49 - -Accuracy on Blind test: 0.78 - -Model_name: AdaBoost Classifier -Model func: AdaBoostClassifier(random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, - colsample_bynode=1, colsample_bytree=1, enable_categorical=False, - gamma=0, gpu_id=-1, importance_type=None, - interaction_constraints='', learning_rate=0.300000012, - max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, - monotone_constraints='()', n_estimators=100, n_jobs=12, - num_parallel_tree=1, predictor='auto', random_state=42, - reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, - tree_method='exact', use_label_encoder=False, - validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', AdaBoostClassifier(random_state=42))]) - -key: fit_time -value: [0.0932951 0.08622575 0.08703232 0.08533549 0.08312297 0.08790827 - 0.08446789 0.08704805 0.08473301 0.08655405] - -mean value: 0.08657228946685791 - -key: score_time -value: [0.01610136 0.0159595 0.00456142 0.01449895 0.01445603 0.01618457 - 0.01588774 0.01573229 0.01589179 0.01581264] - -mean value: 0.014508628845214843 - -key: test_mcc -value: [0.6 0.81649658 nan 0.65465367 1. 0.6 - 1. 1. 0.65465367 0.81649658] - -mean value: nan - -key: train_mcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_accuracy -value: [0.8 0.9 nan 0.8 1. 0.8 1. 1. 0.8 0.9] - -mean value: nan - -key: train_accuracy -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_fscore -value: [0.8 0.90909091 nan 0.75 1. 0.8 - 1. 1. 0.83333333 0.88888889] - -mean value: nan - -key: train_fscore -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_precision -value: [0.8 0.83333333 nan 1. 1. 0.8 - 1. 1. 0.71428571 1. ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: [0.8 1. nan 0.6 1. 0.8 1. 1. 1. 0.8] - -mean value: nan - -key: train_recall -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_roc_auc -value: [0.8 0.9 nan 0.8 1. 0.8 1. 1. 0.8 0.9] - -mean value: nan - -key: train_roc_auc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_jcc -value: [0.66666667 0.83333333 nan 0.6 1. 0.66666667 - 1. 1. 0.71428571 0.8 ] - -mean value: nan - -key: train_jcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -MCC on Blind test: 0.78 - -Accuracy on Blind test: 0.9 - -Model_name: Bagging Classifier -Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, - colsample_bynode=1, colsample_bytree=1, enable_categorical=False, - gamma=0, gpu_id=-1, importance_type=None, - interaction_constraints='', learning_rate=0.300000012, - max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, - monotone_constraints='()', n_estimators=100, n_jobs=12, - num_parallel_tree=1, predictor='auto', random_state=42, - reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, - tree_method='exact', use_label_encoder=False, - validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', - BaggingClassifier(n_jobs=10, oob_score=True, - random_state=42))]) - -key: fit_time -value: [0.03166676 0.04806876 0.04904318 0.03325343 0.03756118 0.02605057 - 0.051754 0.03409243 0.02555013 0.03323889] - -mean value: 0.037027931213378905 - -key: score_time -value: [0.02363086 0.02686834 0.00466347 0.02329421 0.02710223 0.02057576 - 0.02632928 0.01891589 0.01708174 0.02512145] - -mean value: 0.021358323097229005 - -key: test_mcc -value: [0.81649658 0.81649658 nan 0.81649658 1. 0.6 - 1. 0.65465367 0.81649658 0.81649658] - -mean value: nan - -key: train_mcc -value: [0.97801929 0.97801929 1. 1. 1. 1. - 0.97801929 1. 1. 1. ] - -mean value: 0.9934057881530954 - -key: test_accuracy -value: [0.9 0.9 nan 0.9 1. 0.8 1. 0.8 0.9 0.9] - -mean value: nan - -key: train_accuracy -value: [0.98888889 0.98888889 1. 1. 1. 1. - 0.98888889 1. 1. 1. ] - -mean value: 0.9966666666666667 - -key: test_fscore -value: [0.88888889 0.90909091 nan 0.90909091 1. 0.8 - 1. 0.83333333 0.90909091 0.88888889] - -mean value: nan - -key: train_fscore -value: [0.98876404 0.98876404 1. 1. 1. 1. - 0.98876404 1. 1. 1. ] - -mean value: 0.996629213483146 - -key: test_precision -value: [1. 0.83333333 nan 0.83333333 1. 0.8 - 1. 0.71428571 0.83333333 1. ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: [0.8 1. nan 1. 1. 0.8 1. 1. 1. 0.8] - -mean value: nan - -key: train_recall -value: [0.97777778 0.97777778 1. 1. 1. 1. - 0.97777778 1. 1. 1. ] - -mean value: 0.9933333333333333 - -key: test_roc_auc -value: [0.9 0.9 nan 0.9 1. 0.8 1. 0.8 0.9 0.9] - -mean value: nan - -key: train_roc_auc -value: [0.98888889 0.98888889 1. 1. 1. 1. - 0.98888889 1. 1. 1. ] - -mean value: 0.9966666666666667 - -key: test_jcc -value: [0.8 0.83333333 nan 0.83333333 1. 0.66666667 - 1. 0.71428571 0.83333333 0.8 ] - -mean value: nan - -key: train_jcc -value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -[0.97777778 0.97777778 1. 1. 1. 1. - 0.97777778 1. 1. 1. ] - -mean value: 0.9933333333333333 - -MCC on Blind test: 0.95 - -Accuracy on Blind test: 0.98 - -Model_name: Gaussian Process -Model func: GaussianProcessClassifier(random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, - colsample_bynode=1, colsample_bytree=1, enable_categorical=False, - gamma=0, gpu_id=-1, importance_type=None, - interaction_constraints='', learning_rate=0.300000012, - max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, - monotone_constraints='()', n_estimators=100, n_jobs=12, - num_parallel_tree=1, predictor='auto', random_state=42, - reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, - tree_method='exact', use_label_encoder=False, - validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', GaussianProcessClassifier(random_state=42))]) - -key: fit_time -value: [0.01846266 0.01602125 0.01668453 0.02254128 0.01823306 0.01637578 - 0.01689363 0.01705527 0.01624489 0.01630282] - -mean value: 0.017481517791748048 - -key: score_time -value: [0.01143336 0.01123261 0.00608587 0.01233387 0.01204014 0.01180053 - 0.011935 0.01179743 0.01177883 0.01175404] - -mean value: 0.011219167709350586 - -key: test_mcc -value: [0. 0.6 nan 0.2 0.81649658 0.6 - 0.65465367 0.5 0.33333333 1. ] - -mean value: nan - -key: train_mcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_accuracy -value: [0.5 0.8 nan 0.6 0.9 0.8 0.8 0.7 0.6 1. ] - -mean value: nan - -key: train_accuracy -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_fscore -value: [0.28571429 0.8 nan 0.6 0.90909091 0.8 - 0.83333333 0.76923077 0.71428571 1. ] - -mean value: nan - -key: train_fscore -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_precision -value: [0.5 0.8 nan 0.6 0.83333333 0.8 - 0.71428571 0.625 0.55555556 1. ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: [0.2 0.8 nan 0.6 1. 0.8 1. 1. 1. 1. ] - -mean value: nan - -key: train_recall -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_roc_auc -value: [0.5 0.8 nan 0.6 0.9 0.8 0.8 0.7 0.6 1. ] - -mean value: nan - -key: train_roc_auc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_jcc -value: [0.16666667 0.66666667 nan 0.42857143 0.83333333 0.66666667 - 0.71428571 0.625 0.55555556 1. ] - -mean value: nan - -key: train_jcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -MCC on Blind test: 0.16 - -Accuracy on Blind test: 0.62 - -Model_name: Gradient Boosting -Model func: GradientBoostingClassifier(random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, - colsample_bynode=1, colsample_bytree=1, enable_categorical=False, - gamma=0, gpu_id=-1, importance_type=None, - interaction_constraints='', learning_rate=0.300000012, - max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, - monotone_constraints='()', n_estimators=100, n_jobs=12, - num_parallel_tree=1, predictor='auto', random_state=42, - reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, - tree_method='exact', use_label_encoder=False, - validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior. - _warn_prf(average, modifier, msg_start, len(result)) -Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', GradientBoostingClassifier(random_state=42))]) - -key: fit_time -value: [0.17908907 0.17281628 0.17472339 0.16933966 0.17832685 0.17855334 - 0.17665958 0.20065045 0.17976284 0.14105153] - -mean value: 0.17509729862213136 - -key: score_time -value: [0.00950193 0.00909305 0.00476193 0.00946689 0.00997877 0.0098536 - 0.00984955 0.00980544 0.00993419 0.00911093] - -mean value: 0.009135627746582031 - -key: test_mcc -value: [0.81649658 0.81649658 nan 0.81649658 1. 0.6 - 1. 1. 0.81649658 0.81649658] - -mean value: nan - -key: train_mcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_accuracy -value: [0.9 0.9 nan 0.9 1. 0.8 1. 1. 0.9 0.9] - -mean value: nan - -key: train_accuracy -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_fscore -value: [0.88888889 0.90909091 nan 0.90909091 1. 0.8 - 1. 1. 0.90909091 0.88888889] - -mean value: nan - -key: train_fscore -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_precision -value: [1. 0.83333333 nan 0.83333333 1. 0.8 - 1. 1. 0.83333333 1. ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: [0.8 1. nan 1. 1. 0.8 1. 1. 1. 0.8] - -mean value: nan - -key: train_recall -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_roc_auc -value: [0.9 0.9 nan 0.9 1. 0.8 1. 1. 0.9 0.9] - -mean value: nan - -key: train_roc_auc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_jcc -value: [0.8 0.83333333 nan 0.83333333 1. 0.66666667 - 1. 1. 0.83333333 0.8 ] - -mean value: nan - -key: train_jcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -MCC on Blind test: 0.95 - -Accuracy on Blind test: 0.98 - -Model_name: QDA -Model func: QuadraticDiscriminantAnalysis() -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, - colsample_bynode=1, colsample_bytree=1, enable_categorical=False, - gamma=0, gpu_id=-1, importance_type=None, - interaction_constraints='', learning_rate=0.300000012, - max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, - monotone_constraints='()', n_estimators=100, n_jobs=12, - num_parallel_tree=1, predictor='auto', random_state=42, - reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, - tree_method='exact', use_label_encoder=False, - validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', QuadraticDiscriminantAnalysis())]) - -key: fit_time -value: [0.01081228 0.01397157 0.01533651 0.01416707 0.01431513 0.01669645 - 0.0199163 0.01420426 0.01628375 0.01877785] - -mean value: 0.01544811725616455 - -key: score_time -value: [0.01137567 0.01164627 0.00611782 0.01310349 0.01270056 0.01162291 - 0.01324058 0.01273084 0.01166701 0.01337457] - -mean value: 0.011757969856262207 - -key: test_mcc -value: [0.33333333 0.81649658 nan 0.5 0.81649658 0.81649658 - 0.81649658 0.65465367 1. 0.81649658] - -mean value: nan - -key: train_mcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_accuracy -value: [0.6 0.9 nan 0.7 0.9 0.9 0.9 0.8 1. 0.9] - -mean value: nan - -key: train_accuracy -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_fscore -value: [0.33333333 0.88888889 nan 0.57142857 0.88888889 0.88888889 - 0.88888889 0.75 1. 0.88888889] - -mean value: nan - -key: train_fscore -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_precision -value: [ 1. 1. nan 1. 1. 1. 1. 1. 1. 1.] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: [0.2 0.8 nan 0.4 0.8 0.8 0.8 0.6 1. 0.8] - -mean value: nan - -key: train_recall -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_roc_auc -value: [0.6 0.9 nan 0.7 0.9 0.9 0.9 0.8 1. 0.9] - -mean value: nan - -key: train_roc_auc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_jcc -value: [0.2 0.8 nan 0.4 0.8 0.8 0.8 0.6 1. 0.8] - -mean value: nan - -key: train_jcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -MCC on Blind test: 0.0 - -Accuracy on Blind test: 0.65 - -Model_name: Ridge Classifier -Model func: RidgeClassifier(random_state=42) -List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, - colsample_bynode=1, colsample_bytree=1, enable_categorical=False, - gamma=0, gpu_id=-1, importance_type=None, - interaction_constraints='', learning_rate=0.300000012, - max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, - monotone_constraints='()', n_estimators=100, n_jobs=12, - num_parallel_tree=1, predictor='auto', random_state=42, - reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, - tree_method='exact', use_label_encoder=False, - validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', RidgeClassifier(random_state=42))]) - -key: fit_time -value: [0.03344846 0.03200626 0.032161 0.03211999 0.0323348 0.03212428 - 0.03187537 0.03182149 0.0317018 0.03172779] - -mean value: 0.03213212490081787 - -key: score_time -value: [0.01959944 0.01622725 0.01191449 0.02088118 0.0222888 0.01162863 - 0.02132607 0.01181674 0.02234721 0.02092266] - -mean value: 0.01789524555206299 - -key: test_mcc -value: [0.2 0.65465367 nan 0.65465367 0.81649658 0.81649658 - 0.81649658 0.65465367 0.65465367 0.81649658] - -mean value: nan - -key: train_mcc -value: [1. 0.97801929 0.97801929 0.97801929 0.97801929 1. - 0.97801929 0.97801929 1. 0.97801929] - -mean value: 0.9846135056905561 - -key: test_accuracy -value: [0.6 0.8 nan 0.8 0.9 0.9 0.9 0.8 0.8 0.9] - -mean value: nan - -key: train_accuracy -value: [1. 0.98888889 0.98888889 0.98888889 0.98888889 1. - 0.98888889 0.98888889 1. 0.98888889] - -mean value: 0.9922222222222222 - -key: test_fscore -value: [0.6 0.83333333 nan 0.83333333 0.90909091 0.88888889 - 0.90909091 0.83333333 0.83333333 0.88888889] - -mean value: nan - -key: train_fscore -value: [1. 0.98901099 0.98901099 0.98901099 0.98901099 1. - 0.98901099 0.98901099 1. 0.98901099] - -mean value: 0.9923076923076923 - -key: test_precision -value: [0.6 0.71428571 nan 0.71428571 0.83333333 1. - 0.83333333 0.71428571 0.71428571 1. ] - -mean value: nan - -key: train_precision -value: [1. 0.97826087 0.97826087 0.97826087 0.97826087 1. - 0.97826087 0.97826087 1. 0.97826087] - -mean value: 0.9847826086956522 - -key: test_recall -value: [0.6 1. nan 1. 1. 0.8 1. 1. 1. 0.8] - -mean value: nan - -key: train_recall -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_roc_auc -value: [0.6 0.8 nan 0.8 0.9 0.9 0.9 0.8 0.8 0.9] - -mean value: nan - -key: train_roc_auc -value: [1. 0.98888889 0.98888889 0.98888889 0.98888889 1. - 0.98888889 0.98888889 1. 0.98888889] - -mean value: 0.9922222222222221 - -key: test_jcc -value: [0.42857143 0.71428571 nan 0.71428571 0.83333333 0.8 - 0.83333333 0.71428571 0.71428571 0.8 ] - -mean value: nan - -key: train_jcc -value: [1. 0.97826087 0.97826087 0.97826087 0.97826087 1. - 0.97826087 0.97826087 1. 0.97826087] - -mean value: 0.9847826086956522 - -MCC on Blind test: 0.48 - -Accuracy on Blind test: 0.78 - -Model_name: Ridge ClassifierCV -Model func: RidgeClassifierCV(cv=10) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, - colsample_bynode=1, colsample_bytree=1, enable_categorical=False, - gamma=0, gpu_id=-1, importance_type=None, - interaction_constraints='', learning_rate=0.300000012, - max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, - monotone_constraints='()', n_estimators=100, n_jobs=12, - num_parallel_tree=1, predictor='auto', random_state=42, - reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, - tree_method='exact', use_label_encoder=False, - validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:148: SettingWithCopyWarning: -A value is trying to be set on a copy of a slice from a DataFrame - -See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True) -/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:151: SettingWithCopyWarning: -A value is trying to be set on a copy of a slice from a DataFrame - -See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True) -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', RidgeClassifierCV(cv=10))]) - -key: fit_time -value: [0.21318793 0.09895921 0.21072364 0.19039989 0.18606758 0.18941188 - 0.18936801 0.20299673 0.19019341 0.22260165] - -mean value: 0.18939099311828614 - -key: score_time -value: [0.02227712 0.01179457 0.01261592 0.02156162 0.02067518 0.02020216 - 0.02313948 0.0224812 0.02181292 0.02340198] - -mean value: 0.019996213912963866 - -key: test_mcc -value: [0.2 0.65465367 nan 0.65465367 0.81649658 0.81649658 - 0.81649658 0.65465367 0.65465367 0.81649658] - -mean value: nan - -key: train_mcc -value: [1. 0.97801929 0.97801929 0.97801929 0.97801929 1. - 0.97801929 0.97801929 1. 0.97801929] - -mean value: 0.9846135056905561 - -key: test_accuracy -value: [0.6 0.8 nan 0.8 0.9 0.9 0.9 0.8 0.8 0.9] - -mean value: nan - -key: train_accuracy -value: [1. 0.98888889 0.98888889 0.98888889 0.98888889 1. - 0.98888889 0.98888889 1. 0.98888889] - -mean value: 0.9922222222222222 - -key: test_fscore -value: [0.6 0.83333333 nan 0.83333333 0.90909091 0.88888889 - 0.90909091 0.83333333 0.83333333 0.88888889] - -mean value: nan - -key: train_fscore -value: [1. 0.98901099 0.98901099 0.98901099 0.98901099 1. - 0.98901099 0.98901099 1. 0.98901099] - -mean value: 0.9923076923076923 - -key: test_precision -value: [0.6 0.71428571 nan 0.71428571 0.83333333 1. - 0.83333333 0.71428571 0.71428571 1. ] - -mean value: nan - -key: train_precision -value: [1. 0.97826087 0.97826087 0.97826087 0.97826087 1. - 0.97826087 0.97826087 1. 0.97826087] - -mean value: 0.9847826086956522 - -key: test_recall -value: [0.6 1. nan 1. 1. 0.8 1. 1. 1. 0.8] - -mean value: nan - -key: train_recall -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_roc_auc -value: [0.6 0.8 nan 0.8 0.9 0.9 0.9 0.8 0.8 0.9] - -mean value: nan - -key: train_roc_auc -value: [1. 0.98888889 0.98888889 0.98888889 0.98888889 1. - 0.98888889 0.98888889 1. 0.98888889] - -mean value: 0.9922222222222221 - -key: test_jcc -value: [0.42857143 0.71428571 nan 0.71428571 0.83333333 0.8 - 0.83333333 0.71428571 0.71428571 0.8 ] - -mean value: nan - -key: train_jcc -value: [1. 0.97826087 0.97826087 0.97826087 0.97826087 1. - 0.97826087 0.97826087 1. 0.97826087] - -mean value: 0.9847826086956522 - -MCC on Blind test: 0.48 - -Accuracy on Blind test: 0.78 - -Model_name: Logistic Regression -Model func: LogisticRegression(random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', LogisticRegression(random_state=42))]) - -key: fit_time -value: [0.0250783 0.03367138 0.02173638 0.03091216 0.0839653 0.04441166 - 0.03470874 0.0207479 0.02312613 0.02219653] - -mean value: 0.034055447578430174 - -key: score_time -value: [0.01159692 0.01183295 0.01154208 0.0117619 0.01320601 0.00625491 - 0.01170278 0.01160264 0.0115304 0.01158214] - -mean value: 0.011261272430419921 - -key: test_mcc -value: [ 0.33333333 0.70710678 0.4472136 1. nan nan - -0.33333333 1. 1. 1. ] - -mean value: nan - -key: train_mcc -value: [0.96225045 1. 1. 1. 0.96225045 1. - 1. 1. 1. 1. ] - -mean value: 0.9924500897298753 - -key: test_accuracy -value: [0.66666667 0.83333333 0.66666667 1. nan nan - 0.33333333 1. 1. 1. ] - -mean value: nan - -key: train_accuracy -value: [0.98076923 1. 1. 1. 0.98076923 1. - 1. 1. 1. 1. ] - -mean value: 0.9961538461538462 - -key: test_fscore -value: [0.66666667 0.85714286 0.75 1. nan nan - 0.33333333 1. 1. 1. ] - -mean value: nan - -key: train_fscore -value: [0.98039216 1. 1. 1. 0.98039216 1. - 1. 1. 1. 1. ] - -mean value: 0.996078431372549 - -key: test_precision -value: [0.66666667 0.75 0.6 1. nan nan - 0.33333333 1. 1. 1. ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -[0.66666667 1. 1. 1. nan nan - 0.33333333 1. 1. 1. ] - -mean value: nan - -key: train_recall -value: [0.96153846 1. 1. 1. 0.96153846 1. - 1. 1. 1. 1. ] - -mean value: 0.9923076923076923 - -key: test_roc_auc -value: [0.66666667 0.83333333 0.66666667 1. nan nan - 0.33333333 1. 1. 1. ] - -mean value: nan - -key: train_roc_auc -value: [0.98076923 1. 1. 1. 0.98076923 1. - 1. 1. 1. 1. ] - -mean value: 0.9961538461538462 - -key: test_jcc -value: [0.5 0.75 0.6 1. nan nan 0.2 1. 1. 1. ] - -mean value: nan - -key: train_jcc -value: [0.96153846 1. 1. 1. 0.96153846 1. - 1. 1. 1. 1. ] - -mean value: 0.9923076923076923 - -MCC on Blind test: 0.21 - -Accuracy on Blind test: 0.6 - -Model_name: Logistic RegressionCV -Model func: LogisticRegressionCV(random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', LogisticRegressionCV(random_state=42))]) - -key: fit_time -value: [0.27737689 0.28079844 0.29659295 0.26077461 0.26526332 0.26656747 - 0.26323867 0.28429699 0.31050968 0.30310488] - -mean value: 0.28085238933563234 - -key: score_time -value: [0.01181126 0.01163769 0.01170397 0.0118041 0.00647473 0.00621986 - 0.01164699 0.01164746 0.0117209 0.01170206] - -mean value: 0.01063690185546875 - -key: test_mcc -value: [0.33333333 0.70710678 0.4472136 1. nan nan - 0.70710678 1. 1. 1. ] - -mean value: nan - -key: train_mcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_accuracy -value: [0.66666667 0.83333333 0.66666667 1. nan nan - 0.83333333 1. 1. 1. ] - -mean value: nan - -key: train_accuracy -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_fscore -value: [0.66666667 0.85714286 0.75 1. nan nan - 0.8 1. 1. 1. ] - -mean value: nan - -key: train_fscore -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_precision -value: [0.66666667 0.75 0.6 1. nan nan - 1. 1. 1. 1. ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: [0.66666667 1. 1. 1. nan nan - 0.66666667 1. 1. 1. ] - -mean value: nan - -key: train_recall -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_roc_auc -value: [0.66666667 0.83333333 0.66666667 1. nan nan - 0.83333333 1. 1. 1. ] - -mean value: nan - -key: train_roc_auc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_jcc -value: [0.5 0.75 0.6 1. nan nan - 0.66666667 1. 1. 1. ] - -mean value: nan - -key: train_jcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -MCC on Blind test: 0.25 - -Accuracy on Blind test: 0.62 - -Model_name: Gaussian NB -Model func: GaussianNB() -List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', GaussianNB())]) - -key: fit_time -value: [0.01206636 0.0112257 0.00846434 0.0084424 0.01119018 0.01600456 - 0.0084219 0.00850463 0.00825167 0.00816083] - -mean value: 0.010073256492614747 - -key: score_time -value: [0.01196933 0.00882673 0.00861526 0.01163769 0.00628543 0.00520444 - 0.00848961 0.00851965 0.00831652 0.00836253] - -mean value: 0.00862271785736084 - -key: test_mcc -value: [-0.4472136 0.33333333 0.33333333 0.70710678 nan nan - -0.4472136 0. 0.16666667 -0.16666667] - -mean value: nan - -key: train_mcc -value: [0.85634884 0.80829038 0.84866842 0.89056356 0.70064905 0.82305489 - 0.77151675 0.77151675 0.53088871 0.81196581] - -mean value: 0.7813463156135712 - -key: test_accuracy -value: [0.33333333 0.66666667 0.66666667 0.83333333 nan nan - 0.33333333 0.5 0.6 0.4 ] - -mean value: nan - -key: train_accuracy -value: [0.92307692 0.90384615 0.92307692 0.94230769 0.84615385 0.90384615 - 0.88461538 0.88461538 0.71698113 0.90566038] - -mean value: 0.8834179970972423 - -key: test_fscore -value: [0. 0.66666667 0.66666667 0.8 nan nan - 0.5 0.4 0.5 0.4 ] - -mean value: nan - -key: train_fscore -value: [0.91666667 0.90196078 0.92 0.93877551 0.83333333 0.9122807 - 0.88 0.88 0.61538462 0.90566038] - -mean value: 0.8704061989015298 - -key: test_precision -value: [0. 0.66666667 0.66666667 1. nan nan - 0.4 0.5 0.5 0.5 ] - -mean value: nan - -key: train_precision -value: [1. 0.92 0.95833333 1. 0.90909091 0.83870968 - 0.91666667 0.91666667 1. 0.88888889] - -mean value: 0.9348356142065819 - -key: test_recall -value: [0. 0.66666667 0.66666667 0.66666667 nan nan - 0.66666667 0.33333333 0.5 0.33333333] - -mean value: nan - -key: train_recall -value: [0.84615385 0.88461538 0.88461538 0.88461538 0.76923077 1. - 0.84615385 0.84615385 0.44444444 0.92307692] - -mean value: 0.832905982905983 - -key: test_roc_auc -value: [0.33333333 0.66666667 0.66666667 0.83333333 nan nan - 0.33333333 0.5 0.58333333 0.41666667] - -mean value: nan - -key: train_roc_auc -value: [0.92307692 0.90384615 0.92307692 0.94230769 0.84615385 0.90384615 - 0.88461538 0.88461538 0.72222222 0.90598291] - -mean value: 0.883974358974359 - -key: test_jcc -value: [0. 0.5 0.5 0.66666667 nan nan - 0.33333333 0.25 0.33333333 0.25 ] - -mean value: nan - -key: train_jcc -value: [0.84615385 0.82142857 0.85185185 0.88461538 0.71428571 0.83870968 - 0.78571429 0.78571429 0.44444444 0.82758621] - -mean value: 0.7800504268524291 - -MCC on Blind test: -0.07 - -Accuracy on Blind test: 0.48 - -Model_name: Naive Bayes -Model func: BernoulliNB() -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior. - _warn_prf(average, modifier, msg_start, len(result)) -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', BernoulliNB())]) - -key: fit_time -value: [0.00878549 0.00863194 0.00832844 0.00831485 0.00832462 0.00855541 - 0.00872874 0.00837517 0.00835276 0.00829864] - -mean value: 0.008469605445861816 - -key: score_time -value: [0.00858736 0.00840878 0.00837111 0.00836158 0.00419855 0.00429416 - 0.00853777 0.00835299 0.00840139 0.00841165] - -mean value: 0.007592535018920899 - -key: test_mcc -value: [ 0. 0. 0.33333333 0. nan nan - 0.70710678 0.70710678 -0.40824829 0.61237244] - -mean value: nan - -key: train_mcc -value: [0.69436507 0.71151247 0.70064905 0.77849894 0.77151675 0.81312325 - 0.80829038 0.77151675 0.71778392 0.77540056] - -mean value: 0.7542657149596319 - -key: test_accuracy -value: [0.5 0.5 0.66666667 0.5 nan nan - 0.83333333 0.83333333 0.4 0.8 ] - -mean value: nan - -key: train_accuracy -value: [0.84615385 0.84615385 0.84615385 0.88461538 0.88461538 0.90384615 - 0.90384615 0.88461538 0.8490566 0.88679245] - -mean value: 0.8735849056603774 - -key: test_fscore -value: [0.4 0.4 0.66666667 0.4 nan nan - 0.85714286 0.8 0. 0.85714286] - -mean value: nan - -key: train_fscore -value: [0.84 0.82608696 0.83333333 0.875 0.88888889 0.89795918 - 0.90196078 0.88 0.83333333 0.88 ] - -mean value: 0.865656248006449 - -key: test_precision -value: [0.5 0.5 0.66666667 0.5 nan nan - 0.75 1. 0. 0.75 ] - -mean value: nan - -key: train_precision -value: [0.875 0.95 0.90909091 0.95454545 0.85714286 0.95652174 - 0.92 0.91666667 0.95238095 0.91666667] - -mean value: 0.9208015245623942 - -key: test_recall -value: [0.33333333 0.33333333 0.66666667 0.33333333 nan nan - 1. 0.66666667 0. 1. ] - -mean value: nan - -key: train_recall -value: [0.80769231 0.73076923 0.76923077 0.80769231 0.92307692 0.84615385 - 0.88461538 0.84615385 0.74074074 0.84615385] - -mean value: 0.8202279202279202 - -key: test_roc_auc -value: [0.5 0.5 0.66666667 0.5 nan nan - 0.83333333 0.83333333 0.33333333 0.75 ] - -mean value: nan - -key: train_roc_auc -value: [0.84615385 0.84615385 0.84615385 0.88461538 0.88461538 0.90384615 - 0.90384615 0.88461538 0.8511396 0.88603989] - -mean value: 0.8737179487179488 - -key: test_jcc -value: [0.25 0.25 0.5 0.25 nan nan - 0.75 0.66666667 0. 0.75 ] - -mean value: nan - -key: train_jcc -value: [0.72413793 0.7037037 0.71428571 0.77777778 0.8 0.81481481 - 0.82142857 0.78571429 0.71428571 0.78571429] - -mean value: 0.764186279875935 - -MCC on Blind test: -0.03 - -Accuracy on Blind test: 0.5 - -Model_name: K-Nearest Neighbors -Model func: KNeighborsClassifier() -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', KNeighborsClassifier())]) - -key: fit_time -value: [0.00827241 0.00880098 0.00887871 0.00861597 0.00911736 0.00877094 - 0.00847507 0.00913715 0.00888658 0.00889826] - -mean value: 0.008785343170166016 - -key: score_time -value: [0.00981283 0.0094254 0.00966692 0.00966859 0.0045979 0.00435901 - 0.00986171 0.009624 0.00968266 0.0103004 ] - -mean value: 0.008699941635131835 - -key: test_mcc -value: [ 0.4472136 0.33333333 0.70710678 0. nan nan - -0.4472136 1. 0.61237244 0.61237244] - -mean value: nan - -key: train_mcc -value: [0.58333333 0.55339859 0.58080232 0.71151247 0.58789635 0.71151247 - 0.58789635 0.65824263 0.53035501 0.57140596] - -mean value: 0.6076355494357508 - -key: test_accuracy -value: [0.66666667 0.66666667 0.83333333 0.5 nan nan - 0.33333333 1. 0.8 0.8 ] - -mean value: nan - -key: train_accuracy -value: [0.76923077 0.76923077 0.78846154 0.84615385 0.78846154 0.84615385 - 0.78846154 0.82692308 0.75471698 0.77358491] - -mean value: 0.7951378809869376 - -key: test_fscore -value: [0.5 0.66666667 0.85714286 0. nan nan - 0.5 1. 0.66666667 0.85714286] - -mean value: nan - -key: train_fscore -value: [0.71428571 0.73913043 0.7755102 0.82608696 0.76595745 0.82608696 - 0.76595745 0.81632653 0.72340426 0.72727273] - -mean value: 0.7680018673014577 - -key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -test_precision -value: [1. 0.66666667 0.75 0. nan nan - 0.4 1. 1. 0.75 ] - -mean value: nan - -key: train_precision -value: [0.9375 0.85 0.82608696 0.95 0.85714286 0.95 - 0.85714286 0.86956522 0.85 0.88888889] - -mean value: 0.8836326777087646 - -key: test_recall -value: [0.33333333 0.66666667 1. 0. nan nan - 0.66666667 1. 0.5 1. ] - -mean value: nan - -key: train_recall -value: [0.57692308 0.65384615 0.73076923 0.73076923 0.69230769 0.73076923 - 0.69230769 0.76923077 0.62962963 0.61538462] - -mean value: 0.6821937321937321 - -key: test_roc_auc -value: [0.66666667 0.66666667 0.83333333 0.5 nan nan - 0.33333333 1. 0.75 0.75 ] - -mean value: nan - -key: train_roc_auc -value: [0.76923077 0.76923077 0.78846154 0.84615385 0.78846154 0.84615385 - 0.78846154 0.82692308 0.75712251 0.77065527] - -mean value: 0.7950854700854701 - -key: test_jcc -value: [0.33333333 0.5 0.75 0. nan nan - 0.33333333 1. 0.5 0.75 ] - -mean value: nan - -key: train_jcc -value: [0.55555556 0.5862069 0.63333333 0.7037037 0.62068966 0.7037037 - 0.62068966 0.68965517 0.56666667 0.57142857] - -mean value: 0.6251632913701879 - -MCC on Blind test: 0.07 - -Accuracy on Blind test: 0.55 - -Model_name: SVM -Model func: SVC(random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', SVC(random_state=42))]) - -key: fit_time -value: [0.00861406 0.00843453 0.00842524 0.00845695 0.00851583 0.00869155 - 0.01020479 0.008883 0.0094862 0.00905514] - -mean value: 0.008876729011535644 - -key: score_time -value: [0.00841975 0.00838161 0.00835943 0.00834155 0.00417018 0.00427651 - 0.0093472 0.00916672 0.00854087 0.00875092] - -mean value: 0.0077754735946655275 - -key: test_mcc -value: [ 0. 0.33333333 0.4472136 0.4472136 nan nan - -0.4472136 0.70710678 0.61237244 1. ] - -mean value: nan - -key: train_mcc -value: [0.9258201 0.89056356 0.9258201 0.89056356 0.92307692 0.9258201 - 0.84615385 0.80829038 0.89271208 0.89227454] - -mean value: 0.8921095178279635 - -key: test_accuracy -value: [0.5 0.66666667 0.66666667 0.66666667 nan nan - 0.33333333 0.83333333 0.8 1. ] - -mean value: nan - -key: train_accuracy -value: [0.96153846 0.94230769 0.96153846 0.94230769 0.96153846 0.96153846 - 0.92307692 0.90384615 0.94339623 0.94339623] - -mean value: 0.9444484760522497 - -key: test_fscore -value: [0.4 0.66666667 0.75 0.5 nan nan - 0.5 0.8 0.66666667 1. ] - -mean value: nan - -key: train_fscore -value: [0.96 0.93877551 0.96 0.93877551 0.96153846 0.96 - 0.92307692 0.90566038 0.94117647 0.93877551] - -mean value: 0.9427778763174356 - -key: test_precision -value: [0.5 0.66666667 0.6 1. nan nan - 0.4 1. 1. 1. ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 0.96153846 1. - 0.92307692 0.88888889 1. 1. ] - -mean value: 0.9773504273504273 - -key: test_recall -value: [0.33333333 0.66666667 1. 0.33333333 nan nan - 0.66666667 0.66666667 0.5 1. ] - -mean value: nan - -key: train_recall -value: [0.92307692 0.88461538 0.92307692 0.88461538 0.96153846 0.92307692 - 0.92307692 0.92307692 0.88888889 0.88461538] - -mean value: 0.911965811965812 - -key: test_roc_auc -value: [0.5 0.66666667 0.66666667 0.66666667 nan nan - 0.33333333 0.83333333 0.75 1. ] - -mean value: nan - -key: train_roc_auc -value: [0.96153846 0.94230769 0.96153846 0.94230769 0.96153846 0.96153846 - 0.92307692 0.90384615 0.94444444 0.94230769] - -mean value: 0.9444444444444444 - -key: test_jcc -value: [0.25 0.5 0.6 0.33333333 nan nan - 0.33333333 0.66666667 0.5 1. ] - -mean value: nan - -key: train_jcc -value: [0.92307692 0.88461538 0.92307692 0.88461538 0.92592593 0.92307692 - 0.85714286 0.82758621 0.88888889 0.88461538] - -mean value: 0.8922620801931147 - -MCC on Blind test: -0.07 - -Accuracy on Blind test: 0.45 - -Model_name: MLP -Model func: MLPClassifier(max_iter=500, random_state=42) -List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', MLPClassifier(max_iter=500, random_state=42))]) - -key: fit_time -value: [0.257622 0.25290346 0.26910257 0.36610937 0.23078704 0.28138971 - 0.24945307 0.25755453 0.2490387 0.25744033] - -mean value: 0.2671400785446167 - -key: score_time -value: [0.01196766 0.01187682 0.01183176 0.01191616 0.0065763 0.00656652 - 0.01190734 0.01188445 0.0118742 0.01191521] - -mean value: 0.010831642150878906 - -key: test_mcc -value: [0.33333333 0.33333333 0.4472136 0.70710678 nan nan - 0. 1. 0.61237244 1. ] - -mean value: nan - -key: train_mcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_accuracy -value: [0.66666667 0.66666667 0.66666667 0.83333333 nan nan - 0.5 1. 0.8 1. ] - -mean value: nan - -key: train_accuracy -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_fscore -value: [0.66666667 0.66666667 0.75 0.8 nan nan - 0.4 1. 0.66666667 1. ] - -mean value: nan - -key: train_fscore -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_precision -value: [0.66666667 0.66666667 0.6 1. nan nan - 0.5 1. 1. 1. ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: [0.66666667 0.66666667 1. 0.66666667 nan nan - 0.33333333 1. 0.5 1. ] - -mean value: nan - -key: train_recall -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_roc_auc -value: [0.66666667 0.66666667 0.66666667 0.83333333 nan nan - 0.5 1. 0.75 1. ] - -mean value: nan - -key: train_roc_auc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_jcc -value: [0.5 0.5 0.6 0.66666667 nan nan - 0.25 1. 0.5 1. ] - -mean value: nan - -key: train_jcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -MCC on Blind test: 0.07 - -Accuracy on Blind test: 0.52 - -Model_name: Decision Tree -Model func: DecisionTreeClassifier(random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, - colsample_bynode=None, colsample_bytree=None, - enable_categorical=False, gamma=None, gpu_id=None, - importance_type=None, interaction_constraints=None, - learning_rate=None, max_delta_step=None, max_depth=None, - min_child_weight=None, missing=nan, monotone_constraints=None, - n_estimators=100, n_jobs=None, num_parallel_tree=None, - predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, - scale_pos_weight=None, subsample=None, tree_method=None, - use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', DecisionTreeClassifier(random_state=42))]) - -key: fit_time -value: [0.01336122 0.01300812 0.00985575 0.0098536 0.00926661 0.00887156 - 0.00901055 0.00941229 0.00927663 0.00903344] - -mean value: 0.010094976425170899 - -key: score_time -value: [0.0113616 0.0098033 0.00913811 0.00857115 0.00427389 0.00418425 - 0.00839829 0.00844026 0.00838804 0.00840545] - -mean value: 0.008096432685852051 - -key: test_mcc -value: [0.33333333 1. 0.70710678 1. nan nan - 0.70710678 0.70710678 0.61237244 1. ] - -mean value: nan - -key: train_mcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_accuracy -value: [0.66666667 1. 0.83333333 1. nan nan - 0.83333333 0.83333333 0.8 1. ] - -mean value: nan - -key: train_accuracy -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_fscore -value: [0.66666667 1. 0.8 1. nan nan - 0.85714286 0.8 0.66666667 1. ] - -mean value: nan - -key: train_fscore -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_precision -value: [0.66666667 1. 1. 1. nan nan - 0.75 1. 1. 1. ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: [0.66666667 1. 0.66666667 1. nan nan - 1. 0.66666667 0.5 1. ] - -mean value: nan - -key: train_recall -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_roc_auc -value: [0.66666667 1. 0.83333333 1. nan nan - 0.83333333 0.83333333 0.75 1. ] - -mean value: nan - -key: train_roc_auc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_jcc -value: [0.5 1. 0.66666667 1. nan nan - 0.75 0.66666667 0.5 1. ] - -mean value: nan - -key: train_jcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -MCC on Blind test: 0.73 - -Accuracy on Blind test: 0.88 - Model_name: Extra Trees Model func: ExtraTreesClassifier(random_state=42) List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, @@ -18018,20 +13101,20 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', ExtraTreesClassifier(random_state=42))]) key: fit_time -value: [0.07744193 0.0774622 0.07790446 0.07826352 0.07741332 0.07785106 - 0.07745719 0.07764459 0.07778835 0.07841229] +value: [0.09140396 0.11081243 0.1061058 0.1074965 0.10002065 0.08645439 + 0.08668065 0.08711362 0.08664465 0.08567667] -mean value: 0.07776389122009278 +mean value: 0.09484093189239502 key: score_time -value: [0.01661301 0.0167923 0.01716065 0.01694202 0.00452185 0.00454688 - 0.0172863 0.01668048 0.016675 0.01684666] +value: [0.01851869 0.01910639 0.00513697 0.01879001 0.02097702 0.01896 + 0.01881933 0.01879549 0.01810431 0.01854086] -mean value: 0.014406514167785645 +mean value: 0.01757490634918213 key: test_mcc -value: [0. 0.33333333 0.70710678 0.33333333 nan nan - 0.4472136 0.70710678 0.61237244 1. ] +value: [0.81649658 0.6 nan 0.6 0.81649658 0.81649658 + 0.40824829 0.40824829 0.40824829 0.81649658] mean value: nan @@ -18041,8 +13124,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.5 0.66666667 0.83333333 0.66666667 nan nan - 0.66666667 0.83333333 0.8 1. ] +value: [0.9 0.8 nan 0.8 0.9 0.9 0.7 0.7 0.7 0.9] mean value: nan @@ -18052,8 +13134,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.4 0.66666667 0.85714286 0.66666667 nan nan - 0.75 0.8 0.66666667 1. ] +value: [0.88888889 0.8 nan 0.8 0.90909091 0.88888889 + 0.66666667 0.72727273 0.72727273 0.88888889] mean value: nan @@ -18063,8 +13145,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [0.5 0.66666667 0.75 0.66666667 nan nan - 0.6 1. 1. 1. ] +value: [1. 0.8 nan 0.8 0.83333333 1. + 0.75 0.66666667 0.66666667 1. ] mean value: nan @@ -18074,8 +13156,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [0.33333333 0.66666667 1. 0.66666667 nan nan - 1. 0.66666667 0.5 1. ] +value: [0.8 0.8 nan 0.8 1. 0.8 0.6 0.8 0.8 0.8] mean value: nan @@ -18085,8 +13166,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.5 0.66666667 0.83333333 0.66666667 nan nan - 0.66666667 0.83333333 0.75 1. ] +value: [0.9 0.8 nan 0.8 0.9 0.9 0.7 0.7 0.7 0.9] mean value: nan @@ -18096,8 +13176,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.25 0.5 0.75 0.5 nan nan - 0.6 0.66666667 0.5 1. ] +value: [0.8 0.66666667 nan 0.66666667 0.83333333 0.8 + 0.5 0.57142857 0.57142857 0.8 ] mean value: nan @@ -18105,7 +13185,14 @@ key: train_jcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: + +MCC on Blind test: 0.49 + +Accuracy on Blind test: 0.78 + +Model_name: Extra Tree +Model func: ExtraTreeClassifier(random_state=42) +List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call return cache[method] @@ -18153,62 +13240,7 @@ Traceback (most recent call last): ValueError: Found unknown categories ['XDR'] in column 5 during transform warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( - -MCC on Blind test: 0.21 - -Accuracy on Blind test: 0.57 - -Model_name: Extra Tree -Model func: ExtraTreeClassifier(random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, +[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, n_estimators=1000, n_jobs=10, oob_score=True, random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, colsample_bynode=None, colsample_bytree=None, @@ -18237,20 +13269,20 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', ExtraTreeClassifier(random_state=42))]) key: fit_time -value: [0.00849891 0.00832915 0.0084703 0.00833631 0.00853562 0.00831246 - 0.00834322 0.00833392 0.00853539 0.00829196] +value: [0.00981331 0.00926542 0.00961494 0.00931954 0.00935292 0.00946355 + 0.00947499 0.00947022 0.00934291 0.00956202] -mean value: 0.008398723602294923 +mean value: 0.009467983245849609 key: score_time -value: [0.00839496 0.00838709 0.00878024 0.0090816 0.00428891 0.00421572 - 0.00838637 0.00845718 0.00835276 0.00842619] +value: [0.00965023 0.00959539 0.00480247 0.00923729 0.0094893 0.00876379 + 0.00931597 0.00915647 0.00923562 0.00858641] -mean value: 0.007677102088928222 +mean value: 0.008783292770385743 key: test_mcc -value: [0. 0.33333333 0.4472136 0. nan nan - 0.4472136 0. 0.61237244 0.61237244] +value: [0.65465367 0. nan 0.81649658 0.81649658 0.81649658 + 0.2 0.5 0.6 0.81649658] mean value: nan @@ -18260,8 +13292,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.5 0.66666667 0.66666667 0.5 nan nan - 0.66666667 0.5 0.8 0.8 ] +value: [0.8 0.5 nan 0.9 0.9 0.9 0.6 0.7 0.8 0.9] mean value: nan @@ -18271,8 +13302,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.4 0.66666667 0.75 0.57142857 nan nan - 0.5 0.4 0.66666667 0.85714286] +value: [0.83333333 0.61538462 nan 0.88888889 0.90909091 0.90909091 + 0.6 0.76923077 0.8 0.88888889] mean value: nan @@ -18282,8 +13313,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [0.5 0.66666667 0.6 0.5 nan nan - 1. 0.5 1. 0.75 ] +value: [0.71428571 0.5 nan 1. 0.83333333 0.83333333 + 0.6 0.625 0.8 1. ] mean value: nan @@ -18293,8 +13324,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [0.33333333 0.66666667 1. 0.66666667 nan nan - 0.33333333 0.33333333 0.5 1. ] +value: [1. 0.8 nan 0.8 1. 1. 0.6 1. 0.8 0.8] mean value: nan @@ -18304,8 +13334,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.5 0.66666667 0.66666667 0.5 nan nan - 0.66666667 0.5 0.75 0.75 ] +value: [0.8 0.5 nan 0.9 0.9 0.9 0.6 0.7 0.8 0.9] mean value: nan @@ -18315,8 +13344,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.25 0.5 0.6 0.4 nan nan - 0.33333333 0.25 0.5 0.75 ] +value: [0.71428571 0.44444444 nan 0.8 0.83333333 0.83333333 + 0.42857143 0.625 0.66666667 0.8 ] mean value: nan @@ -18325,7 +13354,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 -MCC on Blind test: 0.06 +MCC on Blind test: 0.03 Accuracy on Blind test: 0.5 @@ -18391,58 +13420,6 @@ Traceback (most recent call last): ValueError: Found unknown categories ['XDR'] in column 5 during transform warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. - warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. - warn( /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. warn( /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. @@ -18499,54 +13476,10 @@ ValueError: Found unknown categories ['XDR'] in column 5 during transform warnings.warn( /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. + warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. + warn( /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. warn( /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. @@ -18573,20 +13506,20 @@ Pipeline(steps=[('prep', RandomForestClassifier(n_estimators=1000, random_state=42))]) key: fit_time -value: [0.9603796 0.96432114 0.96725059 0.96787596 0.9631319 0.97312737 - 0.96518373 0.96293473 0.96418524 0.96683455] +value: [1.050071 1.07899761 1.01138568 1.02257371 1.02732587 1.01924753 + 1.05872726 1.03712177 1.03975534 1.04403877] -mean value: 0.9655224800109863 +mean value: 1.0389244556427002 key: score_time -value: [0.14033937 0.08650279 0.0865438 0.08715081 0.0047214 0.0044682 - 0.08628559 0.08696723 0.08689618 0.08648324] +value: [0.08884025 0.0888741 0.00447512 0.09609246 0.08854914 0.09480143 + 0.10091877 0.09175038 0.0905838 0.08938766] -mean value: 0.07563586235046386 +mean value: 0.0834273099899292 key: test_mcc -value: [0.70710678 0.70710678 1. 0.70710678 nan nan - 0.70710678 0.70710678 0.16666667 0.66666667] +value: [0.81649658 0.6 nan 0.81649658 1. 0.81649658 + 0.65465367 0.40824829 0.40824829 0.81649658] mean value: nan @@ -18596,8 +13529,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.83333333 0.83333333 1. 0.83333333 nan nan - 0.83333333 0.83333333 0.6 0.8 ] +value: [0.9 0.8 nan 0.9 1. 0.9 0.8 0.7 0.7 0.9] mean value: nan @@ -18607,8 +13539,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.8 0.85714286 1. 0.8 nan nan - 0.85714286 0.8 0.5 0.8 ] +value: [0.90909091 0.8 nan 0.88888889 1. 0.88888889 + 0.75 0.72727273 0.72727273 0.88888889] mean value: nan @@ -18618,7 +13550,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [1. 0.75 1. 1. nan nan 0.75 1. 0.5 1. ] +value: [0.83333333 0.8 nan 1. 1. 1. + 1. 0.66666667 0.66666667 1. ] mean value: nan @@ -18628,8 +13561,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [0.66666667 1. 1. 0.66666667 nan nan - 1. 0.66666667 0.5 0.66666667] +value: [1. 0.8 nan 0.8 1. 0.8 0.6 0.8 0.8 0.8] mean value: nan @@ -18639,8 +13571,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.83333333 0.83333333 1. 0.83333333 nan nan - 0.83333333 0.83333333 0.58333333 0.83333333] +value: [0.9 0.8 nan 0.9 1. 0.9 0.8 0.7 0.7 0.9] mean value: nan @@ -18650,8 +13581,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.66666667 0.75 1. 0.66666667 nan nan - 0.75 0.66666667 0.33333333 0.66666667] +value: [0.83333333 0.66666667 nan 0.8 1. 0.8 + 0.6 0.57142857 0.57142857 0.8 ] mean value: nan @@ -18660,9 +13591,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 -MCC on Blind test: 0.35 +MCC on Blind test: 0.55 -Accuracy on Blind test: 0.68 +Accuracy on Blind test: 0.8 Model_name: Random Forest2 Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5, @@ -18700,70 +13631,76 @@ Running model pipeline: Pipeline(steps=[('prep', oob_score=True, random_state=42))]) key: fit_time -value: [0.83453751 0.86814737 0.89418793 0.84960437 0.85452509 0.79625249 - 0.84640026 0.8471365 0.86017728 0.86971331] +value: [0.86844516 0.87865114 0.92193866 0.84994102 0.85754681 0.82041979 + 0.87674642 0.89583445 0.9129231 0.9018712 ] -mean value: 0.8520682096481323 +mean value: 0.87843177318573 key: score_time -value: [0.18317986 0.18850303 0.23107004 0.17553663 0.00471902 0.00476313 - 0.23304439 0.20270252 0.2285862 0.14570665] +value: [0.17786026 0.21679783 0.00455117 0.22086644 0.21914244 0.20099974 + 0.23634052 0.21746397 0.24935365 0.14481735] -mean value: 0.1597811460494995 +mean value: 0.1888193368911743 key: test_mcc -value: [0. 0.70710678 1. 0.70710678 nan nan - 0.70710678 1. 0.16666667 0.66666667] +value: [0.81649658 0.6 nan 0.6 1. 1. + 0.40824829 0.40824829 0.6 0.81649658] mean value: nan key: train_mcc -value: [0.96225045 1. 1. 1. 1. 1. - 1. 1. 1. 1. ] +value: [0.97801929 0.95555556 0.91201231 0.93356387 0.97801929 0.95555556 + 0.95555556 0.95555556 0.97801929 0.93356387] -mean value: 0.9962250448649377 +mean value: 0.9535420155810868 key: test_accuracy -value: [0.5 0.83333333 1. 0.83333333 nan nan - 0.83333333 1. 0.6 0.8 ] +value: [0.9 0.8 nan 0.8 1. 1. 0.7 0.7 0.8 0.9] mean value: nan key: train_accuracy -value: [0.98076923 1. 1. 1. 1. 1. - 1. 1. 1. 1. ] +value: [0.98888889 0.97777778 0.95555556 0.96666667 0.98888889 0.97777778 + 0.97777778 0.97777778 0.98888889 0.96666667] -mean value: 0.9980769230769231 +mean value: 0.9766666666666667 key: test_fscore -value: [0.4 0.85714286 1. 0.8 nan nan - 0.85714286 1. 0.5 0.8 ] +value: [0.90909091 0.8 nan 0.8 1. 1. + 0.66666667 0.72727273 0.8 0.88888889] mean value: nan key: train_fscore -value: [0.98039216 1. 1. 1. 1. 1. - 1. 1. 1. 1. ] +value: [0.98876404 0.97777778 0.95454545 0.96629213 0.98901099 0.97777778 + 0.97777778 0.97777778 0.98876404 0.96629213] -mean value: 0.9980392156862745 +mean value: 0.9764779914218117 key: test_precision -value: [0.5 0.75 1. 1. nan nan 0.75 1. 0.5 1. ] +value: [0.83333333 0.8 nan 0.8 1. 1. + 0.75 0.66666667 0.8 1. ] mean value: nan key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] +value: [1. 0.97777778 0.97674419 0.97727273 0.97826087 0.97777778 + 0.97777778 0.97777778 1. 0.97727273] -mean value: 1.0 +mean value: 0.9820661621268294 key: test_recall -value: [0.33333333 1. 1. 0.66666667 nan nan - 1. 1. 0.5 0.66666667] +value: [1. 0.8 nan 0.8 1. 1. 0.6 0.8 0.8 0.8] mean value: nan key: train_recall +value: [0.97777778 0.97777778 0.93333333 0.95555556 1. 0.97777778 + 0.97777778 0.97777778 0.97777778 0.95555556] + +mean value: 0.9711111111111111 + +key: test_roc_auc value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. warn( /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: @@ -18814,86 +13751,31 @@ Traceback (most recent call last): ValueError: Found unknown categories ['XDR'] in column 5 during transform warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -[0.96153846 1. 1. 1. 1. 1. - 1. 1. 1. 1. ] - -mean value: 0.9961538461538462 - -key: test_roc_auc -value: [0.5 0.83333333 1. 0.83333333 nan nan - 0.83333333 1. 0.58333333 0.83333333] +[0.9 0.8 nan 0.8 1. 1. 0.7 0.7 0.8 0.9] mean value: nan key: train_roc_auc -value: [0.98076923 1. 1. 1. 1. 1. - 1. 1. 1. 1. ] +value: [0.98888889 0.97777778 0.95555556 0.96666667 0.98888889 0.97777778 + 0.97777778 0.97777778 0.98888889 0.96666667] -mean value: 0.9980769230769231 +mean value: 0.9766666666666667 key: test_jcc -value: [0.25 0.75 1. 0.66666667 nan nan - 0.75 1. 0.33333333 0.66666667] +value: [0.83333333 0.66666667 nan 0.66666667 1. 1. + 0.5 0.57142857 0.66666667 0.8 ] mean value: nan key: train_jcc -value: [0.96153846 1. 1. 1. 1. 1. - 1. 1. 1. 1. ] +value: [0.97777778 0.95652174 0.91304348 0.93478261 0.97826087 0.95652174 + 0.95652174 0.95652174 0.97777778 0.93478261] -mean value: 0.9961538461538462 +mean value: 0.9542512077294686 -MCC on Blind test: 0.31 +MCC on Blind test: 0.55 -Accuracy on Blind test: 0.65 +Accuracy on Blind test: 0.8 Model_name: Naive Bayes Model func: BernoulliNB() @@ -18926,104 +13808,101 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', BernoulliNB())]) key: fit_time -value: [0.02309561 0.00960851 0.00958014 0.00926995 0.00950646 0.01018548 - 0.00907755 0.00863814 0.00857759 0.00869775] +value: [0.02026916 0.00899649 0.00874615 0.00913286 0.00929546 0.00914478 + 0.00965142 0.00860023 0.00842547 0.00850058] -mean value: 0.010623717308044433 +mean value: 0.010076260566711426 key: score_time -value: [0.01342392 0.00879192 0.01012683 0.00904608 0.00474715 0.00480556 - 0.00867152 0.00860524 0.0085516 0.00846505] +value: [0.00897002 0.00989938 0.0045855 0.00872135 0.00897264 0.00869513 + 0.00925016 0.00857472 0.00852871 0.00845027] -mean value: 0.00852348804473877 +mean value: 0.008464789390563965 key: test_mcc -value: [ 0. 0. 0.33333333 0. nan nan - 0.70710678 0.70710678 -0.40824829 0.61237244] +value: [0.65465367 0.21821789 nan 0. 0.40824829 0.65465367 + 0.21821789 0. 0.40824829 0.40824829] mean value: nan key: train_mcc -value: [0.69436507 0.71151247 0.70064905 0.77849894 0.77151675 0.81312325 - 0.80829038 0.77151675 0.71778392 0.77540056] +value: [0.57906602 0.73624773 0.57906602 0.60540551 0.60238451 0.56056066 + 0.64700558 0.67082039 0.62609903 0.64700558] -mean value: 0.7542657149596319 +mean value: 0.6253661066190971 key: test_accuracy -value: [0.5 0.5 0.66666667 0.5 nan nan - 0.83333333 0.83333333 0.4 0.8 ] +value: [0.8 0.6 nan 0.5 0.7 0.8 0.6 0.5 0.7 0.7] mean value: nan key: train_accuracy -value: [0.84615385 0.84615385 0.84615385 0.88461538 0.88461538 0.90384615 - 0.90384615 0.88461538 0.8490566 0.88679245] +value: [0.78888889 0.86666667 0.78888889 0.8 0.8 0.77777778 + 0.82222222 0.83333333 0.81111111 0.82222222] -mean value: 0.8735849056603774 +mean value: 0.8111111111111111 key: test_fscore -value: [0.4 0.4 0.66666667 0.4 nan nan - 0.85714286 0.8 0. 0.85714286] +value: [0.75 0.66666667 nan 0.54545455 0.72727273 0.75 + 0.5 0.44444444 0.66666667 0.66666667] mean value: nan key: train_fscore -value: [0.84 0.82608696 0.83333333 0.875 0.88888889 0.89795918 - 0.90196078 0.88 0.83333333 0.88 ] +value: [0.7816092 0.86046512 0.7816092 0.78571429 0.79069767 0.76190476 + 0.81395349 0.82352941 0.8 0.81395349] -mean value: 0.865656248006449 +mean value: 0.8013436617630212 key: test_precision -value: [0.5 0.5 0.66666667 0.5 nan nan - 0.75 1. 0. 0.75 ] +value: [1. 0.57142857 nan 0.5 0.66666667 1. + 0.66666667 0.5 0.75 0.75 ] mean value: nan key: train_precision -value: [0.875 0.95 0.90909091 0.95454545 0.85714286 0.95652174 - 0.92 0.91666667 0.95238095 0.91666667] +value: [0.80952381 0.90243902 0.80952381 0.84615385 0.82926829 0.82051282 + 0.85365854 0.875 0.85 0.85365854] -mean value: 0.9208015245623942 +mean value: 0.8449738675958188 key: test_recall -value: [0.33333333 0.33333333 0.66666667 0.33333333 nan nan - 1. 0.66666667 0. 1. ] +value: [0.6 0.8 nan 0.6 0.8 0.6 0.4 0.4 0.6 0.6] mean value: nan key: train_recall -value: [0.80769231 0.73076923 0.76923077 0.80769231 0.92307692 0.84615385 - 0.88461538 0.84615385 0.74074074 0.84615385] +value: [0.75555556 0.82222222 0.75555556 0.73333333 0.75555556 0.71111111 + 0.77777778 0.77777778 0.75555556 0.77777778] -mean value: 0.8202279202279202 +mean value: 0.7622222222222222 key: test_roc_auc -value: [0.5 0.5 0.66666667 0.5 nan nan - 0.83333333 0.83333333 0.33333333 0.75 ] +value: [0.8 0.6 nan 0.5 0.7 0.8 0.6 0.5 0.7 0.7] mean value: nan key: train_roc_auc -value: [0.84615385 0.84615385 0.84615385 0.88461538 0.88461538 0.90384615 - 0.90384615 0.88461538 0.8511396 0.88603989] +value: [0.78888889 0.86666667 0.78888889 0.8 0.8 0.77777778 + 0.82222222 0.83333333 0.81111111 0.82222222] -mean value: 0.8737179487179488 +mean value: 0.8111111111111111 key: test_jcc -value: [0.25 0.25 0.5 0.25 nan nan - 0.75 0.66666667 0. 0.75 ] +value: [0.6 0.5 nan 0.375 0.57142857 0.6 + 0.33333333 0.28571429 0.5 0.5 ] mean value: nan key: train_jcc -value: [0.72413793 0.7037037 0.71428571 0.77777778 0.8 0.81481481 - 0.82142857 0.78571429 0.71428571 0.78571429] +value: [0.64150943 0.75510204 0.64150943 0.64705882 0.65384615 0.61538462 + 0.68627451 0.7 0.66666667 0.68627451] -mean value: 0.764186279875935 +mean value: 0.6693626187775545 -MCC on Blind test: -0.03 +MCC on Blind test: 0.18 -Accuracy on Blind test: 0.5 +Accuracy on Blind test: 0.65 Model_name: XGBoost Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, @@ -19083,54 +13962,6 @@ Traceback (most recent call last): raise ValueError(msg) ValueError: Found unknown categories ['XDR'] in column 5 during transform - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - warnings.warn( [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, n_estimators=1000, n_jobs=10, oob_score=True, @@ -19165,20 +13996,20 @@ Running model pipeline: Pipeline(steps=[('prep', validate_parameters=None, verbosity=0))]) key: fit_time -value: [0.04081035 0.03964376 0.03409052 0.03344679 0.036654 0.0408752 - 0.04086256 0.03915644 0.03814292 0.04359317] +value: [0.1605041 0.03366351 0.0360558 0.05030107 0.04023385 0.03848863 + 0.09582305 0.07147694 0.0326159 0.06000638] -mean value: 0.03872756958007813 +mean value: 0.06191692352294922 key: score_time -value: [0.01037478 0.01039386 0.01099706 0.01184702 0.0050633 0.00477147 - 0.01168633 0.01027298 0.01030707 0.01144981] +value: [0.01092339 0.01055479 0.00469685 0.01059723 0.01124191 0.01426959 + 0.01316428 0.01329756 0.01405334 0.0105629 ] -mean value: 0.009716367721557618 +mean value: 0.011336183547973633 key: test_mcc -value: [1. 1. 0.70710678 1. nan nan - 0.70710678 1. 1. 1. ] +value: [1. 0.81649658 nan 0.81649658 1. 0.81649658 + 0.81649658 0.6 0.5 1. ] mean value: nan @@ -19188,8 +14019,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [1. 1. 0.83333333 1. nan nan - 0.83333333 1. 1. 1. ] +value: [1. 0.9 nan 0.9 1. 0.9 0.9 0.8 0.7 1. ] mean value: nan @@ -19199,8 +14029,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [1. 1. 0.8 1. nan nan - 0.85714286 1. 1. 1. ] +value: [1. 0.90909091 nan 0.88888889 1. 0.90909091 + 0.88888889 0.8 0.76923077 1. ] mean value: nan @@ -19210,7 +14040,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [1. 1. 1. 1. nan nan 0.75 1. 1. 1. ] +value: [1. 0.83333333 nan 1. 1. 0.83333333 + 1. 0.8 0.625 1. ] mean value: nan @@ -19220,8 +14051,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [1. 1. 0.66666667 1. nan nan - 1. 1. 1. 1. ] +value: [1. 1. nan 0.8 1. 1. 0.8 0.8 1. 1. ] mean value: nan @@ -19231,8 +14061,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [1. 1. 0.83333333 1. nan nan - 0.83333333 1. 1. 1. ] +value: [1. 0.9 nan 0.9 1. 0.9 0.9 0.8 0.7 1. ] mean value: nan @@ -19242,8 +14071,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [1. 1. 0.66666667 1. nan nan - 0.75 1. 1. 1. ] +value: [1. 0.83333333 nan 0.8 1. 0.83333333 + 0.8 0.66666667 0.625 1. ] mean value: nan @@ -19326,54 +14155,6 @@ KeyError: 'predict' During handling of the above exception, another exception occurred: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - Traceback (most recent call last): File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score scores = scorer(estimator, X_test, y_test) @@ -19413,54 +14194,6 @@ Traceback (most recent call last): raise ValueError(msg) ValueError: Found unknown categories ['XDR'] in column 5 during transform - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - warnings.warn( Pipeline(steps=[('prep', ColumnTransformer(remainder='passthrough', @@ -19479,103 +14212,100 @@ Pipeline(steps=[('prep', ('model', LinearDiscriminantAnalysis())]) key: fit_time -value: [0.03194928 0.03370738 0.03454018 0.0320971 0.03061795 0.03505969 - 0.03510022 0.03437901 0.04472756 0.04004741] +value: [0.04766369 0.03517723 0.03765726 0.03865457 0.03713489 0.0370903 + 0.03802204 0.03782034 0.03993273 0.03703785] -mean value: 0.03522257804870606 +mean value: 0.03861908912658692 key: score_time -value: [0.02213168 0.0220046 0.02188015 0.0226388 0.00919867 0.00595093 - 0.02261281 0.022228 0.02384186 0.02255106] +value: [0.02126861 0.02220416 0.01126933 0.02398229 0.02256036 0.02334976 + 0.02145195 0.02393079 0.02469134 0.02361798] -mean value: 0.019503855705261232 +mean value: 0.021832656860351563 key: test_mcc -value: [0.70710678 0.70710678 0. 1. nan nan - 1. 0.33333333 1. 0.61237244] +value: [0.40824829 0.81649658 nan 0.40824829 1. 0.65465367 + 0.81649658 0.65465367 0.21821789 0.6 ] mean value: nan key: train_mcc -value: [0.96225045 1. 1. 1. 1. 0.92307692 +value: [1. 0.97801929 1. 1. 1. 1. 1. 1. 1. 1. ] -mean value: 0.9885327371726299 +mean value: 0.9978019293843652 key: test_accuracy -value: [0.83333333 0.83333333 0.5 1. nan nan - 1. 0.66666667 1. 0.8 ] +value: [0.7 0.9 nan 0.7 1. 0.8 0.9 0.8 0.6 0.8] mean value: nan key: train_accuracy -value: [0.98076923 1. 1. 1. 1. 0.96153846 +value: [1. 0.98888889 1. 1. 1. 1. 1. 1. 1. 1. ] -mean value: 0.9942307692307693 +mean value: 0.9988888888888889 key: test_fscore -value: [0.85714286 0.85714286 0.57142857 1. nan nan - 1. 0.66666667 1. 0.85714286] +value: [0.72727273 0.88888889 nan 0.72727273 1. 0.83333333 + 0.90909091 0.83333333 0.66666667 0.8 ] mean value: nan key: train_fscore -value: [0.98113208 1. 1. 1. 1. 0.96153846 +value: [1. 0.98901099 1. 1. 1. 1. 1. 1. 1. 1. ] -mean value: 0.994267053701016 +mean value: 0.9989010989010989 key: test_precision -value: [0.75 0.75 0.5 1. nan nan - 1. 0.66666667 1. 0.75 ] +value: [0.66666667 1. nan 0.66666667 1. 0.71428571 + 0.83333333 0.71428571 0.57142857 0.8 ] mean value: nan key: train_precision -value: [0.96296296 1. 1. 1. 1. 0.96153846 +value: [1. 0.97826087 1. 1. 1. 1. 1. 1. 1. 1. ] -mean value: 0.9924501424501424 +mean value: 0.9978260869565218 key: test_recall -value: [1. 1. 0.66666667 1. nan nan - 1. 0.66666667 1. 1. ] +value: [0.8 0.8 nan 0.8 1. 1. 1. 1. 0.8 0.8] mean value: nan key: train_recall -value: [1. 1. 1. 1. 1. 0.96153846 - 1. 1. 1. 1. ] +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] -mean value: 0.9961538461538462 +mean value: 1.0 key: test_roc_auc -value: [0.83333333 0.83333333 0.5 1. nan nan - 1. 0.66666667 1. 0.75 ] +value: [0.7 0.9 nan 0.7 1. 0.8 0.9 0.8 0.6 0.8] mean value: nan key: train_roc_auc -value: [0.98076923 1. 1. 1. 1. 0.96153846 +value: [1. 0.98888889 1. 1. 1. 1. 1. 1. 1. 1. ] -mean value: 0.9942307692307693 +mean value: 0.9988888888888889 key: test_jcc -value: [0.75 0.75 0.4 1. nan nan 1. 0.5 1. 0.75] +value: [0.57142857 0.8 nan 0.57142857 1. 0.71428571 + 0.83333333 0.71428571 0.5 0.66666667] mean value: nan key: train_jcc -value: [0.96296296 1. 1. 1. 1. 0.92592593 +value: [1. 0.97826087 1. 1. 1. 1. 1. 1. 1. 1. ] -mean value: 0.9888888888888889 +mean value: 0.9978260869565218 -MCC on Blind test: 0.19 +MCC on Blind test: -0.1 -Accuracy on Blind test: 0.62 +Accuracy on Blind test: 0.48 Model_name: Multinomial Model func: MultinomialNB() @@ -19608,54 +14338,70 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', MultinomialNB())]) key: fit_time -value: [0.0098958 0.00960755 0.008389 0.00844193 0.00868154 0.00891495 - 0.00903392 0.00872874 0.00954485 0.00864506] +value: [0.01478887 0.00884414 0.00876284 0.00883818 0.00909948 0.00853968 + 0.00871682 0.00872874 0.00913668 0.00861716] -mean value: 0.008988332748413087 +mean value: 0.009407258033752442 key: score_time -value: [0.00995326 0.00929236 0.00838757 0.00841975 0.00438452 0.00442958 - 0.00843525 0.00857377 0.00896025 0.00875926] +value: [0.008955 0.00877619 0.00435042 0.00857329 0.00864887 0.0086658 + 0.00855088 0.00852728 0.00936174 0.00867295] -mean value: 0.007959556579589844 +mean value: 0.00830824375152588 key: test_mcc -value: [0. 0.33333333 0.4472136 0. nan nan - 0. 1. 0.61237244 1. ] +value: [0.21821789 0.40824829 nan 0.2 0.81649658 1. + 0.21821789 0. 0.40824829 0.21821789] mean value: nan key: train_mcc -value: [0.70064905 0.57735027 0.73131034 0.77151675 0.6172134 0.73131034 - 0.6172134 0.65433031 0.70042867 0.77540056] +value: [0.64508188 0.53452248 0.55776344 0.55776344 0.53452248 0.51161666 + 0.57906602 0.60238451 0.62609903 0.53452248] -mean value: 0.6876723088831835 +mean value: 0.5683342428673076 key: test_accuracy -value: [0.5 0.66666667 0.66666667 0.5 nan nan - 0.5 1. 0.8 1. ] +value: [0.6 0.7 nan 0.6 0.9 1. 0.6 0.5 0.7 0.6] mean value: nan key: train_accuracy -value: [0.84615385 0.78846154 0.86538462 0.88461538 0.80769231 0.86538462 - 0.80769231 0.82692308 0.8490566 0.88679245] +value: [0.82222222 0.76666667 0.77777778 0.77777778 0.76666667 0.75555556 + 0.78888889 0.8 0.81111111 0.76666667] -mean value: 0.8428156748911466 +mean value: 0.7833333333333333 key: test_fscore -value: [0.4 0.66666667 0.75 0.4 nan nan - 0.57142857 1. 0.66666667 1. ] +value: [0.66666667 0.72727273 nan 0.6 0.90909091 1. + 0.5 0.44444444 0.72727273 0.5 ] mean value: nan key: train_fscore -value: [0.83333333 0.78431373 0.8627451 0.88 0.8 0.8627451 - 0.8 0.82352941 0.84615385 0.88 ] +value: [0.81818182 0.75862069 0.76744186 0.76744186 0.75862069 0.75 + 0.7816092 0.79069767 0.8 0.75862069] -mean value: 0.8372820512820512 +mean value: 0.7751234477898471 key: test_precision +value: [0.57142857 0.66666667 nan 0.6 0.83333333 1. + 0.66666667 0.5 0.66666667 0.66666667] + +mean value: nan + +key: train_precision +value: [0.8372093 0.78571429 0.80487805 0.80487805 0.78571429 0.76744186 + 0.80952381 0.82926829 0.85 0.78571429] + +mean value: 0.8060342219701266 + +key: test_recall +value: [0.8 0.8 nan 0.6 1. 1. 0.4 0.4 0.8 0.4] + +mean value: nan + +key: train_recall value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call @@ -19664,6 +14410,403 @@ KeyError: 'predict' During handling of the above exception, another exception occurred: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +[0.8 0.73333333 0.73333333 0.73333333 0.73333333 0.73333333 + 0.75555556 0.75555556 0.75555556 0.73333333] + +mean value: 0.7466666666666666 + +key: test_roc_auc +value: [0.6 0.7 nan 0.6 0.9 1. 0.6 0.5 0.7 0.6] + +mean value: nan + +key: train_roc_auc +value: [0.82222222 0.76666667 0.77777778 0.77777778 0.76666667 0.75555556 + 0.78888889 0.8 0.81111111 0.76666667] + +mean value: 0.7833333333333333 + +key: test_jcc +value: [0.5 0.57142857 nan 0.42857143 0.83333333 1. + 0.33333333 0.28571429 0.57142857 0.33333333] + +mean value: nan + +key: train_jcc +value: [0.69230769 0.61111111 0.62264151 0.62264151 0.61111111 0.6 + 0.64150943 0.65384615 0.66666667 0.61111111] + +mean value: 0.6332946298984035 + +MCC on Blind test: 0.05 + +Accuracy on Blind test: 0.57 + +Model_name: Passive Aggresive +Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42) +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, + colsample_bynode=1, colsample_bytree=1, enable_categorical=False, + gamma=0, gpu_id=-1, importance_type=None, + interaction_constraints='', learning_rate=0.300000012, + max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, + monotone_constraints='()', n_estimators=100, n_jobs=12, + num_parallel_tree=1, predictor='auto', random_state=42, + reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, + tree_method='exact', use_label_encoder=False, + validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', + PassiveAggressiveClassifier(n_jobs=10, random_state=42))]) + +key: fit_time +value: [0.01021981 0.01365471 0.01286817 0.01448369 0.01383781 0.01418757 + 0.01293206 0.01413035 0.0132041 0.01391315] + +mean value: 0.013343143463134765 + +key: score_time +value: [0.00872874 0.01148295 0.00617766 0.01167202 0.01157427 0.01153183 + 0.01140785 0.01148415 0.01151109 0.01141477] + +mean value: 0.010698533058166504 + +key: test_mcc +value: [0.40824829 0.5 nan 0.40824829 0.81649658 0.81649658 + 0.65465367 0.81649658 0.21821789 1. ] + +mean value: nan + +key: train_mcc +value: [0.93356387 0.67202151 0.88910845 0.97801929 0.97801929 0.95650071 + 0.95650071 0.81649658 0.95650071 0.87447463] + +mean value: 0.9011205769973302 + +key: test_accuracy +value: [0.7 0.7 nan 0.7 0.9 0.9 0.8 0.9 0.6 1. ] + +mean value: nan + +key: train_accuracy +value: [0.96666667 0.81111111 0.94444444 0.98888889 0.98888889 0.97777778 + 0.97777778 0.9 0.97777778 0.93333333] + +mean value: 0.9466666666666667 + +key: test_fscore +value: [0.72727273 0.76923077 nan 0.66666667 0.90909091 0.88888889 + 0.75 0.88888889 0.66666667 1. ] + +mean value: nan + +key: train_fscore +value: [0.96629213 0.8411215 0.94382022 0.98876404 0.98901099 0.97727273 + 0.97727273 0.88888889 0.97826087 0.92857143] + +mean value: 0.9479275530403464 + +key: test_precision +value: [0.66666667 0.625 nan 0.75 0.83333333 1. + 1. 1. 0.57142857 1. ] + +mean value: nan + +key: train_precision +value: [0.97727273 0.72580645 0.95454545 1. 0.97826087 1. + 1. 1. 0.95744681 1. ] + +mean value: 0.9593332311506941 + +key: test_recall +value: [0.8 1. nan 0.6 1. 0.8 0.6 0.8 0.8 1. ] + +mean value: nan + +key: train_recall +value: [0.95555556 1. 0.93333333 0.97777778 1. 0.95555556 + 0.95555556 0.8 1. 0.86666667] + +mean value: 0.9444444444444444 + +key: test_roc_auc +value: [0.7 0.7 nan 0.7 0.9 0.9 0.8 0.9 0.6 1. ] + +mean value: nan + +key: train_roc_auc +value: [0.96666667 0.81111111 0.94444444 0.98888889 0.98888889 0.97777778 + 0.97777778 0.9 0.97777778 0.93333333] + +mean value: 0.9466666666666668 + +key: test_jcc +value: [0.57142857 0.625 nan 0.5 0.83333333 0.8 + 0.6 0.8 0.5 1. ] + +mean value: nan + +key: train_jcc +value: [0.93478261 0.72580645 0.89361702 0.97777778 0.97826087 0.95555556 + 0.95555556 0.8 0.95744681 0.86666667] + +mean value: 0.9045469315216562 + +MCC on Blind test: 0.42 + +Accuracy on Blind test: 0.75 + +Model_name: Stochastic GDescent +Model func: SGDClassifier(n_jobs=10, random_state=42) +List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, + colsample_bynode=1, colsample_bytree=1, enable_categorical=False, + gamma=0, gpu_id=-1, importance_type=None, + interaction_constraints='', learning_rate=0.300000012, + max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, + monotone_constraints='()', n_estimators=100, n_jobs=12, + num_parallel_tree=1, predictor='auto', random_state=42, + reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, + tree_method='exact', use_label_encoder=False, + validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', SGDClassifier(n_jobs=10, random_state=42))]) + +key: fit_time +value: [0.01397181 0.01273942 0.01281476 0.01423192 0.0125308 0.01231194 + 0.01235962 0.01259017 0.01245999 0.01275468] + +mean value: 0.012876510620117188 + +key: score_time +value: [0.01007605 0.01145029 0.00625587 0.01167154 0.01142955 0.01144981 + 0.01150846 0.01144075 0.01146603 0.01146483] + +mean value: 0.010821318626403809 + +key: test_mcc +value: [ 1. 0.40824829 nan -0.33333333 0.81649658 1. + 0.65465367 0.81649658 0.21821789 1. ] + +mean value: nan + +key: train_mcc +value: [0.91111111 0.93356387 0.93541435 0.33333333 0.95650071 0.91111111 + 0.72486118 0.91473203 1. 0.97801929] + +mean value: 0.8598646994997842 + +key: test_accuracy +value: [1. 0.7 nan 0.4 0.9 1. 0.8 0.9 0.6 1. ] + +mean value: nan + +key: train_accuracy +value: [0.95555556 0.96666667 0.96666667 0.6 0.97777778 0.95555556 + 0.84444444 0.95555556 1. 0.98888889] + +mean value: 0.9211111111111111 + +key: test_fscore +value: [1. 0.72727273 nan 0. 0.90909091 1. + 0.75 0.90909091 0.66666667 1. ] + +mean value: nan + +key: train_fscore +value: [0.95555556 0.96629213 0.96551724 0.33333333 0.97826087 0.95555556 + 0.81578947 0.95348837 1. 0.98876404] + +mean value: 0.8912556580941488 + +key: test_precision +value: [1. 0.66666667 nan 0. 0.83333333 1. + 1. 0.83333333 0.57142857 1. ] + +mean value: nan + +key: train_precision +value: [0.95555556 0.97727273 1. 1. 0.95744681 0.95555556 + 1. 1. 1. 1. ] + +mean value: 0.9845830646894477 + +key: test_recall +value: [1. 0.8 nan 0. 1. 1. 0.6 1. 0.8 1. ] + +mean value: nan + +key: train_recall +value: [0.95555556 0.95555556 0.93333333 0.2 1. 0.95555556 + 0.68888889 0.91111111 1. 0.97777778] + +mean value: 0.8577777777777778 + +key: test_roc_auc +value: [1. 0.7 nan 0.4 0.9 1. 0.8 0.9 0.6 1. ] + +mean value: nan + +key: train_roc_auc +value: [0.95555556 0.96666667 0.96666667 0.6 0.97777778 0.95555556 + 0.84444444 0.95555556 1. 0.98888889] + +mean value: 0.9211111111111112 + +key: test_jcc +value: [1. 0.57142857 nan 0. 0.83333333 1. + 0.6 0.83333333 0.5 1. ] + +mean value: nan + +key: train_jcc +value: [0.91489362 0.93478261 0.93333333 0.2 0.95744681 0.91489362 + 0.68888889 0.91111111 1. 0.97777778] + +mean value: 0.8433127762359954 + +MCC on Blind test: 0.36 + +Accuracy on Blind test: 0.72 + +Model_name: AdaBoost Classifier +Model func: AdaBoostClassifier(random_state=42) +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, + colsample_bynode=1, colsample_bytree=1, enable_categorical=False, + gamma=0, gpu_id=-1, importance_type=None, + interaction_constraints='', learning_rate=0.300000012, + max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, + monotone_constraints='()', n_estimators=100, n_jobs=12, + num_parallel_tree=1, predictor='auto', random_state=42, + reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, + tree_method='exact', use_label_encoder=False, + validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + Traceback (most recent call last): File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score scores = scorer(estimator, X_test, y_test) @@ -19749,61 +14892,117 @@ Traceback (most recent call last): X_int, X_mask = self._transform( File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform +ValueError: Found unknown categories ['XDR'] in column 5 during transform warnings.warn( -[0.5 0.66666667 0.6 0.5 nan nan - 0.5 1. 1. 1. ] +Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', AdaBoostClassifier(random_state=42))]) + +key: fit_time +value: [0.09748411 0.08118486 0.08655453 0.08261776 0.08569789 0.08832264 + 0.08770275 0.0874753 0.09288096 0.08781147] + +mean value: 0.08777322769165039 + +key: score_time +value: [0.01493359 0.01460385 0.0050025 0.01509309 0.01566195 0.01640439 + 0.01560545 0.01591015 0.01580977 0.01584244] + +mean value: 0.01448671817779541 + +key: test_mcc +value: [0.81649658 0.6 nan 0.81649658 1. 0.81649658 + 0.81649658 0.40824829 0.5 1. ] + +mean value: nan + +key: train_mcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_accuracy +value: [0.9 0.8 nan 0.9 1. 0.9 0.9 0.7 0.7 1. ] + +mean value: nan + +key: train_accuracy +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_fscore +value: [0.90909091 0.8 nan 0.88888889 1. 0.90909091 + 0.88888889 0.72727273 0.76923077 1. ] + +mean value: nan + +key: train_fscore +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_precision +value: [0.83333333 0.8 nan 1. 1. 0.83333333 + 1. 0.66666667 0.625 1. ] mean value: nan key: train_precision -value: [0.90909091 0.8 0.88 0.91666667 0.83333333 0.88 - 0.83333333 0.84 0.88 0.91666667] +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] -mean value: 0.8689090909090909 +mean value: 1.0 key: test_recall -value: [0.33333333 0.66666667 1. 0.33333333 nan nan - 0.66666667 1. 0.5 1. ] +value: [1. 0.8 nan 0.8 1. 1. 0.8 0.8 1. 1. ] mean value: nan key: train_recall -value: [0.76923077 0.76923077 0.84615385 0.84615385 0.76923077 0.84615385 - 0.76923077 0.80769231 0.81481481 0.84615385] +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] -mean value: 0.8084045584045584 +mean value: 1.0 key: test_roc_auc -value: [0.5 0.66666667 0.66666667 0.5 nan nan - 0.5 1. 0.75 1. ] +value: [0.9 0.8 nan 0.9 1. 0.9 0.9 0.7 0.7 1. ] mean value: nan key: train_roc_auc -value: [0.84615385 0.78846154 0.86538462 0.88461538 0.80769231 0.86538462 - 0.80769231 0.82692308 0.8497151 0.88603989] +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] -mean value: 0.8428062678062679 +mean value: 1.0 key: test_jcc -value: [0.25 0.5 0.6 0.25 nan nan 0.4 1. 0.5 1. ] +value: [0.83333333 0.66666667 nan 0.8 1. 0.83333333 + 0.8 0.57142857 0.625 1. ] mean value: nan key: train_jcc -value: [0.71428571 0.64516129 0.75862069 0.78571429 0.66666667 0.75862069 - 0.66666667 0.7 0.73333333 0.78571429] +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] -mean value: 0.7214783622013877 +mean value: 1.0 -MCC on Blind test: 0.0 +MCC on Blind test: 0.78 -Accuracy on Blind test: 0.5 +Accuracy on Blind test: 0.9 -Model_name: Passive Aggresive -Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42) +Model_name: Bagging Classifier +Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42) List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, n_estimators=1000, n_jobs=10, oob_score=True, random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, @@ -19831,59 +15030,56 @@ Running model pipeline: Pipeline(steps=[('prep', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], dtype='object'))])), ('model', - PassiveAggressiveClassifier(n_jobs=10, random_state=42))]) + BaggingClassifier(n_jobs=10, oob_score=True, + random_state=42))]) key: fit_time -value: [0.00921822 0.01226902 0.01219916 0.01230478 0.01273918 0.01236653 - 0.01281667 0.01253247 0.01347136 0.0127697 ] +value: [0.03620768 0.03868723 0.02701831 0.0291822 0.0329349 0.03723025 + 0.05369878 0.03093934 0.03043032 0.04312468] -mean value: 0.012268710136413574 +mean value: 0.03594536781311035 key: score_time -value: [0.00918436 0.01127267 0.01138401 0.01152658 0.00610924 0.00630403 - 0.01162601 0.01155329 0.01138902 0.01136518] +value: [0.01804686 0.01641774 0.00487351 0.02259183 0.02211642 0.03795409 + 0.03202295 0.02688098 0.03348565 0.03866339] -mean value: 0.01017143726348877 +mean value: 0.025305342674255372 key: test_mcc -value: [0.33333333 0.70710678 0.4472136 0.70710678 nan nan - 0.4472136 1. 1. 0.61237244] +value: [1. 0.81649658 nan 0.81649658 1. 0.81649658 + 0.81649658 0.40824829 0.65465367 1. ] mean value: nan key: train_mcc -value: [1. 0.89056356 1. 1. 1. 1. - 1. 1. 1. 1. ] +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] -mean value: 0.9890563556561721 +mean value: 1.0 key: test_accuracy -value: [0.66666667 0.83333333 0.66666667 0.83333333 nan nan - 0.66666667 1. 1. 0.8 ] +value: [1. 0.9 nan 0.9 1. 0.9 0.9 0.7 0.8 1. ] mean value: nan key: train_accuracy -value: [1. 0.94230769 1. 1. 1. 1. - 1. 1. 1. 1. ] +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] -mean value: 0.9942307692307693 +mean value: 1.0 key: test_fscore -value: [0.66666667 0.8 0.75 0.8 nan nan - 0.5 1. 1. 0.85714286] +value: [1. 0.90909091 nan 0.88888889 1. 0.90909091 + 0.88888889 0.72727273 0.83333333 1. ] mean value: nan key: train_fscore -value: [1. 0.93877551 1. 1. 1. 1. - 1. 1. 1. 1. ] +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] -mean value: 0.9938775510204082 +mean value: 1.0 key: test_precision -value: [0.66666667 1. 0.6 1. nan nan - 1. 1. 1. 0.75 ] +value: [1. 0.83333333 nan 1. 1. 0.83333333 + 1. 0.66666667 0.71428571 1. ] mean value: nan @@ -19893,47 +15089,42 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [0.66666667 0.66666667 1. 0.66666667 nan nan - 0.33333333 1. 1. 1. ] +value: [1. 1. nan 0.8 1. 1. 0.8 0.8 1. 1. ] mean value: nan key: train_recall -value: [1. 0.88461538 1. 1. 1. 1. - 1. 1. 1. 1. ] +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] -mean value: 0.9884615384615385 +mean value: 1.0 key: test_roc_auc -value: [0.66666667 0.83333333 0.66666667 0.83333333 nan nan - 0.66666667 1. 1. 0.75 ] +value: [1. 0.9 nan 0.9 1. 0.9 0.9 0.7 0.8 1. ] mean value: nan key: train_roc_auc -value: [1. 0.94230769 1. 1. 1. 1. - 1. 1. 1. 1. ] +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] -mean value: 0.9942307692307693 +mean value: 1.0 key: test_jcc -value: [0.5 0.66666667 0.6 0.66666667 nan nan - 0.33333333 1. 1. 0.75 ] +value: [1. 0.83333333 nan 0.8 1. 0.83333333 + 0.8 0.57142857 0.71428571 1. ] mean value: nan key: train_jcc -value: [1. 0.88461538 1. 1. 1. 1. - 1. 1. 1. 1. ] +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] -mean value: 0.9884615384615385 +mean value: 1.0 -MCC on Blind test: 0.17 +MCC on Blind test: 0.95 -Accuracy on Blind test: 0.52 +Accuracy on Blind test: 0.98 -Model_name: Stochastic GDescent -Model func: SGDClassifier(n_jobs=10, random_state=42) +Model_name: Gaussian Process +Model func: GaussianProcessClassifier(random_state=42) List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call @@ -20027,7 +15218,7 @@ Traceback (most recent call last): X_int, X_mask = self._transform( File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform +ValueError: Found unknown categories ['XDR'] in column 5 during transform warnings.warn( [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, @@ -20042,676 +15233,6 @@ ValueError: Found unknown categories ['Other'] in column 5 during transform reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact', use_label_encoder=False, validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', SGDClassifier(n_jobs=10, random_state=42))]) - -key: fit_time -value: [0.01245952 0.01190948 0.01206875 0.01187015 0.01200151 0.01223326 - 0.01400733 0.01236725 0.01193714 0.01187801] - -mean value: 0.012273240089416503 - -key: score_time -value: [0.0105114 0.01141214 0.01137328 0.01140285 0.00604272 0.0061481 - 0.01152825 0.01161933 0.01146436 0.0113945 ] - -mean value: 0.010289692878723144 - -key: test_mcc -value: [0.70710678 0.70710678 0.4472136 0.70710678 nan nan - 0.4472136 0.70710678 1. 1. ] - -mean value: nan - -key: train_mcc -value: [1. 1. 1. 1. 1. 1. - 0.54772256 0.79056942 1. 0.92704716] - -mean value: 0.926533913727155 - -key: test_accuracy -value: [0.83333333 0.83333333 0.66666667 0.83333333 nan nan - 0.66666667 0.83333333 1. 1. ] - -mean value: nan - -key: train_accuracy -value: [1. 1. 1. 1. 1. 1. - 0.73076923 0.88461538 1. 0.96226415] - -mean value: 0.9577648766328012 - -key: test_fscore -value: [0.85714286 0.85714286 0.75 0.8 nan nan - 0.5 0.8 1. 1. ] - -mean value: nan - -key: train_fscore -value: [1. 1. 1. 1. 1. 1. - 0.63157895 0.86956522 1. 0.96 ] - -mean value: 0.9461144164759725 - -key: test_precision -value: [0.75 0.75 0.6 1. nan nan 1. 1. 1. 1. ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: [1. 1. 1. 0.66666667 nan nan - 0.33333333 0.66666667 1. 1. ] - -mean value: nan - -key: train_recall -value: [1. 1. 1. 1. 1. 1. - 0.46153846 0.76923077 1. 0.92307692] - -mean value: 0.9153846153846154 - -key: test_roc_auc -value: [0.83333333 0.83333333 0.66666667 0.83333333 nan nan - 0.66666667 0.83333333 1. 1. ] - -mean value: nan - -key: train_roc_auc -value: [1. 1. 1. 1. 1. 1. - 0.73076923 0.88461538 1. 0.96153846] - -mean value: 0.9576923076923077 - -key: test_jcc -value: [0.75 0.75 0.6 0.66666667 nan nan - 0.33333333 0.66666667 1. 1. ] - -mean value: nan - -key: train_jcc -value: [1. 1. 1. 1. 1. 1. - 0.46153846 0.76923077 1. 0.92307692] - -mean value: 0.9153846153846154 - -MCC on Blind test: 0.24 - -Accuracy on Blind test: 0.57 - -Model_name: AdaBoost Classifier -Model func: AdaBoostClassifier(random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, - colsample_bynode=1, colsample_bytree=1, enable_categorical=False, - gamma=0, gpu_id=-1, importance_type=None, - interaction_constraints='', learning_rate=0.300000012, - max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, - monotone_constraints='()', n_estimators=100, n_jobs=12, - num_parallel_tree=1, predictor='auto', random_state=42, - reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, - tree_method='exact', use_label_encoder=False, - validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', AdaBoostClassifier(random_state=42))]) - -key: fit_time -value: [0.08268571 0.07317615 0.07111573 0.07205319 0.07330489 0.07189631 - 0.07267833 0.07267451 0.07413673 0.07375813] - -mean value: 0.07374796867370606 - -key: score_time -value: [0.01459098 0.01475048 0.01495194 0.0147922 0.00463033 0.00474453 - 0.01556635 0.01505351 0.01488471 0.01495194] - -mean value: 0.012891697883605956 - -key: test_mcc -value: [1. 1. 0.70710678 1. nan nan - 0.70710678 1. 1. 1. ] - -mean value: nan - -key: train_mcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_accuracy -value: [1. 1. 0.83333333 1. nan nan - 0.83333333 1. 1. 1. ] - -mean value: nan - -key: train_accuracy -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_fscore -value: [1. 1. 0.8 1. nan nan - 0.85714286 1. 1. 1. ] - -mean value: nan - -key: train_fscore -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_precision -value: [1. 1. 1. 1. nan nan 0.75 1. 1. 1. ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: [1. 1. 0.66666667 1. nan nan - 1. 1. 1. 1. ] - -mean value: nan - -key: train_recall -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_roc_auc -value: [1. 1. 0.83333333 1. nan nan - 0.83333333 1. 1. 1. ] - -mean value: nan - -key: train_roc_auc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_jcc -value: [1. 1. 0.66666667 1. nan nan - 0.75 1. 1. 1. ] - -mean value: nan - -key: train_jcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -MCC on Blind test: 0.73 - -Accuracy on Blind test: 0.88 - -Model_name: Bagging Classifier -Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, - colsample_bynode=1, colsample_bytree=1, enable_categorical=False, - gamma=0, gpu_id=-1, importance_type=None, - interaction_constraints='', learning_rate=0.300000012, - max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, - monotone_constraints='()', n_estimators=100, n_jobs=12, - num_parallel_tree=1, predictor='auto', random_state=42, - reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, - tree_method='exact', use_label_encoder=False, - validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', - ColumnTransformer(remainder='passthrough', - transformers=[('num', MinMaxScaler(), - Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', - 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', - 'mcsm_na_affinity', 'rsa', - ... - 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', - 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], - dtype='object', length=167)), - ('cat', OneHotEncoder(), - Index(['ss_class', 'aa_prop_change', 'electrostatics_change', - 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], - dtype='object'))])), - ('model', - BaggingClassifier(n_jobs=10, oob_score=True, - random_state=42))]) - -key: fit_time -value: [0.03236103 0.03303695 0.03448248 0.02975869 0.04348493 0.04276633 - 0.04121518 0.0335412 0.02573681 0.03820777] - -mean value: 0.035459136962890624 - -key: score_time -value: [0.02469873 0.02373099 0.02471375 0.0250423 0.00463319 0.00474143 - 0.01605988 0.02687597 0.01719236 0.02195811] - -mean value: 0.018964672088623048 - -key: test_mcc -value: [1. 1. 0.70710678 0.70710678 nan nan - 0.70710678 1. 1. 1. ] - -mean value: nan - -key: train_mcc -value: [1. 0.96225045 0.96225045 1. 1. 1. - 1. 1. 1. 1. ] - -mean value: 0.9924500897298753 - -key: test_accuracy -value: [1. 1. 0.83333333 0.83333333 nan nan - 0.83333333 1. 1. 1. ] - -mean value: nan - -key: train_accuracy -value: [1. 0.98076923 0.98076923 1. 1. 1. - 1. 1. 1. 1. ] - -mean value: 0.9961538461538462 - -key: test_fscore -value: [1. 1. 0.8 0.8 nan nan - 0.85714286 1. 1. 1. ] - -mean value: nan - -key: train_fscore -value: [1. 0.98039216 0.98039216 1. 1. 1. - 1. 1. 1. 1. ] - -mean value: 0.996078431372549 - -key: test_precision -value: [1. 1. 1. 1. nan nan 0.75 1. 1. 1. ] - -mean value: nan - -key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] - -mean value: 1.0 - -key: test_recall -value: [1. 1. 0.66666667 0.66666667 nan nan - 1. 1. 1. 1. ] - -mean value: nan - -key: train_recall -value: [1. 0.96153846 0.96153846 1. 1. 1. - 1. 1. 1. 1. ] - -mean value: 0.9923076923076923 - -key: test_roc_auc -value: [1. 1. 0.83333333 0.83333333 nan nan - 0.83333333 1. 1. 1. ] - -mean value: nan - -key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior. - _warn_prf(average, modifier, msg_start, len(result)) -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -train_roc_auc -value: [1. 0.98076923 0.98076923 1. 1. 1. - 1. 1. 1. 1. ] - -mean value: 0.9961538461538462 - -key: test_jcc -value: [1. 1. 0.66666667 0.66666667 nan nan - 0.75 1. 1. 1. ] - -mean value: nan - -key: train_jcc -value: [1. 0.96153846 0.96153846 1. 1. 1. - 1. 1. 1. 1. ] - -mean value: 0.9923076923076923 - -MCC on Blind test: 0.95 - -Accuracy on Blind test: 0.98 - -Model_name: Gaussian Process -Model func: GaussianProcessClassifier(random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, - n_estimators=1000, n_jobs=10, oob_score=True, - random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, - colsample_bynode=1, colsample_bytree=1, enable_categorical=False, - gamma=0, gpu_id=-1, importance_type=None, - interaction_constraints='', learning_rate=0.300000012, - max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, - monotone_constraints='()', n_estimators=100, n_jobs=12, - num_parallel_tree=1, predictor='auto', random_state=42, - reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, - tree_method='exact', use_label_encoder=False, - validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] Running model pipeline: Pipeline(steps=[('prep', ColumnTransformer(remainder='passthrough', transformers=[('num', MinMaxScaler(), @@ -20729,20 +15250,20 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', GaussianProcessClassifier(random_state=42))]) key: fit_time -value: [0.01034498 0.01023507 0.01016378 0.010077 0.01010346 0.01009202 - 0.01004457 0.01008701 0.01006556 0.01012492] +value: [0.01397204 0.02746344 0.01704359 0.01630187 0.01670456 0.01774883 + 0.01657462 0.01662517 0.03054595 0.01816249] -mean value: 0.010133838653564453 +mean value: 0.019114255905151367 key: score_time -value: [0.00860214 0.0086844 0.00934911 0.00860739 0.00424981 0.00425982 - 0.00847292 0.00851202 0.00853586 0.008564 ] +value: [0.01164699 0.01156616 0.00662804 0.01197863 0.01218772 0.01199245 + 0.01217413 0.01204348 0.02131319 0.02032018] -mean value: 0.007783746719360352 +mean value: 0.01318509578704834 key: test_mcc -value: [-0.4472136 0.33333333 0.33333333 0. nan nan - 0. 0.4472136 0.16666667 0.66666667] +value: [0.81649658 0.6 nan 0.2 0.65465367 0.81649658 + 0.2 0.5 0. 0.81649658] mean value: nan @@ -20752,8 +15273,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.33333333 0.66666667 0.66666667 0.5 nan nan - 0.5 0.66666667 0.6 0.8 ] +value: [0.9 0.8 nan 0.6 0.8 0.9 0.6 0.7 0.5 0.9] mean value: nan @@ -20763,8 +15283,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0. 0.66666667 0.66666667 0. nan nan - 0.57142857 0.5 0.5 0.8 ] +value: [0.88888889 0.8 nan 0.6 0.83333333 0.88888889 + 0.6 0.76923077 0.61538462 0.88888889] mean value: nan @@ -20774,8 +15294,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [0. 0.66666667 0.66666667 0. nan nan - 0.5 1. 0.5 1. ] +value: [1. 0.8 nan 0.6 0.71428571 1. + 0.6 0.625 0.5 1. ] mean value: nan @@ -20785,8 +15305,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [0. 0.66666667 0.66666667 0. nan nan - 0.66666667 0.33333333 0.5 0.66666667] +value: [0.8 0.8 nan 0.6 1. 0.8 0.6 1. 0.8 0.8] mean value: nan @@ -20796,8 +15315,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.33333333 0.66666667 0.66666667 0.5 nan nan - 0.5 0.66666667 0.58333333 0.83333333] +value: [0.9 0.8 nan 0.6 0.8 0.9 0.6 0.7 0.5 0.9] mean value: nan @@ -20807,8 +15325,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0. 0.5 0.5 0. nan nan - 0.4 0.33333333 0.33333333 0.66666667] +value: [0.8 0.66666667 nan 0.42857143 0.71428571 0.8 + 0.42857143 0.625 0.44444444 0.8 ] mean value: nan @@ -20817,9 +15335,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 -MCC on Blind test: 0.0 +MCC on Blind test: 0.01 -Accuracy on Blind test: 0.5 +Accuracy on Blind test: 0.52 Model_name: Gradient Boosting Model func: GradientBoostingClassifier(random_state=42) @@ -20835,221 +15353,7 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ( reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact', use_label_encoder=False, validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['XDR'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear - warnings.warn("Variables are collinear") -Pipeline(steps=[('prep', +Running model pipeline: Pipeline(steps=[('prep', ColumnTransformer(remainder='passthrough', transformers=[('num', MinMaxScaler(), Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', @@ -21066,20 +15370,92 @@ Pipeline(steps=[('prep', ('model', GradientBoostingClassifier(random_state=42))]) key: fit_time -value: [0.13452578 0.13629317 0.09958744 0.12072825 0.12380791 0.09092355 - 0.10838723 0.11978316 0.1374228 0.13621712] +value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' -mean value: 0.12076764106750489 +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior. + _warn_prf(average, modifier, msg_start, len(result)) +[0.20379448 0.20308447 0.20512509 0.20450664 0.19911671 0.19699526 + 0.18761802 0.18994236 0.19499803 0.18119597] + +mean value: 0.19663770198822023 key: score_time -value: [0.00908279 0.00892663 0.00909948 0.00916409 0.00457883 0.00456905 - 0.00913429 0.00951099 0.00891829 0.00896835] +value: [0.00957632 0.01017761 0.00509214 0.01043415 0.01005363 0.00927019 + 0.01004076 0.00947762 0.01011539 0.01008415] -mean value: 0.008195281028747559 +mean value: 0.009432196617126465 key: test_mcc -value: [0.70710678 0.70710678 0.70710678 1. nan nan - 0.70710678 1. 1. 1. ] +value: [0.81649658 0.81649658 nan 0.6 1. 0.81649658 + 0.81649658 0.40824829 0.65465367 1. ] mean value: nan @@ -21089,8 +15465,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.83333333 0.83333333 0.83333333 1. nan nan - 0.83333333 1. 1. 1. ] +value: [0.9 0.9 nan 0.8 1. 0.9 0.9 0.7 0.8 1. ] mean value: nan @@ -21100,8 +15475,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.8 0.85714286 0.8 1. nan nan - 0.85714286 1. 1. 1. ] +value: [0.88888889 0.90909091 nan 0.8 1. 0.90909091 + 0.88888889 0.72727273 0.83333333 1. ] mean value: nan @@ -21111,7 +15486,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [1. 0.75 1. 1. nan nan 0.75 1. 1. 1. ] +value: [1. 0.83333333 nan 0.8 1. 0.83333333 + 1. 0.66666667 0.71428571 1. ] mean value: nan @@ -21121,8 +15497,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [0.66666667 1. 0.66666667 1. nan nan - 1. 1. 1. 1. ] +value: [0.8 1. nan 0.8 1. 1. 0.8 0.8 1. 1. ] mean value: nan @@ -21132,8 +15507,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.83333333 0.83333333 0.83333333 1. nan nan - 0.83333333 1. 1. 1. ] +value: [0.9 0.9 nan 0.8 1. 0.9 0.9 0.7 0.8 1. ] mean value: nan @@ -21143,8 +15517,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.66666667 0.75 0.66666667 1. nan nan - 0.75 1. 1. 1. ] +value: [0.8 0.83333333 nan 0.66666667 1. 0.83333333 + 0.8 0.57142857 0.71428571 1. ] mean value: nan @@ -21153,9 +15527,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 -MCC on Blind test: 0.89 +MCC on Blind test: 0.84 -Accuracy on Blind test: 0.95 +Accuracy on Blind test: 0.92 Model_name: QDA Model func: QuadraticDiscriminantAnalysis() @@ -21188,20 +15562,20 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', QuadraticDiscriminantAnalysis())]) key: fit_time -value: [0.00955558 0.00891328 0.00895953 0.00941253 0.00888014 0.00890732 - 0.00881815 0.00965381 0.00900769 0.01232696] +value: [0.01176 0.01463223 0.01451039 0.01862359 0.0145371 0.01461315 + 0.01455307 0.0146296 0.01553369 0.01768708] -mean value: 0.00944349765777588 +mean value: 0.015107989311218262 key: score_time -value: [0.00867105 0.00853944 0.00854969 0.00861311 0.00435209 0.00431705 - 0.0086236 0.00882101 0.0087564 0.01142502] +value: [0.01168466 0.01204658 0.00626183 0.01207185 0.01193285 0.01204848 + 0.01547551 0.01594973 0.0170939 0.01871085] -mean value: 0.008066844940185548 +mean value: 0.013327622413635254 key: test_mcc -value: [ 0.70710678 -0.4472136 0.70710678 0.33333333 nan nan - -0.4472136 0. 0.61237244 0.61237244] +value: [0.81649658 0.81649658 nan 0.5 1. 0.81649658 + 0.65465367 0.5 0.81649658 0.65465367] mean value: nan @@ -21211,8 +15585,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.83333333 0.33333333 0.83333333 0.66666667 nan nan - 0.33333333 0.5 0.8 0.8 ] +value: [0.9 0.9 nan 0.7 1. 0.9 0.8 0.7 0.9 0.8] mean value: nan @@ -21222,8 +15595,8 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.8 0. 0.85714286 0.66666667 nan nan - 0.5 0.57142857 0.66666667 0.85714286] +value: [0.88888889 0.88888889 nan 0.57142857 1. 0.88888889 + 0.75 0.57142857 0.88888889 0.75 ] mean value: nan @@ -21233,8 +15606,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [1. 0. 0.75 0.66666667 nan nan - 0.4 0.5 1. 0.75 ] +value: [ 1. 1. nan 1. 1. 1. 1. 1. 1. 1.] mean value: nan @@ -21244,8 +15616,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [0.66666667 0. 1. 0.66666667 nan nan - 0.66666667 0.66666667 0.5 1. ] +value: [0.8 0.8 nan 0.4 1. 0.8 0.6 0.4 0.8 0.6] mean value: nan @@ -21255,8 +15626,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.83333333 0.33333333 0.83333333 0.66666667 nan nan - 0.33333333 0.5 0.75 0.75 ] +value: [0.9 0.9 nan 0.7 1. 0.9 0.8 0.7 0.9 0.8] mean value: nan @@ -21266,8 +15636,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.66666667 0. 0.75 0.5 nan nan - 0.33333333 0.4 0.5 0.75 ] +value: [0.8 0.8 nan 0.4 1. 0.8 0.6 0.4 0.8 0.6] mean value: nan @@ -21276,12 +15645,13 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 -MCC on Blind test: 0.07 +MCC on Blind test: 0.0 -Accuracy on Blind test: 0.52 +Accuracy on Blind test: 0.65 Model_name: Ridge Classifier -Model func: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Model func: RidgeClassifier(random_state=42) +List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call return cache[method] @@ -21329,56 +15699,7 @@ Traceback (most recent call last): ValueError: Found unknown categories ['XDR'] in column 5 during transform warnings.warn( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call - return cache[method] -KeyError: 'predict' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score - scores = scorer(estimator, X_test, y_test) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ - score = scorer._score(cached_call, estimator, *args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score - y_pred = method_caller(estimator, "predict", X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call - result = getattr(estimator, method)(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict - Xt = transform.transform(Xt) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform - Xs = self._fit_transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform - return Parallel(n_jobs=self.n_jobs)( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ - while self.dispatch_one_batch(iterator): - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch - self._dispatch(tasks) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch - job = self._backend.apply_async(batch, callback=cb) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async - result = ImmediateResult(func) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ - self.results = batch() - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in - return [func(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ - return self.function(*args, **kwargs) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one - res = transformer.transform(X) - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform - X_int, X_mask = self._transform( - File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform - raise ValueError(msg) -ValueError: Found unknown categories ['Other'] in column 5 during transform - - warnings.warn( -RidgeClassifier(random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, +[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, n_estimators=1000, n_jobs=10, oob_score=True, random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1, enable_categorical=False, @@ -21407,64 +15728,66 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', RidgeClassifier(random_state=42))]) key: fit_time -value: [0.0125978 0.01234961 0.01238513 0.01238847 0.01241469 0.01236916 - 0.02667212 0.01334715 0.01305318 0.0137136 ] +value: [0.02705026 0.01304579 0.01301503 0.03505015 0.03305626 0.02976966 + 0.03926349 0.02756858 0.0333178 0.03212309] -mean value: 0.014129090309143066 +mean value: 0.028326010704040526 key: score_time -value: [0.01133299 0.01127529 0.01127625 0.01128983 0.00598621 0.00598669 - 0.01129127 0.01196098 0.0119524 0.01206422] +value: [0.01200557 0.01179862 0.00627947 0.02354193 0.02322149 0.02598286 + 0.02078986 0.02019954 0.02314901 0.02163339] -mean value: 0.01044161319732666 +mean value: 0.018860173225402833 key: test_mcc -value: [0.33333333 1. 0.4472136 1. nan nan - 0.70710678 1. 1. 1. ] +value: [0.40824829 0.65465367 nan 0.2 0.81649658 1. + 0.40824829 0.81649658 0.21821789 1. ] mean value: nan key: train_mcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] +value: [0.97801929 0.97801929 0.97801929 0.97801929 0.97801929 0.97801929 + 0.97801929 0.97801929 1. 0.97801929] -mean value: 1.0 +mean value: 0.9802173644592863 key: test_accuracy -value: [0.66666667 1. 0.66666667 1. nan nan - 0.83333333 1. 1. 1. ] +value: [0.7 0.8 nan 0.6 0.9 1. 0.7 0.9 0.6 1. ] mean value: nan key: train_accuracy -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] +value: [0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 + 0.98888889 0.98888889 1. 0.98888889] -mean value: 1.0 +mean value: 0.99 key: test_fscore -value: [0.66666667 1. 0.75 1. nan nan - 0.8 1. 1. 1. ] +value: [0.72727273 0.83333333 nan 0.6 0.90909091 1. + 0.66666667 0.90909091 0.66666667 1. ] mean value: nan key: train_fscore -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] +value: [0.98901099 0.98901099 0.98901099 0.98901099 0.98901099 0.98901099 + 0.98901099 0.98901099 1. 0.98901099] -mean value: 1.0 +mean value: 0.9901098901098901 key: test_precision -value: [0.66666667 1. 0.6 1. nan nan - 1. 1. 1. 1. ] +value: [0.66666667 0.71428571 nan 0.6 0.83333333 1. + 0.75 0.83333333 0.57142857 1. ] mean value: nan key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] +value: [0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 + 0.97826087 0.97826087 1. 0.97826087] -mean value: 1.0 +mean value: 0.9804347826086957 key: test_recall -value: [0.66666667 1. 1. 1. nan nan - 0.66666667 1. 1. 1. ] +value: [0.8 1. nan 0.6 1. 1. 0.6 1. 0.8 1. ] mean value: nan @@ -21474,30 +15797,31 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.66666667 1. 0.66666667 1. nan nan - 0.83333333 1. 1. 1. ] +value: [0.7 0.8 nan 0.6 0.9 1. 0.7 0.9 0.6 1. ] mean value: nan key: train_roc_auc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] +value: [0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 + 0.98888889 0.98888889 1. 0.98888889] -mean value: 1.0 +mean value: 0.99 key: test_jcc -value: [0.5 1. 0.6 1. nan nan - 0.66666667 1. 1. 1. ] +value: [0.57142857 0.71428571 nan 0.42857143 0.83333333 1. + 0.5 0.83333333 0.5 1. ] mean value: nan key: train_jcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] +value: [0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 + 0.97826087 0.97826087 1. 0.97826087] -mean value: 1.0 +mean value: 0.9804347826086957 -MCC on Blind test: 0.25 +MCC on Blind test: 0.37 -Accuracy on Blind test: 0.62 +Accuracy on Blind test: 0.72 Model_name: Ridge ClassifierCV Model func: RidgeClassifierCV(cv=10) @@ -21561,6 +15885,16 @@ Traceback (most recent call last): ValueError: Found unknown categories ['XDR'] in column 5 during transform warnings.warn( +/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:156: SettingWithCopyWarning: +A value is trying to be set on a copy of a slice from a DataFrame + +See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy + ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True) +/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:159: SettingWithCopyWarning: +A value is trying to be set on a copy of a slice from a DataFrame + +See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy + ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True) /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call @@ -21609,16 +15943,54 @@ Traceback (most recent call last): ValueError: Found unknown categories ['Other'] in column 5 during transform warnings.warn( -/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:168: SettingWithCopyWarning: -A value is trying to be set on a copy of a slice from a DataFrame +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' -See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True) -/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:171: SettingWithCopyWarning: -A value is trying to be set on a copy of a slice from a DataFrame +During handling of the above exception, another exception occurred: -See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True) +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( Pipeline(steps=[('prep', ColumnTransformer(remainder='passthrough', transformers=[('num', MinMaxScaler(), @@ -21636,64 +16008,66 @@ Pipeline(steps=[('prep', ('model', RidgeClassifierCV(cv=10))]) key: fit_time -value: [0.08033776 0.07908654 0.07868695 0.07865906 0.07928276 0.08904433 - 0.09892082 0.08086753 0.07872057 0.07906723] +value: [0.11854625 0.19753456 0.18856549 0.10754013 0.16717076 0.19964838 + 0.11414194 0.19587135 0.26366663 0.21352744] -mean value: 0.08226735591888427 +mean value: 0.17662129402160645 key: score_time -value: [0.01172042 0.01198864 0.01163435 0.01175618 0.00623584 0.00627875 - 0.01186275 0.01175117 0.01167297 0.01161075] +value: [0.02023578 0.02330852 0.01283813 0.01211834 0.02970982 0.02259326 + 0.01234365 0.02518892 0.02464271 0.02279258] -mean value: 0.010651183128356934 +mean value: 0.020577168464660643 key: test_mcc -value: [0. 1. 0.4472136 0.70710678 nan nan - 0.33333333 1. 1. 1. ] +value: [0.40824829 0.65465367 nan 0.2 0.81649658 0.65465367 + 0.40824829 0.81649658 0.21821789 1. ] mean value: nan key: train_mcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] +value: [0.97801929 0.97801929 0.97801929 0.97801929 0.97801929 1. + 0.97801929 0.97801929 1. 0.97801929] -mean value: 1.0 +mean value: 0.9824154350749212 key: test_accuracy -value: [0.5 1. 0.66666667 0.83333333 nan nan - 0.66666667 1. 1. 1. ] +value: [0.7 0.8 nan 0.6 0.9 0.8 0.7 0.9 0.6 1. ] mean value: nan key: train_accuracy -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] +value: [0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 1. + 0.98888889 0.98888889 1. 0.98888889] -mean value: 1.0 +mean value: 0.9911111111111112 key: test_fscore -value: [0.57142857 1. 0.75 0.8 nan nan - 0.66666667 1. 1. 1. ] +value: [0.72727273 0.83333333 nan 0.6 0.90909091 0.83333333 + 0.66666667 0.90909091 0.66666667 1. ] mean value: nan key: train_fscore -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] +value: [0.98901099 0.98901099 0.98901099 0.98901099 0.98901099 1. + 0.98901099 0.98901099 1. 0.98901099] -mean value: 1.0 +mean value: 0.9912087912087912 key: test_precision -value: [0.5 1. 0.6 1. nan nan - 0.66666667 1. 1. 1. ] +value: [0.66666667 0.71428571 nan 0.6 0.83333333 0.71428571 + 0.75 0.83333333 0.57142857 1. ] mean value: nan key: train_precision -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] +value: [0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 1. + 0.97826087 0.97826087 1. 0.97826087] -mean value: 1.0 +mean value: 0.9826086956521739 key: test_recall -value: [0.66666667 1. 1. 0.66666667 nan nan - 0.66666667 1. 1. 1. ] +value: [0.8 1. nan 0.6 1. 1. 0.6 1. 0.8 1. ] mean value: nan @@ -21703,30 +16077,31 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.5 1. 0.66666667 0.83333333 nan nan - 0.66666667 1. 1. 1. ] +value: [0.7 0.8 nan 0.6 0.9 0.8 0.7 0.9 0.6 1. ] mean value: nan key: train_roc_auc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] +value: [0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 1. + 0.98888889 0.98888889 1. 0.98888889] -mean value: 1.0 +mean value: 0.991111111111111 key: test_jcc -value: [0.4 1. 0.6 0.66666667 nan nan - 0.5 1. 1. 1. ] +value: [0.57142857 0.71428571 nan 0.42857143 0.83333333 0.71428571 + 0.5 0.83333333 0.5 1. ] mean value: nan key: train_jcc -value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] +value: [0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 1. + 0.97826087 0.97826087 1. 0.97826087] -mean value: 1.0 +mean value: 0.9826086956521739 -MCC on Blind test: 0.25 +MCC on Blind test: 0.37 -Accuracy on Blind test: 0.62 +Accuracy on Blind test: 0.72 Model_name: Logistic Regression Model func: LogisticRegression(random_state=42) @@ -21759,613 +16134,520 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', LogisticRegression(random_state=42))]) key: fit_time -value: [0.02765584 0.02437687 0.02343535 0.02426672 0.02423263 0.0224123 - 0.02587819 0.02598929 0.02278066 0.02541733] +value: [0.02091384 0.02012491 0.02000952 0.02053618 0.02216005 0.02067947 + 0.03914547 0.03124046 0.02153111 0.02034235] -mean value: 0.02464451789855957 +mean value: 0.023668336868286132 key: score_time -value: [0.01162863 0.01157403 0.01157665 0.01159024 0.01161504 0.01156235 - 0.01163483 0.0118084 0.011554 0.0116539 ] +value: [0.01170325 0.01167917 0.01161551 0.01167727 0.01176381 0.00625944 + 0.00652528 0.01202559 0.01162553 0.011724 ] -mean value: 0.011619806289672852 +mean value: 0.010659885406494141 key: test_mcc -value: [0.65465367 0.6 0.21821789 0.40824829 0.81649658 0.65465367 - 0.81649658 0.65465367 0.40824829 0.40824829] +value: [ 0.33333333 0.70710678 0. -0.70710678 0. nan + nan 1. 1. 0.61237244] -mean value: 0.5639916935606966 +mean value: nan key: train_mcc -value: [0.91201231 0.93356387 0.84465303 0.97801929 0.93356387 0.91111111 - 0.91201231 0.91201231 0.93356387 0.97801929] +value: [0.9258201 0.92307692 0.9258201 1. 1. 0.96225045 + 0.9258201 0.96225045 0.96291111 0.96296296] -mean value: 0.9248531267284181 +mean value: 0.9550912190805847 key: test_accuracy -value: [0.8 0.8 0.6 0.7 0.9 0.8 0.9 0.8 0.7 0.7] +value: [0.66666667 0.83333333 0.5 0.16666667 0.5 nan + nan 1. 1. 0.8 ] -mean value: 0.77 +mean value: nan key: train_accuracy -value: [0.95555556 0.96666667 0.92222222 0.98888889 0.96666667 0.95555556 - 0.95555556 0.95555556 0.96666667 0.98888889] +value: [0.96153846 0.96153846 0.96153846 1. 1. 0.98076923 + 0.96153846 0.98076923 0.98113208 0.98113208] -mean value: 0.9622222222222223 +mean value: 0.9769956458635704 key: test_fscore -value: [0.83333333 0.8 0.5 0.72727273 0.90909091 0.75 - 0.90909091 0.83333333 0.66666667 0.66666667] +value: [0.66666667 0.85714286 0.66666667 0.28571429 0.4 nan + nan 1. 1. 0.85714286] -mean value: 0.7595454545454545 +mean value: nan key: train_fscore -value: [0.95652174 0.96703297 0.92134831 0.98901099 0.96703297 0.95555556 - 0.95652174 0.95652174 0.96703297 0.98901099] +value: [0.96 0.96153846 0.96296296 1. 1. 0.98113208 + 0.96296296 0.98113208 0.98181818 0.98113208] -mean value: 0.962558996667448 +mean value: 0.9772678795697664 key: test_precision -value: [0.71428571 0.8 0.66666667 0.66666667 0.83333333 1. - 0.83333333 0.71428571 0.75 0.75 ] +value: [0.66666667 0.75 0.5 0.25 0.5 nan + nan 1. 1. 0.75 ] -mean value: 0.7728571428571429 +mean value: nan key: train_precision -value: [0.93617021 0.95652174 0.93181818 0.97826087 0.95652174 0.95555556 - 0.93617021 0.93617021 0.95652174 0.97826087] +value: [1. 0.96153846 0.92857143 1. 1. 0.96296296 + 0.92857143 0.96296296 0.96428571 0.96296296] -mean value: 0.9521971332193349 +mean value: 0.9671855921855922 +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( key: test_recall -value: [1. 0.8 0.4 0.8 1. 0.6 1. 1. 0.6 0.6] +value: [0.66666667 1. 1. 0.33333333 0.33333333 nan + nan 1. 1. 1. ] -mean value: 0.78 +mean value: nan key: train_recall -value: [0.97777778 0.97777778 0.91111111 1. 0.97777778 0.95555556 - 0.97777778 0.97777778 0.97777778 1. ] +value: [0.92307692 0.96153846 1. 1. 1. 1. + 1. 1. 1. 1. ] -mean value: 0.9733333333333333 +mean value: 0.9884615384615385 key: test_roc_auc -value: [0.8 0.8 0.6 0.7 0.9 0.8 0.9 0.8 0.7 0.7] +value: [0.66666667 0.83333333 0.5 0.16666667 0.5 nan + nan 1. 1. 0.75 ] -mean value: 0.77 +mean value: nan -key: train_roc_auc /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. +key: train_roc_auc +value: [0.96153846 0.96153846 0.96153846 1. 1. 0.98076923 + 0.96153846 0.98076923 0.98076923 0.98148148] -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( - -value: [0.95555556 0.96666667 0.92222222 0.98888889 0.96666667 0.95555556 - 0.95555556 0.95555556 0.96666667 0.98888889] - -mean value: 0.9622222222222223 +mean value: 0.976994301994302 key: test_jcc -value: [0.71428571 0.66666667 0.33333333 0.57142857 0.83333333 0.6 - 0.83333333 0.71428571 0.5 0.5 ] +value: [0.5 0.75 0.5 0.16666667 0.25 nan + nan 1. 1. 0.75 ] -mean value: 0.6266666666666667 +mean value: nan key: train_jcc -value: [0.91666667 0.93617021 0.85416667 0.97826087 0.93617021 0.91489362 - 0.91666667 0.91666667 0.93617021 0.97826087] +value: [0.92307692 0.92592593 0.92857143 1. 1. 0.96296296 + 0.92857143 0.96296296 0.96428571 0.96296296] -mean value: 0.928409266111625 +mean value: 0.9559320309320309 -MCC on Blind test: 0.37 +MCC on Blind test: 0.21 -Accuracy on Blind test: 0.72 +Accuracy on Blind test: 0.65 Model_name: Logistic RegressionCV Model func: LogisticRegressionCV(random_state=42) @@ -22398,105 +16680,202 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', LogisticRegressionCV(random_state=42))]) key: fit_time -value: [0.57545948 0.71273303 0.62566566 0.53029251 0.58704925 0.67743158 - 0.58735538 0.63677311 0.5939045 0.59990978] +value: [0.32553601 0.34272623 0.32583499 0.36345124 0.3234396 0.33710957 + 0.35224915 0.35124803 0.39501452 0.36207604] -mean value: 0.6126574277877808 +mean value: 0.347868537902832 key: score_time -value: [0.01562166 0.01615906 0.01187754 0.01739883 0.01621699 0.01188135 - 0.01306725 0.01308417 0.01306224 0.01307702] +value: [0.01237249 0.01205635 0.01210785 0.01214385 0.01202536 0.00631595 + 0.00638008 0.01196527 0.01431847 0.01206231] -mean value: 0.014144611358642579 +mean value: 0.011174798011779785 key: test_mcc -value: [0.65465367 0.81649658 0.81649658 0.40824829 0.6 0.81649658 - 0.65465367 0.81649658 0.81649658 0.81649658] +value: [0. 0.70710678 0. 0. 0. nan + nan 1. 1. 0.61237244] -mean value: 0.7216535117446173 +mean value: nan key: train_mcc -value: [1. 1. 0.95555556 1. 1. 0.95555556 +value: [0.88527041 1. 0.84866842 1. 1. 0.88527041 1. 1. 1. 1. ] -mean value: 0.9911111111111112 +mean value: 0.9619209250306358 key: test_accuracy -value: [0.8 0.9 0.9 0.7 0.8 0.9 0.8 0.9 0.9 0.9] +value: [0.5 0.83333333 0.5 0.5 0.5 nan + nan 1. 1. 0.8 ] -mean value: 0.85 +mean value: nan key: train_accuracy -value: [1. 1. 0.97777778 1. 1. 0.97777778 +value: [0.94230769 1. 0.92307692 1. 1. 0.94230769 1. 1. 1. 1. ] -mean value: 0.9955555555555555 +mean value: 0.9807692307692307 key: test_fscore -value: [0.83333333 0.90909091 0.90909091 0.72727273 0.8 0.88888889 - 0.83333333 0.90909091 0.88888889 0.90909091] +value: [0.4 0.85714286 0.66666667 0.57142857 0.4 nan + nan 1. 1. 0.85714286] -mean value: 0.8608080808080808 +mean value: nan key: train_fscore -value: [1. 1. 0.97777778 1. 1. 0.97777778 +value: [0.94117647 1. 0.92592593 1. 1. 0.94339623 1. 1. 1. 1. ] -mean value: 0.9955555555555555 +mean value: 0.9810498622929256 key: test_precision -value: [0.71428571 0.83333333 0.83333333 0.66666667 0.8 1. - 0.71428571 0.83333333 1. 0.83333333] +value: [0.5 0.75 0.5 0.5 0.5 nan nan 1. 1. 0.75] -mean value: 0.8228571428571428 +mean value: nan key: train_precision -value: [1. 1. 0.97777778 1. 1. 0.97777778 +value: [0.96 1. 0.89285714 1. 1. 0.92592593 1. 1. 1. 1. ] -mean value: 0.9955555555555555 +mean value: 0.9778783068783069 key: test_recall -value: [1. 1. 1. 0.8 0.8 0.8 1. 1. 0.8 1. ] +value: [0.33333333 1. 1. 0.66666667 0.33333333 nan + nan 1. 1. 1. ] -mean value: 0.92 +mean value: nan key: train_recall -value: [1. 1. 0.97777778 1. 1. 0.97777778 +value: [0.92307692 1. 0.96153846 1. 1. 0.96153846 1. 1. 1. 1. ] -mean value: 0.9955555555555555 +mean value: 0.9846153846153847 key: test_roc_auc -value: [0.8 0.9 0.9 0.7 0.8 0.9 0.8 0.9 0.9 0.9] +value: [0.5 0.83333333 0.5 0.5 0.5 nan + nan 1. 1. 0.75 ] -mean value: 0.8500000000000001 +mean value: nan key: train_roc_auc -value: [1. 1. 0.97777778 1. 1. 0.97777778 +value: [0.94230769 1. 0.92307692 1. 1. 0.94230769 1. 1. 1. 1. ] -mean value: 0.9955555555555555 +mean value: 0.9807692307692308 key: test_jcc -value: [0.71428571 0.83333333 0.83333333 0.57142857 0.66666667 0.8 - 0.71428571 0.83333333 0.8 0.83333333] +value: [0.25 0.75 0.5 0.4 0.25 nan nan 1. 1. 0.75] -mean value: 0.76 +mean value: nan key: train_jcc -value: [1. 1. 0.95652174 1. 1. 0.95652174 +value: [0.88888889 1. 0.86206897 1. 1. 0.89285714 1. 1. 1. 1. ] -mean value: 0.991304347826087 +mean value: 0.9643814997263274 -MCC on Blind test: 0.36 +MCC on Blind test: 0.21 -Accuracy on Blind test: 0.72 +Accuracy on Blind test: 0.65 Model_name: Gaussian NB Model func: GaussianNB() -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, +List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, n_estimators=1000, n_jobs=10, oob_score=True, random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, colsample_bynode=None, colsample_bytree=None, @@ -22525,99 +16904,102 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', GaussianNB())]) key: fit_time -value: [0.01235056 0.01039577 0.00935602 0.00850844 0.00858593 0.00864339 - 0.00865698 0.00895977 0.0085566 0.00893736] +value: [0.01288819 0.01199651 0.01104617 0.00844717 0.00849009 0.00838327 + 0.00898004 0.00881791 0.00832319 0.0085094 ] -mean value: 0.009295082092285157 +mean value: 0.009588193893432618 key: score_time -value: [0.01156569 0.00890326 0.00879598 0.00853562 0.00855756 0.00884724 - 0.0086143 0.00869632 0.0087235 0.00869799] +value: [0.01248598 0.01183987 0.00980401 0.00890875 0.0086236 0.00427198 + 0.0042758 0.00965548 0.00923252 0.00854301] -mean value: 0.008993744850158691 +mean value: 0.008764100074768067 key: test_mcc -value: [ 0. 0.5 0.21821789 0.21821789 -0.21821789 0.21821789 - 0.2 0.5 -0.5 0. ] +value: [0. 0. 0.4472136 0. 0. nan + nan 0.33333333 0.61237244 0.66666667] -mean value: 0.11364357804719848 +mean value: nan key: train_mcc -value: [0.56568542 0.64168895 0.57642872 0.53452248 0.48420012 0.58456547 - 0.69509522 0.77854709 0.53031442 0.8675239 ] +value: [0.70064905 0.65824263 0.81312325 0.66666667 0.76923077 0.5990423 + 0.58333333 0.71151247 0.73357097 0.70527596] -mean value: 0.6258571808190334 +mean value: 0.6940647388614967 key: test_accuracy -value: [0.5 0.7 0.6 0.6 0.4 0.6 0.6 0.7 0.3 0.5] +value: [0.5 0.5 0.66666667 0.5 0.5 nan + nan 0.66666667 0.8 0.8 ] -mean value: 0.5499999999999999 +mean value: nan key: train_accuracy -value: [0.76666667 0.81111111 0.77777778 0.72222222 0.73333333 0.77777778 - 0.84444444 0.88888889 0.75555556 0.93333333] +value: [0.84615385 0.82692308 0.90384615 0.80769231 0.88461538 0.78846154 + 0.76923077 0.84615385 0.8490566 0.8490566 ] -mean value: 0.8011111111111111 +mean value: 0.8371190130624093 key: test_fscore -value: [0.61538462 0.76923077 0.66666667 0.66666667 0.5 0.66666667 - 0.6 0.76923077 0.46153846 0.54545455] +value: [0.4 0.57142857 0.75 0.57142857 0.57142857 nan + nan 0.66666667 0.66666667 0.8 ] -mean value: 0.6260839160839161 +mean value: nan key: train_fscore -value: [0.8 0.83168317 0.80392157 0.7826087 0.76470588 0.80769231 - 0.83333333 0.88636364 0.78431373 0.93478261] +value: [0.83333333 0.81632653 0.89795918 0.76190476 0.88461538 0.81355932 + 0.71428571 0.82608696 0.82608696 0.83333333] -mean value: 0.8229404926524524 +mean value: 0.8207491476835619 key: test_precision -value: [0.5 0.625 0.57142857 0.57142857 0.42857143 0.57142857 - 0.6 0.625 0.375 0.5 ] +value: [0.5 0.5 0.6 0.5 0.5 nan + nan 0.66666667 1. 1. ] -mean value: 0.5367857142857143 +mean value: nan key: train_precision -value: [0.7 0.75 0.71929825 0.64285714 0.68421053 0.71186441 - 0.8974359 0.90697674 0.70175439 0.91489362] +value: [0.90909091 0.86956522 0.95652174 1. 0.88461538 0.72727273 + 0.9375 0.95 1. 0.90909091] -mean value: 0.7629290966174761 +mean value: 0.9143656886591669 key: test_recall -value: [0.8 1. 0.8 0.8 0.6 0.8 0.6 1. 0.6 0.6] +value: [0.33333333 0.66666667 1. 0.66666667 0.66666667 nan + nan 0.66666667 0.5 0.66666667] -mean value: 0.76 +mean value: nan key: train_recall -value: [0.93333333 0.93333333 0.91111111 1. 0.86666667 0.93333333 - 0.77777778 0.86666667 0.88888889 0.95555556] +value: [0.76923077 0.76923077 0.84615385 0.61538462 0.88461538 0.92307692 + 0.57692308 0.73076923 0.7037037 0.76923077] -mean value: 0.9066666666666667 +mean value: 0.7588319088319089 key: test_roc_auc -value: [0.5 0.7 0.6 0.6 0.4 0.6 0.6 0.7 0.3 0.5] +value: [0.5 0.5 0.66666667 0.5 0.5 nan + nan 0.66666667 0.75 0.83333333] -mean value: 0.55 +mean value: nan key: train_roc_auc -value: [0.76666667 0.81111111 0.77777778 0.72222222 0.73333333 0.77777778 - 0.84444444 0.88888889 0.75555556 0.93333333] +value: [0.84615385 0.82692308 0.90384615 0.80769231 0.88461538 0.78846154 + 0.76923077 0.84615385 0.85185185 0.84757835] -mean value: 0.8011111111111111 +mean value: 0.8372507122507122 key: test_jcc -value: [0.44444444 0.625 0.5 0.5 0.33333333 0.5 - 0.42857143 0.625 0.3 0.375 ] +value: [0.25 0.4 0.6 0.4 0.4 nan + nan 0.5 0.5 0.66666667] -mean value: 0.4631349206349206 +mean value: nan key: train_jcc -value: [0.66666667 0.71186441 0.67213115 0.64285714 0.61904762 0.67741935 - 0.71428571 0.79591837 0.64516129 0.87755102] +value: [0.71428571 0.68965517 0.81481481 0.61538462 0.79310345 0.68571429 + 0.55555556 0.7037037 0.7037037 0.71428571] -mean value: 0.7022902730094179 +mean value: 0.6990206728137762 -MCC on Blind test: 0.31 +MCC on Blind test: 0.23 Accuracy on Blind test: 0.65 @@ -22635,7 +17017,199 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ( predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, scale_pos_weight=None, subsample=None, tree_method=None, use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: Pipeline(steps=[('prep', +Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +Pipeline(steps=[('prep', ColumnTransformer(remainder='passthrough', transformers=[('num', MinMaxScaler(), Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', @@ -22652,101 +17226,104 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', BernoulliNB())]) key: fit_time -value: [0.00903225 0.00919151 0.00904417 0.00956297 0.00893402 0.00883603 - 0.00865221 0.00857472 0.00858474 0.00852299] +value: [0.00965214 0.00892282 0.00858331 0.00853586 0.0085423 0.00859547 + 0.0098629 0.00870371 0.00837135 0.00841379] -mean value: 0.008893561363220216 +mean value: 0.008818364143371582 key: score_time -value: [0.00895262 0.00891352 0.00872064 0.00939298 0.00859714 0.00868654 - 0.00841737 0.00842023 0.00843048 0.00861788] +value: [0.00977111 0.00953913 0.00881982 0.00860023 0.00865912 0.00424123 + 0.00450349 0.00848484 0.008569 0.00859237] -mean value: 0.008714938163757324 +mean value: 0.007978034019470216 key: test_mcc -value: [0.40824829 0.6 0. 0.21821789 0.40824829 0.65465367 - 0.6 0.65465367 0.5 0. ] +value: [ 0.4472136 -0.33333333 0. -0.33333333 0. nan + nan -0.33333333 0.66666667 0.16666667] -mean value: 0.4044021812579673 +mean value: nan key: train_mcc -value: [0.68888889 0.58137767 0.60540551 0.66683134 0.64700558 0.55776344 - 0.69162666 0.58137767 0.71269665 0.70004007] +value: [0.77151675 0.63245553 0.84615385 0.82305489 0.80829038 0.77849894 + 0.65433031 0.74466871 0.74106548 0.73609205] -mean value: 0.6433013479547345 +mean value: 0.7536126885007749 key: test_accuracy -value: [0.7 0.8 0.5 0.6 0.7 0.8 0.8 0.8 0.7 0.5] +value: [0.66666667 0.33333333 0.5 0.33333333 0.5 nan + nan 0.33333333 0.8 0.6 ] -mean value: 0.69 +mean value: nan key: train_accuracy -value: [0.84444444 0.78888889 0.8 0.83333333 0.82222222 0.77777778 - 0.84444444 0.78888889 0.85555556 0.84444444] +value: [0.88461538 0.80769231 0.92307692 0.90384615 0.90384615 0.88461538 + 0.82692308 0.86538462 0.86792453 0.86792453] -mean value: 0.82 +mean value: 0.8735849056603774 key: test_fscore -value: [0.72727273 0.8 0.44444444 0.66666667 0.66666667 0.75 - 0.8 0.83333333 0.57142857 0.28571429] +value: [0.5 0.33333333 0.66666667 0.33333333 0.4 nan + nan 0.33333333 0.8 0.66666667] -mean value: 0.6545526695526696 +mean value: nan key: train_fscore -value: [0.84444444 0.77647059 0.78571429 0.83146067 0.81395349 0.76744186 - 0.8372093 0.77647059 0.85057471 0.82926829] +value: [0.88 0.7826087 0.92307692 0.89361702 0.90196078 0.875 + 0.83018868 0.85106383 0.8627451 0.8627451 ] -mean value: 0.8113008237276017 +mean value: 0.8663006129430366 key: test_precision -value: [0.66666667 0.8 0.5 0.57142857 0.75 1. - 0.8 0.71428571 1. 0.5 ] +value: [1. 0.33333333 0.5 0.33333333 0.5 nan + nan 0.33333333 0.66666667 0.66666667] -mean value: 0.7302380952380952 +mean value: nan key: train_precision -value: [0.84444444 0.825 0.84615385 0.84090909 0.85365854 0.80487805 - 0.87804878 0.825 0.88095238 0.91891892] +value: [0.91666667 0.9 0.92307692 1. 0.92 0.95454545 + 0.81481481 0.95238095 0.91666667 0.88 ] -mean value: 0.851796404723234 +mean value: 0.9178151478151478 key: test_recall -value: [0.8 0.8 0.4 0.8 0.6 0.6 0.8 1. 0.4 0.2] +value: [0.33333333 0.33333333 1. 0.33333333 0.33333333 nan + nan 0.33333333 1. 0.66666667] -mean value: 0.64 +mean value: nan key: train_recall -value: [0.84444444 0.73333333 0.73333333 0.82222222 0.77777778 0.73333333 - 0.8 0.73333333 0.82222222 0.75555556] +value: [0.84615385 0.69230769 0.92307692 0.80769231 0.88461538 0.80769231 + 0.84615385 0.76923077 0.81481481 0.84615385] -mean value: 0.7755555555555556 +mean value: 0.8237891737891738 key: test_roc_auc -value: [0.7 0.8 0.5 0.6 0.7 0.8 0.8 0.8 0.7 0.5] +value: [0.66666667 0.33333333 0.5 0.33333333 0.5 nan + nan 0.33333333 0.83333333 0.58333333] -mean value: 0.6900000000000001 +mean value: nan key: train_roc_auc -value: [0.84444444 0.78888889 0.8 0.83333333 0.82222222 0.77777778 - 0.84444444 0.78888889 0.85555556 0.84444444] +value: [0.88461538 0.80769231 0.92307692 0.90384615 0.90384615 0.88461538 + 0.82692308 0.86538462 0.86894587 0.86752137] -mean value: 0.82 +mean value: 0.8736467236467237 key: test_jcc -value: [0.57142857 0.66666667 0.28571429 0.5 0.5 0.6 - 0.66666667 0.71428571 0.4 0.16666667] +value: [0.33333333 0.2 0.5 0.2 0.25 nan + nan 0.2 0.66666667 0.5 ] -mean value: 0.5071428571428571 +mean value: nan key: train_jcc -value: [0.73076923 0.63461538 0.64705882 0.71153846 0.68627451 0.62264151 - 0.72 0.63461538 0.74 0.70833333] +value: [0.78571429 0.64285714 0.85714286 0.80769231 0.82142857 0.77777778 + 0.70967742 0.74074074 0.75862069 0.75862069] -mean value: 0.683584663763909 +mean value: 0.7660272482018867 -MCC on Blind test: 0.12 +MCC on Blind test: -0.03 -Accuracy on Blind test: 0.6 +Accuracy on Blind test: 0.5 Model_name: K-Nearest Neighbors Model func: KNeighborsClassifier() @@ -22779,101 +17356,200 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', KNeighborsClassifier())]) key: fit_time -value: [0.0083065 0.00893617 0.0082252 0.00910974 0.00897431 0.00820327 - 0.00906706 0.00823522 0.00894833 0.00931168] +value: [0.00931239 0.0100131 0.00874043 0.00838232 0.009269 0.00925255 + 0.00977898 0.00942922 0.00934243 0.00922894] -mean value: 0.008731746673583984 +mean value: 0.009274935722351075 key: score_time -value: [0.00974131 0.00983167 0.00941205 0.00987244 0.00959492 0.00921226 - 0.00989819 0.01411557 0.01460791 0.01599026] +value: [0.01010799 0.00981784 0.00952053 0.00997114 0.0101397 0.00483942 + 0.00481582 0.01042175 0.01021457 0.01005983] -mean value: 0.011227655410766601 +mean value: 0.008990859985351563 key: test_mcc -value: [ 0.33333333 0.40824829 -0.21821789 0. 0.2 0.40824829 - 0.40824829 0. 0.33333333 0. ] +value: [ 0. 0. 0. -0.70710678 -0.33333333 nan + nan 0.70710678 0.66666667 0.16666667] -mean value: 0.18731936478222633 +mean value: nan key: train_mcc -value: [0.60540551 0.57906602 0.53452248 0.69162666 0.68888889 0.57906602 - 0.51571581 0.53452248 0.46712826 0.51314236] +value: [0.54494926 0.4259217 0.54006172 0.62279916 0.66628253 0.58080232 + 0.54006172 0.65433031 0.28612567 0.58766552] -mean value: 0.5709084504401615 +mean value: 0.5448999906190637 key: test_accuracy -value: [0.6 0.7 0.4 0.5 0.6 0.7 0.7 0.5 0.6 0.5] +value: [0.5 0.5 0.5 0.16666667 0.33333333 nan + nan 0.83333333 0.8 0.6 ] -mean value: 0.58 +mean value: nan key: train_accuracy -value: [0.8 0.78888889 0.76666667 0.84444444 0.84444444 0.78888889 - 0.75555556 0.76666667 0.73333333 0.75555556] +value: [0.76923077 0.71153846 0.76923077 0.80769231 0.82692308 0.78846154 + 0.76923077 0.82692308 0.64150943 0.79245283] -mean value: 0.7844444444444445 +mean value: 0.7703193033381712 key: test_fscore -value: [0.71428571 0.72727273 0.25 0.61538462 0.6 0.66666667 - 0.72727273 0.54545455 0.33333333 0.28571429] +value: [0.4 0.4 0.66666667 0. 0.33333333 nan + nan 0.85714286 0.8 0.66666667] -mean value: 0.5465384615384615 +mean value: nan key: train_fscore -value: [0.8125 0.79569892 0.75862069 0.85106383 0.84444444 0.79569892 - 0.73809524 0.75862069 0.72727273 0.76595745] +value: [0.75 0.69387755 0.76 0.79166667 0.80851064 0.7755102 + 0.77777778 0.82352941 0.62745098 0.7755102 ] -mean value: 0.7847972915180865 +mean value: 0.7583833434082853 -key: test_precision -value: [0.55555556 0.66666667 0.33333333 0.5 0.6 0.75 - 0.66666667 0.5 1. 0.5 ] +key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' -mean value: 0.6072222222222222 +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +test_precision +value: [0.5 0.5 0.5 0. 0.33333333 nan + nan 0.75 0.66666667 0.66666667] + +mean value: nan key: train_precision -value: [0.76470588 0.77083333 0.78571429 0.81632653 0.84444444 0.77083333 - 0.79487179 0.78571429 0.74418605 0.73469388] +value: [0.81818182 0.73913043 0.79166667 0.86363636 0.9047619 0.82608696 + 0.75 0.84 0.66666667 0.82608696] -mean value: 0.7812323814439311 +mean value: 0.8026217767739506 key: test_recall -value: [1. 0.8 0.2 0.8 0.6 0.6 0.8 0.6 0.2 0.2] +value: [0.33333333 0.33333333 1. 0. 0.33333333 nan + nan 1. 1. 0.66666667] -mean value: 0.58 +mean value: nan key: train_recall -value: [0.86666667 0.82222222 0.73333333 0.88888889 0.84444444 0.82222222 - 0.68888889 0.73333333 0.71111111 0.8 ] +value: [0.69230769 0.65384615 0.73076923 0.73076923 0.73076923 0.73076923 + 0.80769231 0.80769231 0.59259259 0.73076923] -mean value: 0.7911111111111111 +mean value: 0.7207977207977208 key: test_roc_auc -value: [0.6 0.7 0.4 0.5 0.6 0.7 0.7 0.5 0.6 0.5] +value: [0.5 0.5 0.5 0.16666667 0.33333333 nan + nan 0.83333333 0.83333333 0.58333333] -mean value: 0.5800000000000001 +mean value: nan key: train_roc_auc -value: [0.8 0.78888889 0.76666667 0.84444444 0.84444444 0.78888889 - 0.75555556 0.76666667 0.73333333 0.75555556] +value: [0.76923077 0.71153846 0.76923077 0.80769231 0.82692308 0.78846154 + 0.76923077 0.82692308 0.64245014 0.79131054] -mean value: 0.7844444444444445 +mean value: 0.7702991452991453 key: test_jcc -value: [0.55555556 0.57142857 0.14285714 0.44444444 0.42857143 0.5 - 0.57142857 0.375 0.2 0.16666667] +value: [0.25 0.25 0.5 0. 0.2 nan + nan 0.75 0.66666667 0.5 ] -mean value: 0.3955952380952381 +mean value: nan key: train_jcc -value: [0.68421053 0.66071429 0.61111111 0.74074074 0.73076923 0.66071429 - 0.58490566 0.61111111 0.57142857 0.62068966] +value: [0.6 0.53125 0.61290323 0.65517241 0.67857143 0.63333333 + 0.63636364 0.7 0.45714286 0.63333333] -mean value: 0.6476395178454898 +mean value: 0.6138070228344144 -MCC on Blind test: 0.08 +MCC on Blind test: 0.32 -Accuracy on Blind test: 0.57 +Accuracy on Blind test: 0.68 Model_name: SVM Model func: SVC(random_state=42) @@ -22906,101 +17582,5630 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', SVC(random_state=42))]) key: fit_time -value: [0.01041508 0.0102284 0.00914741 0.00940919 0.00931716 0.00927925 - 0.00927663 0.00954556 0.00942039 0.0091517 ] +value: [0.00890541 0.00955153 0.00889421 0.00859499 0.00959396 0.00865912 + 0.01014996 0.00967169 0.01003861 0.01146197] -mean value: 0.009519076347351075 +mean value: 0.00955214500427246 key: score_time -value: [0.00887251 0.00886989 0.00857925 0.0089817 0.00945759 0.00883317 - 0.00878358 0.00884509 0.00862408 0.00881624] +value: [0.00898552 0.00915551 0.00873256 0.00884151 0.00935507 0.00436473 + 0.00465631 0.0094142 0.00948119 0.01040006] -mean value: 0.008866310119628906 +mean value: 0.008338665962219239 key: test_mcc -value: [0.81649658 0.6 0.21821789 0.21821789 0.6 0.65465367 - 0.40824829 0.65465367 0.2 0.5 ] +value: [-0.4472136 -0.33333333 0. -0.70710678 0. nan + nan 1. 1. 0.66666667] -mean value: 0.4870487993279528 +mean value: nan key: train_mcc -value: [0.8675239 0.86666667 0.85485041 0.86666667 0.84632727 0.85485041 - 0.79036782 0.84632727 0.82548988 0.81649658] +value: [0.89056356 0.84866842 0.92307692 0.89056356 0.96225045 0.9258201 + 0.84866842 0.96225045 0.92724773 0.88730475] -mean value: 0.8435566879416903 +mean value: 0.9066414371100951 key: test_accuracy -value: [0.9 0.8 0.6 0.6 0.8 0.8 0.7 0.8 0.6 0.7] +value: [0.33333333 0.33333333 0.5 0.16666667 0.5 nan + nan 1. 1. 0.8 ] -mean value: 0.73 +mean value: nan key: train_accuracy -value: [0.93333333 0.93333333 0.92222222 0.93333333 0.92222222 0.92222222 - 0.88888889 0.92222222 0.91111111 0.9 ] +value: [0.94230769 0.92307692 0.96153846 0.94230769 0.98076923 0.96153846 + 0.92307692 0.98076923 0.96226415 0.94339623] -mean value: 0.9188888888888889 +mean value: 0.9521044992743106 key: test_fscore -value: [0.90909091 0.8 0.5 0.66666667 0.8 0.75 - 0.66666667 0.83333333 0.6 0.57142857] +value: [0. 0.33333333 0.66666667 0.28571429 0.4 nan + nan 1. 1. 0.8 ] -mean value: 0.7097186147186147 +mean value: nan key: train_fscore -value: [0.93181818 0.93333333 0.91566265 0.93333333 0.91954023 0.91566265 - 0.87804878 0.91954023 0.90697674 0.88888889] +value: [0.93877551 0.92 0.96153846 0.93877551 0.98113208 0.96 + 0.92592593 0.98039216 0.96153846 0.94117647] -mean value: 0.9142805023022523 +mean value: 0.9509254572333691 key: test_precision -value: [0.83333333 0.8 0.66666667 0.57142857 0.8 1. - 0.75 0.71428571 0.6 1. ] +value: [0. 0.33333333 0.5 0.25 0.5 nan + nan 1. 1. 1. ] -mean value: 0.7735714285714286 +mean value: nan key: train_precision -value: [0.95348837 0.93333333 1. 0.93333333 0.95238095 1. - 0.97297297 0.95238095 0.95121951 1. ] +value: [1. 0.95833333 0.96153846 1. 0.96296296 1. + 0.89285714 1. 1. 0.96 ] -mean value: 0.964910942868969 +mean value: 0.9735691900691901 key: test_recall -value: [1. 0.8 0.4 0.8 0.8 0.6 0.6 1. 0.6 0.4] +value: [0. 0.33333333 1. 0.33333333 0.33333333 nan + nan 1. 1. 0.66666667] + +mean value: nan + +key: train_recall +value: [0.88461538 0.88461538 0.96153846 0.88461538 1. 0.92307692 + 0.96153846 0.96153846 0.92592593 0.92307692] + +mean value: 0.931054131054131 + +key: test_roc_auc +value: [0.33333333 0.33333333 0.5 0.16666667 0.5 nan + nan 1. 1. 0.83333333] + +mean value: nan + +key: train_roc_auc +value: [0.94230769 0.92307692 0.96153846 0.94230769 0.98076923 0.96153846 + 0.92307692 0.98076923 0.96296296 0.94301994] + +mean value: 0.9521367521367522 + +key: test_jcc +value: [0. 0.2 0.5 0.16666667 0.25 nan + nan 1. 1. 0.66666667] + +mean value: nan + +key: train_jcc +value: [0.88461538 0.85185185 0.92592593 0.88461538 0.96296296 0.92307692 + 0.86206897 0.96153846 0.92592593 0.88888889] + +mean value: 0.9071470674918951 + +MCC on Blind test: 0.11 + +Accuracy on Blind test: 0.57 + +Model_name: MLP +Model func: MLPClassifier(max_iter=500, random_state=42) +List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', MLPClassifier(max_iter=500, random_state=42))]) + +key: fit_time +value: [0.27838421 0.40681887 0.27178621 0.42920971 0.31241632 0.35591602 + 0.32682848 0.27644706 0.28436017 0.2995646 ] + +mean value: 0.3241731643676758 + +key: score_time +value: [0.01218176 0.01219702 0.01212978 0.01238561 0.01210904 0.00661492 + 0.00656652 0.01193714 0.01193643 0.01183224] + +mean value: 0.010989046096801758 + +key: test_mcc +value: [0.33333333 0.70710678 0. 0. 0. nan + nan 0.70710678 1. 0.61237244] + +mean value: nan + +key: train_mcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_accuracy +value: [0.66666667 0.83333333 0.5 0.5 0.5 nan + nan 0.83333333 1. 0.8 ] + +mean value: nan + +key: train_accuracy +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_fscore +value: [0.66666667 0.85714286 0.66666667 0.57142857 0.4 nan + nan 0.85714286 1. 0.85714286] + +mean value: nan + +key: train_fscore +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_precision +value: [0.66666667 0.75 0.5 0.5 0.5 nan + nan 0.75 1. 0.75 ] + +mean value: nan + +key: train_precision +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_recall +value: [0.66666667 1. 1. 0.66666667 0.33333333 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_recall +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_roc_auc +value: [0.66666667 0.83333333 0.5 0.5 0.5 nan + nan 0.83333333 1. 0.75 ] + +mean value: nan + +key: train_roc_auc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_jcc +value: [0.5 0.75 0.5 0.4 0.25 nan nan 0.75 1. 0.75] + +mean value: nan + +key: train_jcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +MCC on Blind test: -0.03 + +Accuracy on Blind test: 0.5 + +Model_name: Decision Tree +Model func: DecisionTreeClassifier(random_state=42) +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', DecisionTreeClassifier(random_state=42))]) + +key: fit_time +value: [0.01573658 0.01213527 0.00921369 0.00900602 0.00878453 0.00891757 + 0.0091784 0.00918722 0.00907421 0.00912213] + +mean value: 0.010035562515258788 + +key: score_time +value: [0.01291966 0.00884986 0.00869918 0.00836587 0.00835061 0.0041151 + 0.00418639 0.00848269 0.00831079 0.00835586] + +mean value: 0.008063602447509765 + +key: test_mcc +value: [0.70710678 0.70710678 0.33333333 1. 0.70710678 nan + nan 0.70710678 1. 1. ] + +mean value: nan + +key: train_mcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_accuracy +value: [0.83333333 0.83333333 0.66666667 1. 0.83333333 nan + nan 0.83333333 1. 1. ] + +mean value: nan + +key: train_accuracy +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_fscore +value: [0.8 0.85714286 0.66666667 1. 0.8 nan + nan 0.8 1. 1. ] + +mean value: nan + +key: train_fscore +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_precision +value: [1. 0.75 0.66666667 1. 1. nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_precision +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_recall +value: [0.66666667 1. 0.66666667 1. 0.66666667 nan + nan 0.66666667 1. 1. ] + +mean value: nan + +key: train_recall +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_roc_auc +value: [0.83333333 0.83333333 0.66666667 1. 0.83333333 nan + nan 0.83333333 1. 1. ] + +mean value: nan + +key: train_roc_auc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_jcc +value: [0.66666667 0.75 0.5 1. 0.66666667 nan + nan 0.66666667 1. 1. ] + +mean value: nan + +key: train_jcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +MCC on Blind test: 0.75 + +Accuracy on Blind test: 0.88 + +Model_name: Extra Trees +Model func: ExtraTreesClassifier(random_state=42) +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', ExtraTreesClassifier(random_state=42))]) + +key: fit_time +value: [0.07804847 0.07725072 0.07686806 0.07656288 0.07687521 0.07669759 + 0.07711124 0.0770638 0.07734013 0.07700109] + +mean value: 0.07708191871643066 + +key: score_time +value: [0.01668477 0.01657367 0.01659799 0.01660562 0.01659155 0.00435138 + 0.0042994 0.01672506 0.01664042 0.01662922] + +mean value: 0.014169907569885254 + +key: test_mcc +value: [0. 0. 0.4472136 0. 0. nan + nan 1. 0.61237244 0.61237244] + +mean value: nan + +key: train_mcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_accuracy +value: [0.5 0.5 0.66666667 0.5 0.5 nan + nan 1. 0.8 0.8 ] + +mean value: nan + +key: train_accuracy +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_fscore +value: [0.4 0.57142857 0.75 0.4 0.4 nan + nan 1. 0.66666667 0.85714286] + +mean value: nan + +key: train_fscore +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_precision +value: [0.5 0.5 0.6 0.5 0.5 nan nan 1. 1. 0.75] + +mean value: nan + +key: train_precision +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_recall +value: [0.33333333 0.66666667 1. 0.33333333 0.33333333 nan + nan 1. 0.5 1. ] + +mean value: nan + +key: train_recall +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_roc_auc +value: [0.5 0.5 0.66666667 0.5 0.5 nan + nan 1. 0.75 0.75 ] + +mean value: nan + +key: train_roc_auc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_jcc +value: [0.25 0.4 0.6 0.25 0.25 nan nan 1. 0.5 0.75] + +mean value: nan + +key: train_jcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +MCC on Blind test: 0.26 + +Accuracy on Blind test: 0.65 + +Model_name: Extra Tree +Model func: ExtraTreeClassifier(random_state=42) /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( + +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', ExtraTreeClassifier(random_state=42))]) + +key: fit_time +value: [0.00831699 0.00808382 0.0081141 0.00813127 0.00810838 0.00812197 + 0.00812531 0.00818348 0.00840473 0.00810528] + +mean value: 0.00816953182220459 + +key: score_time +value: [0.00827551 0.00835371 0.00831056 0.00835538 0.00824785 0.00409269 + 0.00411844 0.00829554 0.00831127 0.00829291] + +mean value: 0.007465386390686035 + +key: test_mcc +value: [ 0. 0. -0.33333333 -0.33333333 -0.33333333 nan + nan -0.33333333 0.16666667 0.66666667] + +mean value: nan + +key: train_mcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_accuracy +value: [0.5 0.5 0.33333333 0.33333333 0.33333333 nan + nan 0.33333333 0.6 0.8 ] + +mean value: nan + +key: train_accuracy +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_fscore +value: [0.57142857 0.4 0.33333333 0.33333333 0.33333333 nan + nan 0.33333333 0.5 0.8 ] + +mean value: nan + +key: train_fscore +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_precision +value: [0.5 0.5 0.33333333 0.33333333 0.33333333 nan + nan 0.33333333 0.5 1. ] + +mean value: nan + +key: train_precision +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_recall +value: [0.66666667 0.33333333 0.33333333 0.33333333 0.33333333 nan + nan 0.33333333 0.5 0.66666667] + +mean value: nan + +key: train_recall +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_roc_auc +value: [0.5 0.5 0.33333333 0.33333333 0.33333333 nan + nan 0.33333333 0.58333333 0.83333333] + +mean value: nan + +key: train_roc_auc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_jcc +value: [0.4 0.25 0.2 0.2 0.2 nan + nan 0.2 0.33333333 0.66666667] + +mean value: nan + +key: train_jcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +MCC on Blind test: 0.26 + +Accuracy on Blind test: 0.65 + +Model_name: Random Forest +Model func: RandomForestClassifier(n_estimators=1000, random_state=42) +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. + warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. + warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. + warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. + warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. + warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. + warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. + warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. + warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. + warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. + warn( +Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', + RandomForestClassifier(n_estimators=1000, random_state=42))]) + +key: fit_time +value: [1.0098381 0.95413804 0.96767163 0.95117521 0.95051098 0.95307279 + 0.95376849 0.95404077 0.95342731 0.95893645] + +mean value: 0.9606579780578614 + +key: score_time +value: [0.08670974 0.08692455 0.08682108 0.08670044 0.08678889 0.00440526 + 0.00438118 0.0927279 0.08725643 0.08669782] + +mean value: 0.07094132900238037 + +key: test_mcc +value: [ 0. 0.4472136 0.4472136 -0.33333333 0. nan + nan 1. 0.61237244 1. ] + +mean value: nan + +key: train_mcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_accuracy +value: [0.5 0.66666667 0.66666667 0.33333333 0.5 nan + nan 1. 0.8 1. ] + +mean value: nan + +key: train_accuracy +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_fscore +value: [0.4 0.75 0.75 0.33333333 0.4 nan + nan 1. 0.66666667 1. ] + +mean value: nan + +key: train_fscore +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_precision +value: [0.5 0.6 0.6 0.33333333 0.5 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_precision +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_recall +value: [0.33333333 1. 1. 0.33333333 0.33333333 nan + nan 1. 0.5 1. ] + +mean value: nan + +key: train_recall +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_roc_auc +value: [0.5 0.66666667 0.66666667 0.33333333 0.5 nan + nan 1. 0.75 1. ] + +mean value: nan + +key: train_roc_auc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_jcc +value: [0.25 0.6 0.6 0.2 0.25 nan nan 1. 0.5 1. ] + +mean value: nan + +key: train_jcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +MCC on Blind test: 0.43 + +Accuracy on Blind test: 0.75 + +Model_name: Random Forest2 +Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42) +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', + RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, + oob_score=True, random_state=42))]) + +key: fit_time +value: [0.77283669 0.89461184 0.82807112 0.81345439 0.84339142 0.90077519 + 0.80800915 0.86690879 0.86262417 0.93805766] + +mean value: 0.8528740406036377 + +key: score_time +value: [0.18392515 0.20116615 0.21894264 0.19048834 0.13761568 0.00455546 + 0.0051651 0.17677927 0.19888759 0.21970034] + +mean value: 0.15372257232666015 + +key: test_mcc +value: [ 0. 0.4472136 0.70710678 -0.33333333 0. nan + nan 1. 0.61237244 1. ] + +mean value: nan + +key: train_mcc +value: [1. 0.96225045 1. 0.96225045 1. 0.96225045 + 0.9258201 1. 1. 0.96296296] + +mean value: 0.9775534408683644 + +key: test_accuracy +value: [0.5 0.66666667 0.83333333 0.33333333 0.5 nan + nan 1. 0.8 1. ] + +mean value: nan + +key: train_accuracy +value: [1. 0.98076923 1. 0.98076923 1. 0.98076923 + 0.96153846 1. 1. 0.98113208] + +mean value: 0.9884978229317852 + +key: test_fscore +value: [0.4 0.75 0.85714286 0.33333333 0.4 nan + nan 1. 0.66666667 1. ] + +mean value: nan + +key: train_fscore +value: [1. 0.98113208 1. 0.98039216 1. 0.98113208 + 0.96296296 1. 1. 0.98113208] + +mean value: 0.9886751346240803 + +key: test_precision +value: [0.5 0.6 0.75 0.33333333 0.5 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_precision +value: [1. 0.96296296 1. 1. 1. 0.96296296 + 0.92857143 1. 1. 0.96296296] + +mean value: 0.9817460317460317 + +key: test_recall +value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. + warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +[0.33333333 1. 1. 0.33333333 0.33333333 nan + nan 1. 0.5 1. ] + +mean value: nan + +key: train_recall +value: [1. 1. 1. 0.96153846 1. 1. + 1. 1. 1. 1. ] + +mean value: 0.9961538461538462 + +key: test_roc_auc +value: [0.5 0.66666667 0.83333333 0.33333333 0.5 nan + nan 1. 0.75 1. ] + +mean value: nan + +key: train_roc_auc +value: [1. 0.98076923 1. 0.98076923 1. 0.98076923 + 0.96153846 1. 1. 0.98148148] + +mean value: 0.9885327635327635 + +key: test_jcc +value: [0.25 0.6 0.75 0.2 0.25 nan nan 1. 0.5 1. ] + +mean value: nan + +key: train_jcc +value: [1. 0.96296296 1. 0.96153846 1. 0.96296296 + 0.92857143 1. 1. 0.96296296] + +mean value: 0.9778998778998779 + +MCC on Blind test: 0.55 + +Accuracy on Blind test: 0.8 + +Model_name: Naive Bayes +Model func: BernoulliNB() +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', BernoulliNB())]) + +key: fit_time +value: [0.020298 0.00831819 0.00845861 0.0083611 0.00830889 0.00845575 + 0.00973797 0.00877762 0.00922585 0.00842929] + +mean value: 0.009837126731872559 + +key: score_time +value: [0.01680541 0.00865436 0.00859785 0.00842381 0.00902247 0.00420737 + 0.00420499 0.00842237 0.00900936 0.00894046] + +mean value: 0.00862884521484375 + +key: test_mcc +value: [ 0.4472136 -0.33333333 0. -0.33333333 0. nan + nan -0.33333333 0.66666667 0.16666667] + +mean value: nan + +key: train_mcc +value: [0.77151675 0.63245553 0.84615385 0.82305489 0.80829038 0.77849894 + 0.65433031 0.74466871 0.74106548 0.73609205] + +mean value: 0.7536126885007749 + +key: test_accuracy +value: [0.66666667 0.33333333 0.5 0.33333333 0.5 nan + nan 0.33333333 0.8 0.6 ] + +mean value: nan + +key: train_accuracy +value: [0.88461538 0.80769231 0.92307692 0.90384615 0.90384615 0.88461538 + 0.82692308 0.86538462 0.86792453 0.86792453] + +mean value: 0.8735849056603774 + +key: test_fscore +value: [0.5 0.33333333 0.66666667 0.33333333 0.4 nan + nan 0.33333333 0.8 0.66666667] + +mean value: nan + +key: train_fscore +value: [0.88 0.7826087 0.92307692 0.89361702 0.90196078 0.875 + 0.83018868 0.85106383 0.8627451 0.8627451 ] + +mean value: 0.8663006129430366 + +key: test_precision +value: [1. 0.33333333 0.5 0.33333333 0.5 nan + nan 0.33333333 0.66666667 0.66666667] + +mean value: nan + +key: train_precision +value: [0.91666667 0.9 0.92307692 1. 0.92 0.95454545 + 0.81481481 0.95238095 0.91666667 0.88 ] + +mean value: 0.9178151478151478 + +key: test_recall +value: [0.33333333 0.33333333 1. 0.33333333 0.33333333 nan + nan 0.33333333 1. 0.66666667] + +mean value: nan + +key: train_recall +value: [0.84615385 0.69230769 0.92307692 0.80769231 0.88461538 0.80769231 + 0.84615385 0.76923077 0.81481481 0.84615385] + +mean value: 0.8237891737891738 + +key: test_roc_auc +value: [0.66666667 0.33333333 0.5 0.33333333 0.5 nan + nan 0.33333333 0.83333333 0.58333333] + +mean value: nan + +key: train_roc_auc +value: [0.88461538 0.80769231 0.92307692 0.90384615 0.90384615 0.88461538 + 0.82692308 0.86538462 0.86894587 0.86752137] + +mean value: 0.8736467236467237 + +key: test_jcc +value: [0.33333333 0.2 0.5 0.2 0.25 nan + nan 0.2 0.66666667 0.5 ] + +mean value: nan + +key: train_jcc +value: [0.78571429 0.64285714 0.85714286 0.80769231 0.82142857 0.77777778 + 0.70967742 0.74074074 0.75862069 0.75862069] + +mean value: 0.7660272482018867 + +MCC on Blind test: -0.03 + +Accuracy on Blind test: 0.5 + +Model_name: XGBoost +Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0) +List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000... + interaction_constraints=None, learning_rate=None, + max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, + monotone_constraints=None, n_estimators=100, + n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, + reg_lambda=None, scale_pos_weight=None, + subsample=None, tree_method=None, + use_label_encoder=False, + validate_parameters=None, verbosity=0))]) + +key: fit_time +value: [0.2208004 0.03246665 0.02982497 0.03053617 0.03095222 0.05028415 + 0.04468656 0.03867793 0.03164172 0.07298398] + +mean value: 0.05828547477722168 + +key: score_time +value: [0.01097751 0.01042724 0.01009774 0.01005578 0.01014471 0.0048039 + 0.00504088 0.01020074 0.00997043 0.0110755 ] + +mean value: 0.009279441833496094 + +key: test_mcc +value: [1. 1. 0.33333333 0.70710678 0.70710678 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_mcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_accuracy +value: [1. 1. 0.66666667 0.83333333 0.83333333 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_accuracy +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_fscore +value: [1. 1. 0.66666667 0.8 0.85714286 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_fscore +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_precision +value: [1. 1. 0.66666667 1. 0.75 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_precision +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_recall +value: [1. 1. 0.66666667 0.66666667 1. nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_recall +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_roc_auc +value: [1. 1. 0.66666667 0.83333333 0.83333333 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_roc_auc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_jcc +value: [1. 1. 0.5 0.66666667 0.75 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_jcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +MCC on Blind test: 0.89 + +Accuracy on Blind test: 0.95 + +Model_name: LDA +Model func: LinearDiscriminantAnalysis() +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, + colsample_bynode=1, colsample_bytree=1, enable_categorical=False, + gamma=0, gpu_id=-1, importance_type=None, + interaction_constraints='', learning_rate=0.300000012, + max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, + monotone_constraints='()', n_estimators=100, n_jobs=12, + num_parallel_tree=1, predictor='auto', random_state=42, + reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, + tree_method='exact', use_label_encoder=False, + validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', LinearDiscriminantAnalysis())]) + +key: fit_time +value: [0.02003193 0.03504539 0.02579474 0.01418924 0.01421022 0.01418233 + 0.0250864 0.03580475 0.03477359 0.03485084] + +mean value: 0.02539694309234619 + +key: score_time +value: [0.02190375 0.0215292 0.01140451 0.01143599 0.01142645 0.00573635 + 0.00591302 0.02012944 0.02000165 0.02008748] + +mean value: 0.014956784248352051 + +key: test_mcc +value: [0.4472136 0.70710678 0.33333333 0. 0. nan + nan 0.70710678 0.40824829 0.61237244] + +mean value: nan + +key: train_mcc +value: [1. 0.92307692 0.96225045 1. 1. 0.96225045 + 1. 1. 1. 1. ] + +mean value: 0.9847577820375676 + +key: test_accuracy +value: [0.66666667 0.83333333 0.66666667 0.5 0.5 nan + nan 0.83333333 0.6 0.8 ] + +mean value: nan + +key: train_accuracy +value: [1. 0.96153846 0.98076923 1. 1. 0.98076923 + 1. 1. 1. 1. ] + +mean value: 0.9923076923076923 + +key: test_fscore +value: [0.75 0.85714286 0.66666667 0.4 0.4 nan + nan 0.85714286 0.66666667 0.85714286] + +mean value: nan + +key: train_fscore +value: [1. 0.96153846 0.98113208 1. 1. 0.98113208 + 1. 1. 1. 1. ] + +mean value: 0.9923802612481858 + +key: test_precision +value: [0.6 0.75 0.66666667 0.5 0.5 nan + nan 0.75 0.5 0.75 ] + +mean value: nan + +key: train_precision +value: [1. 0.96153846 0.96296296 1. 1. 0.96296296 + 1. 1. 1. 1. ] + +mean value: 0.9887464387464387 + +key: test_recall +value: [1. 1. 0.66666667 0.33333333 0.33333333 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_recall +value: [1. 0.96153846 1. 1. 1. 1. + 1. 1. 1. 1. ] + +mean value: 0.9961538461538462 + +key: test_roc_auc +value: [0.66666667 0.83333333 0.66666667 0.5 0.5 nan + nan 0.83333333 0.66666667 0.75 ] + +mean value: nan + +key: train_roc_auc +value: [1. 0.96153846 0.98076923 1. 1. 0.98076923 + 1. 1. 1. 1. ] + +mean value: 0.9923076923076923 + +key: test_jcc +value: [0.6 0.75 0.5 0.25 0.25 nan nan 0.75 0.5 0.75] + +mean value: nan + +key: train_jcc +value: [1. 0.92592593 0.96296296 1. 1. 0.96296296 + 1. 1. 1. 1. ] + +mean value: 0.9851851851851852 + +MCC on Blind test: 0.15 + +Accuracy on Blind test: 0.6 + +Model_name: Multinomial +Model func: MultinomialNB() +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, + colsample_bynode=1, colsample_bytree=1, enable_categorical=False, + gamma=0, gpu_id=-1, importance_type=None, + interaction_constraints='', learning_rate=0.300000012, + max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, + monotone_constraints='()', n_estimators=100, n_jobs=12, + num_parallel_tree=1, predictor='auto', random_state=42, + reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, + tree_method='exact', use_label_encoder=False, + validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', MultinomialNB())]) + +key: fit_time +value: [0.02043128 0.0085032 0.0083077 0.008286 0.0082469 0.00821781 + 0.00827694 0.00811672 0.0081346 0.00810599] + +mean value: 0.009462714195251465 + +key: score_time +value: [0.00872779 0.00860095 0.0085876 0.0083971 0.00843334 0.00414538 + 0.00438237 0.00828457 0.00826526 0.00826359] + +mean value: 0.007608795166015625 + +key: test_mcc +value: [-0.4472136 -0.4472136 0. -0.70710678 -0.33333333 nan + nan 1. 0.66666667 0.66666667] + +mean value: nan + +key: train_mcc +value: [0.6172134 0.58080232 0.73131034 0.57735027 0.6172134 0.65433031 + 0.54006172 0.69230769 0.58487934 0.50927299] + +mean value: 0.6104741777466153 + +key: test_accuracy +value: [0.33333333 0.33333333 0.5 0.16666667 0.33333333 nan + nan 1. 0.8 0.8 ] + +mean value: nan + +key: train_accuracy +value: [0.80769231 0.78846154 0.86538462 0.78846154 0.80769231 0.82692308 + 0.76923077 0.84615385 0.79245283 0.75471698] + +mean value: 0.8047169811320755 + +key: test_fscore +value: [0. 0.5 0.66666667 0.28571429 0.33333333 nan + nan 1. 0.8 0.8 ] + +mean value: nan + +key: train_fscore +value: [0.8 0.7755102 0.86792453 0.78431373 0.8 0.82352941 + 0.77777778 0.84615385 0.8 0.74509804] + +mean value: 0.8020307532785732 + +key: test_precision +value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +[0. 0.4 0.5 0.25 0.33333333 nan + nan 1. 0.66666667 1. ] + +mean value: nan + +key: train_precision +value: [0.83333333 0.82608696 0.85185185 0.8 0.83333333 0.84 + 0.75 0.84615385 0.78571429 0.76 ] + +mean value: 0.812647360690839 + +key: test_recall +value: [0. 0.66666667 1. 0.33333333 0.33333333 nan + nan 1. 1. 0.66666667] + +mean value: nan + +key: train_recall +value: [0.76923077 0.73076923 0.88461538 0.76923077 0.76923077 0.80769231 + 0.80769231 0.84615385 0.81481481 0.73076923] + +mean value: 0.793019943019943 + +key: test_roc_auc +value: [0.33333333 0.33333333 0.5 0.16666667 0.33333333 nan + nan 1. 0.83333333 0.83333333] + +mean value: nan + +key: train_roc_auc +value: [0.80769231 0.78846154 0.86538462 0.78846154 0.80769231 0.82692308 + 0.76923077 0.84615385 0.79202279 0.7542735 ] + +mean value: 0.8046296296296296 + +key: test_jcc +value: [0. 0.33333333 0.5 0.16666667 0.2 nan + nan 1. 0.66666667 0.66666667] + +mean value: nan + +key: train_jcc +value: [0.66666667 0.63333333 0.76666667 0.64516129 0.66666667 0.7 + 0.63636364 0.73333333 0.66666667 0.59375 ] + +mean value: 0.6708608260019551 + +MCC on Blind test: 0.41 + +Accuracy on Blind test: 0.72 + +Model_name: Passive Aggresive +Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42) +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, + colsample_bynode=1, colsample_bytree=1, enable_categorical=False, + gamma=0, gpu_id=-1, importance_type=None, + interaction_constraints='', learning_rate=0.300000012, + max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, + monotone_constraints='()', n_estimators=100, n_jobs=12, + num_parallel_tree=1, predictor='auto', random_state=42, + reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, + tree_method='exact', use_label_encoder=False, + validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', + PassiveAggressiveClassifier(n_jobs=10, random_state=42))]) + +key: fit_time +value: [0.00999832 0.01279449 0.01210523 0.01255631 0.01224685 0.01186943 + 0.0117228 0.01643276 0.01289439 0.01178789] + +mean value: 0.012440848350524902 + +key: score_time +value: [0.0086329 0.01154256 0.01120043 0.01119494 0.01112103 0.00591636 + 0.00598788 0.01161695 0.01126814 0.01119208] + +mean value: 0.009967327117919922 + +key: test_mcc +value: [ 0.33333333 0.33333333 0. -0.33333333 0. nan + nan 1. 1. 0.61237244] + +mean value: nan + +key: train_mcc +value: [1. 0.82305489 0.9258201 0.96225045 1. 0.9258201 + 0.72760688 0.9258201 0.96291111 0.85922733] + +mean value: 0.9112510953164517 + +key: test_accuracy +value: [0.66666667 0.66666667 0.5 0.33333333 0.5 nan + nan 1. 1. 0.8 ] + +mean value: nan + +key: train_accuracy +value: [1. 0.90384615 0.96153846 0.98076923 1. 0.96153846 + 0.84615385 0.96153846 0.98113208 0.9245283 ] + +mean value: 0.9521044992743106 + +key: test_fscore +value: [0.66666667 0.66666667 0.66666667 0.33333333 0.4 nan + nan 1. 1. 0.85714286] + +mean value: nan + +key: train_fscore +value: [1. 0.89361702 0.96296296 0.98039216 1. 0.96 + 0.86666667 0.96296296 0.98181818 0.92857143] + +mean value: 0.9536991381121543 + +key: test_precision +value: [0.66666667 0.66666667 0.5 0.33333333 0.5 nan + nan 1. 1. 0.75 ] + +mean value: nan + +key: train_precision +value: [1. 1. 0.92857143 1. 1. 1. + 0.76470588 0.92857143 0.96428571 0.86666667] + +mean value: 0.945280112044818 + +key: test_recall +value: [0.66666667 0.66666667 1. 0.33333333 0.33333333 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_recall +value: [1. 0.80769231 1. 0.96153846 1. 0.92307692 + 1. 1. 1. 1. ] + +mean value: 0.9692307692307692 + +key: test_roc_auc +value: [0.66666667 0.66666667 0.5 0.33333333 0.5 nan + nan 1. 1. 0.75 ] + +mean value: nan + +key: train_roc_auc +value: [1. 0.90384615 0.96153846 0.98076923 1. 0.96153846 + 0.84615385 0.96153846 0.98076923 0.92592593] + +mean value: 0.9522079772079772 + +key: test_jcc +value: [0.5 0.5 0.5 0.2 0.25 nan nan 1. 1. 0.75] + +mean value: nan + +key: train_jcc +value: [1. 0.80769231 0.92857143 0.96153846 1. 0.92307692 + 0.76470588 0.92857143 0.96428571 0.86666667] + +mean value: 0.9145108812755872 + +MCC on Blind test: 0.01 + +Accuracy on Blind test: 0.52 + +Model_name: Stochastic GDescent +Model func: SGDClassifier(n_jobs=10, random_state=42) +List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior. + _warn_prf(average, modifier, msg_start, len(result)) +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, + colsample_bynode=1, colsample_bytree=1, enable_categorical=False, + gamma=0, gpu_id=-1, importance_type=None, + interaction_constraints='', learning_rate=0.300000012, + max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, + monotone_constraints='()', n_estimators=100, n_jobs=12, + num_parallel_tree=1, predictor='auto', random_state=42, + reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, + tree_method='exact', use_label_encoder=False, + validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', SGDClassifier(n_jobs=10, random_state=42))]) + +key: fit_time +value: [0.01173449 0.0120728 0.01188064 0.01165843 0.01149154 0.01192474 + 0.01300406 0.01200342 0.01213932 0.01185942] + +mean value: 0.011976885795593261 + +key: score_time +value: [0.0111804 0.01164484 0.01130462 0.0112021 0.01118112 0.0060792 + 0.00607777 0.01164412 0.01155257 0.01169229] + +mean value: 0.010355901718139649 + +key: test_mcc +value: [ 0. 0.4472136 0. -0.33333333 0. nan + nan 1. 1. 0.61237244] + +mean value: nan + +key: train_mcc +value: [0.82305489 0.71151247 1. 0.69693205 1. 0.82305489 + 1. 0.85634884 1. 0.92724773] + +mean value: 0.8838150881832578 + +key: test_accuracy +value: [0.5 0.66666667 0.5 0.33333333 0.5 nan + nan 1. 1. 0.8 ] + +mean value: nan + +key: train_accuracy +value: [0.90384615 0.84615385 1. 0.82692308 1. 0.90384615 + 1. 0.92307692 1. 0.96226415] + +mean value: 0.936611030478955 + +key: test_fscore +value: [0. 0.5 0.66666667 0.33333333 0.4 nan + nan 1. 1. 0.85714286] + +mean value: nan + +key: train_fscore +value: [0.89361702 0.82608696 1. 0.79069767 1. 0.89361702 + 1. 0.92857143 1. 0.96296296] + +mean value: 0.9295553065027927 + +key: test_precision +value: [0. 1. 0.5 0.33333333 0.5 nan + nan 1. 1. 0.75 ] + +mean value: nan + +key: train_precision +value: [1. 0.95 1. 1. 1. 1. + 1. 0.86666667 1. 0.92857143] + +mean value: 0.9745238095238096 + +key: test_recall +value: [0. 0.33333333 1. 0.33333333 0.33333333 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_recall +value: [0.80769231 0.73076923 1. 0.65384615 1. 0.80769231 + 1. 1. 1. 1. ] + +mean value: 0.9 + +key: test_roc_auc +value: [0.5 0.66666667 0.5 0.33333333 0.5 nan + nan 1. 1. 0.75 ] + +mean value: nan + +key: train_roc_auc +value: [0.90384615 0.84615385 1. 0.82692308 1. 0.90384615 + 1. 0.92307692 1. 0.96296296] + +mean value: 0.9366809116809117 + +key: test_jcc +value: [0. 0.33333333 0.5 0.2 0.25 nan + nan 1. 1. 0.75 ] + +mean value: nan + +key: train_jcc +value: [0.80769231 0.7037037 1. 0.65384615 1. 0.80769231 + 1. 0.86666667 1. 0.92857143] + +mean value: 0.8768172568172569 + +MCC on Blind test: 0.18 + +Accuracy on Blind test: 0.6 + +Model_name: AdaBoost Classifier +Model func: AdaBoostClassifier(random_state=42) +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, + colsample_bynode=1, colsample_bytree=1, enable_categorical=False, + gamma=0, gpu_id=-1, importance_type=None, + interaction_constraints='', learning_rate=0.300000012, + max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, + monotone_constraints='()', n_estimators=100, n_jobs=12, + num_parallel_tree=1, predictor='auto', random_state=42, + reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, + tree_method='exact', use_label_encoder=False, + validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', AdaBoostClassifier(random_state=42))]) + +key: fit_time +value: [0.08145452 0.07150626 0.07048178 0.07051468 0.07052875 0.07111621 + 0.07133055 0.07053328 0.07560372 0.0774231 ] + +mean value: 0.07304928302764893 + +key: score_time +value: [0.01477504 0.01447749 0.01440597 0.01421499 0.01418257 0.00448847 + 0.00439095 0.01417637 0.0155952 0.01548576] + +mean value: 0.012619280815124511 + +key: test_mcc +value: [0.4472136 1. 0.33333333 0.70710678 0.33333333 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_mcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_accuracy +value: [0.66666667 1. 0.66666667 0.83333333 0.66666667 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_accuracy +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_fscore +value: [0.5 1. 0.66666667 0.8 0.66666667 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_fscore +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_precision +value: [1. 1. 0.66666667 1. 0.66666667 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_precision +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_recall +value: [0.33333333 1. 0.66666667 0.66666667 0.66666667 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_recall +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_roc_auc +value: [0.66666667 1. 0.66666667 0.83333333 0.66666667 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_roc_auc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_jcc +value: [0.33333333 1. 0.5 0.66666667 0.5 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_jcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +MCC on Blind test: 0.95 + +Accuracy on Blind test: 0.98 + +Model_name: Bagging Classifier +Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42) +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, + colsample_bynode=1, colsample_bytree=1, enable_categorical=False, + gamma=0, gpu_id=-1, importance_type=None, + interaction_constraints='', learning_rate=0.300000012, + max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, + monotone_constraints='()', n_estimators=100, n_jobs=12, + num_parallel_tree=1, predictor='auto', random_state=42, + reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, + tree_method='exact', use_label_encoder=False, + validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', + BaggingClassifier(n_jobs=10, oob_score=True, + random_state=42))]) + +key: fit_time +value: [0.03153086 0.02778888 0.03016424 0.03284216 0.04811215 0.02667236 + 0.03099704 0.03483891 0.03757524 0.02810836] + +mean value: 0.03286302089691162 + +key: score_time +value: [0.02119184 0.0220542 0.02267241 0.03023434 0.02344704 0.00497055 + 0.01071739 0.03668547 0.02092147 0.01624227] + +mean value: 0.0209136962890625 + +key: test_mcc +value: [0.70710678 0.70710678 0.33333333 0.70710678 0.33333333 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_mcc +value: [0.96225045 1. 1. 1. 1. 1. + 0.96225045 1. 0.96296296 1. ] + +mean value: 0.9887463860261716 + +key: test_accuracy +value: [0.83333333 0.83333333 0.66666667 0.83333333 0.66666667 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_accuracy +value: [0.98076923 1. 1. 1. 1. 1. + 0.98076923 1. 0.98113208 1. ] + +mean value: 0.994267053701016 + +key: test_fscore +value: [0.8 0.85714286 0.66666667 0.8 0.66666667 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_fscore +value: [0.98039216 1. 1. 1. 1. 1. + 0.98039216 1. 0.98113208 1. ] + +mean value: 0.9941916389197188 + +key: test_precision +value: [1. 0.75 0.66666667 1. 0.66666667 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_precision +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_recall +value: [0.66666667 1. 0.66666667 0.66666667 0.66666667 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_recall +value: [0.96153846 1. 1. 1. 1. 1. + 0.96153846 1. 0.96296296 1. ] + +mean value: 0.9886039886039886 + +key: test_roc_auc +value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +[0.83333333 0.83333333 0.66666667 0.83333333 0.66666667 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_roc_auc +value: [0.98076923 1. 1. 1. 1. 1. + 0.98076923 1. 0.98148148 1. ] + +mean value: 0.9943019943019943 + +key: test_jcc +value: [0.66666667 0.75 0.5 0.66666667 0.5 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_jcc +value: [0.96153846 1. 1. 1. 1. 1. + 0.96153846 1. 0.96296296 1. ] + +mean value: 0.9886039886039886 + +MCC on Blind test: 0.89 + +Accuracy on Blind test: 0.95 + +Model_name: Gaussian Process +Model func: GaussianProcessClassifier(random_state=42) +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, + colsample_bynode=1, colsample_bytree=1, enable_categorical=False, + gamma=0, gpu_id=-1, importance_type=None, + interaction_constraints='', learning_rate=0.300000012, + max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, + monotone_constraints='()', n_estimators=100, n_jobs=12, + num_parallel_tree=1, predictor='auto', random_state=42, + reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, + tree_method='exact', use_label_encoder=False, + validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', GaussianProcessClassifier(random_state=42))]) + +key: fit_time +value: [0.01145601 0.01071548 0.01049471 0.01030135 0.01034331 0.01053452 + 0.01219153 0.0107038 0.01105452 0.01032877] + +mean value: 0.01081240177154541 + +key: score_time +value: [0.00883389 0.00875664 0.00957775 0.00913954 0.00889802 0.00447464 + 0.0049305 0.00890326 0.0089941 0.00859141] + +mean value: 0.008109974861145019 + +key: test_mcc +value: [-0.33333333 0. 0. 0. 0. nan + nan -0.33333333 0.61237244 0.16666667] + +mean value: nan + +key: train_mcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_accuracy +value: [0.33333333 0.5 0.5 0.5 0.5 nan + nan 0.33333333 0.8 0.6 ] + +mean value: nan + +key: train_accuracy +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_fscore +value: [0.33333333 0.57142857 0.66666667 0.4 0.4 nan + nan 0.33333333 0.66666667 0.66666667] + +mean value: nan + +key: train_fscore +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_precision +value: [0.33333333 0.5 0.5 0.5 0.5 nan + nan 0.33333333 1. 0.66666667] + +mean value: nan + +key: train_precision +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_recall +value: [0.33333333 0.66666667 1. 0.33333333 0.33333333 nan + nan 0.33333333 0.5 0.66666667] + +mean value: nan + +key: train_recall +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_roc_auc +value: [0.33333333 0.5 0.5 0.5 0.5 nan + nan 0.33333333 0.75 0.58333333] + +mean value: nan + +key: train_roc_auc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_jcc +value: [0.2 0.4 0.5 0.25 0.25 nan nan 0.2 0.5 0.5 ] + +mean value: nan + +key: train_jcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +MCC on Blind test: 0.1 + +Accuracy on Blind test: 0.55 + +Model_name: Gradient Boosting +Model func: GradientBoostingClassifier(random_state=42) +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, + colsample_bynode=1, colsample_bytree=1, enable_categorical=False, + gamma=0, gpu_id=-1, importance_type=None, + interaction_constraints='', learning_rate=0.300000012, + max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, + monotone_constraints='()', n_estimators=100, n_jobs=12, + num_parallel_tree=1, predictor='auto', random_state=42, + reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, + tree_method='exact', use_label_encoder=False, + validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior. + _warn_prf(average, modifier, msg_start, len(result)) +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear + warnings.warn("Variables are collinear") +Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', GradientBoostingClassifier(random_state=42))]) + +key: fit_time +value: [0.11957264 0.14153457 0.10522366 0.10514927 0.10289383 0.12513161 + 0.12388921 0.12803459 0.12305784 0.10899067] + +mean value: 0.11834778785705566 + +key: score_time +value: [0.00914145 0.00893044 0.00910687 0.00897527 0.00912714 0.00470376 + 0.004632 0.00898361 0.00916672 0.00901628] + +mean value: 0.008178353309631348 + +key: test_mcc +value: [1. 1. 0.33333333 0.70710678 0.70710678 nan + nan 0.70710678 1. 0.61237244] + +mean value: nan + +key: train_mcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_accuracy +value: [1. 1. 0.66666667 0.83333333 0.83333333 nan + nan 0.83333333 1. 0.8 ] + +mean value: nan + +key: train_accuracy +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_fscore +value: [1. 1. 0.66666667 0.85714286 0.8 nan + nan 0.8 1. 0.85714286] + +mean value: nan + +key: train_fscore +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_precision +value: [1. 1. 0.66666667 0.75 1. nan + nan 1. 1. 0.75 ] + +mean value: nan + +key: train_precision +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_recall +value: [1. 1. 0.66666667 1. 0.66666667 nan + nan 0.66666667 1. 1. ] + +mean value: nan + +key: train_recall +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_roc_auc +value: [1. 1. 0.66666667 0.83333333 0.83333333 nan + nan 0.83333333 1. 0.75 ] + +mean value: nan + +key: train_roc_auc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_jcc +value: [1. 1. 0.5 0.75 0.66666667 nan + nan 0.66666667 1. 0.75 ] + +mean value: nan + +key: train_jcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +MCC on Blind test: 0.63 + +Accuracy on Blind test: 0.8 + +Model_name: QDA +Model func: QuadraticDiscriminantAnalysis() +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, + colsample_bynode=1, colsample_bytree=1, enable_categorical=False, + gamma=0, gpu_id=-1, importance_type=None, + interaction_constraints='', learning_rate=0.300000012, + max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, + monotone_constraints='()', n_estimators=100, n_jobs=12, + num_parallel_tree=1, predictor='auto', random_state=42, + reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, + tree_method='exact', use_label_encoder=False, + validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', QuadraticDiscriminantAnalysis())]) + +key: fit_time +value: [0.00893831 0.00888896 0.00908279 0.00880885 0.00884104 0.00885582 + 0.00883555 0.00878549 0.00888848 0.01262331] + +mean value: 0.009254860877990722 + +key: score_time +value: [0.00863051 0.00949979 0.00870037 0.00860357 0.00859833 0.00480151 + 0.00421071 0.00872087 0.00892401 0.01160169] + +mean value: 0.00822913646697998 + +key: test_mcc +value: [ 0.33333333 0. -0.33333333 0.70710678 0. nan + nan 0. 0.40824829 -0.61237244] + +mean value: nan + +key: train_mcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_accuracy +value: [0.66666667 0.5 0.33333333 0.83333333 0.5 nan + nan 0.5 0.6 0.2 ] + +mean value: nan + +key: train_accuracy +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_fscore +value: [0.66666667 0.57142857 0.33333333 0.85714286 0.4 nan + nan 0. 0.66666667 0. ] + +mean value: nan + +key: train_fscore +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_precision +value: [0.66666667 0.5 0.33333333 0.75 0.5 nan + nan 0. 0.5 0. ] + +mean value: nan + +key: train_precision +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_recall +value: [0.66666667 0.66666667 0.33333333 1. 0.33333333 nan + nan 0. 1. 0. ] + +mean value: nan + +key: train_recall +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_roc_auc +value: [0.66666667 0.5 0.33333333 0.83333333 0.5 nan + nan 0.5 0.66666667 0.25 ] + +mean value: nan + +key: train_roc_auc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_jcc +value: [0.5 0.4 0.2 0.75 0.25 nan nan 0. 0.5 0. ] + +mean value: nan + +key: train_jcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +MCC on Blind test: -0.14 + +Accuracy on Blind test: 0.48 + +Model_name: Ridge Classifier +Model func: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +RidgeClassifier(random_state=42) +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, + colsample_bynode=1, colsample_bytree=1, enable_categorical=False, + gamma=0, gpu_id=-1, importance_type=None, + interaction_constraints='', learning_rate=0.300000012, + max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, + monotone_constraints='()', n_estimators=100, n_jobs=12, + num_parallel_tree=1, predictor='auto', random_state=42, + reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, + tree_method='exact', use_label_encoder=False, + validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', RidgeClassifier(random_state=42))]) + +key: fit_time +value: [0.01256394 0.01242661 0.01233029 0.01234102 0.01236892 0.01238871 + 0.01237059 0.01262426 0.01242614 0.0123601 ] + +mean value: 0.012420058250427246 + +key: score_time +value: [0.01140499 0.01143169 0.01140809 0.01140833 0.01138973 0.00616193 + 0.0060811 0.01142287 0.01136637 0.01145077] + +mean value: 0.010352587699890137 + +key: test_mcc +value: [0.33333333 0.70710678 0.70710678 0. 0. nan + nan 1. 1. 0.61237244] + +mean value: nan + +key: train_mcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_accuracy +value: [0.66666667 0.83333333 0.83333333 0.5 0.5 nan + nan 1. 1. 0.8 ] + +mean value: nan + +key: train_accuracy +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_fscore +value: [0.66666667 0.85714286 0.85714286 0.57142857 0.4 nan + nan 1. 1. 0.85714286] + +mean value: nan + +key: train_fscore +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_precision +value: [0.66666667 0.75 0.75 0.5 0.5 nan + nan 1. 1. 0.75 ] + +mean value: nan + +key: train_precision +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_recall +value: [0.66666667 1. 1. 0.66666667 0.33333333 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_recall +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_roc_auc +value: [0.66666667 0.83333333 0.83333333 0.5 0.5 nan + nan 1. 1. 0.75 ] + +mean value: nan + +key: train_roc_auc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +key: test_jcc +value: [0.5 0.75 0.75 0.4 0.25 nan nan 1. 1. 0.75] + +mean value: nan + +key: train_jcc +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 + +MCC on Blind test: 0.15 + +Accuracy on Blind test: 0.6 + +Model_name: Ridge ClassifierCV +Model func: RidgeClassifierCV(cv=10) +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, + colsample_bynode=1, colsample_bytree=1, enable_categorical=False, + gamma=0, gpu_id=-1, importance_type=None, + interaction_constraints='', learning_rate=0.300000012, + max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan, + monotone_constraints='()', n_estimators=100, n_jobs=12, + num_parallel_tree=1, predictor='auto', random_state=42, + reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, + tree_method='exact', use_label_encoder=False, + validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['Other'] in column 5 during transform + + warnings.warn( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py:776: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 74, in _cached_call + return cache[method] +KeyError: 'predict' + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, in _score + scores = scorer(estimator, X_test, y_test) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 106, in __call__ + score = scorer._score(cached_call, estimator, *args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 261, in _score + y_pred = method_caller(estimator, "predict", X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 76, in _cached_call + result = getattr(estimator, method)(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 457, in predict + Xt = transform.transform(Xt) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 746, in transform + Xs = self._fit_transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py", line 604, in _fit_transform + return Parallel(n_jobs=self.n_jobs)( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 1046, in __call__ + while self.dispatch_one_batch(iterator): + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 861, in dispatch_one_batch + self._dispatch(tasks) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 779, in _dispatch + job = self._backend.apply_async(batch, callback=cb) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 208, in apply_async + result = ImmediateResult(func) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 572, in __init__ + self.results = batch() + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/joblib/parallel.py", line 262, in + return [func(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/utils/fixes.py", line 117, in __call__ + return self.function(*args, **kwargs) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/pipeline.py", line 853, in _transform_one + res = transformer.transform(X) + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 882, in transform + X_int, X_mask = self._transform( + File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py", line 160, in _transform + raise ValueError(msg) +ValueError: Found unknown categories ['XDR'] in column 5 during transform + + warnings.warn( +/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:176: SettingWithCopyWarning: +A value is trying to be set on a copy of a slice from a DataFrame + +See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy + rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True) +/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:179: SettingWithCopyWarning: +A value is trying to be set on a copy of a slice from a DataFrame + +See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy + rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True) +Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', RidgeClassifierCV(cv=10))]) + +key: fit_time +value: [0.07766771 0.07815814 0.07832098 0.08276105 0.10114908 0.09942913 + 0.08842373 0.0795567 0.10013795 0.07987666] + +mean value: 0.086548113822937 + +key: score_time +value: [0.01170754 0.01169586 0.01166344 0.01162314 0.01418304 0.00657368 + 0.00637412 0.01173091 0.01183128 0.01019073] + +mean value: 0.010757374763488769 + +key: test_mcc +value: [ 0. 0.70710678 0.70710678 -0.33333333 0. nan + nan 1. 1. 0.61237244] + +mean value: nan + +key: train_mcc +value: [0.88527041 1. 1. 1. 1. 1. + 1. 1. 1. 1. ] + +mean value: 0.9885270412757426 + +key: test_accuracy +value: [0.5 0.83333333 0.83333333 0.33333333 0.5 nan + nan 1. 1. 0.8 ] + +mean value: nan + +key: train_accuracy +value: [0.94230769 1. 1. 1. 1. 1. + 1. 1. 1. 1. ] + +mean value: 0.9942307692307693 + +key: test_fscore +value: [0.4 0.85714286 0.85714286 0.33333333 0.4 nan + nan 1. 1. 0.85714286] + +mean value: nan + +key: train_fscore +value: [0.94117647 1. 1. 1. 1. 1. + 1. 1. 1. 1. ] + +mean value: 0.9941176470588236 + +key: test_precision +value: [0.5 0.75 0.75 0.33333333 0.5 nan + nan 1. 1. 0.75 ] + +mean value: nan + +key: train_precision +value: [0.96 1. 1. 1. 1. 1. 1. 1. 1. 1. ] + +mean value: 0.996 + +key: test_recall +value: [0.33333333 1. 1. 0.33333333 0.33333333 nan + nan 1. 1. 1. ] + +mean value: nan + +key: train_recall +value: [0.92307692 1. 1. 1. 1. 1. + 1. 1. 1. 1. ] + +mean value: 0.9923076923076923 + +key: test_roc_auc +value: [0.5 0.83333333 0.83333333 0.33333333 0.5 nan + nan 1. 1. 0.75 ] + +mean value: nan + +key: train_roc_auc +value: [0.94230769 1. 1. 1. 1. 1. + 1. 1. 1. 1. ] + +mean value: 0.9942307692307693 + +key: test_jcc +value: [0.25 0.75 0.75 0.2 0.25 nan nan 1. 1. 0.75] + +mean value: nan + +key: train_jcc +value: [0.88888889 1. 1. 1. 1. 1. + 1. 1. 1. 1. ] + +mean value: 0.9888888888888889 + +MCC on Blind test: 0.15 + +Accuracy on Blind test: 0.6 + +Model_name: Logistic Regression +Model func: LogisticRegression(random_state=42) +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', LogisticRegression(random_state=42))]) + +key: fit_time +value: [0.02820563 0.02576756 0.02641773 0.02557731 0.02693248 0.02358747 + 0.0255456 0.02283812 0.02356815 0.02982664] + +mean value: 0.025826668739318846 + +key: score_time +value: [0.01189876 0.01176596 0.01171589 0.01177764 0.01182795 0.01168847 + 0.01162362 0.01171899 0.01163292 0.01207137] + +mean value: 0.01177215576171875 + +key: test_mcc +value: [0.81649658 0.81649658 0.5 0.21821789 0.81649658 0.21821789 + 0.6 0.81649658 0.81649658 1. ] + +mean value: 0.6618918685110615 + +key: train_mcc +value: [0.91201231 0.93356387 0.93356387 0.91201231 0.88910845 0.91111111 + 0.88910845 0.88910845 0.95555556 0.88910845] + +mean value: 0.9114252823784718 + +key: test_accuracy +value: [0.9 0.9 0.7 0.6 0.9 0.6 0.8 0.9 0.9 1. ] + +mean value: 0.8200000000000001 + +key: train_accuracy +value: [0.95555556 0.96666667 0.96666667 0.95555556 0.94444444 0.95555556 + 0.94444444 0.94444444 0.97777778 0.94444444] + +mean value: 0.9555555555555556 + +key: test_fscore +value: [0.90909091 0.90909091 0.57142857 0.66666667 0.90909091 0.66666667 + 0.8 0.90909091 0.88888889 1. ] + +mean value: 0.823001443001443 + +key: train_fscore +value: [0.95454545 0.96703297 0.96703297 0.95652174 0.94505495 0.95555556 + 0.94382022 0.94505495 0.97777778 0.94382022] + +mean value: 0.955621680062325 + +key: test_precision +value: [0.83333333 0.83333333 1. 0.57142857 0.83333333 0.57142857 + 0.8 0.83333333 1. 1. ] + +mean value: 0.8276190476190476 + +key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +train_precision +value: [0.97674419 0.95652174 0.95652174 0.93617021 0.93478261 0.95555556 + 0.95454545 0.93478261 0.97777778 0.95454545] + +mean value: 0.9537947336888886 + +key: test_recall +value: [1. 1. 0.4 0.8 1. 0.8 0.8 1. 0.8 1. ] + +mean value: 0.86 + +key: train_recall +value: [0.93333333 0.97777778 0.97777778 0.97777778 0.95555556 0.95555556 + 0.93333333 0.95555556 0.97777778 0.93333333] + +mean value: 0.9577777777777778 + +key: test_roc_auc +value: [0.9 0.9 0.7 0.6 0.9 0.6 0.8 0.9 0.9 1. ] + +mean value: 0.8200000000000001 + +key: train_roc_auc +value: [0.95555556 0.96666667 0.96666667 0.95555556 0.94444444 0.95555556 + 0.94444444 0.94444444 0.97777778 0.94444444] + +mean value: 0.9555555555555556 + +key: test_jcc +value: [0.83333333 0.83333333 0.4 0.5 0.83333333 0.5 + 0.66666667 0.83333333 0.8 1. ] + +mean value: 0.72 + +key: train_jcc +value: [0.91304348 0.93617021 0.93617021 0.91666667 0.89583333 0.91489362 + 0.89361702 0.89583333 0.95652174 0.89361702] + +mean value: 0.9152366635831021 + +MCC on Blind test: 0.42 + +Accuracy on Blind test: 0.75 + +Model_name: Logistic RegressionCV +Model func: LogisticRegressionCV(random_state=42) +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', LogisticRegressionCV(random_state=42))]) + +key: fit_time +value: [0.80905724 0.7472713 0.62414098 0.64533067 0.73280716 0.58336401 + 0.60610628 0.74971604 0.74895 0.61796379] + +mean value: 0.6864707469940186 + +key: score_time +value: [0.01288104 0.01487541 0.01242852 0.01178455 0.01245666 0.01180792 + 0.01207376 0.01235223 0.01500702 0.01522446] + +mean value: 0.013089156150817871 + +key: test_mcc +value: [0.65465367 0.81649658 0.21821789 0.5 0.81649658 0.21821789 + 0.81649658 0.81649658 0.6 1. ] + +mean value: 0.6457075774890866 + +key: train_mcc +value: [1. 1. 0.95555556 1. 0.95555556 0.95555556 + 0.93356387 1. 1. 1. ] + +mean value: 0.980023053806288 + +key: test_accuracy +value: [0.8 0.9 0.6 0.7 0.9 0.6 0.9 0.9 0.8 1. ] + +mean value: 0.81 + +key: train_accuracy +value: [1. 1. 0.97777778 1. 0.97777778 0.97777778 + 0.96666667 1. 1. 1. ] + +mean value: 0.99 + +key: test_fscore +value: [0.83333333 0.90909091 0.5 0.76923077 0.90909091 0.66666667 + 0.88888889 0.90909091 0.8 1. ] + +mean value: 0.8185392385392385 + +key: train_fscore +value: [1. 1. 0.97777778 1. 0.97777778 0.97777778 + 0.96629213 1. 1. 1. ] + +mean value: 0.9899625468164794 + +key: test_precision +value: [0.71428571 0.83333333 0.66666667 0.625 0.83333333 0.57142857 + 1. 0.83333333 0.8 1. ] + +mean value: 0.7877380952380952 + +key: train_precision +value: [1. 1. 0.97777778 1. 0.97777778 0.97777778 + 0.97727273 1. 1. 1. ] + +mean value: 0.9910606060606061 + +key: test_recall +value: [1. 1. 0.4 1. 1. 0.8 0.8 1. 0.8 1. ] + +mean value: 0.88 + +key: train_recall +value: [1. 1. 0.97777778 1. 0.97777778 0.97777778 + 0.95555556 1. 1. 1. ] + +mean value: 0.9888888888888889 + +key: test_roc_auc +value: [0.8 0.9 0.6 0.7 0.9 0.6 0.9 0.9 0.8 1. ] + +mean value: 0.81 + +key: train_roc_auc +value: [1. 1. 0.97777778 1. 0.97777778 0.97777778 + 0.96666667 1. 1. 1. ] + +mean value: 0.99 + +key: test_jcc +value: [0.71428571 0.83333333 0.33333333 0.625 0.83333333 0.5 + 0.8 0.83333333 0.66666667 1. ] + +mean value: 0.7139285714285715 + +key: train_jcc +value: [1. 1. 0.95652174 1. 0.95652174 0.95652174 + 0.93478261 1. 1. 1. ] + +mean value: 0.9804347826086957 + +MCC on Blind test: 0.48 + +Accuracy on Blind test: 0.78 + +Model_name: Gaussian NB +Model func: GaussianNB() +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', GaussianNB())]) + +key: fit_time +value: [0.0124836 0.00930858 0.00983167 0.00957084 0.00956893 0.00951123 + 0.00926447 0.0093565 0.00924706 0.00938511] + +mean value: 0.009752798080444335 + +key: score_time +value: [0.01172256 0.00908756 0.00973558 0.00952482 0.00956821 0.0095439 + 0.0094943 0.00932264 0.00934172 0.00925255] + +mean value: 0.009659385681152344 + +key: test_mcc +value: [0.65465367 0.5 0.21821789 0.21821789 0.6 0. + 0.40824829 0.40824829 0.65465367 0.6 ] + +mean value: 0.4262239702815665 + +key: train_mcc +value: [0.77777778 0.57642872 0.56980288 0.75724019 0.55708601 0.67488191 + 0.67488191 0.65025037 0.5500191 0.65487619] + +mean value: 0.6443245048315153 + +key: test_accuracy +value: [0.8 0.7 0.6 0.6 0.8 0.5 0.7 0.7 0.8 0.8] mean value: 0.7 -key: train_recall -value: [0.91111111 0.93333333 0.84444444 0.93333333 0.88888889 0.84444444 - 0.8 0.88888889 0.86666667 0.8 ] +key: train_accuracy +value: [0.88888889 0.77777778 0.77777778 0.87777778 0.76666667 0.83333333 + 0.83333333 0.82222222 0.76666667 0.82222222] -mean value: 0.8711111111111112 +mean value: 0.8166666666666667 + +key: test_fscore +value: [0.83333333 0.76923077 0.5 0.66666667 0.8 0.66666667 + 0.72727273 0.72727273 0.83333333 0.8 ] + +mean value: 0.7323776223776224 + +key: train_fscore +value: [0.88888889 0.80392157 0.8 0.88172043 0.7961165 0.84536082 + 0.84536082 0.83333333 0.79207921 0.83673469] + +mean value: 0.8323516277094448 + +key: test_precision +value: [0.71428571 0.625 0.66666667 0.57142857 0.8 0.5 + 0.66666667 0.66666667 0.71428571 0.8 ] + +mean value: 0.6725 + +key: train_precision +value: [0.88888889 0.71929825 0.72727273 0.85416667 0.70689655 0.78846154 + 0.78846154 0.78431373 0.71428571 0.77358491] + +mean value: 0.774563050252582 + +key: test_recall +value: [1. 1. 0.4 0.8 0.8 1. 0.8 0.8 1. 0.8] + +mean value: 0.8400000000000001 + +key: train_recall +value: [0.88888889 0.91111111 0.88888889 0.91111111 0.91111111 0.91111111 + 0.91111111 0.88888889 0.88888889 0.91111111] + +mean value: 0.9022222222222221 key: test_roc_auc -value: [0.9 0.8 0.6 0.6 0.8 0.8 0.7 0.8 0.6 0.7] +value: [0.8 0.7 0.6 0.6 0.8 0.5 0.7 0.7 0.8 0.8] -mean value: 0.73 +mean value: 0.7000000000000001 key: train_roc_auc -value: [0.93333333 0.93333333 0.92222222 0.93333333 0.92222222 0.92222222 - 0.88888889 0.92222222 0.91111111 0.9 ] +value: [0.88888889 0.77777778 0.77777778 0.87777778 0.76666667 0.83333333 + 0.83333333 0.82222222 0.76666667 0.82222222] -mean value: 0.9188888888888889 +mean value: 0.8166666666666667 key: test_jcc -value: [0.83333333 0.66666667 0.33333333 0.5 0.66666667 0.6 - 0.5 0.71428571 0.42857143 0.4 ] +value: [0.71428571 0.625 0.33333333 0.5 0.66666667 0.5 + 0.57142857 0.57142857 0.71428571 0.66666667] -mean value: 0.5642857142857143 +mean value: 0.5863095238095238 key: train_jcc -value: [0.87234043 0.875 0.84444444 0.875 0.85106383 0.84444444 - 0.7826087 0.85106383 0.82978723 0.8 ] +value: [0.8 0.67213115 0.66666667 0.78846154 0.66129032 0.73214286 + 0.73214286 0.71428571 0.6557377 0.71929825] -mean value: 0.8425752903689999 +mean value: 0.714215705435333 -MCC on Blind test: 0.37 +MCC on Blind test: 0.07 -Accuracy on Blind test: 0.72 +Accuracy on Blind test: 0.52 + +Model_name: Naive Bayes +Model func: BernoulliNB() +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', BernoulliNB())]) + +key: fit_time +value: [0.01002598 0.00972629 0.00943542 0.009835 0.00988364 0.00961804 + 0.00996184 0.00956464 0.00955367 0.00944734] + +mean value: 0.009705185890197754 + +key: score_time +value: [0.0102427 0.00930071 0.00941706 0.00960851 0.00960922 0.00931358 + 0.00965929 0.00924778 0.00924873 0.00928211] + +mean value: 0.009492969512939453 + +key: test_mcc +value: [0.81649658 0.40824829 0. 0.40824829 0.40824829 0.6 + 0.40824829 0.2 0.40824829 0. ] + +mean value: 0.3657738033247041 + +key: train_mcc +value: [0.62609903 0.58137767 0.60540551 0.58137767 0.62360956 0.71269665 + 0.60238451 0.58969198 0.6681531 0.68957028] + +mean value: 0.6280365978811131 + +key: test_accuracy +value: [0.9 0.7 0.5 0.7 0.7 0.8 0.7 0.6 0.7 0.5] + +mean value: 0.6799999999999999 + +key: train_accuracy +value: [0.81111111 0.78888889 0.8 0.78888889 0.81111111 0.85555556 + 0.8 0.78888889 0.83333333 0.84444444] + +mean value: 0.8122222222222222 + +key: test_fscore +value: [0.90909091 0.66666667 0.28571429 0.72727273 0.66666667 0.8 + 0.72727273 0.6 0.66666667 0.44444444] + +mean value: 0.6493795093795094 + +key: train_fscore +value: [0.8 0.77647059 0.78571429 0.77647059 0.8045977 0.85057471 + 0.79069767 0.7654321 0.82758621 0.84090909] + +mean value: 0.8018452946967656 + +key: test_precision +value: [0.83333333 0.75 0.5 0.66666667 0.75 0.8 + 0.66666667 0.6 0.75 0.5 ] + +mean value: 0.6816666666666666 + +key: train_precision +value: [0.85 0.825 0.84615385 0.825 0.83333333 0.88095238 + 0.82926829 0.86111111 0.85714286 0.86046512] + +mean value: 0.8468426937655525 + +key: test_recall +value: [1. 0.6 0.2 0.8 0.6 0.8 0.8 0.6 0.6 0.4] + +mean value: 0.64 + +key: train_recall +value: [0.75555556 0.73333333 0.73333333 0.73333333 0.77777778 0.82222222 + 0.75555556 0.68888889 0.8 0.82222222] + +mean value: 0.7622222222222222 + +key: test_roc_auc +value: [0.9 0.7 0.5 0.7 0.7 0.8 0.7 0.6 0.7 0.5] + +mean value: 0.68 + +key: train_roc_auc +value: [0.81111111 0.78888889 0.8 0.78888889 0.81111111 0.85555556 + 0.8 0.78888889 0.83333333 0.84444444] + +mean value: 0.8122222222222222 + +key: test_jcc +value: [0.83333333 0.5 0.16666667 0.57142857 0.5 0.66666667 + 0.57142857 0.42857143 0.5 0.28571429] + +mean value: 0.5023809523809524 + +key: train_jcc +value: [0.66666667 0.63461538 0.64705882 0.63461538 0.67307692 0.74 + 0.65384615 0.62 0.70588235 0.7254902 ] + +mean value: 0.6701251885369532 + +MCC on Blind test: 0.18 + +Accuracy on Blind test: 0.65 + +Model_name: K-Nearest Neighbors +Model func: KNeighborsClassifier() +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', KNeighborsClassifier())]) + +key: fit_time +value: [0.00937963 0.0080359 0.00806713 0.00866961 0.00906754 0.00908542 + 0.00891066 0.00935316 0.009197 0.00908399] + +mean value: 0.008885002136230469 + +key: score_time +value: [0.00974846 0.0091846 0.00914931 0.00993633 0.0101018 0.00967264 + 0.01007986 0.01016092 0.01003218 0.00999641] + +mean value: 0.009806251525878907 + +key: test_mcc +value: [ 0.81649658 0.2 0.21821789 -0.21821789 0. 0.40824829 + 0.40824829 0.6 -0.6 0.5 ] + +mean value: 0.2332993161855452 + +key: train_mcc +value: [0.64700558 0.6 0.64700558 0.51111111 0.64508188 0.69162666 + 0.62360956 0.55555556 0.48900965 0.62237591] + +mean value: 0.6032381499326708 + +key: test_accuracy +value: [0.9 0.6 0.6 0.4 0.5 0.7 0.7 0.8 0.2 0.7] + +mean value: 0.61 + +key: train_accuracy +value: [0.82222222 0.8 0.82222222 0.75555556 0.82222222 0.84444444 + 0.81111111 0.77777778 0.74444444 0.81111111] + +mean value: 0.8011111111111111 + +key: test_fscore +value: [0.90909091 0.6 0.5 0.5 0.54545455 0.72727273 + 0.72727273 0.8 0.2 0.57142857] + +mean value: 0.6080519480519481 + +key: train_fscore +value: [0.82978723 0.8 0.82978723 0.75555556 0.82608696 0.85106383 + 0.8045977 0.77777778 0.74157303 0.80898876] + +mean value: 0.8025218086629647 + +key: test_precision +value: [0.83333333 0.6 0.66666667 0.42857143 0.5 0.66666667 + 0.66666667 0.8 0.2 1. ] + +mean value: 0.6361904761904762 + +key: train_precision +value: [0.79591837 0.8 0.79591837 0.75555556 0.80851064 0.81632653 + 0.83333333 0.77777778 0.75 0.81818182] + +mean value: 0.795152238845248 + +key: test_recall +value: [1. 0.6 0.4 0.6 0.6 0.8 0.8 0.8 0.2 0.4] + +mean value: 0.62 + +key: train_recall +value: [0.86666667 0.8 0.86666667 0.75555556 0.84444444 0.88888889 + 0.77777778 0.77777778 0.73333333 0.8 ] + +mean value: 0.8111111111111111 + +key: test_roc_auc +value: [0.9 0.6 0.6 0.4 0.5 0.7 0.7 0.8 0.2 0.7] + +mean value: 0.61 + +key: train_roc_auc +value: [0.82222222 0.8 0.82222222 0.75555556 0.82222222 0.84444444 + 0.81111111 0.77777778 0.74444444 0.81111111] + +mean value: 0.8011111111111111 + +key: test_jcc +value: [0.83333333 0.42857143 0.33333333 0.33333333 0.375 0.57142857 + 0.57142857 0.66666667 0.11111111 0.4 ] + +mean value: 0.4624206349206349 + +key: train_jcc +value: [0.70909091 0.66666667 0.70909091 0.60714286 0.7037037 0.74074074 + 0.67307692 0.63636364 0.58928571 0.67924528] + +mean value: 0.6714407343180928 + +MCC on Blind test: 0.0 + +Accuracy on Blind test: 0.5 + +Model_name: SVM +Model func: SVC(random_state=42) +List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, + n_estimators=1000, n_jobs=10, oob_score=True, + random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, + colsample_bynode=None, colsample_bytree=None, + enable_categorical=False, gamma=None, gpu_id=None, + importance_type=None, interaction_constraints=None, + learning_rate=None, max_delta_step=None, max_depth=None, + min_child_weight=None, missing=nan, monotone_constraints=None, + n_estimators=100, n_jobs=None, num_parallel_tree=None, + predictor=None, random_state=42, reg_alpha=None, reg_lambda=None, + scale_pos_weight=None, subsample=None, tree_method=None, + use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] +Running model pipeline: Pipeline(steps=[('prep', + ColumnTransformer(remainder='passthrough', + transformers=[('num', MinMaxScaler(), + Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', + 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', + 'mcsm_na_affinity', 'rsa', + ... + 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', + 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'], + dtype='object', length=167)), + ('cat', OneHotEncoder(), + Index(['ss_class', 'aa_prop_change', 'electrostatics_change', + 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'], + dtype='object'))])), + ('model', SVC(random_state=42))]) + +key: fit_time +value: [0.01032925 0.01031327 0.01048875 0.0095973 0.00918651 0.01025271 + 0.01038647 0.00908804 0.00948262 0.01041341] + +mean value: 0.009953832626342774 + +key: score_time +value: [0.00930023 0.0093739 0.00958729 0.00855732 0.00871849 0.00915527 + 0.00932598 0.00866413 0.00870752 0.00956011] + +mean value: 0.009095025062561036 + +key: test_mcc +value: [0.81649658 0.81649658 0.21821789 0.6 0.40824829 0.40824829 + 0.40824829 0.6 0.6 0.65465367] + +mean value: 0.553060959419101 + +key: train_mcc +value: [0.8675239 0.84632727 0.84465303 0.84465303 0.88910845 0.93356387 + 0.8230355 0.86666667 0.84465303 0.78086881] + +mean value: 0.854105354455519 + +key: test_accuracy +value: [0.9 0.9 0.6 0.8 0.7 0.7 0.7 0.8 0.8 0.8] + +mean value: 0.77 + +key: train_accuracy +value: [0.93333333 0.92222222 0.92222222 0.92222222 0.94444444 0.96666667 + 0.91111111 0.93333333 0.92222222 0.88888889] + +mean value: 0.9266666666666666 + +key: test_fscore +value: [0.90909091 0.90909091 0.5 0.8 0.66666667 0.72727273 + 0.72727273 0.8 0.8 0.75 ] + +mean value: 0.7589393939393939 + +key: train_fscore +value: [0.93181818 0.91954023 0.92134831 0.92307692 0.94382022 0.96629213 + 0.90909091 0.93333333 0.92307692 0.88372093] + +mean value: 0.925511810467119 + +key: test_precision +value: [0.83333333 0.83333333 0.66666667 0.8 0.75 0.66666667 + 0.66666667 0.8 0.8 1. ] + +mean value: 0.7816666666666667 + +key: train_precision +value: [0.95348837 0.95238095 0.93181818 0.91304348 0.95454545 0.97727273 + 0.93023256 0.93333333 0.91304348 0.92682927] + +mean value: 0.938598780439763 + +key: test_recall +value: [1. 1. 0.4 0.8 0.6 0.8 0.8 0.8 0.8 0.6] + +mean value: 0.76 + +key: train_recall +value: [0.91111111 0.88888889 0.91111111 0.93333333 0.93333333 0.95555556 + 0.88888889 0.93333333 0.93333333 0.84444444] + +mean value: 0.9133333333333333 + +key: test_roc_auc +value: [0.9 0.9 0.6 0.8 0.7 0.7 0.7 0.8 0.8 0.8] + +mean value: 0.77 + +key: train_roc_auc +value: [0.93333333 0.92222222 0.92222222 0.92222222 0.94444444 0.96666667 + 0.91111111 0.93333333 0.92222222 0.88888889] + +mean value: 0.9266666666666666 + +key: test_jcc +value: [0.83333333 0.83333333 0.33333333 0.66666667 0.5 0.57142857 + 0.57142857 0.66666667 0.66666667 0.6 ] + +mean value: 0.6242857142857142 + +key: train_jcc +value: [0.87234043 0.85106383 0.85416667 0.85714286 0.89361702 0.93478261 + 0.83333333 0.875 0.85714286 0.79166667] + +mean value: 0.8620256266243778 + +MCC on Blind test: 0.21 + +Accuracy on Blind test: 0.65 Model_name: MLP Model func: MLPClassifier(max_iter=500, random_state=42) @@ -23033,22 +23238,22 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', MLPClassifier(max_iter=500, random_state=42))]) key: fit_time -value: [0.39881611 0.41650391 0.42689157 0.3460288 0.48951459 0.41404629 - 0.42462683 0.40188336 0.39558363 0.48087025] +value: [0.37708092 0.3637991 0.45402408 0.35582471 0.38116431 0.36729598 + 0.41565084 0.37348127 0.51866865 0.39506054] -mean value: 0.4194765329360962 +mean value: 0.40020503997802737 key: score_time -value: [0.01196933 0.01197171 0.01200271 0.01200986 0.01196694 0.01200986 - 0.01199579 0.01200604 0.01202822 0.01213503] +value: [0.01199031 0.01207948 0.0120008 0.01200986 0.01195621 0.01205134 + 0.01203179 0.01206517 0.01206875 0.0120163 ] -mean value: 0.012009549140930175 +mean value: 0.01202700138092041 key: test_mcc -value: [0.81649658 0.81649658 1. 0.40824829 0.81649658 0.6 - 0.40824829 0.5 0.81649658 0.40824829] +value: [0.65465367 0.81649658 0.40824829 0.40824829 0.6 0.65465367 + 0.81649658 0.81649658 0.40824829 1. ] -mean value: 0.6590731195102493 +mean value: 0.6583541955590722 key: train_mcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23056,9 +23261,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.9 0.9 1. 0.7 0.9 0.8 0.7 0.7 0.9 0.7] +value: [0.8 0.9 0.7 0.7 0.8 0.8 0.9 0.9 0.7 1. ] -mean value: 0.82 +mean value: 0.8200000000000001 key: train_accuracy value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23066,10 +23271,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.90909091 0.90909091 1. 0.72727273 0.88888889 0.8 - 0.72727273 0.76923077 0.88888889 0.66666667] +value: [0.83333333 0.90909091 0.66666667 0.72727273 0.8 0.83333333 + 0.88888889 0.90909091 0.72727273 1. ] -mean value: 0.8286402486402487 +mean value: 0.8294949494949495 key: train_fscore value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23077,10 +23282,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [0.83333333 0.83333333 1. 0.66666667 1. 0.8 - 0.66666667 0.625 1. 0.75 ] +value: [0.71428571 0.83333333 0.75 0.66666667 0.8 0.71428571 + 1. 0.83333333 0.66666667 1. ] -mean value: 0.8175 +mean value: 0.7978571428571428 key: train_precision value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23088,9 +23293,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [1. 1. 1. 0.8 0.8 0.8 0.8 1. 0.8 0.6] +value: [1. 1. 0.6 0.8 0.8 1. 0.8 1. 0.8 1. ] -mean value: 0.86 +mean value: 0.88 key: train_recall value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23098,7 +23303,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.9 0.9 1. 0.7 0.9 0.8 0.7 0.7 0.9 0.7] +value: [0.8 0.9 0.7 0.7 0.8 0.8 0.9 0.9 0.7 1. ] mean value: 0.8200000000000001 @@ -23108,10 +23313,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.83333333 0.83333333 1. 0.57142857 0.8 0.66666667 - 0.57142857 0.625 0.8 0.5 ] +value: [0.71428571 0.83333333 0.5 0.57142857 0.66666667 0.71428571 + 0.8 0.83333333 0.57142857 1. ] -mean value: 0.7201190476190477 +mean value: 0.7204761904761905 key: train_jcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23153,22 +23358,22 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', DecisionTreeClassifier(random_state=42))]) key: fit_time -value: [0.01629376 0.01320314 0.01022458 0.00981021 0.00979257 0.00974083 - 0.00949192 0.0102129 0.00986719 0.00993228] +value: [0.01760101 0.01309419 0.01132631 0.01077938 0.010427 0.01067376 + 0.01073265 0.01048017 0.01065993 0.01080036] -mean value: 0.010856938362121583 +mean value: 0.011657476425170898 key: score_time -value: [0.0115726 0.00897956 0.0086844 0.00850129 0.00838923 0.00844407 - 0.00838757 0.00920272 0.00845051 0.00858712] +value: [0.01169825 0.00983357 0.00959349 0.00917745 0.00919628 0.00912952 + 0.00917816 0.00921082 0.00917602 0.00917816] -mean value: 0.008919906616210938 +mean value: 0.009537172317504884 key: test_mcc -value: [0.81649658 0.81649658 0.81649658 0.65465367 0.81649658 0.81649658 - 0.81649658 0.81649658 0.6 0.81649658] +value: [0.81649658 0.65465367 1. 0.81649658 0.81649658 1. + 0.40824829 0.81649658 0.65465367 1. ] -mean value: 0.7786626318129786 +mean value: 0.7983541955590722 key: train_mcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23176,9 +23381,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.9 0.9 0.9 0.8 0.9 0.9 0.9 0.9 0.8 0.9] +value: [0.9 0.8 1. 0.9 0.9 1. 0.7 0.9 0.8 1. ] -mean value: 0.88 +mean value: 0.89 key: train_accuracy value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23186,10 +23391,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.90909091 0.90909091 0.90909091 0.83333333 0.88888889 0.90909091 - 0.88888889 0.90909091 0.8 0.90909091] +value: [0.90909091 0.83333333 1. 0.90909091 0.88888889 1. + 0.72727273 0.90909091 0.83333333 1. ] -mean value: 0.8865656565656566 +mean value: 0.901010101010101 key: train_fscore value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23197,10 +23402,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [0.83333333 0.83333333 0.83333333 0.71428571 1. 0.83333333 - 1. 0.83333333 0.8 0.83333333] +value: [0.83333333 0.71428571 1. 0.83333333 1. 1. + 0.66666667 0.83333333 0.71428571 1. ] -mean value: 0.8514285714285714 +mean value: 0.8595238095238096 key: train_precision value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23208,9 +23413,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [1. 1. 1. 1. 0.8 1. 0.8 1. 0.8 1. ] +value: [1. 1. 1. 1. 0.8 1. 0.8 1. 1. 1. ] -mean value: 0.9400000000000001 +mean value: 0.96 key: train_recall value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23218,9 +23423,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.9 0.9 0.9 0.8 0.9 0.9 0.9 0.9 0.8 0.9] +value: [0.9 0.8 1. 0.9 0.9 1. 0.7 0.9 0.8 1. ] -mean value: 0.88 +mean value: 0.89 key: train_roc_auc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23228,10 +23433,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.83333333 0.83333333 0.83333333 0.71428571 0.8 0.83333333 - 0.8 0.83333333 0.66666667 0.83333333] +value: [0.83333333 0.71428571 1. 0.83333333 0.8 1. + 0.57142857 0.83333333 0.71428571 1. ] -mean value: 0.7980952380952381 +mean value: 0.8300000000000001 key: train_jcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23273,22 +23478,22 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', ExtraTreesClassifier(random_state=42))]) key: fit_time -value: [0.0893023 0.08990002 0.08999562 0.0806818 0.08832645 0.08767629 - 0.08837485 0.08918786 0.0886693 0.0875802 ] +value: [0.08497882 0.08490729 0.08530784 0.08088517 0.07980752 0.08026528 + 0.08012891 0.07985377 0.0795064 0.07973003] -mean value: 0.08796947002410889 +mean value: 0.08153710365295411 key: score_time -value: [0.01877093 0.01870561 0.01703525 0.01690221 0.01881409 0.01816916 - 0.01854229 0.01852489 0.01844764 0.01862383] +value: [0.01674938 0.01818061 0.0167861 0.01685762 0.01680422 0.01698256 + 0.01682544 0.01685238 0.01678157 0.01684117] -mean value: 0.018253588676452638 +mean value: 0.01696610450744629 key: test_mcc -value: [0.40824829 0.81649658 0.81649658 0.21821789 0.6 0.81649658 - 0.65465367 0.81649658 0.81649658 0.5 ] +value: [0.65465367 0.65465367 0.65465367 0.81649658 0.65465367 0.40824829 + 0.6 0.81649658 0.6 1. ] -mean value: 0.6463602756046463 +mean value: 0.6859856135151223 key: train_mcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23296,9 +23501,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.7 0.9 0.9 0.6 0.8 0.9 0.8 0.9 0.9 0.7] +value: [0.8 0.8 0.8 0.9 0.8 0.7 0.8 0.9 0.8 1. ] -mean value: 0.81 +mean value: 0.8300000000000001 key: train_accuracy value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23306,10 +23511,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.72727273 0.90909091 0.90909091 0.66666667 0.8 0.88888889 - 0.75 0.90909091 0.88888889 0.57142857] +value: [0.83333333 0.83333333 0.75 0.90909091 0.75 0.72727273 + 0.8 0.88888889 0.8 1. ] -mean value: 0.802041847041847 +mean value: 0.8291919191919191 key: train_fscore value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23317,10 +23522,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [0.66666667 0.83333333 0.83333333 0.57142857 0.8 1. - 1. 0.83333333 1. 1. ] +value: [0.71428571 0.71428571 1. 0.83333333 1. 0.66666667 + 0.8 1. 0.8 1. ] -mean value: 0.8538095238095238 +mean value: 0.8528571428571429 key: train_precision value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23328,9 +23533,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [0.8 1. 1. 0.8 0.8 0.8 0.6 1. 0.8 0.4] +value: [1. 1. 0.6 1. 0.6 0.8 0.8 0.8 0.8 1. ] -mean value: 0.8 +mean value: 0.84 key: train_recall value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23338,9 +23543,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.7 0.9 0.9 0.6 0.8 0.9 0.8 0.9 0.9 0.7] +value: [0.8 0.8 0.8 0.9 0.8 0.7 0.8 0.9 0.8 1. ] -mean value: 0.81 +mean value: 0.8300000000000001 key: train_roc_auc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23348,19 +23553,19 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.57142857 0.83333333 0.83333333 0.5 0.66666667 0.8 - 0.6 0.83333333 0.8 0.4 ] +value: [0.71428571 0.71428571 0.6 0.83333333 0.6 0.57142857 + 0.66666667 0.8 0.66666667 1. ] -mean value: 0.6838095238095239 +mean value: 0.7166666666666667 key: train_jcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 -MCC on Blind test: 0.36 +MCC on Blind test: 0.49 -Accuracy on Blind test: 0.72 +Accuracy on Blind test: 0.78 Model_name: Extra Tree Model func: ExtraTreeClassifier(random_state=42) @@ -23393,22 +23598,22 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', ExtraTreeClassifier(random_state=42))]) key: fit_time -value: [0.00986981 0.00918651 0.00949073 0.00941229 0.00946736 0.00948048 - 0.00914884 0.00917602 0.00864077 0.0092082 ] +value: [0.00878644 0.00890398 0.0085156 0.00929928 0.00923872 0.00895572 + 0.00925231 0.00933337 0.00873685 0.00847316] -mean value: 0.009308099746704102 +mean value: 0.008949542045593261 key: score_time -value: [0.00936985 0.00940537 0.00926566 0.00926495 0.00862026 0.00878453 - 0.00922108 0.00850725 0.00934005 0.00925589] +value: [0.00883341 0.00892162 0.00849938 0.00916171 0.00890326 0.00888228 + 0.00898838 0.00917315 0.00845098 0.00846076] -mean value: 0.009103488922119141 +mean value: 0.008827495574951171 key: test_mcc -value: [0.65465367 0.5 1. 0.5 0.6 0. - 0.81649658 0.5 0.6 0.5 ] +value: [0.81649658 0.65465367 0.40824829 0.81649658 0.6 0.21821789 + 0.65465367 0.6 0.40824829 0. ] -mean value: 0.5671150251635704 +mean value: 0.5177014974435125 key: train_mcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23416,9 +23621,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.8 0.7 1. 0.7 0.8 0.5 0.9 0.7 0.8 0.7] +value: [0.9 0.8 0.7 0.9 0.8 0.6 0.8 0.8 0.7 0.5] -mean value: 0.76 +mean value: 0.75 key: train_accuracy value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23426,10 +23631,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.83333333 0.76923077 1. 0.76923077 0.8 0.54545455 - 0.88888889 0.76923077 0.8 0.57142857] +value: [0.90909091 0.83333333 0.72727273 0.90909091 0.8 0.66666667 + 0.83333333 0.8 0.72727273 0.54545455] -mean value: 0.7746797646797647 +mean value: 0.7751515151515151 key: train_fscore value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23437,10 +23642,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [0.71428571 0.625 1. 0.625 0.8 0.5 - 1. 0.625 0.8 1. ] +value: [0.83333333 0.71428571 0.66666667 0.83333333 0.8 0.57142857 + 0.71428571 0.8 0.66666667 0.5 ] -mean value: 0.7689285714285714 +mean value: 0.71 key: train_precision value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23448,9 +23653,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [1. 1. 1. 1. 0.8 0.6 0.8 1. 0.8 0.4] +value: [1. 1. 0.8 1. 0.8 0.8 1. 0.8 0.8 0.6] -mean value: 0.84 +mean value: 0.86 key: train_recall value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23458,9 +23663,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.8 0.7 1. 0.7 0.8 0.5 0.9 0.7 0.8 0.7] +value: [0.9 0.8 0.7 0.9 0.8 0.6 0.8 0.8 0.7 0.5] -mean value: 0.76 +mean value: 0.75 key: train_roc_auc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23468,19 +23673,19 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.71428571 0.625 1. 0.625 0.66666667 0.375 - 0.8 0.625 0.66666667 0.4 ] +value: [0.83333333 0.71428571 0.57142857 0.83333333 0.66666667 0.5 + 0.71428571 0.66666667 0.57142857 0.375 ] -mean value: 0.6497619047619048 +mean value: 0.6446428571428572 key: train_jcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 -MCC on Blind test: -0.04 +MCC on Blind test: 0.03 -Accuracy on Blind test: 0.48 +Accuracy on Blind test: 0.5 Model_name: Random Forest Model func: RandomForestClassifier(n_estimators=1000, random_state=42) @@ -23534,22 +23739,22 @@ Pipeline(steps=[('prep', RandomForestClassifier(n_estimators=1000, random_state=42))]) key: fit_time -value: [1.15932965 1.05107379 1.05955648 1.02203918 1.01323223 1.02794361 - 1.01532245 1.02316165 1.02073669 1.01626611] +value: [1.02410698 1.02078891 1.02329874 1.02577353 1.02832651 1.0975976 + 1.09256458 1.01587534 1.0089438 1.00883484] -mean value: 1.0408661842346192 +mean value: 1.0346110820770265 key: score_time -value: [0.09322572 0.09367871 0.09359598 0.09217787 0.08568668 0.08590388 - 0.09312415 0.09303379 0.08970332 0.09149098] +value: [0.09517384 0.09400702 0.09465933 0.09498215 0.09645772 0.0963223 + 0.09489012 0.09161878 0.09524989 0.09460068] -mean value: 0.091162109375 +mean value: 0.09479618072509766 key: test_mcc -value: [0.6 0.65465367 1. 0.21821789 0.81649658 1. - 0.81649658 0.81649658 0.6 0.5 ] +value: [0.81649658 0.81649658 0.65465367 0.81649658 0.65465367 0.65465367 + 0.81649658 0.81649658 0.6 0.81649658] -mean value: 0.7022361303727148 +mean value: 0.7462940497690288 key: train_mcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23557,9 +23762,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.8 0.8 1. 0.6 0.9 1. 0.9 0.9 0.8 0.7] +value: [0.9 0.9 0.8 0.9 0.8 0.8 0.9 0.9 0.8 0.9] -mean value: 0.84 +mean value: 0.86 key: train_accuracy value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23567,10 +23772,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.8 0.83333333 1. 0.66666667 0.88888889 1. - 0.88888889 0.90909091 0.8 0.57142857] +value: [0.90909091 0.90909091 0.75 0.90909091 0.75 0.83333333 + 0.88888889 0.88888889 0.8 0.88888889] -mean value: 0.8358297258297258 +mean value: 0.8527272727272728 key: train_fscore value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23578,10 +23783,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [0.8 0.71428571 1. 0.57142857 1. 1. - 1. 0.83333333 0.8 1. ] +value: [0.83333333 0.83333333 1. 0.83333333 1. 0.71428571 + 1. 1. 0.8 1. ] -mean value: 0.871904761904762 +mean value: 0.9014285714285715 key: train_precision value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23589,9 +23794,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [0.8 1. 1. 0.8 0.8 1. 0.8 1. 0.8 0.4] +value: [1. 1. 0.6 1. 0.6 1. 0.8 0.8 0.8 0.8] -mean value: 0.8400000000000001 +mean value: 0.84 key: train_recall value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23599,9 +23804,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.8 0.8 1. 0.6 0.9 1. 0.9 0.9 0.8 0.7] +value: [0.9 0.9 0.8 0.9 0.8 0.8 0.9 0.9 0.8 0.9] -mean value: 0.8400000000000001 +mean value: 0.86 key: train_roc_auc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23609,17 +23814,17 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.66666667 0.71428571 1. 0.5 0.8 1. - 0.8 0.83333333 0.66666667 0.4 ] +value: [0.83333333 0.83333333 0.6 0.83333333 0.6 0.71428571 + 0.8 0.8 0.66666667 0.8 ] -mean value: 0.7380952380952381 +mean value: 0.7480952380952381 key: train_jcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 -MCC on Blind test: 0.54 +MCC on Blind test: 0.55 Accuracy on Blind test: 0.8 @@ -23659,103 +23864,103 @@ Running model pipeline: Pipeline(steps=[('prep', oob_score=True, random_state=42))]) key: fit_time -value: [0.84170556 0.83023262 0.82111239 0.94291162 0.86192775 0.89486432 - 0.85397744 0.87075162 0.84488964 0.99061775] +value: [0.84236526 0.86939478 0.84446883 0.89900541 0.81594372 0.85574555 + 0.87060905 0.88659859 0.84814668 0.80270219] -mean value: 0.875299072265625 +mean value: 0.8534980058670044 key: score_time -value: [0.19168687 0.20659614 0.23333478 0.17990088 0.21830344 0.18155169 - 0.20949388 0.21993876 0.20162749 0.21077156] +value: [0.18092632 0.22178912 0.15738559 0.22907305 0.1698842 0.20201349 + 0.20945191 0.231884 0.2336936 0.21163177] -mean value: 0.20532054901123048 +mean value: 0.20477330684661865 key: test_mcc -value: [0.81649658 0.65465367 0.81649658 0.21821789 0.81649658 0.65465367 - 0.81649658 0.81649658 0.65465367 0.5 ] +value: [0.81649658 1. 0.65465367 0.81649658 0.81649658 0.65465367 + 0.40824829 0.81649658 0.6 0.81649658] -mean value: 0.6764661806998554 +mean value: 0.7400038536518447 key: train_mcc -value: [1. 0.97801929 0.97801929 1. 0.97801929 0.97801929 - 0.95650071 1. 0.95650071 1. ] +value: [0.97801929 0.97801929 0.95555556 0.97801929 0.97801929 0.95555556 + 0.95555556 0.95555556 0.93356387 0.95555556] -mean value: 0.9825078604565161 +mean value: 0.9623418824548596 key: test_accuracy -value: [0.9 0.8 0.9 0.6 0.9 0.8 0.9 0.9 0.8 0.7] +value: [0.9 1. 0.8 0.9 0.9 0.8 0.7 0.9 0.8 0.9] -mean value: 0.8200000000000001 +mean value: 0.86 key: train_accuracy -value: [1. 0.98888889 0.98888889 1. 0.98888889 0.98888889 - 0.97777778 1. 0.97777778 1. ] +value: [0.98888889 0.98888889 0.97777778 0.98888889 0.98888889 0.97777778 + 0.97777778 0.97777778 0.96666667 0.97777778] -mean value: 0.9911111111111112 +mean value: 0.9811111111111112 key: test_fscore -value: [0.88888889 0.83333333 0.88888889 0.66666667 0.88888889 0.75 - 0.88888889 0.90909091 0.75 0.57142857] +value: [0.90909091 1. 0.75 0.90909091 0.88888889 0.83333333 + 0.72727273 0.88888889 0.8 0.88888889] -mean value: 0.8036075036075037 +mean value: 0.8595454545454545 key: train_fscore -value: [1. 0.98901099 0.98876404 1. 0.98876404 0.98876404 - 0.97727273 1. 0.97727273 1. ] +value: [0.98876404 0.98901099 0.97777778 0.98901099 0.98901099 0.97777778 + 0.97777778 0.97777778 0.96629213 0.97777778] -mean value: 0.9909848578387904 +mean value: 0.9810978035697137 key: test_precision -value: [1. 0.71428571 1. 0.57142857 1. 1. - 1. 0.83333333 1. 1. ] +value: [0.83333333 1. 1. 0.83333333 1. 0.71428571 + 0.66666667 1. 0.8 1. ] -mean value: 0.9119047619047619 +mean value: 0.8847619047619047 key: train_precision -value: [1. 0.97826087 1. 1. 1. 1. - 1. 1. 1. 1. ] +value: [1. 0.97826087 0.97777778 0.97826087 0.97826087 0.97777778 + 0.97777778 0.97777778 0.97727273 0.97777778] -mean value: 0.9978260869565218 +mean value: 0.9800944224857269 key: test_recall -value: [0.8 1. 0.8 0.8 0.8 0.6 0.8 1. 0.6 0.4] +value: [1. 1. 0.6 1. 0.8 1. 0.8 0.8 0.8 0.8] -mean value: 0.76 +mean value: 0.86 key: train_recall value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers. warn( -[1. 1. 0.97777778 1. 0.97777778 0.97777778 - 0.95555556 1. 0.95555556 1. ] +[0.97777778 1. 0.97777778 1. 1. 0.97777778 + 0.97777778 0.97777778 0.95555556 0.97777778] -mean value: 0.9844444444444445 +mean value: 0.9822222222222222 key: test_roc_auc -value: [0.9 0.8 0.9 0.6 0.9 0.8 0.9 0.9 0.8 0.7] +value: [0.9 1. 0.8 0.9 0.9 0.8 0.7 0.9 0.8 0.9] -mean value: 0.8200000000000001 +mean value: 0.86 key: train_roc_auc -value: [1. 0.98888889 0.98888889 1. 0.98888889 0.98888889 - 0.97777778 1. 0.97777778 1. ] +value: [0.98888889 0.98888889 0.97777778 0.98888889 0.98888889 0.97777778 + 0.97777778 0.97777778 0.96666667 0.97777778] -mean value: 0.9911111111111112 +mean value: 0.981111111111111 key: test_jcc -value: [0.8 0.71428571 0.8 0.5 0.8 0.6 - 0.8 0.83333333 0.6 0.4 ] +value: [0.83333333 1. 0.6 0.83333333 0.8 0.71428571 + 0.57142857 0.8 0.66666667 0.8 ] -mean value: 0.6847619047619048 +mean value: 0.7619047619047619 key: train_jcc -value: [1. 0.97826087 0.97777778 1. 0.97777778 0.97777778 - 0.95555556 1. 0.95555556 1. ] +value: [0.97777778 0.97826087 0.95652174 0.97826087 0.97826087 0.95652174 + 0.95652174 0.95652174 0.93478261 0.95652174] -mean value: 0.9822705314009662 +mean value: 0.9629951690821257 -MCC on Blind test: 0.6 +MCC on Blind test: 0.55 -Accuracy on Blind test: 0.82 +Accuracy on Blind test: 0.8 Model_name: Naive Bayes Model func: BernoulliNB() @@ -23788,101 +23993,101 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', BernoulliNB())]) key: fit_time -value: [0.02315879 0.01028872 0.00969672 0.00906706 0.00864863 0.00874782 - 0.00876069 0.00871038 0.00945473 0.00973272] +value: [0.02122283 0.0088222 0.00864434 0.00867176 0.00868058 0.00866842 + 0.00870466 0.00863767 0.00891924 0.00864077] -mean value: 0.010626626014709473 +mean value: 0.009961247444152832 key: score_time -value: [0.01252508 0.00953984 0.00998116 0.00864005 0.00861979 0.00862026 - 0.0085988 0.00850296 0.00862026 0.00932288] +value: [0.01298022 0.00939631 0.00880861 0.00860405 0.0086031 0.00863481 + 0.00868678 0.00857902 0.00937891 0.00851655] -mean value: 0.00929710865020752 +mean value: 0.009218835830688476 key: test_mcc -value: [0.40824829 0.6 0. 0.21821789 0.40824829 0.65465367 - 0.6 0.65465367 0.5 0. ] +value: [0.81649658 0.40824829 0. 0.40824829 0.40824829 0.6 + 0.40824829 0.2 0.40824829 0. ] -mean value: 0.4044021812579673 +mean value: 0.3657738033247041 key: train_mcc -value: [0.68888889 0.58137767 0.60540551 0.66683134 0.64700558 0.55776344 - 0.69162666 0.58137767 0.71269665 0.70004007] +value: [0.62609903 0.58137767 0.60540551 0.58137767 0.62360956 0.71269665 + 0.60238451 0.58969198 0.6681531 0.68957028] -mean value: 0.6433013479547345 +mean value: 0.6280365978811131 key: test_accuracy -value: [0.7 0.8 0.5 0.6 0.7 0.8 0.8 0.8 0.7 0.5] +value: [0.9 0.7 0.5 0.7 0.7 0.8 0.7 0.6 0.7 0.5] -mean value: 0.69 +mean value: 0.6799999999999999 key: train_accuracy -value: [0.84444444 0.78888889 0.8 0.83333333 0.82222222 0.77777778 - 0.84444444 0.78888889 0.85555556 0.84444444] +value: [0.81111111 0.78888889 0.8 0.78888889 0.81111111 0.85555556 + 0.8 0.78888889 0.83333333 0.84444444] -mean value: 0.82 +mean value: 0.8122222222222222 key: test_fscore -value: [0.72727273 0.8 0.44444444 0.66666667 0.66666667 0.75 - 0.8 0.83333333 0.57142857 0.28571429] +value: [0.90909091 0.66666667 0.28571429 0.72727273 0.66666667 0.8 + 0.72727273 0.6 0.66666667 0.44444444] -mean value: 0.6545526695526696 +mean value: 0.6493795093795094 key: train_fscore -value: [0.84444444 0.77647059 0.78571429 0.83146067 0.81395349 0.76744186 - 0.8372093 0.77647059 0.85057471 0.82926829] +value: [0.8 0.77647059 0.78571429 0.77647059 0.8045977 0.85057471 + 0.79069767 0.7654321 0.82758621 0.84090909] -mean value: 0.8113008237276017 +mean value: 0.8018452946967656 key: test_precision -value: [0.66666667 0.8 0.5 0.57142857 0.75 1. - 0.8 0.71428571 1. 0.5 ] +value: [0.83333333 0.75 0.5 0.66666667 0.75 0.8 + 0.66666667 0.6 0.75 0.5 ] -mean value: 0.7302380952380952 +mean value: 0.6816666666666666 key: train_precision -value: [0.84444444 0.825 0.84615385 0.84090909 0.85365854 0.80487805 - 0.87804878 0.825 0.88095238 0.91891892] +value: [0.85 0.825 0.84615385 0.825 0.83333333 0.88095238 + 0.82926829 0.86111111 0.85714286 0.86046512] -mean value: 0.851796404723234 +mean value: 0.8468426937655525 key: test_recall -value: [0.8 0.8 0.4 0.8 0.6 0.6 0.8 1. 0.4 0.2] +value: [1. 0.6 0.2 0.8 0.6 0.8 0.8 0.6 0.6 0.4] mean value: 0.64 key: train_recall -value: [0.84444444 0.73333333 0.73333333 0.82222222 0.77777778 0.73333333 - 0.8 0.73333333 0.82222222 0.75555556] +value: [0.75555556 0.73333333 0.73333333 0.73333333 0.77777778 0.82222222 + 0.75555556 0.68888889 0.8 0.82222222] -mean value: 0.7755555555555556 +mean value: 0.7622222222222222 key: test_roc_auc -value: [0.7 0.8 0.5 0.6 0.7 0.8 0.8 0.8 0.7 0.5] +value: [0.9 0.7 0.5 0.7 0.7 0.8 0.7 0.6 0.7 0.5] -mean value: 0.6900000000000001 +mean value: 0.68 key: train_roc_auc -value: [0.84444444 0.78888889 0.8 0.83333333 0.82222222 0.77777778 - 0.84444444 0.78888889 0.85555556 0.84444444] +value: [0.81111111 0.78888889 0.8 0.78888889 0.81111111 0.85555556 + 0.8 0.78888889 0.83333333 0.84444444] -mean value: 0.82 +mean value: 0.8122222222222222 key: test_jcc -value: [0.57142857 0.66666667 0.28571429 0.5 0.5 0.6 - 0.66666667 0.71428571 0.4 0.16666667] +value: [0.83333333 0.5 0.16666667 0.57142857 0.5 0.66666667 + 0.57142857 0.42857143 0.5 0.28571429] -mean value: 0.5071428571428571 +mean value: 0.5023809523809524 key: train_jcc -value: [0.73076923 0.63461538 0.64705882 0.71153846 0.68627451 0.62264151 - 0.72 0.63461538 0.74 0.70833333] +value: [0.66666667 0.63461538 0.64705882 0.63461538 0.67307692 0.74 + 0.65384615 0.62 0.70588235 0.7254902 ] -mean value: 0.683584663763909 +mean value: 0.6701251885369532 -MCC on Blind test: 0.12 +MCC on Blind test: 0.18 -Accuracy on Blind test: 0.6 +Accuracy on Blind test: 0.65 Model_name: XGBoost Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None, @@ -23928,22 +24133,22 @@ Running model pipeline: Pipeline(steps=[('prep', validate_parameters=None, verbosity=0))]) key: fit_time -value: [0.0782187 0.035676 0.03869748 0.19958353 0.03675485 0.04161453 - 0.23583102 0.50171185 0.18434334 0.07802296] +value: [0.08190846 0.06687713 0.03293014 0.08744478 0.03286195 0.03449225 + 0.03584909 0.0576427 0.03408647 0.03423762] -mean value: 0.14304542541503906 +mean value: 0.049833059310913086 key: score_time -value: [0.01095033 0.01015282 0.01051188 0.01092458 0.01066589 0.01025844 - 0.01311946 0.01248908 0.01308417 0.01076651] +value: [0.01158047 0.01002455 0.01006317 0.01050115 0.01028323 0.010041 + 0.01015139 0.01003408 0.00998139 0.01002693] -mean value: 0.011292314529418946 +mean value: 0.010268735885620116 key: test_mcc -value: [0.65465367 1. 0.81649658 0.81649658 0.81649658 1. - 0.81649658 0.81649658 1. 1. ] +value: [0.81649658 0.81649658 0.81649658 0.81649658 1. 1. + 1. 0.81649658 0.81649658 1. ] -mean value: 0.8737136575346607 +mean value: 0.8898979485566356 key: train_mcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23951,9 +24156,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.8 1. 0.9 0.9 0.9 1. 0.9 0.9 1. 1. ] +value: [0.9 0.9 0.9 0.9 1. 1. 1. 0.9 0.9 1. ] -mean value: 0.93 +mean value: 0.9400000000000001 key: train_accuracy value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23961,10 +24166,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.83333333 1. 0.90909091 0.90909091 0.88888889 1. - 0.88888889 0.90909091 1. 1. ] +value: [0.90909091 0.90909091 0.88888889 0.90909091 1. 1. + 1. 0.90909091 0.90909091 1. ] -mean value: 0.9338383838383838 +mean value: 0.9434343434343434 key: train_fscore value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23972,10 +24177,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [0.71428571 1. 0.83333333 0.83333333 1. 1. - 1. 0.83333333 1. 1. ] +value: [0.83333333 0.83333333 1. 0.83333333 1. 1. + 1. 0.83333333 0.83333333 1. ] -mean value: 0.9214285714285715 +mean value: 0.9166666666666667 key: train_precision value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23983,9 +24188,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [1. 1. 1. 1. 0.8 1. 0.8 1. 1. 1. ] +value: [1. 1. 0.8 1. 1. 1. 1. 1. 1. 1. ] -mean value: 0.96 +mean value: 0.98 key: train_recall value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -23993,9 +24198,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.8 1. 0.9 0.9 0.9 1. 0.9 0.9 1. 1. ] +value: [0.9 0.9 0.9 0.9 1. 1. 1. 0.9 0.9 1. ] -mean value: 0.93 +mean value: 0.9400000000000001 key: train_roc_auc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -24003,19 +24208,19 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.71428571 1. 0.83333333 0.83333333 0.8 1. - 0.8 0.83333333 1. 1. ] +value: [0.83333333 0.83333333 0.8 0.83333333 1. 1. + 1. 0.83333333 0.83333333 1. ] -mean value: 0.8814285714285715 +mean value: 0.8966666666666667 key: train_jcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 -MCC on Blind test: 0.89 +MCC on Blind test: 0.84 -Accuracy on Blind test: 0.95 +Accuracy on Blind test: 0.92 Model_name: LDA Model func: LinearDiscriminantAnalysis() @@ -24048,101 +24253,100 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', LinearDiscriminantAnalysis())]) key: fit_time -value: [0.02349949 0.04713821 0.06608343 0.06277347 0.04959702 0.0388329 - 0.03968954 0.053617 0.0484674 0.04304004] +value: [0.0152812 0.02139306 0.04124451 0.04086208 0.04085636 0.04082632 + 0.04066205 0.04596925 0.04081917 0.04116321] -mean value: 0.047273850440979 +mean value: 0.0369077205657959 key: score_time -value: [0.02380705 0.02124381 0.0237565 0.03547454 0.02019763 0.02404904 - 0.02267098 0.02003694 0.0200119 0.02282238] +value: [0.01144218 0.01933622 0.0208571 0.02243543 0.01998782 0.02180576 + 0.01957369 0.02016187 0.02205682 0.01150918] -mean value: 0.02340707778930664 +mean value: 0.018916606903076172 key: test_mcc -value: [0.33333333 0.81649658 0.5 0. 0.21821789 0.81649658 - 0.81649658 0.33333333 0.81649658 0.21821789] +value: [0.81649658 0.65465367 0.40824829 0.65465367 0.2 0.21821789 + 1. 0.40824829 0.40824829 0.40824829] -mean value: 0.48690887708495556 +mean value: 0.5177014974435125 key: train_mcc value: [1. 1. 1. 1. 0.97801929 1. - 1. 1. 0.97801929 0.97801929] + 1. 1. 1. 1. ] -mean value: 0.9934057881530954 +mean value: 0.9978019293843652 key: test_accuracy -value: [0.6 0.9 0.7 0.5 0.6 0.9 0.9 0.6 0.9 0.6] +value: [0.9 0.8 0.7 0.8 0.6 0.6 1. 0.7 0.7 0.7] -mean value: 0.72 +mean value: 0.75 key: train_accuracy value: [1. 1. 1. 1. 0.98888889 1. - 1. 1. 0.98888889 0.98888889] - -mean value: 0.9966666666666667 - -key: test_fscore -value: [0.71428571 0.90909091 0.76923077 0.61538462 0.66666667 0.90909091 - 0.90909091 0.71428571 0.88888889 0.5 ] - -mean value: 0.7596015096015096 - -key: train_fscore -value: [1. 1. 1. 1. 0.98901099 1. - 1. 1. 0.98876404 0.98876404] - -mean value: 0.996653907889863 - -key: test_precision -value: [0.55555556 0.83333333 0.625 0.5 0.57142857 0.83333333 - 0.83333333 0.55555556 1. 0.66666667] - -mean value: 0.6974206349206349 - -key: train_precision -value: [1. 1. 1. 1. 0.97826087 1. 1. 1. 1. 1. ] -mean value: 0.9978260869565218 +mean value: 0.9988888888888889 + +key: test_fscore +value: [0.90909091 0.83333333 0.66666667 0.83333333 0.6 0.66666667 + 1. 0.72727273 0.72727273 0.72727273] + +mean value: 0.769090909090909 + +key: train_fscore +value: [1. 1. 1. 1. 0.98876404 1. + 1. 1. 1. 1. ] + +mean value: 0.998876404494382 + +key: test_precision +value: [0.83333333 0.71428571 0.75 0.71428571 0.6 0.57142857 + 1. 0.66666667 0.66666667 0.66666667] + +mean value: 0.7183333333333333 + +key: train_precision +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + +mean value: 1.0 key: test_recall -value: [1. 1. 1. 0.8 0.8 1. 1. 1. 0.8 0.4] +value: [1. 1. 0.6 1. 0.6 0.8 1. 0.8 0.8 0.8] -mean value: 0.88 +mean value: 0.84 key: train_recall -value: [1. 1. 1. 1. 1. 1. - 1. 1. 0.97777778 0.97777778] +value: [1. 1. 1. 1. 0.97777778 1. + 1. 1. 1. 1. ] -mean value: 0.9955555555555555 +mean value: 0.9977777777777778 key: test_roc_auc -value: [0.6 0.9 0.7 0.5 0.6 0.9 0.9 0.6 0.9 0.6] +value: [0.9 0.8 0.7 0.8 0.6 0.6 1. 0.7 0.7 0.7] -mean value: 0.72 +mean value: 0.75 key: train_roc_auc value: [1. 1. 1. 1. 0.98888889 1. - 1. 1. 0.98888889 0.98888889] + 1. 1. 1. 1. ] -mean value: 0.9966666666666667 +mean value: 0.9988888888888889 key: test_jcc -value: [0.55555556 0.83333333 0.625 0.44444444 0.5 0.83333333 - 0.83333333 0.55555556 0.8 0.33333333] +value: [0.83333333 0.71428571 0.5 0.71428571 0.42857143 0.5 + 1. 0.57142857 0.57142857 0.57142857] -mean value: 0.6313888888888889 +mean value: 0.6404761904761904 key: train_jcc -value: [1. 1. 1. 1. 0.97826087 1. - 1. 1. 0.97777778 0.97777778] +value: [1. 1. 1. 1. 0.97777778 1. + 1. 1. 1. 1. ] -mean value: 0.9933816425120773 +mean value: 0.9977777777777778 -MCC on Blind test: 0.01 +MCC on Blind test: -0.1 -Accuracy on Blind test: 0.52 +Accuracy on Blind test: 0.48 Model_name: Multinomial Model func: MultinomialNB() @@ -24175,101 +24379,101 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', MultinomialNB())]) key: fit_time -value: [0.01617742 0.00876117 0.00850463 0.0087924 0.00857806 0.00921059 - 0.00938845 0.0089817 0.00929332 0.00935221] +value: [0.01152873 0.00863695 0.00857687 0.00819945 0.00833392 0.00825739 + 0.0083611 0.00821066 0.00823236 0.00819683] -mean value: 0.009703993797302246 +mean value: 0.00865342617034912 key: score_time -value: [0.00881886 0.0085144 0.00851989 0.00928068 0.00837755 0.00896335 - 0.00911069 0.00911427 0.00912476 0.00914335] +value: [0.01131368 0.00865769 0.0086081 0.00834394 0.00836015 0.00839067 + 0.00829577 0.00829291 0.00828815 0.00827575] -mean value: 0.008896780014038087 +mean value: 0.008682680130004884 key: test_mcc -value: [0.40824829 0.40824829 0.21821789 0.40824829 0.40824829 0.65465367 - 0. 0.6 0.2 0.21821789] +value: [0.6 0.65465367 0. 0.40824829 0.40824829 0.40824829 + 0.40824829 0.2 0.40824829 0.5 ] -mean value: 0.3524082613035414 +mean value: 0.3995895123027292 key: train_mcc -value: [0.62237591 0.62237591 0.58137767 0.60238451 0.56056066 0.60540551 - 0.56056066 0.55610507 0.56056066 0.55610507] +value: [0.55610507 0.49193496 0.60059347 0.51314236 0.55610507 0.57906602 + 0.57906602 0.53452248 0.60238451 0.60238451] -mean value: 0.5827811645657781 +mean value: 0.5615304473142139 key: test_accuracy -value: [0.7 0.7 0.6 0.7 0.7 0.8 0.5 0.8 0.6 0.6] - -mean value: 0.6699999999999999 - -key: train_accuracy -value: [0.81111111 0.81111111 0.78888889 0.8 0.77777778 0.8 - 0.77777778 0.77777778 0.77777778 0.77777778] - -mean value: 0.79 - -key: test_fscore -value: [0.72727273 0.72727273 0.5 0.72727273 0.72727273 0.75 - 0.54545455 0.8 0.6 0.5 ] - -mean value: 0.6604545454545454 - -key: train_fscore -value: [0.80898876 0.81318681 0.77647059 0.80851064 0.76190476 0.78571429 - 0.76190476 0.77272727 0.76190476 0.77272727] - -mean value: 0.782403992064804 - -key: test_precision -value: [0.66666667 0.66666667 0.66666667 0.66666667 0.66666667 1. - 0.5 0.8 0.6 0.66666667] +value: [0.8 0.8 0.5 0.7 0.7 0.7 0.7 0.6 0.7 0.7] mean value: 0.69 -key: train_precision -value: [0.81818182 0.80434783 0.825 0.7755102 0.82051282 0.84615385 - 0.82051282 0.79069767 0.82051282 0.79069767] +key: train_accuracy +value: [0.77777778 0.74444444 0.8 0.75555556 0.77777778 0.78888889 + 0.78888889 0.76666667 0.8 0.8 ] -mean value: 0.8112127504879925 +mean value: 0.78 + +key: test_fscore +value: [0.8 0.75 0.44444444 0.72727273 0.72727273 0.72727273 + 0.72727273 0.6 0.66666667 0.57142857] + +mean value: 0.6741630591630591 + +key: train_fscore +value: [0.77272727 0.72941176 0.79545455 0.74418605 0.77272727 0.7816092 + 0.7816092 0.75862069 0.79069767 0.79069767] + +mean value: 0.7717741331423581 + +key: test_precision +value: [0.8 1. 0.5 0.66666667 0.66666667 0.66666667 + 0.66666667 0.6 0.75 1. ] + +mean value: 0.7316666666666667 + +key: train_precision +value: [0.79069767 0.775 0.81395349 0.7804878 0.79069767 0.80952381 + 0.80952381 0.78571429 0.82926829 0.82926829] + +mean value: 0.801413513221511 key: test_recall -value: [0.8 0.8 0.4 0.8 0.8 0.6 0.6 0.8 0.6 0.4] +value: [0.8 0.6 0.4 0.8 0.8 0.8 0.8 0.6 0.6 0.4] mean value: 0.66 key: train_recall -value: [0.8 0.82222222 0.73333333 0.84444444 0.71111111 0.73333333 - 0.71111111 0.75555556 0.71111111 0.75555556] +value: [0.75555556 0.68888889 0.77777778 0.71111111 0.75555556 0.75555556 + 0.75555556 0.73333333 0.75555556 0.75555556] -mean value: 0.7577777777777778 +mean value: 0.7444444444444445 key: test_roc_auc -value: [0.7 0.7 0.6 0.7 0.7 0.8 0.5 0.8 0.6 0.6] +value: [0.8 0.8 0.5 0.7 0.7 0.7 0.7 0.6 0.7 0.7] -mean value: 0.67 +mean value: 0.6900000000000001 key: train_roc_auc -value: [0.81111111 0.81111111 0.78888889 0.8 0.77777778 0.8 - 0.77777778 0.77777778 0.77777778 0.77777778] +value: [0.77777778 0.74444444 0.8 0.75555556 0.77777778 0.78888889 + 0.78888889 0.76666667 0.8 0.8 ] -mean value: 0.79 +mean value: 0.78 key: test_jcc -value: [0.57142857 0.57142857 0.33333333 0.57142857 0.57142857 0.6 - 0.375 0.66666667 0.42857143 0.33333333] +value: [0.66666667 0.6 0.28571429 0.57142857 0.57142857 0.57142857 + 0.57142857 0.42857143 0.5 0.4 ] -mean value: 0.5022619047619047 +mean value: 0.5166666666666666 key: train_jcc -value: [0.67924528 0.68518519 0.63461538 0.67857143 0.61538462 0.64705882 - 0.61538462 0.62962963 0.61538462 0.62962963] +value: [0.62962963 0.57407407 0.66037736 0.59259259 0.62962963 0.64150943 + 0.64150943 0.61111111 0.65384615 0.65384615] -mean value: 0.6430089210333384 +mean value: 0.628812557114444 -MCC on Blind test: 0.3 +MCC on Blind test: 0.05 -Accuracy on Blind test: 0.68 +Accuracy on Blind test: 0.57 Model_name: Passive Aggresive Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42) @@ -24303,105 +24507,107 @@ Running model pipeline: Pipeline(steps=[('prep', PassiveAggressiveClassifier(n_jobs=10, random_state=42))]) key: fit_time -value: [0.01009655 0.01374936 0.0128026 0.01307821 0.01348066 0.01305318 - 0.01311874 0.01353908 0.01283336 0.01359773] +value: [0.01000237 0.01344228 0.01320529 0.01263332 0.01426673 0.01389861 + 0.01293731 0.01380181 0.01295614 0.01367664] -mean value: 0.01293494701385498 +mean value: 0.013082051277160644 key: score_time -value: [0.0084734 0.01106596 0.01120782 0.0111711 0.0113287 0.01123643 - 0.01121616 0.01123714 0.01122808 0.01128221] +value: [0.00835109 0.01133704 0.01144195 0.01147938 0.01144743 0.01136518 + 0.011235 0.0112555 0.01120543 0.01124072] -mean value: 0.010944700241088868 +mean value: 0.011035871505737305 key: test_mcc -value: [0.65465367 0.81649658 0.81649658 0.21821789 0.81649658 0.5 - 0.65465367 0.65465367 0.81649658 0.40824829] +value: [0.81649658 0.81649658 0.40824829 0.40824829 0.81649658 0.5 + 0.81649658 0.65465367 0.81649658 1. ] -mean value: 0.6356413516534691 +mean value: 0.7053633156274334 key: train_mcc -value: [0.93541435 0.93541435 0.91473203 0.97801929 0.97801929 0.89442719 - 0.91201231 0.93541435 0.97801929 0.95650071] +value: [1. 0.95650071 0.89442719 0.91201231 0.91473203 0.95650071 + 0.93356387 0.93541435 0.95555556 0.97801929] -mean value: 0.9417973170390846 +mean value: 0.9436726030863619 key: test_accuracy -value: [0.8 0.9 0.9 0.6 0.9 0.7 0.8 0.8 0.9 0.7] - -mean value: 0.8 - -key: train_accuracy -value: [0.96666667 0.96666667 0.95555556 0.98888889 0.98888889 0.94444444 - 0.95555556 0.96666667 0.98888889 0.97777778] - -mean value: 0.97 - -key: test_fscore -value: [0.83333333 0.90909091 0.90909091 0.66666667 0.88888889 0.57142857 - 0.83333333 0.83333333 0.88888889 0.66666667] - -mean value: 0.80007215007215 - -key: train_fscore -value: [0.96774194 0.96774194 0.95744681 0.98901099 0.98901099 0.94117647 - 0.95652174 0.96774194 0.98901099 0.97727273] - -mean value: 0.9702676518986616 - -key: test_precision -value: [0.71428571 0.83333333 0.83333333 0.57142857 1. 1. - 0.71428571 0.71428571 1. 0.75 ] - -mean value: 0.8130952380952381 - -key: train_precision -value: [0.9375 0.9375 0.91836735 0.97826087 0.97826087 1. - 0.93617021 0.9375 0.97826087 1. ] - -mean value: 0.9601820168400386 - -key: test_recall -value: [1. 1. 1. 0.8 0.8 0.4 1. 1. 0.8 0.6] +value: [0.9 0.9 0.7 0.7 0.9 0.7 0.9 0.8 0.9 1. ] mean value: 0.84 -key: train_recall -value: [1. 1. 1. 1. 1. 0.88888889 - 0.97777778 1. 1. 0.95555556] +key: train_accuracy +value: [1. 0.97777778 0.94444444 0.95555556 0.95555556 0.97777778 + 0.96666667 0.96666667 0.97777778 0.98888889] -mean value: 0.9822222222222222 +mean value: 0.9711111111111111 + +key: test_fscore +value: [0.90909091 0.90909091 0.66666667 0.72727273 0.90909091 0.76923077 + 0.88888889 0.83333333 0.88888889 1. ] + +mean value: 0.8501554001554001 + +key: train_fscore +value: [1. 0.97727273 0.94736842 0.95652174 0.95744681 0.97826087 + 0.96703297 0.96774194 0.97777778 0.98901099] + +mean value: 0.9718434234837254 + +key: test_precision +value: [0.83333333 0.83333333 0.75 0.66666667 0.83333333 0.625 + 1. 0.71428571 1. 1. ] + +mean value: 0.825595238095238 + +key: train_precision +value: [1. 1. 0.9 0.93617021 0.91836735 0.95744681 + 0.95652174 0.9375 0.97777778 0.97826087] + +mean value: 0.9562044754688801 + +key: test_recall +value: [1. 1. 0.6 0.8 1. 1. 0.8 1. 0.8 1. ] + +mean value: 0.9 + +key: train_recall +value: [1. 0.95555556 1. 0.97777778 1. 1. + 0.97777778 1. 0.97777778 1. ] + +mean value: 0.9888888888888889 key: test_roc_auc -value: [0.8 0.9 0.9 0.6 0.9 0.7 0.8 0.8 0.9 0.7] +value: [0.9 0.9 0.7 0.7 0.9 0.7 0.9 0.8 0.9 1. ] -mean value: 0.8 +mean value: 0.8400000000000001 key: train_roc_auc -value: [0.96666667 0.96666667 0.95555556 0.98888889 0.98888889 0.94444444 - 0.95555556 0.96666667 0.98888889 0.97777778] +value: [1. 0.97777778 0.94444444 0.95555556 0.95555556 0.97777778 + 0.96666667 0.96666667 0.97777778 0.98888889] -mean value: 0.97 +mean value: 0.9711111111111111 key: test_jcc -value: [0.71428571 0.83333333 0.83333333 0.5 0.8 0.4 - 0.71428571 0.71428571 0.8 0.5 ] +value: [0.83333333 0.83333333 0.5 0.57142857 0.83333333 0.625 + 0.8 0.71428571 0.8 1. ] -mean value: 0.680952380952381 +mean value: 0.7510714285714286 key: train_jcc -value: [0.9375 0.9375 0.91836735 0.97826087 0.97826087 0.88888889 - 0.91666667 0.9375 0.97826087 0.95555556] +value: [1. 0.95555556 0.9 0.91666667 0.91836735 0.95744681 + 0.93617021 0.9375 0.95652174 0.97826087] -mean value: 0.9426761066745539 +mean value: 0.9456489199133246 -MCC on Blind test: 0.15 +MCC on Blind test: 0.36 -Accuracy on Blind test: 0.6 +Accuracy on Blind test: 0.72 Model_name: Stochastic GDescent Model func: SGDClassifier(n_jobs=10, random_state=42) -List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, +List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior. + _warn_prf(average, modifier, msg_start, len(result)) +[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5, n_estimators=1000, n_jobs=10, oob_score=True, random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1, enable_categorical=False, @@ -24430,101 +24636,101 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', SGDClassifier(n_jobs=10, random_state=42))]) key: fit_time -value: [0.01266241 0.01253319 0.01259041 0.0122745 0.01273322 0.01224399 - 0.01242423 0.01244211 0.01222348 0.01222324] +value: [0.01229525 0.01237893 0.01234698 0.01202965 0.0121994 0.01249433 + 0.01228952 0.01218224 0.01196837 0.01224113] -mean value: 0.012435078620910645 +mean value: 0.012242579460144043 key: score_time -value: [0.01101017 0.01124358 0.01118398 0.01122379 0.01121879 0.01122594 - 0.01122475 0.01121068 0.01118016 0.01124597] +value: [0.01024342 0.01122355 0.0113194 0.01121879 0.01136684 0.01144981 + 0.01120973 0.01118469 0.0115819 0.01125216] -mean value: 0.011196780204772949 +mean value: 0.011205029487609864 key: test_mcc -value: [0.65465367 0.81649658 0.81649658 0.21821789 0.40824829 0.65465367 - 0.81649658 0.65465367 0.81649658 0.81649658] +value: [0.81649658 0.81649658 0.21821789 0.6 0.33333333 0.40824829 + 0.81649658 0.81649658 0. 0.81649658] -mean value: 0.6672910097462417 +mean value: 0.5642282418671819 key: train_mcc -value: [0.93356387 0.97801929 0.95650071 1. 1. 0.88910845 - 0.93356387 0.97801929 0.97801929 0.97801929] +value: [1. 0.95650071 0.97801929 0.8675239 0.74278135 0.97801929 + 0.88910845 0.89442719 0.48257301 0.91111111] -mean value: 0.9624814081711083 +mean value: 0.8700064322418951 key: test_accuracy -value: [0.8 0.9 0.9 0.6 0.7 0.8 0.9 0.8 0.9 0.9] +value: [0.9 0.9 0.6 0.8 0.6 0.7 0.9 0.9 0.5 0.9] -mean value: 0.8200000000000001 +mean value: 0.77 key: train_accuracy -value: [0.96666667 0.98888889 0.97777778 1. 1. 0.94444444 - 0.96666667 0.98888889 0.98888889 0.98888889] +value: [1. 0.97777778 0.98888889 0.93333333 0.85555556 0.98888889 + 0.94444444 0.94444444 0.68888889 0.95555556] -mean value: 0.9811111111111112 +mean value: 0.9277777777777778 key: test_fscore -value: [0.83333333 0.90909091 0.90909091 0.66666667 0.72727273 0.75 - 0.90909091 0.83333333 0.88888889 0.90909091] +value: [0.90909091 0.90909091 0.5 0.8 0.33333333 0.72727273 + 0.88888889 0.90909091 0. 0.90909091] -mean value: 0.8335858585858585 +mean value: 0.6885858585858586 key: train_fscore -value: [0.96629213 0.98876404 0.97727273 1. 1. 0.94382022 - 0.96703297 0.98876404 0.98901099 0.98901099] +value: [1. 0.97826087 0.98876404 0.93181818 0.83116883 0.98876404 + 0.94505495 0.94736842 0.5483871 0.95555556] -mean value: 0.9809968121765875 +mean value: 0.9115141990877197 key: test_precision -value: [0.71428571 0.83333333 0.83333333 0.57142857 0.66666667 1. - 0.83333333 0.71428571 1. 0.83333333] +value: [0.83333333 0.83333333 0.66666667 0.8 1. 0.66666667 + 1. 0.83333333 0. 0.83333333] -mean value: 0.8 +mean value: 0.7466666666666667 key: train_precision -value: [0.97727273 1. 1. 1. 1. 0.95454545 - 0.95652174 1. 0.97826087 0.97826087] +value: [1. 0.95744681 1. 0.95348837 1. 1. + 0.93478261 0.9 1. 0.95555556] -mean value: 0.9844861660079052 +mean value: 0.9701273344854869 key: test_recall -value: [1. 1. 1. 0.8 0.8 0.6 1. 1. 0.8 1. ] +value: [1. 1. 0.4 0.8 0.2 0.8 0.8 1. 0. 1. ] -mean value: 0.9 +mean value: 0.7000000000000001 key: train_recall -value: [0.95555556 0.97777778 0.95555556 1. 1. 0.93333333 - 0.97777778 0.97777778 1. 1. ] +value: [1. 1. 0.97777778 0.91111111 0.71111111 0.97777778 + 0.95555556 1. 0.37777778 0.95555556] -mean value: 0.9777777777777777 +mean value: 0.8866666666666667 key: test_roc_auc -value: [0.8 0.9 0.9 0.6 0.7 0.8 0.9 0.8 0.9 0.9] +value: [0.9 0.9 0.6 0.8 0.6 0.7 0.9 0.9 0.5 0.9] -mean value: 0.8200000000000001 +mean value: 0.77 key: train_roc_auc -value: [0.96666667 0.98888889 0.97777778 1. 1. 0.94444444 - 0.96666667 0.98888889 0.98888889 0.98888889] +value: [1. 0.97777778 0.98888889 0.93333333 0.85555556 0.98888889 + 0.94444444 0.94444444 0.68888889 0.95555556] -mean value: 0.9811111111111112 +mean value: 0.9277777777777778 key: test_jcc -value: [0.71428571 0.83333333 0.83333333 0.5 0.57142857 0.6 - 0.83333333 0.71428571 0.8 0.83333333] +value: [0.83333333 0.83333333 0.33333333 0.66666667 0.2 0.57142857 + 0.8 0.83333333 0. 0.83333333] -mean value: 0.7233333333333334 +mean value: 0.5904761904761905 key: train_jcc -value: [0.93478261 0.97777778 0.95555556 1. 1. 0.89361702 - 0.93617021 0.97777778 0.97826087 0.97826087] +value: [1. 0.95744681 0.97777778 0.87234043 0.71111111 0.97777778 + 0.89583333 0.9 0.37777778 0.91489362] -mean value: 0.9632202692979751 +mean value: 0.8584958628841608 -MCC on Blind test: 0.08 +MCC on Blind test: 0.32 -Accuracy on Blind test: 0.57 +Accuracy on Blind test: 0.7 Model_name: AdaBoost Classifier Model func: AdaBoostClassifier(random_state=42) @@ -24557,22 +24763,22 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', AdaBoostClassifier(random_state=42))]) key: fit_time -value: [0.09050202 0.08575082 0.08697701 0.08462596 0.08410382 0.0856297 - 0.08490443 0.08724713 0.08709764 0.08831048] +value: [0.09080362 0.08026624 0.0802474 0.07980943 0.07940364 0.08048773 + 0.07949638 0.07976413 0.07984233 0.07969499] -mean value: 0.08651490211486816 +mean value: 0.08098158836364747 key: score_time -value: [0.01545334 0.01579857 0.01585507 0.01584077 0.01450062 0.01623821 - 0.01575541 0.01573849 0.01574063 0.01583362] +value: [0.01442552 0.01436472 0.01435137 0.01420498 0.01422119 0.01444697 + 0.01426816 0.01428533 0.01425791 0.01430011] -mean value: 0.0156754732131958 +mean value: 0.01431262493133545 key: test_mcc -value: [0.81649658 0.81649658 0.81649658 0.65465367 0.81649658 1. - 0.81649658 0.81649658 0.81649658 0.81649658] +value: [0.81649658 0.65465367 0.81649658 0.81649658 0.81649658 0.81649658 + 1. 0.81649658 0.65465367 1. ] -mean value: 0.8186626318129786 +mean value: 0.8208286826982311 key: train_mcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -24580,7 +24786,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.9 0.9 0.9 0.8 0.9 1. 0.9 0.9 0.9 0.9] +value: [0.9 0.8 0.9 0.9 0.9 0.9 1. 0.9 0.8 1. ] mean value: 0.9 @@ -24590,10 +24796,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.90909091 0.90909091 0.90909091 0.83333333 0.90909091 1. - 0.88888889 0.90909091 0.88888889 0.90909091] +value: [0.90909091 0.83333333 0.88888889 0.90909091 0.88888889 0.90909091 + 1. 0.90909091 0.83333333 1. ] -mean value: 0.9065656565656566 +mean value: 0.908080808080808 key: train_fscore value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -24601,10 +24807,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [0.83333333 0.83333333 0.83333333 0.71428571 0.83333333 1. - 1. 0.83333333 1. 0.83333333] +value: [0.83333333 0.71428571 1. 0.83333333 1. 0.83333333 + 1. 0.83333333 0.71428571 1. ] -mean value: 0.8714285714285714 +mean value: 0.8761904761904762 key: train_precision value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -24612,7 +24818,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [1. 1. 1. 1. 1. 1. 0.8 1. 0.8 1. ] +value: [1. 1. 0.8 1. 0.8 1. 1. 1. 1. 1. ] mean value: 0.96 @@ -24622,7 +24828,7 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.9 0.9 0.9 0.8 0.9 1. 0.9 0.9 0.9 0.9] +value: [0.9 0.8 0.9 0.9 0.9 0.9 1. 0.9 0.8 1. ] mean value: 0.9 @@ -24632,10 +24838,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.83333333 0.83333333 0.83333333 0.71428571 0.83333333 1. - 0.8 0.83333333 0.8 0.83333333] +value: [0.83333333 0.71428571 0.8 0.83333333 0.8 0.83333333 + 1. 0.83333333 0.71428571 1. ] -mean value: 0.8314285714285714 +mean value: 0.8361904761904763 key: train_jcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -24679,68 +24885,68 @@ Running model pipeline: Pipeline(steps=[('prep', random_state=42))]) key: fit_time -value: [0.03332639 0.03159523 0.0316155 0.02976799 0.02722383 0.02606964 - 0.02668548 0.0271318 0.03500819 0.02735019] +value: [0.02699709 0.03070116 0.03277135 0.05133557 0.02615666 0.02731848 + 0.047997 0.03133965 0.03537989 0.04155445] -mean value: 0.029577422142028808 +mean value: 0.03515512943267822 key: score_time -value: [0.01860285 0.01750827 0.02369905 0.02393627 0.01593733 0.01683497 - 0.01794434 0.0177505 0.02326179 0.02694941] +value: [0.02213979 0.02183247 0.03555799 0.03083539 0.01692629 0.02012992 + 0.01762462 0.02948856 0.03649259 0.02368855] -mean value: 0.020242476463317872 +mean value: 0.0254716157913208 key: test_mcc -value: [0.65465367 1. 0.81649658 0.65465367 0.81649658 1. - 0.81649658 0.81649658 0.81649658 0.81649658] +value: [0.81649658 0.65465367 0.81649658 0.81649658 1. 1. + 1. 0.81649658 0.81649658 1. ] -mean value: 0.8208286826982311 +mean value: 0.8737136575346607 key: train_mcc -value: [1. 1. 0.97801929 1. 1. 0.97801929 - 1. 0.97801929 1. 1. ] +value: [1. 1. 1. 1. 0.97801929 1. + 1. 1. 0.97801929 1. ] -mean value: 0.9934057881530954 +mean value: 0.9956038587687303 key: test_accuracy -value: [0.8 1. 0.9 0.8 0.9 1. 0.9 0.9 0.9 0.9] +value: [0.9 0.8 0.9 0.9 1. 1. 1. 0.9 0.9 1. ] -mean value: 0.9 +mean value: 0.93 key: train_accuracy -value: [1. 1. 0.98888889 1. 1. 0.98888889 - 1. 0.98888889 1. 1. ] +value: [1. 1. 1. 1. 0.98888889 1. + 1. 1. 0.98888889 1. ] -mean value: 0.9966666666666667 +mean value: 0.9977777777777778 key: test_fscore -value: [0.83333333 1. 0.90909091 0.83333333 0.88888889 1. - 0.88888889 0.90909091 0.88888889 0.90909091] +value: [0.90909091 0.83333333 0.88888889 0.90909091 1. 1. + 1. 0.90909091 0.90909091 1. ] -mean value: 0.906060606060606 +mean value: 0.9358585858585858 key: train_fscore -value: [1. 1. 0.98901099 1. 1. 0.98901099 - 1. 0.98901099 1. 1. ] +value: [1. 1. 1. 1. 0.98901099 1. + 1. 1. 0.98901099 1. ] -mean value: 0.9967032967032967 +mean value: 0.9978021978021978 key: test_precision -value: [0.71428571 1. 0.83333333 0.71428571 1. 1. - 1. 0.83333333 1. 0.83333333] +value: [0.83333333 0.71428571 1. 0.83333333 1. 1. + 1. 0.83333333 0.83333333 1. ] -mean value: 0.8928571428571429 +mean value: 0.9047619047619048 key: train_precision -value: [1. 1. 0.97826087 1. 1. 0.97826087 - 1. 0.97826087 1. 1. ] +value: [1. 1. 1. 1. 0.97826087 1. + 1. 1. 0.97826087 1. ] -mean value: 0.9934782608695653 +mean value: 0.9956521739130435 key: test_recall -value: [1. 1. 1. 1. 0.8 1. 0.8 1. 0.8 1. ] +value: [1. 1. 0.8 1. 1. 1. 1. 1. 1. 1. ] -mean value: 0.9400000000000001 +mean value: 0.98 key: train_recall value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -24748,31 +24954,31 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.8 1. 0.9 0.8 0.9 1. 0.9 0.9 0.9 0.9] +value: [0.9 0.8 0.9 0.9 1. 1. 1. 0.9 0.9 1. ] -mean value: 0.9 +mean value: 0.93 key: train_roc_auc -value: [1. 1. 0.98888889 1. 1. 0.98888889 - 1. 0.98888889 1. 1. ] +value: [1. 1. 1. 1. 0.98888889 1. + 1. 1. 0.98888889 1. ] -mean value: 0.9966666666666666 +mean value: 0.9977777777777778 key: test_jcc -value: [0.71428571 1. 0.83333333 0.71428571 0.8 1. - 0.8 0.83333333 0.8 0.83333333] +value: [0.83333333 0.71428571 0.8 0.83333333 1. 1. + 1. 0.83333333 0.83333333 1. ] -mean value: 0.8328571428571429 +mean value: 0.8847619047619047 key: train_jcc -value: [1. 1. 0.97826087 1. 1. 0.97826087 - 1. 0.97826087 1. 1. ] +value: [1. 1. 1. 1. 0.97826087 1. + 1. 1. 0.97826087 1. ] -mean value: 0.9934782608695653 +mean value: 0.9956521739130435 -MCC on Blind test: 0.95 +MCC on Blind test: 0.89 -Accuracy on Blind test: 0.98 +Accuracy on Blind test: 0.95 Model_name: Gaussian Process Model func: GaussianProcessClassifier(random_state=42) @@ -24805,22 +25011,22 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', GaussianProcessClassifier(random_state=42))]) key: fit_time -value: [0.01528788 0.01607227 0.02439117 0.01962614 0.01726866 0.01696539 - 0.01694345 0.01618481 0.01628375 0.01687813] +value: [0.01359749 0.01574779 0.01587391 0.02081776 0.01601291 0.01616716 + 0.01599646 0.01598001 0.01611137 0.01597762] -mean value: 0.01759016513824463 +mean value: 0.016228246688842773 key: score_time -value: [0.01140237 0.01114202 0.01187825 0.0118506 0.01176071 0.0119977 - 0.01188946 0.01196241 0.01185942 0.01186037] +value: [0.01142287 0.01155448 0.01172853 0.01179075 0.01171851 0.01176023 + 0.01172566 0.01173353 0.01173139 0.01173902] -mean value: 0.011760330200195313 +mean value: 0.011690497398376465 key: test_mcc -value: [0.6 0.65465367 0.81649658 0.21821789 0.6 0.81649658 - 0.40824829 0.5 0.40824829 0.40824829] +value: [0.81649658 0.65465367 0.40824829 0.6 0.5 0.40824829 + 0.40824829 0.40824829 0.6 0.81649658] -mean value: 0.543060959419101 +mean value: 0.5620639994418881 key: train_mcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -24828,9 +25034,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.8 0.8 0.9 0.6 0.8 0.9 0.7 0.7 0.7 0.7] +value: [0.9 0.8 0.7 0.8 0.7 0.7 0.7 0.7 0.8 0.9] -mean value: 0.76 +mean value: 0.77 key: train_accuracy value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -24838,10 +25044,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.8 0.83333333 0.90909091 0.66666667 0.8 0.88888889 - 0.66666667 0.76923077 0.72727273 0.66666667] +value: [0.90909091 0.83333333 0.66666667 0.8 0.57142857 0.72727273 + 0.72727273 0.72727273 0.8 0.90909091] -mean value: 0.7727816627816628 +mean value: 0.7671428571428571 key: train_fscore value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -24849,10 +25055,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [0.8 0.71428571 0.83333333 0.57142857 0.8 1. - 0.75 0.625 0.66666667 0.75 ] +value: [0.83333333 0.71428571 0.75 0.8 1. 0.66666667 + 0.66666667 0.66666667 0.8 0.83333333] -mean value: 0.7510714285714286 +mean value: 0.7730952380952381 key: train_precision value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -24860,9 +25066,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [0.8 1. 1. 0.8 0.8 0.8 0.6 1. 0.8 0.6] +value: [1. 1. 0.6 0.8 0.4 0.8 0.8 0.8 0.8 1. ] -mean value: 0.8200000000000001 +mean value: 0.8 key: train_recall value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -24870,9 +25076,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.8 0.8 0.9 0.6 0.8 0.9 0.7 0.7 0.7 0.7] +value: [0.9 0.8 0.7 0.8 0.7 0.7 0.7 0.7 0.8 0.9] -mean value: 0.76 +mean value: 0.77 key: train_roc_auc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -24880,19 +25086,19 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.66666667 0.71428571 0.83333333 0.5 0.66666667 0.8 - 0.5 0.625 0.57142857 0.5 ] +value: [0.83333333 0.71428571 0.5 0.66666667 0.4 0.57142857 + 0.57142857 0.57142857 0.66666667 0.83333333] -mean value: 0.6377380952380952 +mean value: 0.6328571428571429 key: train_jcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 -MCC on Blind test: 0.16 +MCC on Blind test: 0.01 -Accuracy on Blind test: 0.62 +Accuracy on Blind test: 0.52 Model_name: Gradient Boosting Model func: GradientBoostingClassifier(random_state=42) @@ -24949,22 +25155,22 @@ Pipeline(steps=[('prep', ('model', GradientBoostingClassifier(random_state=42))]) key: fit_time -value: [0.17597413 0.1721468 0.17924476 0.17421913 0.17393565 0.17166734 - 0.14058495 0.17408228 0.17574048 0.17127013] +value: [0.22217441 0.19093752 0.1718936 0.16672945 0.18235731 0.17839694 + 0.1745894 0.18664312 0.18885684 0.19141936] -mean value: 0.17088656425476073 +mean value: 0.18539979457855224 key: score_time -value: [0.00916481 0.00947452 0.00913882 0.00930166 0.00918031 0.00899935 - 0.0091269 0.00965333 0.00991249 0.00923991] +value: [0.00920916 0.00894237 0.00916672 0.00911665 0.00934219 0.00979352 + 0.0089221 0.00914454 0.00932813 0.00907612] -mean value: 0.009319210052490234 +mean value: 0.00920414924621582 key: test_mcc -value: [0.81649658 1. 0.81649658 1. 0.6 1. - 0.81649658 0.81649658 0.81649658 0.81649658] +value: [0.81649658 0.81649658 0.81649658 0.65465367 0.65465367 1. + 1. 0.81649658 0.81649658 0.81649658] -mean value: 0.8498979485566356 +mean value: 0.8208286826982311 key: train_mcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -24972,9 +25178,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.9 1. 0.9 1. 0.8 1. 0.9 0.9 0.9 0.9] +value: [0.9 0.9 0.9 0.8 0.8 1. 1. 0.9 0.9 0.9] -mean value: 0.92 +mean value: 0.9 key: train_accuracy value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -24982,10 +25188,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.90909091 1. 0.90909091 1. 0.8 1. - 0.88888889 0.90909091 0.88888889 0.90909091] +value: [0.90909091 0.90909091 0.88888889 0.83333333 0.75 1. + 1. 0.90909091 0.90909091 0.88888889] -mean value: 0.9214141414141415 +mean value: 0.8997474747474747 key: train_fscore value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -24993,10 +25199,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_precision -value: [0.83333333 1. 0.83333333 1. 0.8 1. - 1. 0.83333333 1. 0.83333333] +value: [0.83333333 0.83333333 1. 0.71428571 1. 1. + 1. 0.83333333 0.83333333 1. ] -mean value: 0.9133333333333333 +mean value: 0.9047619047619048 key: train_precision value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -25004,9 +25210,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [1. 1. 1. 1. 0.8 1. 0.8 1. 0.8 1. ] +value: [1. 1. 0.8 1. 0.6 1. 1. 1. 1. 0.8] -mean value: 0.9400000000000001 +mean value: 0.92 key: train_recall value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -25014,9 +25220,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.9 1. 0.9 1. 0.8 1. 0.9 0.9 0.9 0.9] +value: [0.9 0.9 0.9 0.8 0.8 1. 1. 0.9 0.9 0.9] -mean value: 0.92 +mean value: 0.9 key: train_roc_auc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -25024,19 +25230,19 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.83333333 1. 0.83333333 1. 0.66666667 1. - 0.8 0.83333333 0.8 0.83333333] +value: [0.83333333 0.83333333 0.8 0.71428571 0.6 1. + 1. 0.83333333 0.83333333 0.8 ] -mean value: 0.86 +mean value: 0.8247619047619048 key: train_jcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 -MCC on Blind test: 0.95 +MCC on Blind test: 0.84 -Accuracy on Blind test: 0.98 +Accuracy on Blind test: 0.92 Model_name: QDA Model func: QuadraticDiscriminantAnalysis() @@ -25069,22 +25275,22 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', QuadraticDiscriminantAnalysis())]) key: fit_time -value: [0.01125002 0.01427794 0.01427984 0.01602817 0.01415706 0.01421785 - 0.0143075 0.01926303 0.01412845 0.01442242] +value: [0.01358581 0.01411629 0.01471233 0.01414609 0.02364945 0.02104473 + 0.01440454 0.01459599 0.01464772 0.01447725] -mean value: 0.01463322639465332 +mean value: 0.01593801975250244 key: score_time -value: [0.01145935 0.01170135 0.01169324 0.01170659 0.01228118 0.01166224 - 0.01343036 0.01290202 0.01216722 0.01210403] +value: [0.01189876 0.01186109 0.01198363 0.0119555 0.01537371 0.01311707 + 0.01196265 0.01466823 0.01172829 0.01508021] -mean value: 0.012110757827758788 +mean value: 0.012962913513183594 key: test_mcc -value: [0.81649658 1. 1. 0.81649658 0.81649658 0.65465367 - 0.5 0.81649658 0.65465367 0.33333333] +value: [1. 1. 0.5 0.81649658 0.5 0.81649658 + 0.81649658 0.81649658 0.81649658 0.65465367] -mean value: 0.7408626998460192 +mean value: 0.7737136575346607 key: train_mcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -25092,9 +25298,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_accuracy -value: [0.9 1. 1. 0.9 0.9 0.8 0.7 0.9 0.8 0.6] +value: [1. 1. 0.7 0.9 0.7 0.9 0.9 0.9 0.9 0.8] -mean value: 0.85 +mean value: 0.87 key: train_accuracy value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -25102,10 +25308,10 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_fscore -value: [0.88888889 1. 1. 0.88888889 0.88888889 0.75 - 0.57142857 0.88888889 0.75 0.33333333] +value: [1. 1. 0.57142857 0.88888889 0.57142857 0.88888889 + 0.88888889 0.88888889 0.88888889 0.75 ] -mean value: 0.7960317460317461 +mean value: 0.8337301587301588 key: train_fscore value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -25123,9 +25329,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_recall -value: [0.8 1. 1. 0.8 0.8 0.6 0.4 0.8 0.6 0.2] +value: [1. 1. 0.4 0.8 0.4 0.8 0.8 0.8 0.8 0.6] -mean value: 0.7000000000000001 +mean value: 0.74 key: train_recall value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -25133,9 +25339,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_roc_auc -value: [0.9 1. 1. 0.9 0.9 0.8 0.7 0.9 0.8 0.6] +value: [1. 1. 0.7 0.9 0.7 0.9 0.9 0.9 0.9 0.8] -mean value: 0.85 +mean value: 0.87 key: train_roc_auc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -25143,9 +25349,9 @@ value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] mean value: 1.0 key: test_jcc -value: [0.8 1. 1. 0.8 0.8 0.6 0.4 0.8 0.6 0.2] +value: [1. 1. 0.4 0.8 0.4 0.8 0.8 0.8 0.8 0.6] -mean value: 0.7000000000000001 +mean value: 0.74 key: train_jcc value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] @@ -25187,101 +25393,100 @@ Running model pipeline: Pipeline(steps=[('prep', ('model', RidgeClassifier(random_state=42))]) key: fit_time -value: [0.03190494 0.0162518 0.01298714 0.02096915 0.02571321 0.07055092 - 0.01573253 0.01286387 0.01908612 0.01534081] +value: [0.0334847 0.03210235 0.03213644 0.03319669 0.03299642 0.03233814 + 0.0345664 0.03212976 0.03434682 0.03329611] -mean value: 0.024140048027038574 +mean value: 0.03305938243865967 key: score_time -value: [0.0120213 0.01187372 0.01150703 0.01177883 0.0234704 0.01707196 - 0.01153183 0.01159644 0.01922894 0.01779103] +value: [0.01982021 0.02007699 0.02197194 0.02256417 0.02357554 0.0200212 + 0.0116353 0.02040458 0.02340674 0.0233531 ] -mean value: 0.01478714942932129 +mean value: 0.020682978630065917 key: test_mcc -value: [0.81649658 0.81649658 0.81649658 0.40824829 0.6 0.65465367 - 0.65465367 0.81649658 0.81649658 0.81649658] +value: [0.65465367 0.81649658 0.40824829 0.6 0.81649658 0.5 + 0.81649658 0.65465367 0.6 1. ] -mean value: 0.7216535117446173 +mean value: 0.6867045374662996 key: train_mcc -value: [0.97801929 0.97801929 0.97801929 1. 1. 0.97801929 - 0.95555556 0.97801929 1. 0.97801929] +value: [1. 0.97801929 0.97801929 0.97801929 0.97801929 0.97801929 + 0.97801929 0.97801929 0.97801929 0.97801929] -mean value: 0.9823671318617464 +mean value: 0.9802173644592863 key: test_accuracy -value: [0.9 0.9 0.9 0.7 0.8 0.8 0.8 0.9 0.9 0.9] +value: [0.8 0.9 0.7 0.8 0.9 0.7 0.9 0.8 0.8 1. ] -mean value: 0.85 +mean value: 0.8300000000000001 key: train_accuracy -value: [0.98888889 0.98888889 0.98888889 1. 1. 0.98888889 - 0.97777778 0.98888889 1. 0.98888889] +value: [1. 0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 + 0.98888889 0.98888889 0.98888889 0.98888889] -mean value: 0.9911111111111112 +mean value: 0.99 key: test_fscore -value: [0.90909091 0.90909091 0.90909091 0.72727273 0.8 0.83333333 - 0.83333333 0.90909091 0.88888889 0.90909091] +value: [0.83333333 0.90909091 0.66666667 0.8 0.90909091 0.76923077 + 0.88888889 0.83333333 0.8 1. ] -mean value: 0.8628282828282828 +mean value: 0.840963480963481 key: train_fscore -value: [0.98901099 0.98901099 0.98901099 1. 1. 0.98901099 - 0.97777778 0.98901099 1. 0.98901099] +value: [1. 0.98901099 0.98901099 0.98901099 0.98901099 0.98901099 + 0.98901099 0.98901099 0.98901099 0.98901099] -mean value: 0.9911843711843712 +mean value: 0.9901098901098901 key: test_precision -value: [0.83333333 0.83333333 0.83333333 0.66666667 0.8 0.71428571 - 0.71428571 0.83333333 1. 0.83333333] +value: [0.71428571 0.83333333 0.75 0.8 0.83333333 0.625 + 1. 0.71428571 0.8 1. ] -mean value: 0.8061904761904762 +mean value: 0.8070238095238096 key: train_precision -value: [0.97826087 0.97826087 0.97826087 1. 1. 0.97826087 - 0.97777778 0.97826087 1. 0.97826087] +value: [1. 0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 + 0.97826087 0.97826087 0.97826087 0.97826087] -mean value: 0.9847342995169082 +mean value: 0.9804347826086957 key: test_recall -value: [1. 1. 1. 0.8 0.8 1. 1. 1. 0.8 1. ] +value: [1. 1. 0.6 0.8 1. 1. 0.8 1. 0.8 1. ] -mean value: 0.9400000000000001 +mean value: 0.9 key: train_recall -value: [1. 1. 1. 1. 1. 1. - 0.97777778 1. 1. 1. ] +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] -mean value: 0.9977777777777778 +mean value: 1.0 key: test_roc_auc -value: [0.9 0.9 0.9 0.7 0.8 0.8 0.8 0.9 0.9 0.9] +value: [0.8 0.9 0.7 0.8 0.9 0.7 0.9 0.8 0.8 1. ] -mean value: 0.8500000000000001 +mean value: 0.8300000000000001 key: train_roc_auc -value: [0.98888889 0.98888889 0.98888889 1. 1. 0.98888889 - 0.97777778 0.98888889 1. 0.98888889] +value: [1. 0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 + 0.98888889 0.98888889 0.98888889 0.98888889] -mean value: 0.991111111111111 +mean value: 0.99 key: test_jcc -value: [0.83333333 0.83333333 0.83333333 0.57142857 0.66666667 0.71428571 - 0.71428571 0.83333333 0.8 0.83333333] +value: [0.71428571 0.83333333 0.5 0.66666667 0.83333333 0.625 + 0.8 0.71428571 0.66666667 1. ] -mean value: 0.7633333333333333 +mean value: 0.7353571428571428 key: train_jcc -value: [0.97826087 0.97826087 0.97826087 1. 1. 0.97826087 - 0.95652174 0.97826087 1. 0.97826087] +value: [1. 0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 + 0.97826087 0.97826087 0.97826087 0.97826087] -mean value: 0.9826086956521739 +mean value: 0.9804347826086957 -MCC on Blind test: 0.48 +MCC on Blind test: 0.37 -Accuracy on Blind test: 0.78 +Accuracy on Blind test: 0.72 Model_name: Ridge ClassifierCV Model func: RidgeClassifierCV(cv=10) @@ -25297,12 +25502,12 @@ List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ( reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact', use_label_encoder=False, validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))] -Running model pipeline: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:188: SettingWithCopyWarning: +Running model pipeline: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:196: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True) -/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:191: SettingWithCopyWarning: +/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_7030.py:199: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy @@ -25324,98 +25529,97 @@ Pipeline(steps=[('prep', ('model', RidgeClassifierCV(cv=10))]) key: fit_time -value: [0.26709867 0.15390754 0.17872667 0.26606345 0.15658689 0.17817855 - 0.16672301 0.15290213 0.25758982 0.18749452] +value: [0.10436153 0.10901594 0.15473795 0.18076968 0.18270421 0.19743443 + 0.19927812 0.20212865 0.22301292 0.22878146] -mean value: 0.1965271234512329 +mean value: 0.17822248935699464 key: score_time -value: [0.0125277 0.01932573 0.02402663 0.02339983 0.02723026 0.01232576 - 0.02062273 0.02069354 0.02367878 0.02091861] +value: [0.0118041 0.02067137 0.01172853 0.02039146 0.02330875 0.02281046 + 0.02109528 0.02306652 0.0215745 0.02139878] -mean value: 0.020474958419799804 +mean value: 0.019784975051879882 key: test_mcc -value: [0.81649658 0.81649658 0.81649658 0.40824829 0.40824829 0.65465367 - 0.65465367 0.81649658 0.81649658 0.65465367] +value: [0.65465367 0.81649658 0.40824829 0.6 0.81649658 0.5 + 0.81649658 0.65465367 0.21821789 1. ] -mean value: 0.6862940497690287 +mean value: 0.6485263264898988 key: train_mcc -value: [0.97801929 0.97801929 0.97801929 1. 1. 0.97801929 - 0.95555556 0.97801929 1. 1. ] +value: [1. 0.97801929 0.97801929 0.97801929 0.97801929 0.97801929 + 0.97801929 0.97801929 1. 0.97801929] -mean value: 0.9845652024773813 +mean value: 0.9824154350749212 key: test_accuracy -value: [0.9 0.9 0.9 0.7 0.7 0.8 0.8 0.9 0.9 0.8] +value: [0.8 0.9 0.7 0.8 0.9 0.7 0.9 0.8 0.6 1. ] -mean value: 0.8300000000000001 +mean value: 0.81 key: train_accuracy -value: [0.98888889 0.98888889 0.98888889 1. 1. 0.98888889 - 0.97777778 0.98888889 1. 1. ] +value: [1. 0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 + 0.98888889 0.98888889 1. 0.98888889] -mean value: 0.9922222222222222 +mean value: 0.9911111111111112 key: test_fscore -value: [0.90909091 0.90909091 0.90909091 0.72727273 0.72727273 0.83333333 - 0.83333333 0.90909091 0.88888889 0.83333333] +value: [0.83333333 0.90909091 0.66666667 0.8 0.90909091 0.76923077 + 0.88888889 0.83333333 0.66666667 1. ] -mean value: 0.847979797979798 +mean value: 0.8276301476301476 key: train_fscore -value: [0.98901099 0.98901099 0.98901099 1. 1. 0.98901099 - 0.97777778 0.98901099 1. 1. ] +value: [1. 0.98901099 0.98901099 0.98901099 0.98901099 0.98901099 + 0.98901099 0.98901099 1. 0.98901099] -mean value: 0.9922832722832723 +mean value: 0.9912087912087912 key: test_precision -value: [0.83333333 0.83333333 0.83333333 0.66666667 0.66666667 0.71428571 - 0.71428571 0.83333333 1. 0.71428571] +value: [0.71428571 0.83333333 0.75 0.8 0.83333333 0.625 + 1. 0.71428571 0.57142857 1. ] -mean value: 0.780952380952381 +mean value: 0.7841666666666667 key: train_precision -value: [0.97826087 0.97826087 0.97826087 1. 1. 0.97826087 - 0.97777778 0.97826087 1. 1. ] +value: [1. 0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 + 0.97826087 0.97826087 1. 0.97826087] -mean value: 0.9869082125603865 +mean value: 0.9826086956521739 key: test_recall -value: [1. 1. 1. 0.8 0.8 1. 1. 1. 0.8 1. ] +value: [1. 1. 0.6 0.8 1. 1. 0.8 1. 0.8 1. ] -mean value: 0.9400000000000001 +mean value: 0.9 key: train_recall -value: [1. 1. 1. 1. 1. 1. - 0.97777778 1. 1. 1. ] +value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] -mean value: 0.9977777777777778 +mean value: 1.0 key: test_roc_auc -value: [0.9 0.9 0.9 0.7 0.7 0.8 0.8 0.9 0.9 0.8] +value: [0.8 0.9 0.7 0.8 0.9 0.7 0.9 0.8 0.6 1. ] -mean value: 0.8300000000000001 +mean value: 0.81 key: train_roc_auc -value: [0.98888889 0.98888889 0.98888889 1. 1. 0.98888889 - 0.97777778 0.98888889 1. 1. ] +value: [1. 0.98888889 0.98888889 0.98888889 0.98888889 0.98888889 + 0.98888889 0.98888889 1. 0.98888889] -mean value: 0.9922222222222222 +mean value: 0.991111111111111 key: test_jcc -value: [0.83333333 0.83333333 0.83333333 0.57142857 0.57142857 0.71428571 - 0.71428571 0.83333333 0.8 0.71428571] +value: [0.71428571 0.83333333 0.5 0.66666667 0.83333333 0.625 + 0.8 0.71428571 0.5 1. ] -mean value: 0.741904761904762 +mean value: 0.7186904761904762 key: train_jcc -value: [0.97826087 0.97826087 0.97826087 1. 1. 0.97826087 - 0.95652174 0.97826087 1. 1. ] +value: [1. 0.97826087 0.97826087 0.97826087 0.97826087 0.97826087 + 0.97826087 0.97826087 1. 0.97826087] -mean value: 0.9847826086956522 +mean value: 0.9826086956521739 -MCC on Blind test: 0.48 +MCC on Blind test: 0.37 -Accuracy on Blind test: 0.78 +Accuracy on Blind test: 0.72 diff --git a/scripts/ml/ml_data.py b/scripts/ml/ml_data.py index 488c549..62eba26 100644 --- a/scripts/ml/ml_data.py +++ b/scripts/ml/ml_data.py @@ -5,706 +5,726 @@ Created on Sun Mar 6 13:41:54 2022 @author: tanu """ -def setvars(gene,drug): - #https://stackoverflow.com/questions/51695322/compare-multiple-algorithms-with-sklearn-pipeline - import os, sys - import pandas as pd - import numpy as np - print(np.__version__) - print(pd.__version__) - import pprint as pp - from copy import deepcopy - from collections import Counter - from sklearn.impute import KNNImputer as KNN - from imblearn.over_sampling import RandomOverSampler - from imblearn.under_sampling import RandomUnderSampler - from imblearn.over_sampling import SMOTE - from sklearn.datasets import make_classification - from imblearn.combine import SMOTEENN - from imblearn.combine import SMOTETomek - - from imblearn.over_sampling import SMOTENC - from imblearn.under_sampling import EditedNearestNeighbours - from imblearn.under_sampling import RepeatedEditedNearestNeighbours - - from sklearn.metrics import make_scorer, confusion_matrix, accuracy_score, balanced_accuracy_score, precision_score, average_precision_score, recall_score - from sklearn.metrics import roc_auc_score, roc_curve, f1_score, matthews_corrcoef, jaccard_score, classification_report - - from sklearn.model_selection import train_test_split, cross_validate, cross_val_score - from sklearn.model_selection import StratifiedKFold,RepeatedStratifiedKFold, RepeatedKFold - - from sklearn.pipeline import Pipeline, make_pipeline - #%% GLOBALS - rs = {'random_state': 42} - njobs = {'n_jobs': 10} - - scoring_fn = ({ 'mcc' : make_scorer(matthews_corrcoef) - , 'accuracy' : make_scorer(accuracy_score) - , 'fscore' : make_scorer(f1_score) - , 'precision' : make_scorer(precision_score) - , 'recall' : make_scorer(recall_score) - , 'roc_auc' : make_scorer(roc_auc_score) - , 'jcc' : make_scorer(jaccard_score) - }) - - skf_cv = StratifiedKFold(n_splits = 10 - #, shuffle = False, random_state= None) - , shuffle = True,**rs) - - rskf_cv = RepeatedStratifiedKFold(n_splits = 10 - , n_repeats = 3 - , **rs) - - mcc_score_fn = {'mcc': make_scorer(matthews_corrcoef)} - jacc_score_fn = {'jcc': make_scorer(jaccard_score)} - - #%% FOR LATER: Combine ED logo data - #%% DONE: active aa site annotations **DONE on 15/05/2022 as part of generating merged_dfs - ########################################################################### - rs = {'random_state': 42} - njobs = {'n_jobs': 10} - homedir = os.path.expanduser("~") - - geneL_basic = ['pnca'] - geneL_na = ['gid'] - geneL_na_ppi2 = ['rpob'] - geneL_ppi2 = ['alr', 'embb', 'katg'] - - #num_type = ['int64', 'float64'] - num_type = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64'] - cat_type = ['object', 'bool'] - - #============== - # directories - #============== - datadir = homedir + '/git/Data/' - indir = datadir + drug + '/input/' - outdir = datadir + drug + '/output/' - - #======= - # input - #======= - - #--------- - # File 1 - #--------- - infile_ml1 = outdir + gene.lower() + '_merged_df3.csv' - #infile_ml2 = outdir + gene.lower() + '_merged_df2.csv' - - my_features_df = pd.read_csv(infile_ml1, index_col = 0) - my_features_df = my_features_df .reset_index(drop = True) - my_features_df.index - - my_features_df.dtypes - mycols = my_features_df.columns - - #--------- - # File 2 - #--------- - infile_aaindex = outdir + 'aa_index/' + gene.lower() + '_aa.csv' - aaindex_df = pd.read_csv(infile_aaindex, index_col = 0) - aaindex_df.dtypes - - #----------- - # check for non-numerical columns - #----------- - if any(aaindex_df.dtypes==object): - print('\naaindex_df contains non-numerical data') - - aaindex_df_object = aaindex_df.select_dtypes(include = cat_type) - print('\nTotal no. of non-numerial columns:', len(aaindex_df_object.columns)) - - expected_aa_ncols = len(aaindex_df.columns) - len(aaindex_df_object.columns) +#def setvars(gene,drug): +#https://stackoverflow.com/questions/51695322/compare-multiple-algorithms-with-sklearn-pipeline +import os, sys +import pandas as pd +import numpy as np +print(np.__version__) +print(pd.__version__) +import pprint as pp +from copy import deepcopy +from collections import Counter +from sklearn.impute import KNNImputer as KNN +from imblearn.over_sampling import RandomOverSampler +from imblearn.under_sampling import RandomUnderSampler +from imblearn.over_sampling import SMOTE +from sklearn.datasets import make_classification +from imblearn.combine import SMOTEENN +from imblearn.combine import SMOTETomek - #----------- - # Extract numerical data only - #----------- - print('\nSelecting numerical data only') - aaindex_df = aaindex_df.select_dtypes(include = num_type) +from imblearn.over_sampling import SMOTENC +from imblearn.under_sampling import EditedNearestNeighbours +from imblearn.under_sampling import RepeatedEditedNearestNeighbours - #--------------------------- - # aaindex: sanity check 1 - #--------------------------- - if len(aaindex_df.columns) == expected_aa_ncols: - print('\nPASS: successfully selected numerical columns only for aaindex_df') - else: - print('\nFAIL: Numbers mismatch' - , '\nExpected ncols:', expected_aa_ncols - , '\nGot:', len(aaindex_df.columns)) - - #--------------- - # check for NA - #--------------- - print('\nNow checking for NA in the remaining aaindex_cols') - c1 = aaindex_df.isna().sum() - c2 = c1.sort_values(ascending=False) - print('\nCounting aaindex_df cols with NA' - , '\nncols with NA:', sum(c2>0), 'columns' - , '\nDropping these...' - , '\nOriginal ncols:', len(aaindex_df.columns) - ) - aa_df = aaindex_df.dropna(axis=1) - - print('\nRevised df ncols:', len(aa_df.columns)) - - c3 = aa_df.isna().sum() - c4 = c3.sort_values(ascending=False) - - print('\nChecking NA in revised df...') - - if sum(c4>0): - sys.exit('\nFAIL: aaindex_df still contains cols with NA, please check and drop these before proceeding...') - else: - print('\nPASS: cols with NA successfully dropped from aaindex_df' - , '\nProceeding with combining aa_df with other features_df') - - #--------------------------- - # aaindex: sanity check 2 - #--------------------------- - expected_aa_ncols2 = len(aaindex_df.columns) - sum(c2>0) - if len(aa_df.columns) == expected_aa_ncols2: - print('\nPASS: ncols match' - , '\nExpected ncols:', expected_aa_ncols2 - , '\nGot:', len(aa_df.columns)) - else: - print('\nFAIL: Numbers mismatch' - , '\nExpected ncols:', expected_aa_ncols2 - , '\nGot:', len(aa_df.columns)) - - # Important: need this to identify aaindex cols - aa_df_cols = aa_df.columns - print('\nTotal no. of columns in clean aa_df:', len(aa_df_cols)) - - ############################################################################### - #%% Combining my_features_df and aaindex_df - #=========================== - # Merge my_df + aaindex_df - #=========================== - - if aa_df.columns[aa_df.columns.isin(my_features_df.columns)] == my_features_df.columns[my_features_df.columns.isin(aa_df.columns)]: - print('\nMerging on column: mutationinformation') - - if len(my_features_df) == len(aa_df): - expected_nrows = len(my_features_df) - print('\nProceeding to merge, expected nrows in merged_df:', expected_nrows) - else: - sys.exit('\nNrows mismatch, cannot merge. Please check' - , '\nnrows my_df:', len(my_features_df) - , '\nnrows aa_df:', len(aa_df)) - - #----------------- - # Reset index: mutationinformation - # Very important for merging - #----------------- - aa_df = aa_df.reset_index() - - expected_ncols = len(my_features_df.columns) + len(aa_df.columns) - 1 # for the no. of merging col +from sklearn.metrics import make_scorer, confusion_matrix, accuracy_score, balanced_accuracy_score, precision_score, average_precision_score, recall_score +from sklearn.metrics import roc_auc_score, roc_curve, f1_score, matthews_corrcoef, jaccard_score, classification_report - #----------------- - # Merge: my_features_df + aa_df - #----------------- - merged_df = pd.merge(my_features_df - , aa_df - , on = 'mutationinformation') - - #--------------------------- - # aaindex: sanity check 3 - #--------------------------- - if len(merged_df.columns) == expected_ncols: - print('\nPASS: my_features_df and aa_df successfully combined' - , '\nnrows:', len(merged_df) - , '\nncols:', len(merged_df.columns)) - else: - sys.exit('\nFAIL: could not combine my_features_df and aa_df' - , '\nCheck dims and merging cols!') - - #-------- - # Reassign so downstream code doesn't need to change - #-------- - my_df = merged_df.copy() - - #%% Data: my_df - # Check if non structural pos have crept in - # IDEALLY remove from source! But for rpoB do it here - # Drop NA where numerical cols have them - if gene.lower() in geneL_na_ppi2: - #D1148 get rid of - na_index = my_df['mutationinformation'].index[my_df['mcsm_na_affinity'].apply(np.isnan)] - my_df = my_df.drop(index=na_index) - - # FIXED: complete data for all muts inc L114M, F115L, V123L, V125I, V131M - # if gene.lower() in ['embb']: - # na_index = my_df['mutationinformation'].index[my_df['ligand_distance'].apply(np.isnan)] - # my_df = my_df.drop(index=na_index) - - # # Sanity check for non-structural positions - # print('\nChecking for non-structural postions') - # na_index = my_df['mutationinformation'].index[my_df['ligand_distance'].apply(np.isnan)] - # if len(na_index) > 0: - # print('\nNon-structural positions detected for gene:', gene.lower() - # , '\nTotal number of these detected:', len(na_index) - # , '\These are at index:', na_index - # , '\nOriginal nrows:', len(my_df) - # , '\nDropping these...') - # my_df = my_df.drop(index=na_index) - # print('\nRevised nrows:', len(my_df)) - # else: - # print('\nNo non-structural positions detected for gene:', gene.lower() - # , '\nnrows:', len(my_df)) - - - ########################################################################### - #%% Add lineage calculation columns - #FIXME: Check if this can be imported from config? - total_mtblineage_uc = 8 - lineage_colnames = ['lineage_list_all', 'lineage_count_all', 'lineage_count_unique', 'lineage_list_unique', 'lineage_multimode'] - #bar = my_df[lineage_colnames] - my_df['lineage_proportion'] = my_df['lineage_count_unique']/my_df['lineage_count_all'] - my_df['dist_lineage_proportion'] = my_df['lineage_count_unique']/total_mtblineage_uc - ########################################################################### - #%% Active site annotation column - # change from numberic to categorical +from sklearn.model_selection import train_test_split, cross_validate, cross_val_score +from sklearn.model_selection import StratifiedKFold,RepeatedStratifiedKFold, RepeatedKFold - if my_df['active_site'].dtype in num_type: - my_df['active_site'] = my_df['active_site'].astype(object) - my_df['active_site'].dtype - #%% AA property change - #-------------------- - # Water prop change - #-------------------- - my_df['water_change'] = my_df['wt_prop_water'] + str('_to_') + my_df['mut_prop_water'] - my_df['water_change'].value_counts() +from sklearn.pipeline import Pipeline, make_pipeline +#%% GLOBALS +rs = {'random_state': 42} +njobs = {'n_jobs': 10} + +scoring_fn = ({ 'mcc' : make_scorer(matthews_corrcoef) + , 'accuracy' : make_scorer(accuracy_score) + , 'fscore' : make_scorer(f1_score) + , 'precision' : make_scorer(precision_score) + , 'recall' : make_scorer(recall_score) + , 'roc_auc' : make_scorer(roc_auc_score) + , 'jcc' : make_scorer(jaccard_score) + }) + +skf_cv = StratifiedKFold(n_splits = 10 + #, shuffle = False, random_state= None) + , shuffle = True,**rs) + +rskf_cv = RepeatedStratifiedKFold(n_splits = 10 + , n_repeats = 3 + , **rs) + +mcc_score_fn = {'mcc': make_scorer(matthews_corrcoef)} +jacc_score_fn = {'jcc': make_scorer(jaccard_score)} + +#%% FOR LATER: Combine ED logo data +#%% DONE: active aa site annotations **DONE on 15/05/2022 as part of generating merged_dfs +########################################################################### +rs = {'random_state': 42} +njobs = {'n_jobs': 10} +homedir = os.path.expanduser("~") + +geneL_basic = ['pnca'] +geneL_na = ['gid'] +geneL_na_ppi2 = ['rpob'] +geneL_ppi2 = ['alr', 'embb', 'katg'] + +#num_type = ['int64', 'float64'] +num_type = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64'] +cat_type = ['object', 'bool'] + +#============== +# directories +#============== +datadir = homedir + '/git/Data/' +indir = datadir + drug + '/input/' +outdir = datadir + drug + '/output/' + +#======= +# input +#======= + +#--------- +# File 1 +#--------- +infile_ml1 = outdir + gene.lower() + '_merged_df3.csv' +#infile_ml2 = outdir + gene.lower() + '_merged_df2.csv' + +my_features_df = pd.read_csv(infile_ml1, index_col = 0) +my_features_df = my_features_df .reset_index(drop = True) +my_features_df.index + +my_features_df.dtypes +mycols = my_features_df.columns + +#--------- +# File 2 +#--------- +infile_aaindex = outdir + 'aa_index/' + gene.lower() + '_aa.csv' +aaindex_df = pd.read_csv(infile_aaindex, index_col = 0) +aaindex_df.dtypes + +#----------- +# check for non-numerical columns +#----------- +if any(aaindex_df.dtypes==object): + print('\naaindex_df contains non-numerical data') + +aaindex_df_object = aaindex_df.select_dtypes(include = cat_type) +print('\nTotal no. of non-numerial columns:', len(aaindex_df_object.columns)) + +expected_aa_ncols = len(aaindex_df.columns) - len(aaindex_df_object.columns) + +#----------- +# Extract numerical data only +#----------- +print('\nSelecting numerical data only') +aaindex_df = aaindex_df.select_dtypes(include = num_type) + +#--------------------------- +# aaindex: sanity check 1 +#--------------------------- +if len(aaindex_df.columns) == expected_aa_ncols: + print('\nPASS: successfully selected numerical columns only for aaindex_df') +else: + print('\nFAIL: Numbers mismatch' + , '\nExpected ncols:', expected_aa_ncols + , '\nGot:', len(aaindex_df.columns)) - water_prop_changeD = { - 'hydrophobic_to_neutral' : 'change' - , 'hydrophobic_to_hydrophobic' : 'no_change' - , 'neutral_to_neutral' : 'no_change' - , 'neutral_to_hydrophobic' : 'change' - , 'hydrophobic_to_hydrophilic' : 'change' - , 'neutral_to_hydrophilic' : 'change' - , 'hydrophilic_to_neutral' : 'change' - , 'hydrophilic_to_hydrophobic' : 'change' - , 'hydrophilic_to_hydrophilic' : 'no_change' - } +#--------------- +# check for NA +#--------------- +print('\nNow checking for NA in the remaining aaindex_cols') +c1 = aaindex_df.isna().sum() +c2 = c1.sort_values(ascending=False) +print('\nCounting aaindex_df cols with NA' + , '\nncols with NA:', sum(c2>0), 'columns' + , '\nDropping these...' + , '\nOriginal ncols:', len(aaindex_df.columns) + ) +aa_df = aaindex_df.dropna(axis=1) + +print('\nRevised df ncols:', len(aa_df.columns)) + +c3 = aa_df.isna().sum() +c4 = c3.sort_values(ascending=False) + +print('\nChecking NA in revised df...') + +if sum(c4>0): + sys.exit('\nFAIL: aaindex_df still contains cols with NA, please check and drop these before proceeding...') +else: + print('\nPASS: cols with NA successfully dropped from aaindex_df' + , '\nProceeding with combining aa_df with other features_df') - my_df['water_change'] = my_df['water_change'].map(water_prop_changeD) - my_df['water_change'].value_counts() +#--------------------------- +# aaindex: sanity check 2 +#--------------------------- +expected_aa_ncols2 = len(aaindex_df.columns) - sum(c2>0) +if len(aa_df.columns) == expected_aa_ncols2: + print('\nPASS: ncols match' + , '\nExpected ncols:', expected_aa_ncols2 + , '\nGot:', len(aa_df.columns)) +else: + print('\nFAIL: Numbers mismatch' + , '\nExpected ncols:', expected_aa_ncols2 + , '\nGot:', len(aa_df.columns)) - #-------------------- - # Polarity change - #-------------------- - my_df['polarity_change'] = my_df['wt_prop_polarity'] + str('_to_') + my_df['mut_prop_polarity'] - my_df['polarity_change'].value_counts() +# Important: need this to identify aaindex cols +aa_df_cols = aa_df.columns +print('\nTotal no. of columns in clean aa_df:', len(aa_df_cols)) + +############################################################################### +#%% Combining my_features_df and aaindex_df +#=========================== +# Merge my_df + aaindex_df +#=========================== + +if aa_df.columns[aa_df.columns.isin(my_features_df.columns)] == my_features_df.columns[my_features_df.columns.isin(aa_df.columns)]: + print('\nMerging on column: mutationinformation') + +if len(my_features_df) == len(aa_df): + expected_nrows = len(my_features_df) + print('\nProceeding to merge, expected nrows in merged_df:', expected_nrows) +else: + sys.exit('\nNrows mismatch, cannot merge. Please check' + , '\nnrows my_df:', len(my_features_df) + , '\nnrows aa_df:', len(aa_df)) + +#----------------- +# Reset index: mutationinformation +# Very important for merging +#----------------- +aa_df = aa_df.reset_index() + +expected_ncols = len(my_features_df.columns) + len(aa_df.columns) - 1 # for the no. of merging col + +#----------------- +# Merge: my_features_df + aa_df +#----------------- +merged_df = pd.merge(my_features_df + , aa_df + , on = 'mutationinformation') + +#--------------------------- +# aaindex: sanity check 3 +#--------------------------- +if len(merged_df.columns) == expected_ncols: + print('\nPASS: my_features_df and aa_df successfully combined' + , '\nnrows:', len(merged_df) + , '\nncols:', len(merged_df.columns)) +else: + sys.exit('\nFAIL: could not combine my_features_df and aa_df' + , '\nCheck dims and merging cols!') - polarity_prop_changeD = { +#-------- +# Reassign so downstream code doesn't need to change +#-------- +my_df = merged_df.copy() + +#%% Data: my_df +# Check if non structural pos have crept in +# IDEALLY remove from source! But for rpoB do it here +# Drop NA where numerical cols have them +if gene.lower() in geneL_na_ppi2: + #D1148 get rid of + na_index = my_df['mutationinformation'].index[my_df['mcsm_na_affinity'].apply(np.isnan)] + my_df = my_df.drop(index=na_index) + +# FIXED: complete data for all muts inc L114M, F115L, V123L, V125I, V131M +# if gene.lower() in ['embb']: +# na_index = my_df['mutationinformation'].index[my_df['ligand_distance'].apply(np.isnan)] +# my_df = my_df.drop(index=na_index) + +# # Sanity check for non-structural positions +# print('\nChecking for non-structural postions') +# na_index = my_df['mutationinformation'].index[my_df['ligand_distance'].apply(np.isnan)] +# if len(na_index) > 0: +# print('\nNon-structural positions detected for gene:', gene.lower() +# , '\nTotal number of these detected:', len(na_index) +# , '\These are at index:', na_index +# , '\nOriginal nrows:', len(my_df) +# , '\nDropping these...') +# my_df = my_df.drop(index=na_index) +# print('\nRevised nrows:', len(my_df)) +# else: +# print('\nNo non-structural positions detected for gene:', gene.lower() +# , '\nnrows:', len(my_df)) + + +########################################################################### +#%% Add lineage calculation columns +#FIXME: Check if this can be imported from config? +total_mtblineage_uc = 8 +lineage_colnames = ['lineage_list_all', 'lineage_count_all', 'lineage_count_unique', 'lineage_list_unique', 'lineage_multimode'] +#bar = my_df[lineage_colnames] +my_df['lineage_proportion'] = my_df['lineage_count_unique']/my_df['lineage_count_all'] +my_df['dist_lineage_proportion'] = my_df['lineage_count_unique']/total_mtblineage_uc +########################################################################### +#%% Active site annotation column +# change from numberic to categorical + +if my_df['active_site'].dtype in num_type: + my_df['active_site'] = my_df['active_site'].astype(object) + my_df['active_site'].dtype +#%% AA property change +#-------------------- +# Water prop change +#-------------------- +my_df['water_change'] = my_df['wt_prop_water'] + str('_to_') + my_df['mut_prop_water'] +my_df['water_change'].value_counts() + +water_prop_changeD = { + 'hydrophobic_to_neutral' : 'change' + , 'hydrophobic_to_hydrophobic' : 'no_change' + , 'neutral_to_neutral' : 'no_change' + , 'neutral_to_hydrophobic' : 'change' + , 'hydrophobic_to_hydrophilic' : 'change' + , 'neutral_to_hydrophilic' : 'change' + , 'hydrophilic_to_neutral' : 'change' + , 'hydrophilic_to_hydrophobic' : 'change' + , 'hydrophilic_to_hydrophilic' : 'no_change' +} + +my_df['water_change'] = my_df['water_change'].map(water_prop_changeD) +my_df['water_change'].value_counts() + +#-------------------- +# Polarity change +#-------------------- +my_df['polarity_change'] = my_df['wt_prop_polarity'] + str('_to_') + my_df['mut_prop_polarity'] +my_df['polarity_change'].value_counts() + +polarity_prop_changeD = { + 'non-polar_to_non-polar' : 'no_change' + , 'non-polar_to_neutral' : 'change' + , 'neutral_to_non-polar' : 'change' + , 'neutral_to_neutral' : 'no_change' + , 'non-polar_to_basic' : 'change' + , 'acidic_to_neutral' : 'change' + , 'basic_to_neutral' : 'change' + , 'non-polar_to_acidic' : 'change' + , 'neutral_to_basic' : 'change' + , 'acidic_to_non-polar' : 'change' + , 'basic_to_non-polar' : 'change' + , 'neutral_to_acidic' : 'change' + , 'acidic_to_acidic' : 'no_change' + , 'basic_to_acidic' : 'change' + , 'basic_to_basic' : 'no_change' + , 'acidic_to_basic' : 'change'} + +my_df['polarity_change'] = my_df['polarity_change'].map(polarity_prop_changeD) +my_df['polarity_change'].value_counts() + +#-------------------- +# Electrostatics change +#-------------------- +my_df['electrostatics_change'] = my_df['wt_calcprop'] + str('_to_') + my_df['mut_calcprop'] +my_df['electrostatics_change'].value_counts() + +calc_prop_changeD = { 'non-polar_to_non-polar' : 'no_change' - , 'non-polar_to_neutral' : 'change' - , 'neutral_to_non-polar' : 'change' - , 'neutral_to_neutral' : 'no_change' - , 'non-polar_to_basic' : 'change' - , 'acidic_to_neutral' : 'change' - , 'basic_to_neutral' : 'change' - , 'non-polar_to_acidic' : 'change' - , 'neutral_to_basic' : 'change' - , 'acidic_to_non-polar' : 'change' - , 'basic_to_non-polar' : 'change' - , 'neutral_to_acidic' : 'change' - , 'acidic_to_acidic' : 'no_change' - , 'basic_to_acidic' : 'change' - , 'basic_to_basic' : 'no_change' - , 'acidic_to_basic' : 'change'} - - my_df['polarity_change'] = my_df['polarity_change'].map(polarity_prop_changeD) - my_df['polarity_change'].value_counts() - - #-------------------- - # Electrostatics change - #-------------------- - my_df['electrostatics_change'] = my_df['wt_calcprop'] + str('_to_') + my_df['mut_calcprop'] - my_df['electrostatics_change'].value_counts() - - calc_prop_changeD = { - 'non-polar_to_non-polar' : 'no_change' - , 'non-polar_to_polar' : 'change' - , 'polar_to_non-polar' : 'change' - , 'non-polar_to_pos' : 'change' - , 'neg_to_non-polar' : 'change' - , 'non-polar_to_neg' : 'change' - , 'pos_to_polar' : 'change' - , 'pos_to_non-polar' : 'change' - , 'polar_to_polar' : 'no_change' - , 'neg_to_neg' : 'no_change' - , 'polar_to_neg' : 'change' - , 'pos_to_neg' : 'change' - , 'pos_to_pos' : 'no_change' - , 'polar_to_pos' : 'change' - , 'neg_to_polar' : 'change' - , 'neg_to_pos' : 'change' - } - - my_df['electrostatics_change'] = my_df['electrostatics_change'].map(calc_prop_changeD) - my_df['electrostatics_change'].value_counts() - - #-------------------- - # Summary change: Create a combined column summarising these three cols - #-------------------- - detect_change = 'change' - check_prop_cols = ['water_change', 'polarity_change', 'electrostatics_change'] - #my_df['aa_prop_change'] = (my_df.values == detect_change).any(1).astype(int) - my_df['aa_prop_change'] = (my_df[check_prop_cols].values == detect_change).any(1).astype(int) - my_df['aa_prop_change'].value_counts() - my_df['aa_prop_change'].dtype - - my_df['aa_prop_change'] = my_df['aa_prop_change'].map({1:'change' - , 0: 'no_change'}) - - my_df['aa_prop_change'].value_counts() - my_df['aa_prop_change'].dtype - - #%% IMPUTE values for OR [check script for exploration: UQ_or_imputer] - #-------------------- - # Impute OR values - #-------------------- - #or_cols = ['or_mychisq', 'log10_or_mychisq', 'or_fisher'] - sel_cols = ['mutationinformation', 'or_mychisq', 'log10_or_mychisq'] - or_cols = ['or_mychisq', 'log10_or_mychisq'] - - print("count of NULL values before imputation\n") - print(my_df[or_cols].isnull().sum()) - - my_dfI = pd.DataFrame(index = my_df['mutationinformation'] ) - - - my_dfI = pd.DataFrame(KNN(n_neighbors=3, weights="uniform").fit_transform(my_df[or_cols]) - , index = my_df['mutationinformation'] - , columns = or_cols ) - my_dfI.columns = ['or_rawI', 'logorI'] - my_dfI.columns - my_dfI = my_dfI.reset_index(drop = False) # prevents old index from being added as a column - my_dfI.head() - print("count of NULL values AFTER imputation\n") - print(my_dfI.isnull().sum()) - - #------------------------------------------- - # OR df Merge: with original based on index - #------------------------------------------- - #my_df['index_bm'] = my_df.index - mydf_imputed = pd.merge(my_df - , my_dfI - , on = 'mutationinformation') - #mydf_imputed = mydf_imputed.set_index(['index_bm']) - - my_df['log10_or_mychisq'].isna().sum() - mydf_imputed['log10_or_mychisq'].isna().sum() - mydf_imputed['logorI'].isna().sum() # should be 0 - - len(my_df.columns) - len(mydf_imputed.columns) - - #----------------------------------------- - # REASSIGN my_df after imputing OR values - #----------------------------------------- - my_df = mydf_imputed.copy() - - if my_df['logorI'].isna().sum() == 0: - print('\nPASS: OR values imputed, data ready for ML') - else: - sys.exit('\nFAIL: something went wrong, Data not ready for ML. Please check upstream!') - - #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - #--------------------------------------- - # TODO: try other imputation like MICE - #--------------------------------------- - #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - #%%######################################################################## - #========================== - # Data for ML - #========================== - my_df_ml = my_df.copy() - - #%% Build X: input for ML - common_cols_stabiltyN = ['ligand_distance' - , 'ligand_affinity_change' - , 'duet_stability_change' - , 'ddg_foldx' - , 'deepddg' - , 'ddg_dynamut2' - , 'mmcsm_lig' - , 'contacts'] - - # Build stability columns ~ gene - if gene.lower() in geneL_basic: - X_stabilityN = common_cols_stabiltyN - cols_to_mask = ['ligand_affinity_change'] - - if gene.lower() in geneL_ppi2: - # X_stabilityN = common_cols_stabiltyN + ['mcsm_ppi2_affinity' , 'interface_dist'] - geneL_ppi2_st_cols = ['mcsm_ppi2_affinity', 'interface_dist'] - X_stabilityN = common_cols_stabiltyN + geneL_ppi2_st_cols - cols_to_mask = ['ligand_affinity_change', 'mcsm_ppi2_affinity'] - - if gene.lower() in geneL_na: - # X_stabilityN = common_cols_stabiltyN + ['mcsm_na_affinity'] - geneL_na_st_cols = ['mcsm_na_affinity'] - X_stabilityN = common_cols_stabiltyN + geneL_na_st_cols - cols_to_mask = ['ligand_affinity_change', 'mcsm_na_affinity'] - - if gene.lower() in geneL_na_ppi2: - # X_stabilityN = common_cols_stabiltyN + ['mcsm_na_affinity'] + ['mcsm_ppi2_affinity', 'interface_dist'] - geneL_na_ppi2_st_cols = ['mcsm_na_affinity'] + ['mcsm_ppi2_affinity', 'interface_dist'] - X_stabilityN = common_cols_stabiltyN + geneL_na_ppi2_st_cols - cols_to_mask = ['ligand_affinity_change', 'mcsm_na_affinity', 'mcsm_ppi2_affinity'] - - - X_foldX_cols = [ 'electro_rr', 'electro_mm', 'electro_sm', 'electro_ss' - , 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss' - , 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss' - , 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss' - , 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss' - , 'volumetric_rr', 'volumetric_mm', 'volumetric_ss' - ] - - X_str = ['rsa' - #, 'asa' - , 'kd_values' - , 'rd_values'] - - X_ssFN = X_stabilityN + X_str + X_foldX_cols - - X_evolFN = ['consurf_score' - , 'snap2_score' - , 'provean_score'] - - X_genomic_mafor = ['maf' - , 'logorI' - # , 'or_rawI' - # , 'or_mychisq' - # , 'or_logistic' - # , 'or_fisher' - # , 'pval_fisher' - ] - - X_genomic_linegae = ['lineage_proportion' - , 'dist_lineage_proportion' - #, 'lineage' # could be included as a category but it has L2;L4 formatting - , 'lineage_count_all' - , 'lineage_count_unique' - ] - - X_genomicFN = X_genomic_mafor + X_genomic_linegae - - X_aaindexFN = list(aa_df_cols) - - print('\nTotal no. of features for aaindex:', len(X_aaindexFN)) - - # numerical feature names - numerical_FN = X_ssFN + X_evolFN + X_genomicFN + X_aaindexFN + , 'non-polar_to_polar' : 'change' + , 'polar_to_non-polar' : 'change' + , 'non-polar_to_pos' : 'change' + , 'neg_to_non-polar' : 'change' + , 'non-polar_to_neg' : 'change' + , 'pos_to_polar' : 'change' + , 'pos_to_non-polar' : 'change' + , 'polar_to_polar' : 'no_change' + , 'neg_to_neg' : 'no_change' + , 'polar_to_neg' : 'change' + , 'pos_to_neg' : 'change' + , 'pos_to_pos' : 'no_change' + , 'polar_to_pos' : 'change' + , 'neg_to_polar' : 'change' + , 'neg_to_pos' : 'change' +} + +my_df['electrostatics_change'] = my_df['electrostatics_change'].map(calc_prop_changeD) +my_df['electrostatics_change'].value_counts() + +#-------------------- +# Summary change: Create a combined column summarising these three cols +#-------------------- +detect_change = 'change' +check_prop_cols = ['water_change', 'polarity_change', 'electrostatics_change'] +#my_df['aa_prop_change'] = (my_df.values == detect_change).any(1).astype(int) +my_df['aa_prop_change'] = (my_df[check_prop_cols].values == detect_change).any(1).astype(int) +my_df['aa_prop_change'].value_counts() +my_df['aa_prop_change'].dtype + +my_df['aa_prop_change'] = my_df['aa_prop_change'].map({1:'change' + , 0: 'no_change'}) + +my_df['aa_prop_change'].value_counts() +my_df['aa_prop_change'].dtype + +#%% IMPUTE values for OR [check script for exploration: UQ_or_imputer] +#-------------------- +# Impute OR values +#-------------------- +#or_cols = ['or_mychisq', 'log10_or_mychisq', 'or_fisher'] +sel_cols = ['mutationinformation', 'or_mychisq', 'log10_or_mychisq'] +or_cols = ['or_mychisq', 'log10_or_mychisq'] + +print("count of NULL values before imputation\n") +print(my_df[or_cols].isnull().sum()) + +my_dfI = pd.DataFrame(index = my_df['mutationinformation'] ) - # categorical feature names - categorical_FN = ['ss_class' - # , 'wt_prop_water' - # , 'mut_prop_water' - # , 'wt_prop_polarity' - # , 'mut_prop_polarity' - # , 'wt_calcprop' - # , 'mut_calcprop' - , 'aa_prop_change' - , 'electrostatics_change' - , 'polarity_change' - , 'water_change' - , 'drtype_mode_labels' # beware then you can't use it to predict [USED it for uq_v1, not v2] - , 'active_site' #[didn't use it for uq_v1] - #, 'gene_name' # will be required for the combined stuff - ] - #---------------------------------------------- - # count numerical and categorical features - #---------------------------------------------- +my_dfI = pd.DataFrame(KNN(n_neighbors=3, weights="uniform").fit_transform(my_df[or_cols]) + , index = my_df['mutationinformation'] + , columns = or_cols ) +my_dfI.columns = ['or_rawI', 'logorI'] +my_dfI.columns +my_dfI = my_dfI.reset_index(drop = False) # prevents old index from being added as a column +my_dfI.head() +print("count of NULL values AFTER imputation\n") +print(my_dfI.isnull().sum()) + +#------------------------------------------- +# OR df Merge: with original based on index +#------------------------------------------- +#my_df['index_bm'] = my_df.index +mydf_imputed = pd.merge(my_df + , my_dfI + , on = 'mutationinformation') +#mydf_imputed = mydf_imputed.set_index(['index_bm']) + +my_df['log10_or_mychisq'].isna().sum() +mydf_imputed['log10_or_mychisq'].isna().sum() +mydf_imputed['logorI'].isna().sum() # should be 0 + +len(my_df.columns) +len(mydf_imputed.columns) + +#----------------------------------------- +# REASSIGN my_df after imputing OR values +#----------------------------------------- +my_df = mydf_imputed.copy() + +if my_df['logorI'].isna().sum() == 0: + print('\nPASS: OR values imputed, data ready for ML') +else: + sys.exit('\nFAIL: something went wrong, Data not ready for ML. Please check upstream!') + +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +#--------------------------------------- +# TODO: try other imputation like MICE +#--------------------------------------- +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +#%%######################################################################## +#========================== +# Data for ML +#========================== +my_df_ml = my_df.copy() + +#%% Build X: input for ML +common_cols_stabiltyN = ['ligand_distance' + , 'ligand_affinity_change' + , 'duet_stability_change' + , 'ddg_foldx' + , 'deepddg' + , 'ddg_dynamut2' + , 'mmcsm_lig' + , 'contacts'] + +# Build stability columns ~ gene +if gene.lower() in geneL_basic: + X_stabilityN = common_cols_stabiltyN + cols_to_mask = ['ligand_affinity_change'] - print('\nNo. of numerical features:', len(numerical_FN) - , '\nNo. of categorical features:', len(categorical_FN)) +if gene.lower() in geneL_ppi2: +# X_stabilityN = common_cols_stabiltyN + ['mcsm_ppi2_affinity' , 'interface_dist'] + geneL_ppi2_st_cols = ['mcsm_ppi2_affinity', 'interface_dist'] + X_stabilityN = common_cols_stabiltyN + geneL_ppi2_st_cols + cols_to_mask = ['ligand_affinity_change', 'mcsm_ppi2_affinity'] + +if gene.lower() in geneL_na: +# X_stabilityN = common_cols_stabiltyN + ['mcsm_na_affinity'] + geneL_na_st_cols = ['mcsm_na_affinity'] + X_stabilityN = common_cols_stabiltyN + geneL_na_st_cols + cols_to_mask = ['ligand_affinity_change', 'mcsm_na_affinity'] + +if gene.lower() in geneL_na_ppi2: +# X_stabilityN = common_cols_stabiltyN + ['mcsm_na_affinity'] + ['mcsm_ppi2_affinity', 'interface_dist'] + geneL_na_ppi2_st_cols = ['mcsm_na_affinity'] + ['mcsm_ppi2_affinity', 'interface_dist'] + X_stabilityN = common_cols_stabiltyN + geneL_na_ppi2_st_cols + cols_to_mask = ['ligand_affinity_change', 'mcsm_na_affinity', 'mcsm_ppi2_affinity'] + + +X_foldX_cols = [ 'electro_rr', 'electro_mm', 'electro_sm', 'electro_ss' +, 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss' +, 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss' +, 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss' +, 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss' +, 'volumetric_rr', 'volumetric_mm', 'volumetric_ss' +] + +X_str = ['rsa' + #, 'asa' + , 'kd_values' + , 'rd_values'] + +X_ssFN = X_stabilityN + X_str + X_foldX_cols + +X_evolFN = ['consurf_score' + , 'snap2_score' + , 'provean_score'] - ########################################################################### - #======================= - # Masking columns: - # (mCSM-lig, mCSM-NA, mCSM-ppi2) values for lig_dist >10 - #======================= - # my_df_ml['mutationinformation'][my_df['ligand_distance']>10].value_counts() - # my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts() - - # my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), 'ligand_affinity_change'] = 0 - # (my_df_ml['ligand_affinity_change'] == 0).sum() - - my_df_ml['mutationinformation'][my_df_ml['ligand_distance']>10].value_counts() - my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts() - my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask].value_counts() - - # mask the mcsm affinity related columns where ligand distance > 10 - my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask] = 0 - (my_df_ml['ligand_affinity_change'] == 0).sum() - - mask_check = my_df_ml[['mutationinformation', 'ligand_distance'] + cols_to_mask] - - # write file for check - mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True) - mask_check.to_csv(outdir + 'ml/' + gene.lower() + '_mask_check.csv') - - #=================================================== - # Training and BLIND test set: actual vs imputed - # dst with actual values : training set - # dst with imputed values : blind test - #================================================== - my_df_ml[drug].isna().sum() #'na' ones are the blind_test set - - blind_test_df = my_df_ml[my_df_ml[drug].isna()] - blind_test_df.shape - - training_df = my_df_ml[my_df_ml[drug].notna()] - training_df.shape - - # Target 1: dst_mode - training_df[drug].value_counts() - training_df['dst_mode'].value_counts() - #################################################################### - #%% extracting dfs based on numerical, categorical column names - #---------------------------------- - # WITHOUT the target var included - #---------------------------------- - num_df = training_df[numerical_FN] - num_df.shape - - cat_df = training_df[categorical_FN] - cat_df.shape - - all_df = training_df[numerical_FN + categorical_FN] - all_df.shape - - #------------------------------ - # WITH the target var included: - #'wtgt': with target - #------------------------------ - # drug and dst_mode should be the same thing - num_df_wtgt = training_df[numerical_FN + ['dst_mode']] - num_df_wtgt.shape - - cat_df_wtgt = training_df[categorical_FN + ['dst_mode']] - cat_df_wtgt.shape - - all_df_wtgt = training_df[numerical_FN + categorical_FN + ['dst_mode']] - all_df_wtgt.shape - #%%######################################################################## - #============ - # ML data - #============ - #------ - # X: Training and Blind test (BTS) - #------ - X = all_df_wtgt[numerical_FN + categorical_FN] # training data ALL - X_bts = blind_test_df[numerical_FN + categorical_FN] # blind test data ALL - #X = all_df_wtgt[numerical_FN] # training numerical only - #X_bts = blind_test_df[numerical_FN] # blind test data numerical - - #------ - # y - #------ - y = all_df_wtgt['dst_mode'] # training data y - y_bts = blind_test_df['dst_mode'] # blind data test y - - #X_bts_wt = blind_test_df[numerical_FN + ['dst_mode']] - - # Quick check - #(X['ligand_affinity_change']==0).sum() == (X['ligand_distance']>10).sum() - for i in range(len(cols_to_mask)): - ind = i+1 - print('\nindex:', i, '\nind:', ind) - print('\nMask count check:' - , (my_df_ml[cols_to_mask[i]]==0).sum() == (my_df_ml['ligand_distance']>10).sum() - ) - - print('Original Data\n', Counter(y) - , 'Data dim:', X.shape) - - ########################################################################### - #%% - ########################################################################### - # RESAMPLING - ########################################################################### - #------------------------------ - # Simple Random oversampling - # [Numerical + catgeorical] - #------------------------------ - oversample = RandomOverSampler(sampling_strategy='minority') - X_ros, y_ros = oversample.fit_resample(X, y) - print('Simple Random OverSampling\n', Counter(y_ros)) - print(X_ros.shape) - - #------------------------------ - # Simple Random Undersampling - # [Numerical + catgeorical] - #------------------------------ - undersample = RandomUnderSampler(sampling_strategy='majority') - X_rus, y_rus = undersample.fit_resample(X, y) - print('Simple Random UnderSampling\n', Counter(y_rus)) - print(X_rus.shape) - - #------------------------------ - # Simple combine ROS and RUS - # [Numerical + catgeorical] - #------------------------------ - oversample = RandomOverSampler(sampling_strategy='minority') - X_ros, y_ros = oversample.fit_resample(X, y) - undersample = RandomUnderSampler(sampling_strategy='majority') - X_rouC, y_rouC = undersample.fit_resample(X_ros, y_ros) - print('Simple Combined Over and UnderSampling\n', Counter(y_rouC)) - print(X_rouC.shape) - - #------------------------------ - # SMOTE_NC: oversampling - # [numerical + categorical] - #https://stackoverflow.com/questions/47655813/oversampling-smote-for-binary-and-categorical-data-in-python - #------------------------------ - # Determine categorical and numerical features - numerical_ix = X.select_dtypes(include=['int64', 'float64']).columns - numerical_ix - num_featuresL = list(numerical_ix) - numerical_colind = X.columns.get_indexer(list(numerical_ix) ) - numerical_colind - - categorical_ix = X.select_dtypes(include=['object', 'bool']).columns - categorical_ix - categorical_colind = X.columns.get_indexer(list(categorical_ix)) - categorical_colind - - k_sm = 5 # 5 is deafult - sm_nc = SMOTENC(categorical_features=categorical_colind, k_neighbors = k_sm, **rs, **njobs) - X_smnc, y_smnc = sm_nc.fit_resample(X, y) - print('SMOTE_NC OverSampling\n', Counter(y_smnc)) - print(X_smnc.shape) - globals().update(locals()) # TROLOLOLOLOLOLS - #print("i did a horrible hack :-)") - ############################################################################### - #%% SMOTE RESAMPLING for NUMERICAL ONLY* - # #------------------------------ - # # SMOTE: Oversampling - # # [Numerical ONLY] - # #------------------------------ - # k_sm = 1 - # sm = SMOTE(sampling_strategy = 'auto', k_neighbors = k_sm, **rs) - # X_sm, y_sm = sm.fit_resample(X, y) - # print(X_sm.shape) - # print('SMOTE OverSampling\n', Counter(y_sm)) - # y_sm_df = y_sm.to_frame() - # y_sm_df.value_counts().plot(kind = 'bar') - - # #------------------------------ - # # SMOTE: Over + Undersampling COMBINED - # # [Numerical ONLY] - # #----------------------------- - # sm_enn = SMOTEENN(enn=EditedNearestNeighbours(sampling_strategy='all', **rs, **njobs )) - # X_enn, y_enn = sm_enn.fit_resample(X, y) - # print(X_enn.shape) - # print('SMOTE Over+Under Sampling combined\n', Counter(y_enn)) - - ############################################################################### - # TODO: Find over and undersampling JUST for categorical data +X_genomic_mafor = ['maf' + , 'logorI' + # , 'or_rawI' + # , 'or_mychisq' + # , 'or_logistic' + # , 'or_fisher' + # , 'pval_fisher' + ] + +X_genomic_linegae = ['lineage_proportion' + , 'dist_lineage_proportion' + #, 'lineage' # could be included as a category but it has L2;L4 formatting + , 'lineage_count_all' + , 'lineage_count_unique' + ] + +X_genomicFN = X_genomic_mafor + X_genomic_linegae + +#X_aaindexFN = list(aa_df_cols) + +#print('\nTotal no. of features for aaindex:', len(X_aaindexFN)) + +# numerical feature names [NO aa_index] +numerical_FN = X_ssFN + X_evolFN + X_genomicFN + + +# categorical feature names +categorical_FN = ['ss_class' + # , 'wt_prop_water' + # , 'mut_prop_water' + # , 'wt_prop_polarity' + # , 'mut_prop_polarity' + # , 'wt_calcprop' + # , 'mut_calcprop' + , 'aa_prop_change' + , 'electrostatics_change' + , 'polarity_change' + , 'water_change' + , 'drtype_mode_labels' # beware then you can't use it to predict [USED it for uq_v1, not v2] + , 'active_site' #[didn't use it for uq_v1] + #, 'gene_name' # will be required for the combined stuff + ] +#---------------------------------------------- +# count numerical and categorical features +#---------------------------------------------- + +print('\nNo. of numerical features:', len(numerical_FN) + , '\nNo. of categorical features:', len(categorical_FN)) + +########################################################################### +#======================= +# Masking columns: +# (mCSM-lig, mCSM-NA, mCSM-ppi2) values for lig_dist >10 +#======================= +# my_df_ml['mutationinformation'][my_df['ligand_distance']>10].value_counts() +# my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts() + +# my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), 'ligand_affinity_change'] = 0 +# (my_df_ml['ligand_affinity_change'] == 0).sum() + +my_df_ml['mutationinformation'][my_df_ml['ligand_distance']>10].value_counts() +my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts() +my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask].value_counts() + +# mask the mcsm affinity related columns where ligand distance > 10 +my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask] = 0 +(my_df_ml['ligand_affinity_change'] == 0).sum() + +mask_check = my_df_ml[['mutationinformation', 'ligand_distance'] + cols_to_mask] + +# write file for check +mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True) +mask_check.to_csv(outdir + 'ml/' + gene.lower() + '_mask_check.csv') + +#=================================================== +# Training and BLIND test set [UQ]: actual vs imputed +# No aa index but active_site included +# dst with actual values : training set +# dst with imputed values : blind test +#================================================== +my_df_ml[drug].isna().sum() #'na' ones are the blind_test set + +blind_test_df = my_df_ml[my_df_ml[drug].isna()] +blind_test_df.shape + +training_df = my_df_ml[my_df_ml[drug].notna()] +training_df.shape + +# Target 1: dst_mode +training_df[drug].value_counts() +training_df['dst_mode'].value_counts() +#################################################################### +#%% extracting dfs based on numerical, categorical column names +#---------------------------------- +# WITHOUT the target var included +#---------------------------------- +num_df = training_df[numerical_FN] +num_df.shape + +cat_df = training_df[categorical_FN] +cat_df.shape + +all_df = training_df[numerical_FN + categorical_FN] +all_df.shape + +#------------------------------ +# WITH the target var included: + #'wtgt': with target +#------------------------------ +# drug and dst_mode should be the same thing +num_df_wtgt = training_df[numerical_FN + ['dst_mode']] +num_df_wtgt.shape + +cat_df_wtgt = training_df[categorical_FN + ['dst_mode']] +cat_df_wtgt.shape + +all_df_wtgt = training_df[numerical_FN + categorical_FN + ['dst_mode']] +all_df_wtgt.shape +#%%######################################################################## +#============ +# ML data +#============ +#------ +# X: Training and Blind test (BTS) +#------ +X = all_df_wtgt[numerical_FN + categorical_FN] # training data ALL +X_bts = blind_test_df[numerical_FN + categorical_FN] # blind test data ALL +#X = all_df_wtgt[numerical_FN] # training numerical only +#X_bts = blind_test_df[numerical_FN] # blind test data numerical + +#------ +# y +#------ +y = all_df_wtgt['dst_mode'] # training data y +y_bts = blind_test_df['dst_mode'] # blind data test y + +#X_bts_wt = blind_test_df[numerical_FN + ['dst_mode']] + +# Quick check +#(X['ligand_affinity_change']==0).sum() == (X['ligand_distance']>10).sum() +for i in range(len(cols_to_mask)): + ind = i+1 + print('\nindex:', i, '\nind:', ind) + print('\nMask count check:' + , (my_df_ml[cols_to_mask[i]]==0).sum() == (my_df_ml['ligand_distance']>10).sum() + ) + +print('Original Data\n', Counter(y) + , 'Data dim:', X.shape) + +yc1 = Counter(y) +yc1_ratio = yc1[0]/yc1[1] + +yc2 = Counter(y_bts) +yc2_ratio = yc2[0]/yc2[1] + +print('\n-------------------------------------------------------------' + , '\nSuccessfully split data: UQ [no aa_index but active site included] training' + , '\nactual values: training set' + , '\nimputed values: blind test set' + , '\nTrain data size:', X.shape + , '\nTest data size:', X_bts.shape + , '\ny_train numbers:', yc1 + , '\ny_train ratio:',yc1_ratio + , '\n' + , '\ny_test_numbers:', yc2 + , '\ny_test ratio:', yc2_ratio + , '\n-------------------------------------------------------------' + ) +########################################################################### +#%% +########################################################################### +# RESAMPLING +########################################################################### +#------------------------------ +# Simple Random oversampling +# [Numerical + catgeorical] +#------------------------------ +oversample = RandomOverSampler(sampling_strategy='minority') +X_ros, y_ros = oversample.fit_resample(X, y) +print('Simple Random OverSampling\n', Counter(y_ros)) +print(X_ros.shape) + +#------------------------------ +# Simple Random Undersampling +# [Numerical + catgeorical] +#------------------------------ +undersample = RandomUnderSampler(sampling_strategy='majority') +X_rus, y_rus = undersample.fit_resample(X, y) +print('Simple Random UnderSampling\n', Counter(y_rus)) +print(X_rus.shape) + +#------------------------------ +# Simple combine ROS and RUS +# [Numerical + catgeorical] +#------------------------------ +oversample = RandomOverSampler(sampling_strategy='minority') +X_ros, y_ros = oversample.fit_resample(X, y) +undersample = RandomUnderSampler(sampling_strategy='majority') +X_rouC, y_rouC = undersample.fit_resample(X_ros, y_ros) +print('Simple Combined Over and UnderSampling\n', Counter(y_rouC)) +print(X_rouC.shape) + +#------------------------------ +# SMOTE_NC: oversampling +# [numerical + categorical] +#https://stackoverflow.com/questions/47655813/oversampling-smote-for-binary-and-categorical-data-in-python +#------------------------------ +# Determine categorical and numerical features +numerical_ix = X.select_dtypes(include=['int64', 'float64']).columns +numerical_ix +num_featuresL = list(numerical_ix) +numerical_colind = X.columns.get_indexer(list(numerical_ix) ) +numerical_colind + +categorical_ix = X.select_dtypes(include=['object', 'bool']).columns +categorical_ix +categorical_colind = X.columns.get_indexer(list(categorical_ix)) +categorical_colind + +k_sm = 5 # 5 is deafult +sm_nc = SMOTENC(categorical_features=categorical_colind, k_neighbors = k_sm, **rs, **njobs) +X_smnc, y_smnc = sm_nc.fit_resample(X, y) +print('SMOTE_NC OverSampling\n', Counter(y_smnc)) +print(X_smnc.shape) +globals().update(locals()) # TROLOLOLOLOLOLS +#print("i did a horrible hack :-)") +############################################################################### +#%% SMOTE RESAMPLING for NUMERICAL ONLY* +# #------------------------------ +# # SMOTE: Oversampling +# # [Numerical ONLY] +# #------------------------------ +# k_sm = 1 +# sm = SMOTE(sampling_strategy = 'auto', k_neighbors = k_sm, **rs) +# X_sm, y_sm = sm.fit_resample(X, y) +# print(X_sm.shape) +# print('SMOTE OverSampling\n', Counter(y_sm)) +# y_sm_df = y_sm.to_frame() +# y_sm_df.value_counts().plot(kind = 'bar') + +# #------------------------------ +# # SMOTE: Over + Undersampling COMBINED +# # [Numerical ONLY] +# #----------------------------- +# sm_enn = SMOTEENN(enn=EditedNearestNeighbours(sampling_strategy='all', **rs, **njobs )) +# X_enn, y_enn = sm_enn.fit_resample(X, y) +# print(X_enn.shape) +# print('SMOTE Over+Under Sampling combined\n', Counter(y_enn)) + +############################################################################### +# TODO: Find over and undersampling JUST for categorical data diff --git a/scripts/ml/ml_data_rt.py b/scripts/ml/ml_data_rt.py index 5e5a01b..f235517 100644 --- a/scripts/ml/ml_data_rt.py +++ b/scripts/ml/ml_data_rt.py @@ -552,18 +552,18 @@ def setvars(gene,drug): #================================================= # Training and BLIND test set: imputed vs actual - # BUT in REVERSE i.e + # BUT in REVERSE i.e. # dst with actual values : blind test # dst with imputed values : training set #================================================== my_df_ml[drug].isna().sum() #'na' ones are now training set + blind_test_df = my_df_ml[my_df_ml[drug].notna()] + blind_test_df.shape + training_df = my_df_ml[my_df_ml[drug].isna()] training_df.shape - blind_test_df = my_df_ml[my_df_ml[drug].notna()] - blind_test_df.shape - # Target 1: dst_mode training_df[drug].value_counts() training_df['dst_mode'].value_counts() diff --git a/scripts/ml/pnca_config.py b/scripts/ml/pnca_config.py index ecc34f3..9fc27a8 100755 --- a/scripts/ml/pnca_config.py +++ b/scripts/ml/pnca_config.py @@ -32,15 +32,36 @@ from ml_data import * # TT run all ML clfs: baseline mode from MultModelsCl import MultModelsCl -#%%########################################################################### - -print('\n#####################################################################\n') - -print('TESTING cmd:' +############################################################################ +print('\n#####################################################################\n' + , '\nRunning ML analysis: UQ [without AA index but with active site annotations]' , '\nGene name:', gene - , '\nDrug name:', drug - , '\nTotal input features:', X.shape - , '\n', Counter(y)) + , '\nDrug name:', drug) + +#================== +# Specify outdir +#================== + +outdir_ml = outdir + 'ml/uq_v1/' + +print('\nOutput directory:', outdir_ml) + +#%%########################################################################### +print('\nSanity checks:' + , '\nTotal input features:', len(X.columns) + , '\n' + , '\nTraining data size:', X.shape + , '\nTest data size:', X_bts.shape + , '\n' + , '\nTarget feature numbers (training data):', Counter(y) + , '\nTarget features ratio (training data:', yc1_ratio + , '\n' + , '\nTarget feature numbers (test data):', Counter(y_bts) + , '\nTarget features ratio (test data):', yc2_ratio + + , '\n\n#####################################################################\n') + +print('\n================================================================\n') print('Strucutral features (n):' , len(X_ssFN) @@ -50,11 +71,11 @@ print('Strucutral features (n):' , '\nOther struc columns:', X_str , '\n================================================================\n') -print('AAindex features (n):' - , len(X_aaindexFN) - , '\nThese are:\n' - , X_aaindexFN - , '\n================================================================\n') +# print('AAindex features (n):' +# , len(X_aaindexFN) +# , '\nThese are:\n' +# , X_aaindexFN +# , '\n================================================================\n') print('Evolutionary features (n):' , len(X_evolFN) @@ -75,20 +96,15 @@ print('Categorical features (n):' , categorical_FN , '\n================================================================\n') -if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): +#if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): +if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): print('\nPass: No. of features match') else: sys.exit('\nFail: Count of feature mismatch') print('\n#####################################################################\n') -################################################################################ -#================== -# Specify outdir -#================== -outdir_ml = outdir + 'ml/v2/' - -################################################################################ +############################################################################### #================== # Baseline models #================== diff --git a/scripts/ml/rpob_config.py b/scripts/ml/rpob_config.py index 15a4cec..a995559 100755 --- a/scripts/ml/rpob_config.py +++ b/scripts/ml/rpob_config.py @@ -32,15 +32,36 @@ from ml_data import * # TT run all ML clfs: baseline mode from MultModelsCl import MultModelsCl -#%%########################################################################### - -print('\n#####################################################################\n') - -print('TESTING cmd:' +############################################################################ +print('\n#####################################################################\n' + , '\nRunning ML analysis: UQ [without AA index but with active site annotations]' , '\nGene name:', gene - , '\nDrug name:', drug - , '\nTotal input features:', X.shape - , '\n', Counter(y)) + , '\nDrug name:', drug) + +#================== +# Specify outdir +#================== + +outdir_ml = outdir + 'ml/uq_v1/' + +print('\nOutput directory:', outdir_ml) + +#%%########################################################################### +print('\nSanity checks:' + , '\nTotal input features:', len(X.columns) + , '\n' + , '\nTraining data size:', X.shape + , '\nTest data size:', X_bts.shape + , '\n' + , '\nTarget feature numbers (training data):', Counter(y) + , '\nTarget features ratio (training data:', yc1_ratio + , '\n' + , '\nTarget feature numbers (test data):', Counter(y_bts) + , '\nTarget features ratio (test data):', yc2_ratio + + , '\n\n#####################################################################\n') + +print('\n================================================================\n') print('Strucutral features (n):' , len(X_ssFN) @@ -50,11 +71,11 @@ print('Strucutral features (n):' , '\nOther struc columns:', X_str , '\n================================================================\n') -print('AAindex features (n):' - , len(X_aaindexFN) - , '\nThese are:\n' - , X_aaindexFN - , '\n================================================================\n') +# print('AAindex features (n):' +# , len(X_aaindexFN) +# , '\nThese are:\n' +# , X_aaindexFN +# , '\n================================================================\n') print('Evolutionary features (n):' , len(X_evolFN) @@ -75,20 +96,15 @@ print('Categorical features (n):' , categorical_FN , '\n================================================================\n') -if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): +#if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): +if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): print('\nPass: No. of features match') else: sys.exit('\nFail: Count of feature mismatch') print('\n#####################################################################\n') -################################################################################ -#================== -# Specify outdir -#================== -outdir_ml = outdir + 'ml/v2/' - -################################################################################ +############################################################################### #================== # Baseline models #================== diff --git a/scripts/ml/running_ml_scripts.txt b/scripts/ml/running_ml_scripts.txt index f9b271a..ae3a8e3 100644 --- a/scripts/ml/running_ml_scripts.txt +++ b/scripts/ml/running_ml_scripts.txt @@ -1,49 +1,51 @@ ================================== -# BTS data: UQ -# Features NOT including AA index -# Date: 30/05/2022, but reran with active_site indication column that was added lateron -# TODO: put in folder called v1, and UQ in folder called UQ_v0 (will need to rename!) +# UQ run: same as ORIGINAL except +# Features NOT including AA index, but includes active site annotations +# Date: 30/05/2022, but reran with from my desktop on 19/05/2022 to capture log and include active_site indication column +# captures error: 2>$1 ================================= -./pnca_config.py -./embb_config.py -./gid_config.py -./katg_config.py -./rpob_config.py +./pnca_config.py 2>&1 | tee log_pnca_config.txt +./embb_config.py 2>&1 | tee log_embb_config.txt +./gid_config.py 2>&1 | tee log_gid_config.txt +./katg_config.py 2>&1 | tee log_katg_config.txt +./rpob_config.py 2>&1 | tee log_rpob_config.txt +./alr_config.py 2>&1 | tee log_alr_config.txt + +# ERROR, as expected, too few values! -## ./alr_config.py = NA -# ^^^^^^^^ FIXME! ######################################################################## -================================== -# BTS data: imputed values -# All features including AA index -# Date: 16/05/2022 ================================= +# Split: ORIGINAL +# actual values: training set +# imputed values: blind set +# All features including AA index +# Date: 18/05/2022 # reran with minor formatting and log capture +# captures error: 2>$1 +================================= +./pnca_orig.py 2>&1 | tee log_pnca_orig.txt +./embb_orig.py 2>&1 | tee log_embb_orig.txt +./gid_orig.py 2>&1 | tee log_gid_orig.txt +./katg_orig.py 2>&1 | tee log_katg_orig.txt +./rpob_orig.py 2>&1 | tee log_rpob_orig.txt +./alr_orig.py 2>&1 | tee log_alr_orig.txt # ERROR, as expected, too few values! -./pnca_config.py -./embb_config.py -./gid_config.py -./katg_config.py -./rpob_config.py - -##./alr_config.py = NA ######################################################################## ================================= # Split: 70/30 # All features including AA index -# Date: 17/05/2022 and 18/05/2022 +# Date: 17/05/2022 and 18/05/2022, reran with minor foramtting and log capture # captures error: 2>$1 ================================= - -./pnca_7030.py -./embb_7030.py -# ./gid_7030.py: problems, CT values are non existing except for rouC +./pnca_7030.py 2>&1 | tee log_pnca_7030.txt +./embb_7030.py 2>&1 | tee log_embb_7030.txt ./gid_7030.py 2>&1 | tee log_gid_7030.txt -./katg_7030.py -./rpob_7030.py +./katg_7030.py 2>&1 | tee log_katg_7030.txt +./rpob_7030.py 2>&1 | tee log_rpob_7030.txt +./alr_7030.py 2>&1 | tee log_alr_7030.txt # ERROR, as expected, too few values! -##./alr_7030.py = NA +problems, CT values are non existing except for rouC ######################################################################## ================================= @@ -57,7 +59,7 @@ ./gid_8020.py 2>&1 | tee log_gid_8020.txt ./katg_8020.py 2>&1 | tee log_katg_8020.txt ./rpob_8020.py 2>&1 | tee log_rpob_8020.txt -./alr_8020.py 2>&1 | tee log_alr_8020.txt +./alr_8020.py 2>&1 | tee log_alr_8020.txt # ERROR, as expected, too few values! ######################################################################## @@ -72,11 +74,11 @@ ./gid_sl.py 2>&1 | tee log_gid_sl.txt ./katg_sl.py 2>&1 | tee log_katg_sl.txt ./rpob_sl.py 2>&1 | tee log_rpob_sl.txt -./alr_sl.py 2>&1 | tee log_alr_sl.txt +./alr_sl.py 2>&1 | tee log_alr_sl.txt # ERROR, as expected, too few values! ######################################################################## ================================= -# Split: Reverse training +# Split: REVERSE training # imputed values: training set # actual values: blind set # All features including AA index @@ -88,7 +90,7 @@ ./gid_rt.py 2>&1 | tee log_gid_rt.txt ./katg_rt.py 2>&1 | tee log_katg_rt.txt ./rpob_rt.py 2>&1 | tee log_rpob_rt.txt -./alr_rt.py 2>&1 | tee log_alr_rt.txt +./alr_rt.py 2>&1 | tee log_alr_rt.txt # ERROR, as expected, too few values! ######################################################################## # COMPLETE Data: actual + na i.e imputed @@ -105,7 +107,7 @@ ./gid_cd_7030.py 2>&1 | tee log_gid_cd_7030.txt ./katg_cd_7030.py 2>&1 | tee log_katg_cd_7030.txt ./rpob_cd_7030.py 2>&1 | tee log_rpob_cd_7030.txt -./alr_cd_7030.py 2>&1 | tee log_alr_cd_7030.txt +./alr_cd_7030.py 2>&1 | tee log_alr_cd_7030.txt # ERROR, as expected, too few values! ######################################################################## ================================= @@ -119,5 +121,18 @@ ./gid_cd_8020.py 2>&1 | tee log_gid_cd_8020.txt ./katg_cd_8020.py 2>&1 | tee log_katg_cd_8020.txt ./rpob_cd_8020.py 2>&1 | tee log_rpob_cd_8020.txt -./alr_cd_8020.py 2>&1 | tee log_alr_cd_8020.txt +./alr_cd_8020.py 2>&1 | tee log_alr_cd_8020.txt # ERROR, as expected, too few values! + +================================= +# Split: scaling law [COMPLETE DATA] +# All features including AA index +# Date: 18/05/2022 +# captures error: 2>$1 +================================= +./pnca_cd_sl.py 2>&1 | tee log_pnca_cd_sl.txt +./embb_cd_sl.py 2>&1 | tee log_embb_cd_sl.txt +./gid_cd_sl.py 2>&1 | tee log_gid_cd_sl.txt +./katg_cd_sl.py 2>&1 | tee log_katg_cd_sl.txt +./rpob_cd_sl.py 2>&1 | tee log_rpob_cd_sl.txt +./alr_cd_sl.py 2>&1 | tee log_alr_cd_sl.txt # ERROR, as expected, too few values!