added practice and base_estimator for all the confusion in my head
This commit is contained in:
parent
e28a296d98
commit
97620c1bb0
3 changed files with 513 additions and 0 deletions
33
MultClassPipe3_CALL.py
Normal file
33
MultClassPipe3_CALL.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Tue Mar 15 11:09:50 2022
|
||||||
|
|
||||||
|
@author: tanu
|
||||||
|
"""
|
||||||
|
# stratified shuffle split
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(num_df_wtgt[numerical_FN]
|
||||||
|
, num_df_wtgt['mutation_class']
|
||||||
|
, test_size = 0.33
|
||||||
|
, **rs
|
||||||
|
, shuffle = True
|
||||||
|
, stratify = num_df_wtgt['mutation_class'])
|
||||||
|
|
||||||
|
y_train.to_frame().value_counts().plot(kind = 'bar')
|
||||||
|
y_test.to_frame().value_counts().plot(kind = 'bar')
|
||||||
|
|
||||||
|
MultClassPipelineCV(X_train, X_test, y_train, y_test
|
||||||
|
, input_df = num_df_wtgt[numerical_FN]
|
||||||
|
, var_type = 'numerical')
|
||||||
|
|
||||||
|
|
||||||
|
skf_cv_scores = MultClassPipelineCV(X_train, X_test, y_train, y_test
|
||||||
|
, input_df = num_df_wtgt[numerical_FN]
|
||||||
|
, var_type = 'numerical')
|
||||||
|
|
||||||
|
pp.pprint(skf_cv_scores)
|
||||||
|
# construct a df
|
||||||
|
skf_cv_scores_df = pd.DataFrame(skf_cv_scores)
|
||||||
|
skf_cv_scores_df
|
||||||
|
skf_cv_scores_df_test = skf_cv_scores_df.filter(like='test_', axis=0)
|
||||||
|
skf_cv_scores_df_train = skf_cv_scores_df.filter(like='train_', axis=0)
|
236
base_estimator.py
Normal file
236
base_estimator.py
Normal file
|
@ -0,0 +1,236 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Tue Mar 15 09:50:37 2022
|
||||||
|
|
||||||
|
@author: tanu
|
||||||
|
"""
|
||||||
|
#https://stackoverflow.com/questions/50272416/gridsearch-on-model-and-classifiers
|
||||||
|
|
||||||
|
#%%
|
||||||
|
# https://github.com/davidsbatista/machine-learning-notebooks/blob/master/hyperparameter-across-models.ipynb
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn.model_selection import GridSearchCV
|
||||||
|
|
||||||
|
|
||||||
|
from sklearn import datasets
|
||||||
|
from sklearn.ensemble import ExtraTreesClassifier
|
||||||
|
from sklearn.ensemble import RandomForestClassifier
|
||||||
|
from sklearn.ensemble import AdaBoostClassifier
|
||||||
|
from sklearn.ensemble import GradientBoostingClassifier
|
||||||
|
from sklearn.svm import SVC
|
||||||
|
|
||||||
|
from sklearn.base import BaseEstimator
|
||||||
|
from sklearn.naive_bayes import MultinomialNB
|
||||||
|
from sklearn.linear_model import SGDClassifier
|
||||||
|
from sklearn.pipeline import Pipeline
|
||||||
|
from sklearn.model_selection import GridSearchCV
|
||||||
|
#%%
|
||||||
|
class EstimatorSelectionHelper:
|
||||||
|
|
||||||
|
def __init__(self, models, params):
|
||||||
|
self.models = models
|
||||||
|
self.params = params
|
||||||
|
self.keys = models.keys()
|
||||||
|
self.grid_searches = {}
|
||||||
|
|
||||||
|
def fit(self, X, y, **grid_kwargs):
|
||||||
|
for key in self.keys:
|
||||||
|
print('Running GridSearchCV for %s.' % key)
|
||||||
|
model = self.models[key]
|
||||||
|
params = self.params[key]
|
||||||
|
grid_search = GridSearchCV(model, params, **grid_kwargs)
|
||||||
|
grid_search.fit(X, y)
|
||||||
|
self.grid_searches[key] = grid_search
|
||||||
|
print('Done.')
|
||||||
|
|
||||||
|
def score_summary(self, sort_by='mean_test_score'):
|
||||||
|
frames = []
|
||||||
|
for name, grid_search in self.grid_searches.items():
|
||||||
|
frame = pd.DataFrame(grid_search.cv_results_)
|
||||||
|
frame = frame.filter(regex='^(?!.*param_).*$')
|
||||||
|
frame['estimator'] = len(frame)*[name]
|
||||||
|
frames.append(frame)
|
||||||
|
df = pd.concat(frames)
|
||||||
|
|
||||||
|
df = df.sort_values([sort_by], ascending=False)
|
||||||
|
df = df.reset_index()
|
||||||
|
df = df.drop(['rank_test_score', 'index'], 1)
|
||||||
|
|
||||||
|
columns = df.columns.tolist()
|
||||||
|
columns.remove('estimator')
|
||||||
|
columns = ['estimator']+columns
|
||||||
|
df = df[columns]
|
||||||
|
return df
|
||||||
|
|
||||||
|
#%%
|
||||||
|
breast_cancer = datasets.load_breast_cancer()
|
||||||
|
X_cancer = breast_cancer.data
|
||||||
|
y_cancer = breast_cancer.target
|
||||||
|
|
||||||
|
|
||||||
|
models1 = {
|
||||||
|
'ExtraTreesClassifier': ExtraTreesClassifier(),
|
||||||
|
'RandomForestClassifier': RandomForestClassifier(),
|
||||||
|
'AdaBoostClassifier': AdaBoostClassifier(),
|
||||||
|
'GradientBoostingClassifier': GradientBoostingClassifier()
|
||||||
|
}
|
||||||
|
|
||||||
|
params1 = {
|
||||||
|
'ExtraTreesClassifier': { 'n_estimators': [16, 32] },
|
||||||
|
'RandomForestClassifier': [
|
||||||
|
{ 'n_estimators': [16, 32] },
|
||||||
|
{'criterion': ['gini', 'entropy'], 'n_estimators': [8, 16]}],
|
||||||
|
'AdaBoostClassifier': { 'n_estimators': [16, 32] },
|
||||||
|
'GradientBoostingClassifier': { 'n_estimators': [16, 32], 'learning_rate': [0.8, 1.0] }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
helper1 = EstimatorSelectionHelper(models1, params1)
|
||||||
|
helper1.fit(X_cancer, y_cancer, scoring='f1', n_jobs=2)
|
||||||
|
helper1.score_summary()
|
||||||
|
|
||||||
|
mm_df = helper1.score_summary()
|
||||||
|
# COMMENT: Not sure what scores is it mean of and the options available thus
|
||||||
|
|
||||||
|
#%%
|
||||||
|
|
||||||
|
class ClfSwitcher(BaseEstimator):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
estimator = SGDClassifier(),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
A Custom BaseEstimator that can switch between classifiers.
|
||||||
|
:param estimator: sklearn object - The classifier
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.estimator = estimator
|
||||||
|
|
||||||
|
|
||||||
|
def fit(self, X, y=None, **kwargs):
|
||||||
|
self.estimator.fit(X, y)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def predict(self, X, y=None):
|
||||||
|
return self.estimator.predict(X)
|
||||||
|
|
||||||
|
def predict_proba(self, X):
|
||||||
|
return self.estimator.predict_proba(X)
|
||||||
|
|
||||||
|
def score(self, X, y):
|
||||||
|
return self.estimator.score(X, y)
|
||||||
|
|
||||||
|
parameters = [
|
||||||
|
{
|
||||||
|
'clf__estimator': [SGDClassifier()], # SVM if hinge loss / logreg if log loss
|
||||||
|
#'tfidf__max_df': (0.25, 0.5, 0.75, 1.0),
|
||||||
|
#'tfidf__stop_words': ['english', None],
|
||||||
|
'clf__estimator__penalty': ('l2', 'elasticnet', 'l1'),
|
||||||
|
'clf__estimator__max_iter': [50, 80],
|
||||||
|
'clf__estimator__tol': [1e-4],
|
||||||
|
'clf__estimator__loss': ['hinge', 'log', 'modified_huber'],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'clf__estimator': [MultinomialNB()],
|
||||||
|
#'tfidf__max_df': (0.25, 0.5, 0.75, 1.0),
|
||||||
|
#'tfidf__stop_words': [None],
|
||||||
|
'clf__estimator__alpha': (1e-2, 1e-3, 1e-1),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
pipeline = Pipeline([
|
||||||
|
('pre', MinMaxScaler()),
|
||||||
|
('clf', ClfSwitcher()),
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
gscv = GridSearchCV(pipeline
|
||||||
|
, parameters
|
||||||
|
, cv=5
|
||||||
|
, n_jobs=12
|
||||||
|
, return_train_score=False
|
||||||
|
, verbose=3)
|
||||||
|
|
||||||
|
#gscv.fit(train_data, train_labels)
|
||||||
|
|
||||||
|
#%% my numerical data
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(num_df_wtgt[numerical_FN]
|
||||||
|
, num_df_wtgt['mutation_class']
|
||||||
|
, test_size = 0.33
|
||||||
|
, random_state = 2
|
||||||
|
, shuffle = True
|
||||||
|
, stratify = num_df_wtgt['mutation_class'])
|
||||||
|
|
||||||
|
y_train.to_frame().value_counts().plot(kind = 'bar')
|
||||||
|
y_test.to_frame().value_counts().plot(kind = 'bar')
|
||||||
|
#%%
|
||||||
|
gscv.fit(X_train, y_train)
|
||||||
|
print('Best model:\n', gscv.best_params_)
|
||||||
|
print('Best models score:\n', gscv.best_score_)
|
||||||
|
gscv.score(X_test, y_test) # see how it does on test
|
||||||
|
|
||||||
|
#===========================================
|
||||||
|
mod_pred = gscv.predict(X_test)
|
||||||
|
|
||||||
|
fscore = f1_score(y_test, mod_pred)
|
||||||
|
fscore
|
||||||
|
#%% same as above
|
||||||
|
# custom classifier
|
||||||
|
|
||||||
|
class MyClassifier(BaseEstimator):
|
||||||
|
|
||||||
|
def __init__(self, classifier_type: str = 'SGDClassifier'):
|
||||||
|
"""
|
||||||
|
A Custome BaseEstimator that can switch between classifiers.
|
||||||
|
:param classifier_type: string - The switch for different classifiers
|
||||||
|
"""
|
||||||
|
self.classifier_type = classifier_type
|
||||||
|
|
||||||
|
|
||||||
|
def fit(self, X, y=None):
|
||||||
|
if self.classifier_type == 'SGDClassifier':
|
||||||
|
self.classifier_ = SGDClassifier()
|
||||||
|
elif self.classifier_type == 'MultinomialNB':
|
||||||
|
self.classifier_ = MultinomialNB()
|
||||||
|
else:
|
||||||
|
raise ValueError('Unkown classifier type.')
|
||||||
|
|
||||||
|
self.classifier_.fit(X, y)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def predict(self, X, y=None):
|
||||||
|
return self.classifier_.predict(X)
|
||||||
|
|
||||||
|
def score(self, X, y):
|
||||||
|
return self.estimator.score(X, y)
|
||||||
|
|
||||||
|
pipeline = Pipeline([
|
||||||
|
('pre', MinMaxScaler())
|
||||||
|
#, ('clf', ClfSwitcher()
|
||||||
|
, ('clf', MyClassifier())
|
||||||
|
])
|
||||||
|
|
||||||
|
# parameter_space = {
|
||||||
|
# 'clf__classifier_type': ['SGDClassifier', 'MultinomialNB']
|
||||||
|
# }
|
||||||
|
|
||||||
|
parameter_space = [
|
||||||
|
{
|
||||||
|
'clf__estimator': [SGDClassifier()],
|
||||||
|
'clf__estimator__penalty': ('l2', 'elasticnet', 'l1'),
|
||||||
|
'clf__estimator__max_iter': [50, 80],
|
||||||
|
'clf__estimator__tol': [1e-4],
|
||||||
|
'clf__estimator__loss': ['hinge', 'log', 'modified_huber'],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'clf__estimator': [MultinomialNB()],
|
||||||
|
'clf__estimator__alpha': (1e-2, 1e-3, 1e-1),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
search = GridSearchCV(pipeline , parameter_space, n_jobs=-1, cv=5)
|
||||||
|
search.fit(X_train, y_train)
|
||||||
|
print('Best model:\n', search.best_params_)
|
||||||
|
print('Best models score:\n', gscv.best_score_)
|
244
practice_cv.py
Normal file
244
practice_cv.py
Normal file
|
@ -0,0 +1,244 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Tue Mar 15 11:09:50 2022
|
||||||
|
|
||||||
|
@author: tanu
|
||||||
|
"""
|
||||||
|
from sklearn.neighbors import KNeighborsClassifier
|
||||||
|
from sklearn.datasets import load_wine
|
||||||
|
from sklearn.model_selection import KFold
|
||||||
|
|
||||||
|
wine = load_wine()
|
||||||
|
X_train, y_train = wine.data, wine.target
|
||||||
|
model = Pipeline([
|
||||||
|
('pre', StandardScaler()),
|
||||||
|
('knn', KNeighborsClassifier())
|
||||||
|
])
|
||||||
|
model.fit(X_train,y_train)
|
||||||
|
|
||||||
|
from sklearn.model_selection import cross_validate
|
||||||
|
val = cross_validate(model,X_train,y_train, cv = 10)
|
||||||
|
val['test_score'].mean()
|
||||||
|
|
||||||
|
my_mcc = make_scorer({'mcc':make_scorer(matthews_corrcoef})
|
||||||
|
|
||||||
|
|
||||||
|
# for scoring in ({'accuracy' : make_scorer(accuracy_score)
|
||||||
|
# , 'fscore' : make_scorer(f1_score)
|
||||||
|
# , 'mcc' : make_scorer(matthews_corrcoef)
|
||||||
|
# , 'precision' : make_scorer(precision_score)
|
||||||
|
# , 'recall' : make_scorer(recall_score)
|
||||||
|
# , 'roc_auc' : make_scorer(roc_auc_score)
|
||||||
|
# , 'jaccard' : make_scorer(jaccard_score)
|
||||||
|
# }
|
||||||
|
# ,'accuracy', 'fscore', 'MCC', 'Precision', 'Recall', 'ROC_AUC', 'jaccard'):
|
||||||
|
|
||||||
|
scoring_fn = ({'accuracy' : make_scorer(accuracy_score)
|
||||||
|
, 'fscore' : make_scorer(f1_score)
|
||||||
|
, 'mcc' : make_scorer(matthews_corrcoef)
|
||||||
|
, 'precision' : make_scorer(precision_score)
|
||||||
|
, 'recall' : make_scorer(recall_score)
|
||||||
|
, 'roc_auc' : make_scorer(roc_auc_score)
|
||||||
|
#, 'jaccard' : make_scorer(jaccard_score)
|
||||||
|
})
|
||||||
|
|
||||||
|
val2 = cross_validate(model,X_train,y_train, cv = 10
|
||||||
|
|
||||||
|
, scoring=('accuracy', 'f1', 'precision', 'recall', 'roc_auc' )
|
||||||
|
#, scoring=scoring_fn
|
||||||
|
|
||||||
|
, return_train_score=False)
|
||||||
|
|
||||||
|
val2
|
||||||
|
print(val2['test_f1'])
|
||||||
|
print(mean(val2['test_accuracy']))
|
||||||
|
print(mean(val2['test_f1']))
|
||||||
|
#print(mean(val2['train_f1']))
|
||||||
|
print(mean(val2['test_precision']))
|
||||||
|
#print(mean(val2['train_precision']))
|
||||||
|
print(mean(val2['test_recall']))
|
||||||
|
print(mean(val2['test_roc_auc']))
|
||||||
|
|
||||||
|
#%%
|
||||||
|
val3 = cross_validate(model
|
||||||
|
, X_train
|
||||||
|
, y_train
|
||||||
|
, cv = 10
|
||||||
|
, scoring = scoring_fn
|
||||||
|
, return_train_score=False)
|
||||||
|
|
||||||
|
val3
|
||||||
|
print(mean(val3['test_accuracy']))
|
||||||
|
print(mean(val3['test_fscore']))
|
||||||
|
print(mean(val3['test_mcc']))
|
||||||
|
print(mean(val3['test_precision']))
|
||||||
|
print(mean(val3['test_recall']))
|
||||||
|
print(mean(val3['test_roc_auc'])) # differs
|
||||||
|
|
||||||
|
#======================
|
||||||
|
# with CV.split
|
||||||
|
scores = []
|
||||||
|
scores
|
||||||
|
#best_svr = SVR(kernel='rbf')
|
||||||
|
model = Pipeline([
|
||||||
|
('pre', StandardScaler()),
|
||||||
|
('knn', KNeighborsClassifier())
|
||||||
|
])
|
||||||
|
cv = KFold(n_splits=10
|
||||||
|
#, random_state=42
|
||||||
|
#, shuffle=True)
|
||||||
|
)
|
||||||
|
for train_index, test_index in cv.split(num_df_wtgt[numerical_FN]
|
||||||
|
, num_df_wtgt['mutation_class']):
|
||||||
|
#print("Train Index: ", train_index, "\n")
|
||||||
|
#print("Test Index: ", test_index)
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = num_df_wtgt[numerical_FN].iloc[train_index], num_df_wtgt[numerical_FN].iloc[test_index], num_df_wtgt['mutation_class'].iloc[train_index], num_df_wtgt['mutation_class'].iloc[test_index]
|
||||||
|
model.fit(X_train, y_train)
|
||||||
|
scores.append(model.score(X_test, y_test))
|
||||||
|
|
||||||
|
mean(scores)
|
||||||
|
|
||||||
|
################
|
||||||
|
scores_skf = []
|
||||||
|
skf = StratifiedKFold(n_splits = 10
|
||||||
|
#, shuffle = True
|
||||||
|
#, **r
|
||||||
|
)
|
||||||
|
|
||||||
|
for train_index, test_index in skf.split(num_df_wtgt[numerical_FN]
|
||||||
|
, num_df_wtgt['mutation_class']):
|
||||||
|
#print("Train Index: ", train_index, "\n")
|
||||||
|
#print("Test Index: ", test_index)
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = num_df_wtgt[numerical_FN].iloc[train_index], num_df_wtgt[numerical_FN].iloc[test_index], num_df_wtgt['mutation_class'].iloc[train_index], num_df_wtgt['mutation_class'].iloc[test_index]
|
||||||
|
model.fit(X_train, y_train)
|
||||||
|
scores_skf.append(model.score(X_test, y_test))
|
||||||
|
|
||||||
|
mean(scores_skf)
|
||||||
|
|
||||||
|
|
||||||
|
val = cross_validate(model, X_train,y_train , cv = 10)
|
||||||
|
val['test_score'].mean()
|
||||||
|
#%% compare loopity loop vs CV with SKF
|
||||||
|
rs = {'random_state': 42}
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(num_df_wtgt[numerical_FN]
|
||||||
|
, num_df_wtgt['mutation_class']
|
||||||
|
, test_size = 0.33
|
||||||
|
, **rs
|
||||||
|
, shuffle = True
|
||||||
|
, stratify = num_df_wtgt['mutation_class'])
|
||||||
|
|
||||||
|
log_reg = LogisticRegression(**rs)
|
||||||
|
nb = BernoulliNB()
|
||||||
|
knn = KNeighborsClassifier()
|
||||||
|
svm = SVC(**rs)
|
||||||
|
|
||||||
|
model_single_pipeline = Pipeline([
|
||||||
|
('pre', MinMaxScaler())
|
||||||
|
, ('model', log_reg)
|
||||||
|
#, ('model', nb)
|
||||||
|
#, ('model', knn)
|
||||||
|
|
||||||
|
])
|
||||||
|
|
||||||
|
skf_cv = cross_validate(model_single_pipeline
|
||||||
|
#, X_train
|
||||||
|
#, y_train
|
||||||
|
, num_df_wtgt[numerical_FN]
|
||||||
|
, num_df_wtgt['mutation_class']
|
||||||
|
, cv = 10
|
||||||
|
, scoring = scoring_fn
|
||||||
|
, return_train_score=True)
|
||||||
|
|
||||||
|
skf_cv
|
||||||
|
print(round(mean(skf_cv['test_accuracy']),2))
|
||||||
|
print(round(mean(skf_cv['test_fscore']),2))
|
||||||
|
print(round(mean(skf_cv['test_mcc']),2))
|
||||||
|
print(round(mean(skf_cv['test_precision']),2))
|
||||||
|
print(round(mean(skf_cv['test_recall']),2))
|
||||||
|
print(round(mean(skf_cv['test_roc_auc']),2)) # differs
|
||||||
|
|
||||||
|
|
||||||
|
# %% Extracting skf_cv mean values and assiging to a dict
|
||||||
|
models_single = [
|
||||||
|
('Logistic Regression' , log_reg)
|
||||||
|
#, ('Naive Bayes' , nb)
|
||||||
|
#, ('K-Nearest Neighbors', knn)
|
||||||
|
# , ('SVM' , svm)
|
||||||
|
]
|
||||||
|
|
||||||
|
foo_single = {}
|
||||||
|
for model_name, model in models_single:
|
||||||
|
print(model_name)
|
||||||
|
#model_name_dict = {'model_name': model_name}
|
||||||
|
foo_single[model_name] = {}
|
||||||
|
for key, value in skf_cv.items():
|
||||||
|
print('\nkey:', key, '\nvalue:', value)
|
||||||
|
print('\nmean value:', mean(value))
|
||||||
|
foo_single[model_name][key] = round(mean(value),2)
|
||||||
|
pp.pprint(foo_single)
|
||||||
|
|
||||||
|
foo_single_df = pd.DataFrame(foo_single)
|
||||||
|
foo_single_df
|
||||||
|
foo_single_df.filter(like='test_', axis=0)
|
||||||
|
|
||||||
|
# ONLY for a single score
|
||||||
|
cval_score = cross_val_score(model
|
||||||
|
, num_df_wtgt[numerical_FN]
|
||||||
|
, num_df_wtgt['mutation_class']
|
||||||
|
, scoring = 'f1_macro'
|
||||||
|
, cv=10)
|
||||||
|
print(cval_score)
|
||||||
|
print(round(mean(cval_score), 2))
|
||||||
|
|
||||||
|
|
||||||
|
# %% Running multiple model with CV
|
||||||
|
log_reg = LogisticRegression(**rs)
|
||||||
|
nb = BernoulliNB()
|
||||||
|
knn = KNeighborsClassifier()
|
||||||
|
svm = SVC(**rs)
|
||||||
|
|
||||||
|
models = [
|
||||||
|
('Logistic Regression' , log_reg)
|
||||||
|
, ('Naive Bayes' , nb)
|
||||||
|
, ('K-Nearest Neighbors', knn)
|
||||||
|
, ('SVM' , svm)
|
||||||
|
]
|
||||||
|
|
||||||
|
foo = {}
|
||||||
|
for model_name, model_fn in models:
|
||||||
|
# print('\nModel_name:', model_name
|
||||||
|
# , '\nModel func:', model_fn
|
||||||
|
# , '\nList of models:', models)
|
||||||
|
|
||||||
|
model_pipeline = Pipeline([
|
||||||
|
('pre' , MinMaxScaler())
|
||||||
|
, ('model' , model_fn)])
|
||||||
|
print('Running model pipeline:', model_pipeline)
|
||||||
|
skf_cv = cross_validate(model_pipeline
|
||||||
|
, X_train
|
||||||
|
, y_train
|
||||||
|
, cv = 10
|
||||||
|
, scoring = scoring_fn
|
||||||
|
, return_train_score = True)
|
||||||
|
foo[model_name] = {}
|
||||||
|
for key, value in skf_cv.items():
|
||||||
|
print('\nkey:', key, '\nvalue:', value)
|
||||||
|
print('\nmean value:', mean(value))
|
||||||
|
foo[model_name][key] = round(mean(value),2)
|
||||||
|
pp.pprint(foo)
|
||||||
|
|
||||||
|
# construtc df
|
||||||
|
foo_df = pd.DataFrame(foo)
|
||||||
|
foo_df
|
||||||
|
scores_df = foo_df.filter(like='test_', axis=0)
|
||||||
|
|
||||||
|
|
||||||
|
a = pd.DataFrame(foo)
|
||||||
|
b = pd.DataFrame.from_dict(foo)
|
||||||
|
c = pd.DataFrame.from_records(foo)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue