renamed hyperparams to gscv
This commit is contained in:
parent
a82358dbb4
commit
ad5ebad7f8
31 changed files with 4433 additions and 0 deletions
244
earlier_versions/practice_cv.py
Normal file
244
earlier_versions/practice_cv.py
Normal file
|
@ -0,0 +1,244 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Tue Mar 15 11:09:50 2022
|
||||
|
||||
@author: tanu
|
||||
"""
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
from sklearn.datasets import load_wine
|
||||
from sklearn.model_selection import KFold
|
||||
|
||||
wine = load_wine()
|
||||
X_train, y_train = wine.data, wine.target
|
||||
model = Pipeline([
|
||||
('pre', StandardScaler()),
|
||||
('knn', KNeighborsClassifier())
|
||||
])
|
||||
model.fit(X_train,y_train)
|
||||
|
||||
from sklearn.model_selection import cross_validate
|
||||
val = cross_validate(model,X_train,y_train, cv = 10)
|
||||
val['test_score'].mean()
|
||||
|
||||
my_mcc = make_scorer({'mcc':make_scorer(matthews_corrcoef})
|
||||
|
||||
|
||||
# for scoring in ({'accuracy' : make_scorer(accuracy_score)
|
||||
# , 'fscore' : make_scorer(f1_score)
|
||||
# , 'mcc' : make_scorer(matthews_corrcoef)
|
||||
# , 'precision' : make_scorer(precision_score)
|
||||
# , 'recall' : make_scorer(recall_score)
|
||||
# , 'roc_auc' : make_scorer(roc_auc_score)
|
||||
# , 'jaccard' : make_scorer(jaccard_score)
|
||||
# }
|
||||
# ,'accuracy', 'fscore', 'MCC', 'Precision', 'Recall', 'ROC_AUC', 'jaccard'):
|
||||
|
||||
scoring_fn = ({'accuracy' : make_scorer(accuracy_score)
|
||||
, 'fscore' : make_scorer(f1_score)
|
||||
, 'mcc' : make_scorer(matthews_corrcoef)
|
||||
, 'precision' : make_scorer(precision_score)
|
||||
, 'recall' : make_scorer(recall_score)
|
||||
, 'roc_auc' : make_scorer(roc_auc_score)
|
||||
#, 'jaccard' : make_scorer(jaccard_score)
|
||||
})
|
||||
|
||||
val2 = cross_validate(model,X_train,y_train, cv = 10
|
||||
|
||||
, scoring=('accuracy', 'f1', 'precision', 'recall', 'roc_auc' )
|
||||
#, scoring=scoring_fn
|
||||
|
||||
, return_train_score=False)
|
||||
|
||||
val2
|
||||
print(val2['test_f1'])
|
||||
print(mean(val2['test_accuracy']))
|
||||
print(mean(val2['test_f1']))
|
||||
#print(mean(val2['train_f1']))
|
||||
print(mean(val2['test_precision']))
|
||||
#print(mean(val2['train_precision']))
|
||||
print(mean(val2['test_recall']))
|
||||
print(mean(val2['test_roc_auc']))
|
||||
|
||||
#%%
|
||||
val3 = cross_validate(model
|
||||
, X_train
|
||||
, y_train
|
||||
, cv = 10
|
||||
, scoring = scoring_fn
|
||||
, return_train_score=False)
|
||||
|
||||
val3
|
||||
print(mean(val3['test_accuracy']))
|
||||
print(mean(val3['test_fscore']))
|
||||
print(mean(val3['test_mcc']))
|
||||
print(mean(val3['test_precision']))
|
||||
print(mean(val3['test_recall']))
|
||||
print(mean(val3['test_roc_auc'])) # differs
|
||||
|
||||
#======================
|
||||
# with CV.split
|
||||
scores = []
|
||||
scores
|
||||
#best_svr = SVR(kernel='rbf')
|
||||
model = Pipeline([
|
||||
('pre', StandardScaler()),
|
||||
('knn', KNeighborsClassifier())
|
||||
])
|
||||
cv = KFold(n_splits=10
|
||||
#, random_state=42
|
||||
#, shuffle=True)
|
||||
)
|
||||
for train_index, test_index in cv.split(num_df_wtgt[numerical_FN]
|
||||
, num_df_wtgt['mutation_class']):
|
||||
#print("Train Index: ", train_index, "\n")
|
||||
#print("Test Index: ", test_index)
|
||||
|
||||
X_train, X_test, y_train, y_test = num_df_wtgt[numerical_FN].iloc[train_index], num_df_wtgt[numerical_FN].iloc[test_index], num_df_wtgt['mutation_class'].iloc[train_index], num_df_wtgt['mutation_class'].iloc[test_index]
|
||||
model.fit(X_train, y_train)
|
||||
scores.append(model.score(X_test, y_test))
|
||||
|
||||
mean(scores)
|
||||
|
||||
################
|
||||
scores_skf = []
|
||||
skf = StratifiedKFold(n_splits = 10
|
||||
#, shuffle = True
|
||||
#, **r
|
||||
)
|
||||
|
||||
for train_index, test_index in skf.split(num_df_wtgt[numerical_FN]
|
||||
, num_df_wtgt['mutation_class']):
|
||||
#print("Train Index: ", train_index, "\n")
|
||||
#print("Test Index: ", test_index)
|
||||
|
||||
X_train, X_test, y_train, y_test = num_df_wtgt[numerical_FN].iloc[train_index], num_df_wtgt[numerical_FN].iloc[test_index], num_df_wtgt['mutation_class'].iloc[train_index], num_df_wtgt['mutation_class'].iloc[test_index]
|
||||
model.fit(X_train, y_train)
|
||||
scores_skf.append(model.score(X_test, y_test))
|
||||
|
||||
mean(scores_skf)
|
||||
|
||||
|
||||
val = cross_validate(model, X_train,y_train , cv = 10)
|
||||
val['test_score'].mean()
|
||||
#%% compare loopity loop vs CV with SKF
|
||||
rs = {'random_state': 42}
|
||||
X_train, X_test, y_train, y_test = train_test_split(num_df_wtgt[numerical_FN]
|
||||
, num_df_wtgt['mutation_class']
|
||||
, test_size = 0.33
|
||||
, **rs
|
||||
, shuffle = True
|
||||
, stratify = num_df_wtgt['mutation_class'])
|
||||
|
||||
log_reg = LogisticRegression(**rs)
|
||||
nb = BernoulliNB()
|
||||
knn = KNeighborsClassifier()
|
||||
svm = SVC(**rs)
|
||||
|
||||
model_single_pipeline = Pipeline([
|
||||
('pre', MinMaxScaler())
|
||||
, ('model', log_reg)
|
||||
#, ('model', nb)
|
||||
#, ('model', knn)
|
||||
|
||||
])
|
||||
|
||||
skf_cv = cross_validate(model_single_pipeline
|
||||
#, X_train
|
||||
#, y_train
|
||||
, num_df_wtgt[numerical_FN]
|
||||
, num_df_wtgt['mutation_class']
|
||||
, cv = 10
|
||||
, scoring = scoring_fn
|
||||
, return_train_score=True)
|
||||
|
||||
skf_cv
|
||||
print(round(mean(skf_cv['test_accuracy']),2))
|
||||
print(round(mean(skf_cv['test_fscore']),2))
|
||||
print(round(mean(skf_cv['test_mcc']),2))
|
||||
print(round(mean(skf_cv['test_precision']),2))
|
||||
print(round(mean(skf_cv['test_recall']),2))
|
||||
print(round(mean(skf_cv['test_roc_auc']),2)) # differs
|
||||
|
||||
|
||||
# %% Extracting skf_cv mean values and assiging to a dict
|
||||
models_single = [
|
||||
('Logistic Regression' , log_reg)
|
||||
#, ('Naive Bayes' , nb)
|
||||
#, ('K-Nearest Neighbors', knn)
|
||||
# , ('SVM' , svm)
|
||||
]
|
||||
|
||||
foo_single = {}
|
||||
for model_name, model in models_single:
|
||||
print(model_name)
|
||||
#model_name_dict = {'model_name': model_name}
|
||||
foo_single[model_name] = {}
|
||||
for key, value in skf_cv.items():
|
||||
print('\nkey:', key, '\nvalue:', value)
|
||||
print('\nmean value:', mean(value))
|
||||
foo_single[model_name][key] = round(mean(value),2)
|
||||
pp.pprint(foo_single)
|
||||
|
||||
foo_single_df = pd.DataFrame(foo_single)
|
||||
foo_single_df
|
||||
foo_single_df.filter(like='test_', axis=0)
|
||||
|
||||
# ONLY for a single score
|
||||
cval_score = cross_val_score(model
|
||||
, num_df_wtgt[numerical_FN]
|
||||
, num_df_wtgt['mutation_class']
|
||||
, scoring = 'f1_macro'
|
||||
, cv=10)
|
||||
print(cval_score)
|
||||
print(round(mean(cval_score), 2))
|
||||
|
||||
|
||||
# %% Running multiple model with CV
|
||||
log_reg = LogisticRegression(**rs)
|
||||
nb = BernoulliNB()
|
||||
knn = KNeighborsClassifier()
|
||||
svm = SVC(**rs)
|
||||
|
||||
models = [
|
||||
('Logistic Regression' , log_reg)
|
||||
, ('Naive Bayes' , nb)
|
||||
, ('K-Nearest Neighbors', knn)
|
||||
, ('SVM' , svm)
|
||||
]
|
||||
|
||||
foo = {}
|
||||
for model_name, model_fn in models:
|
||||
# print('\nModel_name:', model_name
|
||||
# , '\nModel func:', model_fn
|
||||
# , '\nList of models:', models)
|
||||
|
||||
model_pipeline = Pipeline([
|
||||
('pre' , MinMaxScaler())
|
||||
, ('model' , model_fn)])
|
||||
print('Running model pipeline:', model_pipeline)
|
||||
skf_cv = cross_validate(model_pipeline
|
||||
, X_train
|
||||
, y_train
|
||||
, cv = 10
|
||||
, scoring = scoring_fn
|
||||
, return_train_score = True)
|
||||
foo[model_name] = {}
|
||||
for key, value in skf_cv.items():
|
||||
print('\nkey:', key, '\nvalue:', value)
|
||||
print('\nmean value:', mean(value))
|
||||
foo[model_name][key] = round(mean(value),2)
|
||||
pp.pprint(foo)
|
||||
|
||||
# construtc df
|
||||
foo_df = pd.DataFrame(foo)
|
||||
foo_df
|
||||
scores_df = foo_df.filter(like='test_', axis=0)
|
||||
|
||||
|
||||
a = pd.DataFrame(foo)
|
||||
b = pd.DataFrame.from_dict(foo)
|
||||
c = pd.DataFrame.from_records(foo)
|
||||
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue