added CHECK_model
This commit is contained in:
parent
23799275a0
commit
880ef46099
1 changed files with 153 additions and 0 deletions
153
scripts/ml/ml_functions/CHECK_model.py
Normal file
153
scripts/ml/ml_functions/CHECK_model.py
Normal file
|
@ -0,0 +1,153 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Thu Jul 7 22:18:14 2022
|
||||
|
||||
@author: tanu
|
||||
"""
|
||||
|
||||
|
||||
# Create a pipeline that standardizes the data then creates a model
|
||||
import pandas as pd
|
||||
|
||||
from pandas import read_csv
|
||||
from sklearn.model_selection import KFold
|
||||
from sklearn.model_selection import cross_val_score
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
|
||||
|
||||
# load data
|
||||
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
|
||||
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
|
||||
dataframe = read_csv(url, names=names)
|
||||
array = dataframe.values
|
||||
X = array[:,0:8]
|
||||
X = pd.DataFrame(X)
|
||||
|
||||
Y = array[:,8]
|
||||
Y = pd.DataFrame(Y)
|
||||
kfold = KFold(n_splits=1, random_state=None)
|
||||
spl_type = "check"
|
||||
fooD1 = MultModelsCl(input_df = X
|
||||
, target = Y
|
||||
, sel_cv = kfold
|
||||
, run_blind_test = False
|
||||
#, blind_test_df = df2['X_bts']
|
||||
#, blind_test_target = df2['y_bts']
|
||||
, add_cm = False
|
||||
, add_yn = False
|
||||
, tts_split_type = spl_type
|
||||
, resampling_type = 'none' # default
|
||||
, var_type = ['mixed']
|
||||
, scale_numeric = ['std']
|
||||
, return_formatted_output = True
|
||||
)
|
||||
|
||||
# create pipeline
|
||||
estimators = []
|
||||
estimators.append(('standardize', StandardScaler()))
|
||||
estimators.append(('lda', LinearDiscriminantAnalysis()))
|
||||
model = Pipeline(estimators)
|
||||
|
||||
# evaluate pipeline
|
||||
seed = 7
|
||||
#kfold = KFold(n_splits=10, random_state=seed)
|
||||
kfold = KFold(n_splits=10, random_state=None)
|
||||
|
||||
results = cross_val_score(model, X, Y, cv=kfold)
|
||||
print(results.mean())
|
||||
results_A = round(results.mean(),2)
|
||||
|
||||
results2 = cross_val_score(model, X, Y, cv=kfold, scoring = "recall")
|
||||
print(results2.mean())
|
||||
results_R = round(results2.mean(),2)
|
||||
|
||||
results3 = cross_val_score(model, X, Y, cv=kfold, scoring = "precision")
|
||||
print(results3.mean())
|
||||
results_P = round(results3.mean(),2)
|
||||
|
||||
results4 = cross_val_score(model, X, Y, cv=kfold, scoring = "f1")
|
||||
print(results4.mean())
|
||||
results_f1 = round(results4.mean(),2)
|
||||
|
||||
results5 = cross_val_score(model, X, Y, cv=kfold, scoring = "jaccard")
|
||||
print(results5.mean())
|
||||
results_J = round(results5.mean(),2)
|
||||
|
||||
results6 = cross_val_score(model, X, Y, cv=kfold, scoring = "matthews_corrcoef")
|
||||
print(results6.mean())
|
||||
results_mcc = round(results6.mean(),2)
|
||||
|
||||
|
||||
#%%
|
||||
import numpy as np
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.datasets import fetch_openml
|
||||
from sklearn.pipeline import Pipeline, make_pipeline
|
||||
from sklearn.decomposition import PCA
|
||||
from sklearn.impute import SimpleImputer, KNNImputer
|
||||
from sklearn.preprocessing import RobustScaler, OneHotEncoder
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
from sklearn.model_selection import train_test_split, cross_val_score, RandomizedSearchCV
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, Y, stratify=Y, test_size=0.2)
|
||||
|
||||
fooD2 = MultModelsCl(input_df = X_train
|
||||
, target = y_train
|
||||
, sel_cv = kfold
|
||||
, run_blind_test = True
|
||||
, blind_test_df = X_test
|
||||
, blind_test_target = y_test
|
||||
, add_cm = False
|
||||
, add_yn = False
|
||||
, tts_split_type = spl_type
|
||||
, resampling_type = 'none' # default
|
||||
, var_type = ['mixed']
|
||||
, scale_numeric = ['std']
|
||||
, return_formatted_output = True
|
||||
)
|
||||
# fitting and predicting on test
|
||||
model.fit(X_train, y_train)
|
||||
y_pred = model.predict(X_test)
|
||||
|
||||
|
||||
results_A = round(cross_val_score(model, X_train, y_train, cv=kfold).mean(),2)
|
||||
print(results_A)
|
||||
|
||||
results_P = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "precision").mean(),2)
|
||||
print(results_P)
|
||||
|
||||
results_R = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "recall").mean(),2)
|
||||
print(results_R)
|
||||
|
||||
results_F = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "f1").mean(),2)
|
||||
print(results_F)
|
||||
|
||||
results_J = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "jaccard").mean(),2)
|
||||
print(results_J)
|
||||
|
||||
results_M = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "matthews_corrcoef").mean(),2)
|
||||
print(results_M)
|
||||
|
||||
print('\nCV example accuracy:', results_P)
|
||||
print('BTS example accuracy:', round(precision_score(y_test, y_pred),2))
|
||||
|
||||
print('\nCV example accuracy:', results_J)
|
||||
print('BTS example accuracy:', round(jaccard_score(y_test, y_pred),2))
|
||||
|
||||
print('\nCV example accuracy:', results_R)
|
||||
print('BTS example accuracy:', round(recall_score(y_test, y_pred),2))
|
||||
|
||||
print('\nCV example accuracy:', results_F)
|
||||
print('BTS example accuracy:', round(f1_score(y_test, y_pred),2))
|
||||
|
||||
print('\nCV example accuracy:', results_A)
|
||||
print('BTS example accuracy:', round(accuracy_score(y_test, y_pred),2))
|
||||
|
||||
print('\nCV example accuracy:', results_M)
|
||||
print('BTS example accuracy:', round(matthews_corrcoef(y_test, y_pred),2))
|
||||
|
||||
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue