added CHECK_model
This commit is contained in:
parent
23799275a0
commit
880ef46099
1 changed files with 153 additions and 0 deletions
153
scripts/ml/ml_functions/CHECK_model.py
Normal file
153
scripts/ml/ml_functions/CHECK_model.py
Normal file
|
@ -0,0 +1,153 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Thu Jul 7 22:18:14 2022
|
||||||
|
|
||||||
|
@author: tanu
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# Create a pipeline that standardizes the data then creates a model
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from pandas import read_csv
|
||||||
|
from sklearn.model_selection import KFold
|
||||||
|
from sklearn.model_selection import cross_val_score
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
from sklearn.pipeline import Pipeline
|
||||||
|
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
|
||||||
|
|
||||||
|
# load data
|
||||||
|
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
|
||||||
|
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
|
||||||
|
dataframe = read_csv(url, names=names)
|
||||||
|
array = dataframe.values
|
||||||
|
X = array[:,0:8]
|
||||||
|
X = pd.DataFrame(X)
|
||||||
|
|
||||||
|
Y = array[:,8]
|
||||||
|
Y = pd.DataFrame(Y)
|
||||||
|
kfold = KFold(n_splits=1, random_state=None)
|
||||||
|
spl_type = "check"
|
||||||
|
fooD1 = MultModelsCl(input_df = X
|
||||||
|
, target = Y
|
||||||
|
, sel_cv = kfold
|
||||||
|
, run_blind_test = False
|
||||||
|
#, blind_test_df = df2['X_bts']
|
||||||
|
#, blind_test_target = df2['y_bts']
|
||||||
|
, add_cm = False
|
||||||
|
, add_yn = False
|
||||||
|
, tts_split_type = spl_type
|
||||||
|
, resampling_type = 'none' # default
|
||||||
|
, var_type = ['mixed']
|
||||||
|
, scale_numeric = ['std']
|
||||||
|
, return_formatted_output = True
|
||||||
|
)
|
||||||
|
|
||||||
|
# create pipeline
|
||||||
|
estimators = []
|
||||||
|
estimators.append(('standardize', StandardScaler()))
|
||||||
|
estimators.append(('lda', LinearDiscriminantAnalysis()))
|
||||||
|
model = Pipeline(estimators)
|
||||||
|
|
||||||
|
# evaluate pipeline
|
||||||
|
seed = 7
|
||||||
|
#kfold = KFold(n_splits=10, random_state=seed)
|
||||||
|
kfold = KFold(n_splits=10, random_state=None)
|
||||||
|
|
||||||
|
results = cross_val_score(model, X, Y, cv=kfold)
|
||||||
|
print(results.mean())
|
||||||
|
results_A = round(results.mean(),2)
|
||||||
|
|
||||||
|
results2 = cross_val_score(model, X, Y, cv=kfold, scoring = "recall")
|
||||||
|
print(results2.mean())
|
||||||
|
results_R = round(results2.mean(),2)
|
||||||
|
|
||||||
|
results3 = cross_val_score(model, X, Y, cv=kfold, scoring = "precision")
|
||||||
|
print(results3.mean())
|
||||||
|
results_P = round(results3.mean(),2)
|
||||||
|
|
||||||
|
results4 = cross_val_score(model, X, Y, cv=kfold, scoring = "f1")
|
||||||
|
print(results4.mean())
|
||||||
|
results_f1 = round(results4.mean(),2)
|
||||||
|
|
||||||
|
results5 = cross_val_score(model, X, Y, cv=kfold, scoring = "jaccard")
|
||||||
|
print(results5.mean())
|
||||||
|
results_J = round(results5.mean(),2)
|
||||||
|
|
||||||
|
results6 = cross_val_score(model, X, Y, cv=kfold, scoring = "matthews_corrcoef")
|
||||||
|
print(results6.mean())
|
||||||
|
results_mcc = round(results6.mean(),2)
|
||||||
|
|
||||||
|
|
||||||
|
#%%
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.compose import ColumnTransformer
|
||||||
|
from sklearn.datasets import fetch_openml
|
||||||
|
from sklearn.pipeline import Pipeline, make_pipeline
|
||||||
|
from sklearn.decomposition import PCA
|
||||||
|
from sklearn.impute import SimpleImputer, KNNImputer
|
||||||
|
from sklearn.preprocessing import RobustScaler, OneHotEncoder
|
||||||
|
from sklearn.ensemble import RandomForestClassifier
|
||||||
|
from sklearn.model_selection import train_test_split, cross_val_score, RandomizedSearchCV
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, Y, stratify=Y, test_size=0.2)
|
||||||
|
|
||||||
|
fooD2 = MultModelsCl(input_df = X_train
|
||||||
|
, target = y_train
|
||||||
|
, sel_cv = kfold
|
||||||
|
, run_blind_test = True
|
||||||
|
, blind_test_df = X_test
|
||||||
|
, blind_test_target = y_test
|
||||||
|
, add_cm = False
|
||||||
|
, add_yn = False
|
||||||
|
, tts_split_type = spl_type
|
||||||
|
, resampling_type = 'none' # default
|
||||||
|
, var_type = ['mixed']
|
||||||
|
, scale_numeric = ['std']
|
||||||
|
, return_formatted_output = True
|
||||||
|
)
|
||||||
|
# fitting and predicting on test
|
||||||
|
model.fit(X_train, y_train)
|
||||||
|
y_pred = model.predict(X_test)
|
||||||
|
|
||||||
|
|
||||||
|
results_A = round(cross_val_score(model, X_train, y_train, cv=kfold).mean(),2)
|
||||||
|
print(results_A)
|
||||||
|
|
||||||
|
results_P = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "precision").mean(),2)
|
||||||
|
print(results_P)
|
||||||
|
|
||||||
|
results_R = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "recall").mean(),2)
|
||||||
|
print(results_R)
|
||||||
|
|
||||||
|
results_F = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "f1").mean(),2)
|
||||||
|
print(results_F)
|
||||||
|
|
||||||
|
results_J = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "jaccard").mean(),2)
|
||||||
|
print(results_J)
|
||||||
|
|
||||||
|
results_M = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "matthews_corrcoef").mean(),2)
|
||||||
|
print(results_M)
|
||||||
|
|
||||||
|
print('\nCV example accuracy:', results_P)
|
||||||
|
print('BTS example accuracy:', round(precision_score(y_test, y_pred),2))
|
||||||
|
|
||||||
|
print('\nCV example accuracy:', results_J)
|
||||||
|
print('BTS example accuracy:', round(jaccard_score(y_test, y_pred),2))
|
||||||
|
|
||||||
|
print('\nCV example accuracy:', results_R)
|
||||||
|
print('BTS example accuracy:', round(recall_score(y_test, y_pred),2))
|
||||||
|
|
||||||
|
print('\nCV example accuracy:', results_F)
|
||||||
|
print('BTS example accuracy:', round(f1_score(y_test, y_pred),2))
|
||||||
|
|
||||||
|
print('\nCV example accuracy:', results_A)
|
||||||
|
print('BTS example accuracy:', round(accuracy_score(y_test, y_pred),2))
|
||||||
|
|
||||||
|
print('\nCV example accuracy:', results_M)
|
||||||
|
print('BTS example accuracy:', round(matthews_corrcoef(y_test, y_pred),2))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue