added CHECK_model

This commit is contained in:
Tanushree Tunstall 2022-07-08 13:53:44 +01:00
parent 23799275a0
commit 880ef46099

View file

@ -0,0 +1,153 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 7 22:18:14 2022
@author: tanu
"""
# Create a pipeline that standardizes the data then creates a model
import pandas as pd
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
# load data
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(url, names=names)
array = dataframe.values
X = array[:,0:8]
X = pd.DataFrame(X)
Y = array[:,8]
Y = pd.DataFrame(Y)
kfold = KFold(n_splits=1, random_state=None)
spl_type = "check"
fooD1 = MultModelsCl(input_df = X
, target = Y
, sel_cv = kfold
, run_blind_test = False
#, blind_test_df = df2['X_bts']
#, blind_test_target = df2['y_bts']
, add_cm = False
, add_yn = False
, tts_split_type = spl_type
, resampling_type = 'none' # default
, var_type = ['mixed']
, scale_numeric = ['std']
, return_formatted_output = True
)
# create pipeline
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('lda', LinearDiscriminantAnalysis()))
model = Pipeline(estimators)
# evaluate pipeline
seed = 7
#kfold = KFold(n_splits=10, random_state=seed)
kfold = KFold(n_splits=10, random_state=None)
results = cross_val_score(model, X, Y, cv=kfold)
print(results.mean())
results_A = round(results.mean(),2)
results2 = cross_val_score(model, X, Y, cv=kfold, scoring = "recall")
print(results2.mean())
results_R = round(results2.mean(),2)
results3 = cross_val_score(model, X, Y, cv=kfold, scoring = "precision")
print(results3.mean())
results_P = round(results3.mean(),2)
results4 = cross_val_score(model, X, Y, cv=kfold, scoring = "f1")
print(results4.mean())
results_f1 = round(results4.mean(),2)
results5 = cross_val_score(model, X, Y, cv=kfold, scoring = "jaccard")
print(results5.mean())
results_J = round(results5.mean(),2)
results6 = cross_val_score(model, X, Y, cv=kfold, scoring = "matthews_corrcoef")
print(results6.mean())
results_mcc = round(results6.mean(),2)
#%%
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.datasets import fetch_openml
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.preprocessing import RobustScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score, RandomizedSearchCV
X_train, X_test, y_train, y_test = train_test_split(X, Y, stratify=Y, test_size=0.2)
fooD2 = MultModelsCl(input_df = X_train
, target = y_train
, sel_cv = kfold
, run_blind_test = True
, blind_test_df = X_test
, blind_test_target = y_test
, add_cm = False
, add_yn = False
, tts_split_type = spl_type
, resampling_type = 'none' # default
, var_type = ['mixed']
, scale_numeric = ['std']
, return_formatted_output = True
)
# fitting and predicting on test
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
results_A = round(cross_val_score(model, X_train, y_train, cv=kfold).mean(),2)
print(results_A)
results_P = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "precision").mean(),2)
print(results_P)
results_R = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "recall").mean(),2)
print(results_R)
results_F = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "f1").mean(),2)
print(results_F)
results_J = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "jaccard").mean(),2)
print(results_J)
results_M = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "matthews_corrcoef").mean(),2)
print(results_M)
print('\nCV example accuracy:', results_P)
print('BTS example accuracy:', round(precision_score(y_test, y_pred),2))
print('\nCV example accuracy:', results_J)
print('BTS example accuracy:', round(jaccard_score(y_test, y_pred),2))
print('\nCV example accuracy:', results_R)
print('BTS example accuracy:', round(recall_score(y_test, y_pred),2))
print('\nCV example accuracy:', results_F)
print('BTS example accuracy:', round(f1_score(y_test, y_pred),2))
print('\nCV example accuracy:', results_A)
print('BTS example accuracy:', round(accuracy_score(y_test, y_pred),2))
print('\nCV example accuracy:', results_M)
print('BTS example accuracy:', round(matthews_corrcoef(y_test, y_pred),2))