#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu Jul 7 22:18:14 2022 @author: tanu """ # Create a pipeline that standardizes the data then creates a model import pandas as pd from pandas import read_csv from sklearn.model_selection import KFold from sklearn.model_selection import cross_val_score from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline from sklearn.discriminant_analysis import LinearDiscriminantAnalysis # load data url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv" names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] dataframe = read_csv(url, names=names) array = dataframe.values X = array[:,0:8] X = pd.DataFrame(X) Y = array[:,8] Y = pd.DataFrame(Y) kfold = KFold(n_splits=1, random_state=None) spl_type = "check" fooD1 = MultModelsCl(input_df = X , target = Y , sel_cv = kfold , run_blind_test = False #, blind_test_df = df2['X_bts'] #, blind_test_target = df2['y_bts'] , add_cm = False , add_yn = False , tts_split_type = spl_type , resampling_type = 'none' # default , var_type = ['mixed'] , scale_numeric = ['std'] , return_formatted_output = True ) # create pipeline estimators = [] estimators.append(('standardize', StandardScaler())) estimators.append(('lda', LinearDiscriminantAnalysis())) model = Pipeline(estimators) # evaluate pipeline seed = 7 #kfold = KFold(n_splits=10, random_state=seed) kfold = KFold(n_splits=10, random_state=None) results = cross_val_score(model, X, Y, cv=kfold) print(results.mean()) results_A = round(results.mean(),2) results2 = cross_val_score(model, X, Y, cv=kfold, scoring = "recall") print(results2.mean()) results_R = round(results2.mean(),2) results3 = cross_val_score(model, X, Y, cv=kfold, scoring = "precision") print(results3.mean()) results_P = round(results3.mean(),2) results4 = cross_val_score(model, X, Y, cv=kfold, scoring = "f1") print(results4.mean()) results_f1 = round(results4.mean(),2) results5 = cross_val_score(model, X, Y, cv=kfold, scoring = "jaccard") print(results5.mean()) results_J = round(results5.mean(),2) results6 = cross_val_score(model, X, Y, cv=kfold, scoring = "matthews_corrcoef") print(results6.mean()) results_mcc = round(results6.mean(),2) #%% import numpy as np from sklearn.compose import ColumnTransformer from sklearn.datasets import fetch_openml from sklearn.pipeline import Pipeline, make_pipeline from sklearn.decomposition import PCA from sklearn.impute import SimpleImputer, KNNImputer from sklearn.preprocessing import RobustScaler, OneHotEncoder from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split, cross_val_score, RandomizedSearchCV X_train, X_test, y_train, y_test = train_test_split(X, Y, stratify=Y, test_size=0.2) fooD2 = MultModelsCl(input_df = X_train , target = y_train , sel_cv = kfold , run_blind_test = True , blind_test_df = X_test , blind_test_target = y_test , add_cm = False , add_yn = False , tts_split_type = spl_type , resampling_type = 'none' # default , var_type = ['mixed'] , scale_numeric = ['std'] , return_formatted_output = True ) # fitting and predicting on test model.fit(X_train, y_train) y_pred = model.predict(X_test) results_A = round(cross_val_score(model, X_train, y_train, cv=kfold).mean(),2) print(results_A) results_P = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "precision").mean(),2) print(results_P) results_R = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "recall").mean(),2) print(results_R) results_F = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "f1").mean(),2) print(results_F) results_J = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "jaccard").mean(),2) print(results_J) results_M = round(cross_val_score(model, X_train, y_train, cv=kfold, scoring = "matthews_corrcoef").mean(),2) print(results_M) print('\nCV example accuracy:', results_P) print('BTS example accuracy:', round(precision_score(y_test, y_pred),2)) print('\nCV example accuracy:', results_J) print('BTS example accuracy:', round(jaccard_score(y_test, y_pred),2)) print('\nCV example accuracy:', results_R) print('BTS example accuracy:', round(recall_score(y_test, y_pred),2)) print('\nCV example accuracy:', results_F) print('BTS example accuracy:', round(f1_score(y_test, y_pred),2)) print('\nCV example accuracy:', results_A) print('BTS example accuracy:', round(accuracy_score(y_test, y_pred),2)) print('\nCV example accuracy:', results_M) print('BTS example accuracy:', round(matthews_corrcoef(y_test, y_pred),2))