added my data ML test
This commit is contained in:
parent
8edd4c5b6d
commit
67e003df8b
57 changed files with 40473 additions and 4 deletions
109
other examples/eg1.py
Normal file
109
other examples/eg1.py
Normal file
|
@ -0,0 +1,109 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Wed Feb 16 11:56:57 2022
|
||||
|
||||
@author: tanu
|
||||
"""
|
||||
|
||||
# source
|
||||
#https://machinelearningmastery.com/automate-machine-learning-workflows-pipelines-python-scikit-learn/
|
||||
|
||||
###############################################
|
||||
# Pipeline 1: Data Preparation and Modeling
|
||||
###############################################
|
||||
|
||||
# Create a pipeline that standardizes the data then creates a model
|
||||
from pandas import read_csv
|
||||
from sklearn.model_selection import KFold
|
||||
from sklearn.model_selection import cross_val_score
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
|
||||
|
||||
# load data
|
||||
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
|
||||
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
|
||||
dataframe = read_csv(url, names=names)
|
||||
array = dataframe.values
|
||||
X = array[:,0:8]
|
||||
Y = array[:,8]
|
||||
|
||||
# create pipeline
|
||||
estimators = []
|
||||
estimators.append(('standardize', StandardScaler()))
|
||||
estimators.append(('lda', LinearDiscriminantAnalysis()))
|
||||
model = Pipeline(estimators)
|
||||
|
||||
# evaluate pipeline
|
||||
seed = 7
|
||||
#kfold = KFold(n_splits=10, random_state=seed)
|
||||
kfold = KFold(n_splits=10, random_state=None)
|
||||
results = cross_val_score(model, X, Y, cv=kfold)
|
||||
print(results.mean())
|
||||
|
||||
###############################################
|
||||
# Pipeline 2: Feature Extraction and Modeling
|
||||
###############################################
|
||||
|
||||
# Create a pipeline that extracts features from the data then creates a model
|
||||
from sklearn.pipeline import FeatureUnion
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.decomposition import PCA
|
||||
from sklearn.feature_selection import SelectKBest
|
||||
|
||||
# load data
|
||||
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
|
||||
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
|
||||
dataframe = read_csv(url, names=names)
|
||||
array = dataframe.values
|
||||
X = array[:,0:8]
|
||||
Y = array[:,8]
|
||||
|
||||
# create feature union
|
||||
features = []
|
||||
features.append(('pca', PCA(n_components=3)))
|
||||
features.append(('select_best', SelectKBest(k=6)))
|
||||
feature_union = FeatureUnion(features)
|
||||
|
||||
# create pipeline
|
||||
estimators = []
|
||||
estimators.append(('feature_union', feature_union))
|
||||
estimators.append(('logistic', LogisticRegression()))
|
||||
model = Pipeline(estimators)
|
||||
|
||||
# evaluate pipeline
|
||||
seed = 7
|
||||
kfold = KFold(n_splits=10, random_state=seed)
|
||||
results = cross_val_score(model, X, Y, cv=kfold)
|
||||
print(results.mean())
|
||||
|
||||
#%%############################################################################
|
||||
from sklearn import datasets
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.decomposition import PCA
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
# import some data within sklearn for iris classification
|
||||
iris = datasets.load_iris()
|
||||
X = iris.data
|
||||
y = iris.target
|
||||
|
||||
# Splitting data into train and testing part
|
||||
# The 25 % of data is test size of the data
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)
|
||||
# importing pipes for making the Pipe flow
|
||||
from sklearn.pipeline import Pipeline
|
||||
# pipe flow is :
|
||||
# PCA(Dimension reduction to two) -> Scaling the data -> DecisionTreeClassification
|
||||
pipe = Pipeline([('pca', PCA(n_components = 2))
|
||||
, ('std', StandardScaler())
|
||||
, ('decision_tree', DecisionTreeClassifier())]
|
||||
, verbose = True)
|
||||
|
||||
# fitting the data in the pipe
|
||||
pipe.fit(X_train, y_train)
|
||||
|
||||
# scoring data
|
||||
from sklearn.metrics import accuracy_score
|
||||
print(accuracy_score(y_test, pipe.predict(X_test)))
|
Loading…
Add table
Add a link
Reference in a new issue