69 lines
1.6 KiB
Python
69 lines
1.6 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Mon Feb 21 13:06:25 2022
|
|
|
|
@author: tanu
|
|
"""
|
|
X_train
|
|
scaler = preprocessing.MinMaxScaler()
|
|
scaler.fit(X_train)
|
|
|
|
x_train_scaled = scaler.transform(X_train)
|
|
x_train_scaled
|
|
|
|
|
|
foo = scaler.fit(X_train)
|
|
|
|
x_train_scaled2 = foo.transform(X_train)
|
|
x_train_scaled2
|
|
|
|
(x_train_scaled == x_train_scaled2).all()
|
|
|
|
|
|
toy = pd.DataFrame({
|
|
'numeric': [1., 2., 3., 4., 5.],
|
|
'category': ['a', 'a', 'b', 'c', 'b']
|
|
})
|
|
numeric_features = ['numeric']
|
|
categorical_features = ['category']
|
|
preprocessor = ColumnTransformer(transformers=[('num', StandardScaler(), numeric_features),
|
|
('cat', OneHotEncoder(), categorical_features)
|
|
])
|
|
preprocessor.fit(toy)
|
|
bar = preprocessor.transform(toy)
|
|
bar
|
|
#############
|
|
toy2 = pd.DataFrame({
|
|
'numeric': [1., 2., 3., 4., 5.],
|
|
'numeric2': [1., 2., 3., 4., 6.],
|
|
'category': ['a', 'a', 'b', 'c', 'b'],
|
|
'category2': ['b', 'a', 'b', 'e', 'f']
|
|
})
|
|
numeric_features = ['numeric', 'numeric2']
|
|
categorical_features = ['category', 'category2']
|
|
preprocessor = ColumnTransformer(transformers=[
|
|
('num', StandardScaler(), numeric_features),
|
|
('cat', OneHotEncoder(), categorical_features)
|
|
])
|
|
|
|
preprocessor.fit(toy2)
|
|
bar2 = preprocessor.transform(toy2)
|
|
bar2
|
|
|
|
####
|
|
import pandas as pd
|
|
from pandas import DataFrame
|
|
import numpy as np
|
|
|
|
from sklearn.decomposition import PCA
|
|
from pandas import DataFrame
|
|
pca = PCA(n_components = 2)
|
|
pca.fit(toy2.iloc[:, 0:2])
|
|
|
|
columns = ['pca_%i' % i for i in range(2)]
|
|
df_pca = DataFrame(pca.transform(toy2.iloc[:, 0:2])
|
|
, columns=columns
|
|
, index=toy2.index)
|
|
df_pca.head()
|
|
|