52 lines
1.4 KiB
Python
52 lines
1.4 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Created on Fri Sep 2 11:11:49 2022
|
||
|
||
@author: tanu
|
||
"""
|
||
# https://towardsdatascience.com/explain-feature-variation-employing-pca-in-scikit-learn-6711e0a5c0b7
|
||
from sklearn.decomposition import PCA
|
||
#import tensorflow as tf
|
||
#from tensorflow import keras
|
||
import numpy as np
|
||
import pandas as pd
|
||
import seaborn as sns
|
||
from sklearn.metrics import matthews_corrcoef
|
||
|
||
# pca = PCA().fit(X)
|
||
# plt.plot(np.cumsum(pca.explained_variance_ratio_))
|
||
# plt.xlabel(‘number of components’)
|
||
# plt.ylabel(‘cumulative explained variance’)
|
||
|
||
# from old scripts
|
||
fooD = combined_DF_OS(combined_df)
|
||
|
||
numerical_ix = fooD['X'].select_dtypes(include=['int64', 'float64']).columns
|
||
numerical_ix
|
||
num_featuresL = list(numerical_ix)
|
||
numerical_colind = fooD['X'].columns.get_indexer(list(numerical_ix) )
|
||
numerical_colind
|
||
|
||
numF = fooD['X'][numerical_ix]
|
||
|
||
categorical_ix = fooD['X'].select_dtypes(include=['object', 'bool']).columns
|
||
categorical_ix
|
||
categorical_colind = fooD['X'].columns.get_indexer(list(categorical_ix))
|
||
categorical_colind
|
||
|
||
##############
|
||
|
||
X_train,X_test,y_train,y_test=train_test_split(numF,fooD['y'],test_size=0.2)
|
||
|
||
pca=PCA(n_components=50)
|
||
X_train_new=pca.fit_transform(X_train)
|
||
X_test_new=pca.transform(X_test)
|
||
print(X_train.shape)
|
||
print(X_train_new.shape)
|
||
|
||
pca.explained_variance_ratio_
|
||
clf=KNeighborsClassifier(n_neighbors=5)
|
||
clf.fit(X_train_new,y_train)
|
||
y_pred_new=clf.predict(X_test_new)
|
||
matthews_corrcoef(y_test,y_pred_new)
|