#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Fri Sep 2 11:11:49 2022 @author: tanu """ # https://towardsdatascience.com/explain-feature-variation-employing-pca-in-scikit-learn-6711e0a5c0b7 from sklearn.decomposition import PCA #import tensorflow as tf #from tensorflow import keras import numpy as np import pandas as pd import seaborn as sns from sklearn.metrics import matthews_corrcoef # pca = PCA().fit(X) # plt.plot(np.cumsum(pca.explained_variance_ratio_)) # plt.xlabel(‘number of components’) # plt.ylabel(‘cumulative explained variance’) # from old scripts fooD = combined_DF_OS(combined_df) numerical_ix = fooD['X'].select_dtypes(include=['int64', 'float64']).columns numerical_ix num_featuresL = list(numerical_ix) numerical_colind = fooD['X'].columns.get_indexer(list(numerical_ix) ) numerical_colind numF = fooD['X'][numerical_ix] categorical_ix = fooD['X'].select_dtypes(include=['object', 'bool']).columns categorical_ix categorical_colind = fooD['X'].columns.get_indexer(list(categorical_ix)) categorical_colind ############## X_train,X_test,y_train,y_test=train_test_split(numF,fooD['y'],test_size=0.2) pca=PCA(n_components=50) X_train_new=pca.fit_transform(X_train) X_test_new=pca.transform(X_test) print(X_train.shape) print(X_train_new.shape) pca.explained_variance_ratio_ clf=KNeighborsClassifier(n_neighbors=5) clf.fit(X_train_new,y_train) y_pred_new=clf.predict(X_test_new) matthews_corrcoef(y_test,y_pred_new)