added combined model FS code and run script

This commit is contained in:
Tanushree Tunstall 2022-09-03 12:28:36 +01:00
parent 78704dec5a
commit 2b953583e2
7 changed files with 1046 additions and 0 deletions

52
scripts/ml/untitled5.py Normal file
View file

@ -0,0 +1,52 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 2 11:11:49 2022
@author: tanu
"""
# https://towardsdatascience.com/explain-feature-variation-employing-pca-in-scikit-learn-6711e0a5c0b7
from sklearn.decomposition import PCA
#import tensorflow as tf
#from tensorflow import keras
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import matthews_corrcoef
# pca = PCA().fit(X)
# plt.plot(np.cumsum(pca.explained_variance_ratio_))
# plt.xlabel(number of components)
# plt.ylabel(cumulative explained variance)
# from old scripts
fooD = combined_DF_OS(combined_df)
numerical_ix = fooD['X'].select_dtypes(include=['int64', 'float64']).columns
numerical_ix
num_featuresL = list(numerical_ix)
numerical_colind = fooD['X'].columns.get_indexer(list(numerical_ix) )
numerical_colind
numF = fooD['X'][numerical_ix]
categorical_ix = fooD['X'].select_dtypes(include=['object', 'bool']).columns
categorical_ix
categorical_colind = fooD['X'].columns.get_indexer(list(categorical_ix))
categorical_colind
##############
X_train,X_test,y_train,y_test=train_test_split(numF,fooD['y'],test_size=0.2)
pca=PCA(n_components=50)
X_train_new=pca.fit_transform(X_train)
X_test_new=pca.transform(X_test)
print(X_train.shape)
print(X_train_new.shape)
pca.explained_variance_ratio_
clf=KNeighborsClassifier(n_neighbors=5)
clf.fit(X_train_new,y_train)
y_pred_new=clf.predict(X_test_new)
matthews_corrcoef(y_test,y_pred_new)