saving work

This commit is contained in:
Tanushree Tunstall 2022-03-04 19:15:49 +00:00
parent 51069fdb76
commit 89158bc669

View file

@ -15,6 +15,7 @@ from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import os
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, roc_auc_score, roc_curve, f1_score
@ -23,15 +24,30 @@ import pandas as pd
homedir = os.path.expanduser("~")
os.chdir(homedir + "/git/ML_AI_training/test_data")
# this needs to be merged_df2 or merged_df3?
#gene 'pncA'
drug = 'pyrazinamide'
#drug = 'pyrazinamide'
my_df = pd.read_csv("pnca_merged_df3.csv")
#==============
# directories
#==============
datadir = homedir + '/git/Data/'
indir = datadir + drug + '/input/'
outdir = datadir + drug + '/output/'
#=======
# input
#=======
# this needs to be merged_df2 or merged_df3?
infile_ml1 = outdir + gene.lower() + '_merged_df3.csv'
#infile_ml2 = outdir + gene.lower() + '_merged_df2.csv'
my_df = pd.read_csv(infile_ml1)
my_df.dtypes
my_df_cols = my_df.columns
gene_baiscL = ['pnca']
geneL_naL = ['gid', 'rpob']
geneL_ppi2L = ['alr', 'embb', 'katg', 'rpob']
#%%============================================================================
# GET Y
# Y = my_df.loc[:,drug] #has NA
@ -116,7 +132,7 @@ def modelPipeline(X_train, X_test, y_train, y_test):
for clf_name, clf in clfs:
pipeline = Pipeline(steps=[
('scaler', StandardScaler()),
('scaler', MinMaxScaler()),
('classifier', clf)
]
)