saving work

This commit is contained in:
Tanushree Tunstall 2022-03-04 19:15:49 +00:00
parent 51069fdb76
commit 89158bc669

View file

@ -15,6 +15,7 @@ from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline from sklearn.pipeline import Pipeline
from xgboost import XGBClassifier from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import os import os
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, roc_auc_score, roc_curve, f1_score from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, roc_auc_score, roc_curve, f1_score
@ -23,15 +24,30 @@ import pandas as pd
homedir = os.path.expanduser("~") homedir = os.path.expanduser("~")
os.chdir(homedir + "/git/ML_AI_training/test_data") os.chdir(homedir + "/git/ML_AI_training/test_data")
# this needs to be merged_df2 or merged_df3?
#gene 'pncA' #gene 'pncA'
drug = 'pyrazinamide' #drug = 'pyrazinamide'
my_df = pd.read_csv("pnca_merged_df3.csv") #==============
# directories
#==============
datadir = homedir + '/git/Data/'
indir = datadir + drug + '/input/'
outdir = datadir + drug + '/output/'
#=======
# input
#=======
# this needs to be merged_df2 or merged_df3?
infile_ml1 = outdir + gene.lower() + '_merged_df3.csv'
#infile_ml2 = outdir + gene.lower() + '_merged_df2.csv'
my_df = pd.read_csv(infile_ml1)
my_df.dtypes my_df.dtypes
my_df_cols = my_df.columns my_df_cols = my_df.columns
gene_baiscL = ['pnca']
geneL_naL = ['gid', 'rpob']
geneL_ppi2L = ['alr', 'embb', 'katg', 'rpob']
#%%============================================================================ #%%============================================================================
# GET Y # GET Y
# Y = my_df.loc[:,drug] #has NA # Y = my_df.loc[:,drug] #has NA
@ -116,7 +132,7 @@ def modelPipeline(X_train, X_test, y_train, y_test):
for clf_name, clf in clfs: for clf_name, clf in clfs:
pipeline = Pipeline(steps=[ pipeline = Pipeline(steps=[
('scaler', StandardScaler()), ('scaler', MinMaxScaler()),
('classifier', clf) ('classifier', clf)
] ]
) )