saving work
This commit is contained in:
parent
51069fdb76
commit
89158bc669
1 changed files with 20 additions and 4 deletions
24
my_data5.py
24
my_data5.py
|
@ -15,6 +15,7 @@ from sklearn.neural_network import MLPClassifier
|
||||||
from sklearn.pipeline import Pipeline
|
from sklearn.pipeline import Pipeline
|
||||||
from xgboost import XGBClassifier
|
from xgboost import XGBClassifier
|
||||||
from sklearn.preprocessing import StandardScaler
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
import os
|
import os
|
||||||
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, roc_auc_score, roc_curve, f1_score
|
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, roc_auc_score, roc_curve, f1_score
|
||||||
|
@ -23,15 +24,30 @@ import pandas as pd
|
||||||
homedir = os.path.expanduser("~")
|
homedir = os.path.expanduser("~")
|
||||||
os.chdir(homedir + "/git/ML_AI_training/test_data")
|
os.chdir(homedir + "/git/ML_AI_training/test_data")
|
||||||
|
|
||||||
# this needs to be merged_df2 or merged_df3?
|
|
||||||
#gene 'pncA'
|
#gene 'pncA'
|
||||||
drug = 'pyrazinamide'
|
#drug = 'pyrazinamide'
|
||||||
|
|
||||||
my_df = pd.read_csv("pnca_merged_df3.csv")
|
#==============
|
||||||
|
# directories
|
||||||
|
#==============
|
||||||
|
datadir = homedir + '/git/Data/'
|
||||||
|
indir = datadir + drug + '/input/'
|
||||||
|
outdir = datadir + drug + '/output/'
|
||||||
|
|
||||||
|
#=======
|
||||||
|
# input
|
||||||
|
#=======
|
||||||
|
# this needs to be merged_df2 or merged_df3?
|
||||||
|
infile_ml1 = outdir + gene.lower() + '_merged_df3.csv'
|
||||||
|
#infile_ml2 = outdir + gene.lower() + '_merged_df2.csv'
|
||||||
|
|
||||||
|
my_df = pd.read_csv(infile_ml1)
|
||||||
my_df.dtypes
|
my_df.dtypes
|
||||||
my_df_cols = my_df.columns
|
my_df_cols = my_df.columns
|
||||||
|
|
||||||
|
gene_baiscL = ['pnca']
|
||||||
|
geneL_naL = ['gid', 'rpob']
|
||||||
|
geneL_ppi2L = ['alr', 'embb', 'katg', 'rpob']
|
||||||
#%%============================================================================
|
#%%============================================================================
|
||||||
# GET Y
|
# GET Y
|
||||||
# Y = my_df.loc[:,drug] #has NA
|
# Y = my_df.loc[:,drug] #has NA
|
||||||
|
@ -116,7 +132,7 @@ def modelPipeline(X_train, X_test, y_train, y_test):
|
||||||
for clf_name, clf in clfs:
|
for clf_name, clf in clfs:
|
||||||
|
|
||||||
pipeline = Pipeline(steps=[
|
pipeline = Pipeline(steps=[
|
||||||
('scaler', StandardScaler()),
|
('scaler', MinMaxScaler()),
|
||||||
('classifier', clf)
|
('classifier', clf)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue