git add UQ_imbalance.py
This commit is contained in:
parent
42c8c47e2d
commit
1da87ba177
4 changed files with 134 additions and 56 deletions
|
@ -27,22 +27,56 @@ from sklearn.model_selection import train_test_split, cross_validate, cross_val_
|
|||
# Metric
|
||||
from sklearn.metrics import mean_squared_error, make_scorer, roc_auc_score, f1_score, matthews_corrcoef, accuracy_score, balanced_accuracy_score, confusion_matrix, classification_report
|
||||
|
||||
def run_all_ML(input_pd, target_label):
|
||||
#def run_all_ML(input_pd, target_label, bts_input, bts_target, var_type):
|
||||
def run_all_ML(input_pd, target_label, preprocess = True, var_type = 'numerical'):
|
||||
|
||||
#y = input_pd[target_label]
|
||||
#X = input_pd.drop(target_label,axis=1)
|
||||
y = target_label
|
||||
X = input_pd
|
||||
# determine categorical and numerical features
|
||||
numerical_ix = input_df.select_dtypes(include=['int64', 'float64']).columns
|
||||
numerical_ix
|
||||
categorical_ix = input_df.select_dtypes(include=['object', 'bool']).columns
|
||||
categorical_ix
|
||||
|
||||
# Determine preprocessing steps ~ var_type
|
||||
if var_type == 'numerical':
|
||||
t = [('num', MinMaxScaler(), numerical_ix)]
|
||||
|
||||
if var_type == 'categorical':
|
||||
t = [('cat', OneHotEncoder(), categorical_ix)]
|
||||
|
||||
if var_type == 'mixed':
|
||||
t = [('num', MinMaxScaler(), numerical_ix)
|
||||
, ('cat', OneHotEncoder(), categorical_ix)]
|
||||
|
||||
col_transform = ColumnTransformer(transformers = t
|
||||
, remainder='passthrough')
|
||||
result_pd = pd.DataFrame()
|
||||
for name, algorithm in all_estimators(type_filter="classifier"):
|
||||
try:
|
||||
estmator = algorithm()
|
||||
temp_pd = pd.DataFrame()
|
||||
temp_cm = pd.DataFrame()
|
||||
|
||||
|
||||
# orig
|
||||
pipe = Pipeline([
|
||||
("model", algorithm())
|
||||
("model" , algorithm())
|
||||
])
|
||||
|
||||
# turn on and off preprocessing
|
||||
if preprocess == True:
|
||||
pipe = Pipeline([
|
||||
('prep' , col_transform),
|
||||
("model" , algorithm())
|
||||
])
|
||||
else:
|
||||
pipe = Pipeline([
|
||||
("model" , algorithm())
|
||||
])
|
||||
|
||||
|
||||
y_pred = cross_val_predict(pipe, X, y, cv = 10, n_jobs=10)
|
||||
_mcc = round(matthews_corrcoef(y_pred, y), 3)
|
||||
_bacc = round(balanced_accuracy_score(y_pred, y), 3)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue