git add UQ_imbalance.py

This commit is contained in:
Tanushree Tunstall 2022-05-27 06:05:34 +01:00
parent 42c8c47e2d
commit 1da87ba177
4 changed files with 134 additions and 56 deletions

View file

@ -27,22 +27,56 @@ from sklearn.model_selection import train_test_split, cross_validate, cross_val_
# Metric
from sklearn.metrics import mean_squared_error, make_scorer, roc_auc_score, f1_score, matthews_corrcoef, accuracy_score, balanced_accuracy_score, confusion_matrix, classification_report
def run_all_ML(input_pd, target_label):
#def run_all_ML(input_pd, target_label, bts_input, bts_target, var_type):
def run_all_ML(input_pd, target_label, preprocess = True, var_type = 'numerical'):
#y = input_pd[target_label]
#X = input_pd.drop(target_label,axis=1)
y = target_label
X = input_pd
# determine categorical and numerical features
numerical_ix = input_df.select_dtypes(include=['int64', 'float64']).columns
numerical_ix
categorical_ix = input_df.select_dtypes(include=['object', 'bool']).columns
categorical_ix
# Determine preprocessing steps ~ var_type
if var_type == 'numerical':
t = [('num', MinMaxScaler(), numerical_ix)]
if var_type == 'categorical':
t = [('cat', OneHotEncoder(), categorical_ix)]
if var_type == 'mixed':
t = [('num', MinMaxScaler(), numerical_ix)
, ('cat', OneHotEncoder(), categorical_ix)]
col_transform = ColumnTransformer(transformers = t
, remainder='passthrough')
result_pd = pd.DataFrame()
for name, algorithm in all_estimators(type_filter="classifier"):
try:
estmator = algorithm()
temp_pd = pd.DataFrame()
temp_cm = pd.DataFrame()
# orig
pipe = Pipeline([
("model", algorithm())
("model" , algorithm())
])
# turn on and off preprocessing
if preprocess == True:
pipe = Pipeline([
('prep' , col_transform),
("model" , algorithm())
])
else:
pipe = Pipeline([
("model" , algorithm())
])
y_pred = cross_val_predict(pipe, X, y, cv = 10, n_jobs=10)
_mcc = round(matthews_corrcoef(y_pred, y), 3)
_bacc = round(balanced_accuracy_score(y_pred, y), 3)