added different scaling options
This commit is contained in:
parent
ebef0c7967
commit
8d831f3613
3 changed files with 99 additions and 31 deletions
|
@ -142,7 +142,9 @@ scoreBT_mapD = {'bts_mcc' : 'MCC'
|
|||
# Run Multiple Classifiers
|
||||
############################
|
||||
# Multiple Classification - Model Pipeline
|
||||
def MultModelsCl(input_df, target, skf_cv
|
||||
def MultModelsCl(input_df, target
|
||||
#, skf_cv
|
||||
, sel_cv
|
||||
, blind_test_df
|
||||
, blind_test_target
|
||||
, tts_split_type
|
||||
|
@ -150,7 +152,8 @@ def MultModelsCl(input_df, target, skf_cv
|
|||
, resampling_type = 'none' # default
|
||||
, add_cm = True # adds confusion matrix based on cross_val_predict
|
||||
, add_yn = True # adds target var class numbers
|
||||
, var_type = ['numerical', 'categorical','mixed']
|
||||
, var_type = ['numerical', 'categorical','mixed']
|
||||
, scale_numeric = ['min_max', 'std', 'min_max_neg', 'none']
|
||||
, run_blind_test = True
|
||||
, return_formatted_output = True):
|
||||
|
||||
|
@ -182,24 +185,52 @@ def MultModelsCl(input_df, target, skf_cv
|
|||
#======================================================
|
||||
# Determine preprocessing steps ~ var_type
|
||||
#======================================================
|
||||
if var_type == 'numerical':
|
||||
t = [('num', MinMaxScaler(), numerical_ix)]
|
||||
|
||||
# if var_type == 'numerical':
|
||||
# t = [('num', MinMaxScaler(), numerical_ix)]
|
||||
|
||||
# if var_type == 'categorical':
|
||||
# t = [('cat', OneHotEncoder(), categorical_ix)]
|
||||
|
||||
# # if var_type == 'mixed':
|
||||
# # t = [('num', MinMaxScaler(), numerical_ix)
|
||||
# # , ('cat', OneHotEncoder(), categorical_ix) ]
|
||||
|
||||
# if var_type == 'mixed':
|
||||
# t = [('cat', OneHotEncoder(), categorical_ix) ]
|
||||
if type(var_type) == list:
|
||||
var_type = str(var_type[0])
|
||||
else:
|
||||
var_type = var_type
|
||||
|
||||
if var_type in ['numerical','mixed']:
|
||||
if scale_numeric == ['none']:
|
||||
t = [('cat', OneHotEncoder(), categorical_ix)]
|
||||
if scale_numeric != ['none']:
|
||||
if scale_numeric == ['min_max']:
|
||||
scaler = MinMaxScaler()
|
||||
if scale_numeric == ['min_max_neg']:
|
||||
scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
if scale_numeric == ['std']:
|
||||
scaler = StandardScaler()
|
||||
|
||||
t = [('num', scaler, numerical_ix)
|
||||
, ('cat', OneHotEncoder(), categorical_ix)]
|
||||
|
||||
|
||||
if var_type == 'categorical':
|
||||
t = [('cat', OneHotEncoder(), categorical_ix)]
|
||||
|
||||
if var_type == 'mixed':
|
||||
t = [('num', MinMaxScaler(), numerical_ix)
|
||||
, ('cat', OneHotEncoder(), categorical_ix) ]
|
||||
|
||||
|
||||
col_transform = ColumnTransformer(transformers = t
|
||||
, remainder='passthrough')
|
||||
|
||||
|
||||
#======================================================
|
||||
# Specify multiple Classification Models
|
||||
#======================================================
|
||||
models = [('AdaBoost Classifier' , AdaBoostClassifier(**rs) )
|
||||
, ('Bagging Classifier' , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True) )
|
||||
, ('Bagging Classifier' , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True, verbose = 3, n_estimators = 100) )
|
||||
, ('Decision Tree' , DecisionTreeClassifier(**rs) )
|
||||
, ('Extra Tree' , ExtraTreeClassifier(**rs) )
|
||||
, ('Extra Trees' , ExtraTreesClassifier(**rs) )
|
||||
|
@ -211,18 +242,18 @@ def MultModelsCl(input_df, target, skf_cv
|
|||
, ('Logistic Regression' , LogisticRegression(**rs) )
|
||||
, ('Logistic RegressionCV' , LogisticRegressionCV(cv = 3, **rs))
|
||||
, ('MLP' , MLPClassifier(max_iter = 500, **rs) )
|
||||
, ('Multinomial' , MultinomialNB() )
|
||||
#, ('Multinomial' , MultinomialNB() )
|
||||
, ('Naive Bayes' , BernoulliNB() )
|
||||
, ('Passive Aggresive' , PassiveAggressiveClassifier(**rs, **njobs) )
|
||||
, ('QDA' , QuadraticDiscriminantAnalysis() )
|
||||
, ('Random Forest' , RandomForestClassifier(**rs, n_estimators = 1000, **njobs ) )
|
||||
, ('Random Forest2' , RandomForestClassifier(min_samples_leaf = 5
|
||||
, n_estimators = 1000
|
||||
, bootstrap = True
|
||||
, oob_score = True
|
||||
, **njobs
|
||||
, **rs
|
||||
, max_features = 'auto') )
|
||||
# , ('Random Forest2' , RandomForestClassifier(min_samples_leaf = 5
|
||||
# , n_estimators = 1000
|
||||
# , bootstrap = True
|
||||
# , oob_score = True
|
||||
# , **njobs
|
||||
# , **rs
|
||||
# , max_features = 'auto') )
|
||||
, ('Ridge Classifier' , RidgeClassifier(**rs) )
|
||||
, ('Ridge ClassifierCV' , RidgeClassifierCV(cv = 3) )
|
||||
, ('SVC' , SVC(**rs) )
|
||||
|
@ -254,7 +285,7 @@ def MultModelsCl(input_df, target, skf_cv
|
|||
skf_cv_modD = cross_validate(model_pipeline
|
||||
, input_df
|
||||
, target
|
||||
, cv = skf_cv
|
||||
, cv = sel_cv
|
||||
, scoring = scoring_fn
|
||||
, return_train_score = True)
|
||||
#==============================
|
||||
|
@ -283,7 +314,7 @@ def MultModelsCl(input_df, target, skf_cv
|
|||
cmD = {}
|
||||
|
||||
# Calculate cm
|
||||
y_pred = cross_val_predict(model_pipeline, input_df, target, cv = skf_cv, **njobs)
|
||||
y_pred = cross_val_predict(model_pipeline, input_df, target, cv = sel_cv, **njobs)
|
||||
#_tn, _fp, _fn, _tp = confusion_matrix(y_pred, y).ravel() # internally
|
||||
tn, fp, fn, tp = confusion_matrix(y_pred, target).ravel()
|
||||
|
||||
|
@ -334,8 +365,9 @@ def MultModelsCl(input_df, target, skf_cv
|
|||
|
||||
bts_mcc_score = round(matthews_corrcoef(blind_test_target, bts_predict),2)
|
||||
print('\nMCC on Blind test:' , bts_mcc_score)
|
||||
print('\nAccuracy on Blind test:', round(accuracy_score(blind_test_target, bts_predict),2))
|
||||
|
||||
#print('\nAccuracy on Blind test:', round(accuracy_score(blind_test_target, bts_predict),2))
|
||||
print('\nMCC on Training:' , mm_skf_scoresD[model_name]['test_mcc'] )
|
||||
|
||||
mm_skf_scoresD[model_name]['bts_mcc'] = bts_mcc_score
|
||||
mm_skf_scoresD[model_name]['bts_fscore'] = round(f1_score(blind_test_target, bts_predict),2)
|
||||
mm_skf_scoresD[model_name]['bts_precision'] = round(precision_score(blind_test_target, bts_predict),2)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue