working on dissected model, testing diff feature groups
This commit is contained in:
parent
135efcee41
commit
e68a153883
4 changed files with 270 additions and 161 deletions
|
@ -78,10 +78,10 @@ rs = {'random_state': 42}
|
|||
njobs = {'n_jobs': 10}
|
||||
|
||||
scoring_fn = ({ 'mcc' : make_scorer(matthews_corrcoef)
|
||||
, 'accuracy' : make_scorer(accuracy_score)
|
||||
, 'fscore' : make_scorer(f1_score)
|
||||
, 'precision' : make_scorer(precision_score)
|
||||
, 'recall' : make_scorer(recall_score)
|
||||
, 'accuracy' : make_scorer(accuracy_score)
|
||||
, 'roc_auc' : make_scorer(roc_auc_score)
|
||||
, 'jcc' : make_scorer(jaccard_score)
|
||||
})
|
||||
|
@ -103,7 +103,6 @@ def MultModelsCl_dissected(input_df, target, skf_cv
|
|||
, blind_test_target
|
||||
, add_cm = True # adds confusion matrix based on cross_val_predict
|
||||
, add_yn = True # adds target var class numbers
|
||||
, feature_groups = ['']
|
||||
, var_type = ['numerical', 'categorical','mixed']):
|
||||
|
||||
'''
|
||||
|
@ -122,14 +121,18 @@ def MultModelsCl_dissected(input_df, target, skf_cv
|
|||
returns
|
||||
Dict containing multiple classification scores for each model and mean of each Stratified Kfold including training
|
||||
'''
|
||||
|
||||
|
||||
#======================================================
|
||||
# Determine categorical and numerical features
|
||||
#======================================================
|
||||
numerical_ix = input_df.select_dtypes(include=['int64', 'float64']).columns
|
||||
numerical_ix
|
||||
categorical_ix = input_df.select_dtypes(include=['object', 'bool']).columns
|
||||
categorical_ix
|
||||
|
||||
#======================================================
|
||||
# Determine preprocessing steps ~ var_type
|
||||
#======================================================
|
||||
if var_type == 'numerical':
|
||||
t = [('num', MinMaxScaler(), numerical_ix)]
|
||||
|
||||
|
@ -143,7 +146,9 @@ def MultModelsCl_dissected(input_df, target, skf_cv
|
|||
col_transform = ColumnTransformer(transformers = t
|
||||
, remainder='passthrough')
|
||||
|
||||
# Specify multiple Classification models
|
||||
#======================================================
|
||||
# Specify multiple Classification Models
|
||||
#======================================================
|
||||
models = [('Logistic Regression' , LogisticRegression(**rs) )
|
||||
, ('Logistic RegressionCV' , LogisticRegressionCV(**rs) )
|
||||
, ('Gaussian NB' , GaussianNB() )
|
||||
|
@ -206,7 +211,7 @@ def MultModelsCl_dissected(input_df, target, skf_cv
|
|||
|
||||
#######################################################################
|
||||
#======================================================
|
||||
# Option 1: Add confusion matrix from cross_val_predict
|
||||
# Option: Add confusion matrix from cross_val_predict
|
||||
# Understand and USE with caution
|
||||
# cross_val_score, cross_val_predict, "Passing these predictions into an evaluation metric may not be a valid way to measure generalization performance. Results can differ from cross_validate and cross_val_score unless all tests sets have equal size and the metric decomposes over samples."
|
||||
# https://stackoverflow.com/questions/65645125/producing-a-confusion-matrix-with-cross-validate
|
||||
|
@ -237,7 +242,7 @@ def MultModelsCl_dissected(input_df, target, skf_cv
|
|||
skf_cv_modD = skf_cv_modD
|
||||
#######################################################################
|
||||
#=============================================
|
||||
# Option 2: Add targety numbers for data
|
||||
# Option: Add targety numbers for data
|
||||
#=============================================
|
||||
if add_yn:
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue