added option to add confusion matrix and target numbers in the mult function

This commit is contained in:
Tanushree Tunstall 2022-06-20 17:08:22 +01:00
parent 905327bf4e
commit 135efcee41
3 changed files with 144 additions and 140 deletions

View file

@ -137,95 +137,76 @@ def MultModelsCl(input_df, target, skf_cv
col_transform = ColumnTransformer(transformers = t
, remainder='passthrough')
# Specify multiple Classification models
lr = LogisticRegression(**rs)
lrcv = LogisticRegressionCV(**rs)
gnb = GaussianNB()
nb = BernoulliNB()
knn = KNeighborsClassifier()
svc = SVC(**rs)
mlp = MLPClassifier(max_iter = 500, **rs)
dt = DecisionTreeClassifier(**rs)
ets = ExtraTreesClassifier(**rs)
et = ExtraTreeClassifier(**rs)
rf = RandomForestClassifier(**rs, n_estimators = 1000 )
rf2 = RandomForestClassifier(
min_samples_leaf = 5
, n_estimators = 1000
, bootstrap = True
, oob_score = True
, **njobs
, **rs
, max_features = 'auto')
xgb = XGBClassifier(**rs, verbosity = 0, use_label_encoder =False)
lda = LinearDiscriminantAnalysis()
mnb = MultinomialNB()
pa = PassiveAggressiveClassifier(**rs, **njobs)
sgd = SGDClassifier(**rs, **njobs)
abc = AdaBoostClassifier(**rs)
bc = BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True)
gpc = GaussianProcessClassifier(**rs)
gbc = GradientBoostingClassifier(**rs)
qda = QuadraticDiscriminantAnalysis()
rc = RidgeClassifier(**rs)
rccv = RidgeClassifierCV(cv = 10)
models = [('Logistic Regression' , lr)
, ('Logistic RegressionCV' , lrcv)
, ('Gaussian NB' , gnb)
, ('Naive Bayes' , nb)
, ('K-Nearest Neighbors' , knn)
, ('SVC' , svc)
, ('MLP' , mlp)
, ('Decision Tree' , dt)
, ('Extra Trees' , ets)
, ('Extra Tree' , et)
, ('Random Forest' , rf)
, ('Random Forest2' , rf2)
, ('XGBoost' , xgb)
, ('LDA' , lda)
, ('Multinomial' , mnb)
, ('Passive Aggresive' , pa)
, ('Stochastic GDescent' , sgd)
, ('AdaBoost Classifier' , abc)
, ('Bagging Classifier' , bc)
, ('Gaussian Process' , gpc)
, ('Gradient Boosting' , gbc)
, ('QDA' , qda)
, ('Ridge Classifier' , rc)
, ('Ridge ClassifierCV' , rccv)
# Specify multiple Classification models
models = [('Logistic Regression' , LogisticRegression(**rs) )
, ('Logistic RegressionCV' , LogisticRegressionCV(**rs) )
, ('Gaussian NB' , GaussianNB() )
, ('Naive Bayes' , BernoulliNB() )
, ('K-Nearest Neighbors' , KNeighborsClassifier() )
, ('SVC' , SVC(**rs) )
, ('MLP' , MLPClassifier(max_iter = 500, **rs) )
, ('Decision Tree' , DecisionTreeClassifier(**rs) )
, ('Extra Trees' , ExtraTreesClassifier(**rs) )
, ('Extra Tree' , ExtraTreeClassifier(**rs) )
, ('Random Forest' , RandomForestClassifier(**rs, n_estimators = 1000 ) )
, ('Random Forest2' , RandomForestClassifier(min_samples_leaf = 5
, n_estimators = 1000
, bootstrap = True
, oob_score = True
, **njobs
, **rs
, max_features = 'auto') )
, ('XGBoost' , XGBClassifier(**rs, verbosity = 0, use_label_encoder =False) )
, ('LDA' , LinearDiscriminantAnalysis() )
, ('Multinomial' , MultinomialNB() )
, ('Passive Aggresive' , PassiveAggressiveClassifier(**rs, **njobs) )
, ('Stochastic GDescent' , SGDClassifier(**rs, **njobs) )
, ('AdaBoost Classifier' , AdaBoostClassifier(**rs) )
, ('Bagging Classifier' , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True) )
, ('Gaussian Process' , GaussianProcessClassifier(**rs) )
, ('Gradient Boosting' , GradientBoostingClassifier(**rs) )
, ('QDA' , QuadraticDiscriminantAnalysis() )
, ('Ridge Classifier' , RidgeClassifier(**rs) )
, ('Ridge ClassifierCV' , RidgeClassifierCV(cv = 10) )
]
mm_skf_scoresD = {}
for model_name, model_fn in models:
print('\nModel_name:', model_name
, '\nModel func:' , model_fn
, '\nList of models:', models)
print('\n==============================================================\n'
, '\nRunning several classification models (n):', len(models)
,'\nList of models:')
for m in models:
print(m)
print('\n================================================================\n')
index = 1
for model_name, model_fn in models:
print('\nRunning classifier:', index
, '\nModel_name:' , model_name
, '\nModel func:' , model_fn)
index = index+1
model_pipeline = Pipeline([
('prep' , col_transform)
, ('model' , model_fn)])
print('Running model pipeline:', model_pipeline)
skf_cv_mod = cross_validate(model_pipeline
print('\nRunning model pipeline:', model_pipeline)
skf_cv_modD = cross_validate(model_pipeline
, input_df
, target
, cv = skf_cv
, scoring = scoring_fn
, return_train_score = True)
, return_train_score = True)
#==============================
# Extract mean values for CV
#==============================
mm_skf_scoresD[model_name] = {}
for key, value in skf_cv_mod.items():
for key, value in skf_cv_modD.items():
print('\nkey:', key, '\nvalue:', value)
print('\nmean value:', mean(value))
mm_skf_scoresD[model_name][key] = round(mean(value),2)
#pp.pprint(mm_skf_scoresD)
#cvtrain_mcc = mm_skf_scoresD[model_name]['test_mcc']
#return(mm_skf_scoresD)
#%%