moved logo_skf function to del as using the MultClfs for combined data
This commit is contained in:
parent
a6532ddfa3
commit
2c50124b1b
8 changed files with 71 additions and 1735 deletions
|
@ -77,9 +77,11 @@ import re
|
|||
import itertools
|
||||
from sklearn.model_selection import LeaveOneGroupOut
|
||||
from sklearn.decomposition import PCA
|
||||
from sklearn.naive_bayes import ComplementNB
|
||||
from sklearn.dummy import DummyClassifier
|
||||
|
||||
#%% GLOBALS
|
||||
#rs = {'random_state': 42}
|
||||
#rs = {'random_state': 42} # INSIDE FUNCTION CALL NOW
|
||||
#njobs = {'n_jobs': os.cpu_count() } # the number of jobs should equal the number of CPU cores
|
||||
|
||||
scoring_fn = ({ 'mcc' : make_scorer(matthews_corrcoef)
|
||||
|
@ -90,8 +92,7 @@ scoring_fn = ({ 'mcc' : make_scorer(matthews_corrcoef)
|
|||
, 'roc_auc' : make_scorer(roc_auc_score)
|
||||
, 'jcc' : make_scorer(jaccard_score)
|
||||
})
|
||||
|
||||
# for sel_cv
|
||||
# for sel_cv INSIDE FUNCTION CALL NOW
|
||||
#skf_cv = StratifiedKFold(n_splits = 10
|
||||
# #, shuffle = False, random_state= None)
|
||||
# , shuffle = True, **rs)
|
||||
|
@ -149,25 +150,25 @@ scoreBT_mapD = {'bts_mcc' : 'MCC'
|
|||
############################
|
||||
# Multiple Classification - Model Pipeline
|
||||
def MultModelsCl_logo_skf(input_df
|
||||
, target
|
||||
, sel_cv
|
||||
, tts_split_type
|
||||
, resampling_type
|
||||
#, group = None
|
||||
|
||||
, add_cm = True # adds confusion matrix based on cross_val_predict
|
||||
, add_yn = True # adds target var class numbers
|
||||
, var_type = ['numerical', 'categorical','mixed']
|
||||
, scale_numeric = ['min_max', 'std', 'min_max_neg', 'none']
|
||||
, target
|
||||
, sel_cv
|
||||
, tts_split_type
|
||||
, resampling_type
|
||||
#, group = None
|
||||
|
||||
, add_cm = True # adds confusion matrix based on cross_val_predict
|
||||
, add_yn = True # adds target var class numbers
|
||||
, var_type = ['numerical', 'categorical','mixed']
|
||||
, scale_numeric = ['min_max', 'std', 'min_max_neg', 'none']
|
||||
|
||||
, run_blind_test = True
|
||||
, blind_test_df = pd.DataFrame()
|
||||
, blind_test_target = pd.Series(dtype = int)
|
||||
, return_formatted_output = True
|
||||
, run_blind_test = True
|
||||
, blind_test_df = pd.DataFrame()
|
||||
, blind_test_target = pd.Series(dtype = int)
|
||||
, return_formatted_output = True
|
||||
|
||||
, random_state = 42
|
||||
, n_jobs = os.cpu_count() # the number of jobs should equal the number of CPU cores
|
||||
):
|
||||
, random_state = 42
|
||||
, n_jobs = os.cpu_count() # the number of jobs should equal the number of CPU cores
|
||||
):
|
||||
|
||||
'''
|
||||
@ param input_df: input features
|
||||
|
@ -189,7 +190,15 @@ def MultModelsCl_logo_skf(input_df
|
|||
#%% Func globals
|
||||
rs = {'random_state': random_state}
|
||||
njobs = {'n_jobs': n_jobs}
|
||||
|
||||
skf_cv = StratifiedKFold(n_splits = 10
|
||||
#, shuffle = False, random_state= None)
|
||||
, shuffle = True,**rs)
|
||||
|
||||
rskf_cv = RepeatedStratifiedKFold(n_splits = 10
|
||||
, n_repeats = 3
|
||||
, **rs)
|
||||
logo = LeaveOneGroupOut()
|
||||
|
||||
# select CV type:
|
||||
# if group == None:
|
||||
|
@ -252,8 +261,10 @@ def MultModelsCl_logo_skf(input_df
|
|||
#======================================================
|
||||
# Specify multiple Classification Models
|
||||
#======================================================
|
||||
models = [('AdaBoost Classifier' , AdaBoostClassifier(**rs) )
|
||||
, ('Bagging Classifier' , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True, verbose = 3, n_estimators = 100) )
|
||||
models = [('AdaBoost Classifier' , AdaBoostClassifier(**rs) )
|
||||
, ('Bagging Classifier' , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True, verbose = 3, n_estimators = 100) )
|
||||
#, ('Bernoulli NB' , BernoulliNB() ) # pks Naive Bayes, CAUTION
|
||||
, ('Complement NB' , ComplementNB() )
|
||||
, ('Decision Tree' , DecisionTreeClassifier(**rs) )
|
||||
, ('Extra Tree' , ExtraTreeClassifier(**rs) )
|
||||
, ('Extra Trees' , ExtraTreesClassifier(**rs) )
|
||||
|
@ -265,23 +276,23 @@ def MultModelsCl_logo_skf(input_df
|
|||
, ('Logistic Regression' , LogisticRegression(**rs) )
|
||||
, ('Logistic RegressionCV' , LogisticRegressionCV(cv = 3, **rs))
|
||||
, ('MLP' , MLPClassifier(max_iter = 500, **rs) )
|
||||
, ('Multinomial' , MultinomialNB() )
|
||||
, ('Naive Bayes' , BernoulliNB() )
|
||||
, ('Multinomial NB' , MultinomialNB() )
|
||||
, ('Passive Aggresive' , PassiveAggressiveClassifier(**rs, **njobs) )
|
||||
, ('QDA' , QuadraticDiscriminantAnalysis() )
|
||||
, ('Random Forest' , RandomForestClassifier(**rs, n_estimators = 1000, **njobs ) )
|
||||
, ('Random Forest2' , RandomForestClassifier(min_samples_leaf = 5
|
||||
, n_estimators = 1000
|
||||
, bootstrap = True
|
||||
, oob_score = True
|
||||
, **njobs
|
||||
, **rs
|
||||
, max_features = 'auto') )
|
||||
, ('Ridge Classifier' , RidgeClassifier(**rs) )
|
||||
, ('Ridge ClassifierCV' , RidgeClassifierCV(cv = 3) )
|
||||
, ('SVC' , SVC(**rs) )
|
||||
, ('Stochastic GDescent' , SGDClassifier(**rs, **njobs) )
|
||||
, ('XGBoost' , XGBClassifier(**rs, verbosity = 0, use_label_encoder = False, **njobs) )
|
||||
, n_estimators = 1000
|
||||
, bootstrap = True
|
||||
, oob_score = True
|
||||
, **njobs
|
||||
, **rs
|
||||
, max_features = 'auto') )
|
||||
, ('Ridge Classifier' , RidgeClassifier(**rs) )
|
||||
, ('Ridge ClassifierCV' , RidgeClassifierCV(cv = 3) )
|
||||
, ('SVC' , SVC(**rs) )
|
||||
, ('Stochastic GDescent' , SGDClassifier(**rs, **njobs) )
|
||||
, ('XGBoost' , XGBClassifier(**rs, verbosity = 0, use_label_encoder = False, **njobs) )
|
||||
, ('Dummy Classifier' , DummyClassifier(strategy = 'most_frequent') )
|
||||
]
|
||||
|
||||
mm_skf_scoresD = {}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue