saving work from thinkpad
This commit is contained in:
parent
5577f5b195
commit
23799275a0
3 changed files with 49 additions and 31 deletions
|
@ -146,8 +146,8 @@ scoreBT_mapD = {'bts_mcc' : 'MCC'
|
|||
def MultModelsCl(input_df, target
|
||||
#, skf_cv
|
||||
, sel_cv
|
||||
, blind_test_df
|
||||
, blind_test_target
|
||||
#, blind_test_df
|
||||
#, blind_test_target
|
||||
, tts_split_type
|
||||
|
||||
, resampling_type = 'none' # default
|
||||
|
@ -231,35 +231,36 @@ def MultModelsCl(input_df, target
|
|||
# Specify multiple Classification Models
|
||||
#======================================================
|
||||
models = [('AdaBoost Classifier' , AdaBoostClassifier(**rs) )
|
||||
, ('Bagging Classifier' , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True, verbose = 3, n_estimators = 100) )
|
||||
, ('Decision Tree' , DecisionTreeClassifier(**rs) )
|
||||
, ('Extra Tree' , ExtraTreeClassifier(**rs) )
|
||||
, ('Extra Trees' , ExtraTreesClassifier(**rs) )
|
||||
, ('Gradient Boosting' , GradientBoostingClassifier(**rs) )
|
||||
, ('Gaussian NB' , GaussianNB() )
|
||||
, ('Gaussian Process' , GaussianProcessClassifier(**rs) )
|
||||
, ('K-Nearest Neighbors' , KNeighborsClassifier() )
|
||||
, ('LDA' , LinearDiscriminantAnalysis() )
|
||||
, ('Logistic Regression' , LogisticRegression(**rs) )
|
||||
, ('Logistic RegressionCV' , LogisticRegressionCV(cv = 3, **rs))
|
||||
, ('MLP' , MLPClassifier(max_iter = 500, **rs) )
|
||||
# , ('Bagging Classifier' , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True, verbose = 3, n_estimators = 100) )
|
||||
# , ('Decision Tree' , DecisionTreeClassifier(**rs) )
|
||||
# , ('Extra Tree' , ExtraTreeClassifier(**rs) )
|
||||
# , ('Extra Trees' , ExtraTreesClassifier(**rs) )
|
||||
# , ('Gradient Boosting' , GradientBoostingClassifier(**rs) )
|
||||
# , ('Gaussian NB' , GaussianNB() )
|
||||
# , ('Gaussian Process' , GaussianProcessClassifier(**rs) )
|
||||
# , ('K-Nearest Neighbors' , KNeighborsClassifier() )
|
||||
, ('LDA' , LinearDiscriminantAnalysis() )
|
||||
# , ('Logistic Regression' , LogisticRegression(**rs) )
|
||||
# , ('Logistic RegressionCV' , LogisticRegressionCV(cv = 3, **rs))
|
||||
# , ('MLP' , MLPClassifier(max_iter = 500, **rs) )
|
||||
#, ('Multinomial' , MultinomialNB() )
|
||||
, ('Naive Bayes' , BernoulliNB() )
|
||||
, ('Passive Aggresive' , PassiveAggressiveClassifier(**rs, **njobs) )
|
||||
, ('QDA' , QuadraticDiscriminantAnalysis() )
|
||||
, ('Random Forest' , RandomForestClassifier(**rs, n_estimators = 1000, **njobs ) )
|
||||
# , ('Random Forest2' , RandomForestClassifier(min_samples_leaf = 5
|
||||
# , ('Naive Bayes' , BernoulliNB() )
|
||||
# , ('Passive Aggresive' , PassiveAggressiveClassifier(**rs, **njobs) )
|
||||
# , ('QDA' , QuadraticDiscriminantAnalysis() )
|
||||
# , ('Random Forest' , RandomForestClassifier(**rs, n_estimators = 1000, **njobs ) )
|
||||
# # , ('Random Forest2' , RandomForestClassifier(min_samples_leaf = 5
|
||||
# , n_estimators = 1000
|
||||
# , bootstrap = True
|
||||
# , oob_score = True
|
||||
# , **njobs
|
||||
# , **rs
|
||||
# , max_features = 'auto') )
|
||||
, ('Ridge Classifier' , RidgeClassifier(**rs) )
|
||||
, ('Ridge ClassifierCV' , RidgeClassifierCV(cv = 3) )
|
||||
, ('SVC' , SVC(**rs) )
|
||||
, ('Stochastic GDescent' , SGDClassifier(**rs, **njobs) )
|
||||
, ('XGBoost' , XGBClassifier(**rs, verbosity = 0, use_label_encoder =False, **njobs) )
|
||||
# , ('Ridge Classifier' , RidgeClassifier(**rs) )
|
||||
# , ('Ridge ClassifierCV' , RidgeClassifierCV(cv = 3) )
|
||||
# , ('SVC' , SVC(**rs) )
|
||||
# , ('Stochastic GDescent' , SGDClassifier(**rs, **njobs) )
|
||||
# , ('XGBoost' , XGBClassifier(**rs, verbosity = 0, use_label_encoder =False, **njobs) )
|
||||
#
|
||||
]
|
||||
|
||||
mm_skf_scoresD = {}
|
||||
|
@ -308,7 +309,7 @@ def MultModelsCl(input_df, target
|
|||
# ADD more info: meta data related to input df
|
||||
mm_skf_scoresD[model_name]['resampling'] = resampling_type
|
||||
mm_skf_scoresD[model_name]['n_training_size'] = len(input_df)
|
||||
mm_skf_scoresD[model_name]['n_trainingY_ratio'] = round(Counter(target)[0]/Counter(target)[1], 2)
|
||||
#mm_skf_scoresD[model_name]['n_trainingY_ratio'] = round(Counter(target)[0]/Counter(target)[1], 2)
|
||||
mm_skf_scoresD[model_name]['n_features'] = len(input_df.columns)
|
||||
mm_skf_scoresD[model_name]['tts_split'] = tts_split_type
|
||||
|
||||
|
@ -357,7 +358,7 @@ def MultModelsCl(input_df, target
|
|||
# Build bts numbers dict
|
||||
btD = {'n_blindY_neg' : Counter(blind_test_target)[0]
|
||||
, 'n_blindY_pos' : Counter(blind_test_target)[1]
|
||||
, 'n_testY_ratio' : round(Counter(blind_test_target)[0]/Counter(blind_test_target)[1], 2)
|
||||
#, 'n_testY_ratio' : round(Counter(blind_test_target)[0]/Counter(blind_test_target)[1], 2)
|
||||
, 'n_test_size' : len(blind_test_df) }
|
||||
|
||||
# Update cmD+tnD dicts with btD
|
||||
|
|
|
@ -58,8 +58,8 @@ all(df.columns.isin(['gene_name'])) # should be False
|
|||
|
||||
|
||||
spl_type = '70_30'
|
||||
spl_type = '80_20'
|
||||
spl_type = 'sl'
|
||||
#spl_type = '80_20'
|
||||
#spl_type = 'sl'
|
||||
|
||||
df2 = split_tts(df
|
||||
, data_type = 'actual'
|
||||
|
@ -84,7 +84,6 @@ fooD = MultModelsCl(input_df = df2['X']
|
|||
, var_type = ['mixed']
|
||||
, scale_numeric = ['min_max']
|
||||
, return_formatted_output = False
|
||||
|
||||
)
|
||||
|
||||
for k, v in fooD.items():
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue