tidying script to run from cmd and via ssh

This commit is contained in:
Tanushree Tunstall 2022-05-28 09:40:24 +01:00
parent 0a84a4b4dc
commit b6f0308e42
4 changed files with 271 additions and 76 deletions

View file

@ -6,17 +6,7 @@
# autosklearn --> pipleine --> components --> classification
# https://github.com/automl/auto-sklearn/tree/master/autosklearn/pipeline/components/classification
# TOADD:
# LDA
https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/lda.py
# Multinomial_nb
https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/multinomial_nb.py
# passive_aggressive
https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/passive_aggressive.py
# SGD
https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/sgd.py
# ADDED 27/05/2022: Extra Tree + LRCV and RCCV
######https://scikit-learn.org/stable/supervised_learning.html
########################################################################
@ -57,7 +47,7 @@ param_grid_abc = [
#https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/extra_trees.py
#https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html
#======================
estimator = ExtraTreesClassifier**rs)
estimator = ExtraTreesClassifier(**rs)
# Define pipleline with steps
pipe_abc = Pipeline([
@ -85,6 +75,40 @@ param_grid_abc = [
}
]
#======================
# Extra TreeClassifier()
https://scikit-learn.org/stable/modules/generated/sklearn.tree.ExtraTreeClassifier.html
#======================
estimator = ExtraTreeClassifier(**rs)
# Define pipleline with steps
pipe_abc = Pipeline([
('pre', MinMaxScaler())
, ('fs', RFECV(DecisionTreeClassifier(**rs), cv = cv, scoring = 'matthews_corrcoef'))
# , ('fs', RFECV(estimator, cv = cv, scoring = 'matthews_corrcoef'))
# , ('clf', ExtraTreesClassifier(**rs))])
, ('clf', estimator)
])
# Define hyperparmeter space to search for
param_grid_abc = [
{
'fs__min_features_to_select' : [1,2]
# , 'fs__cv': [cv]
},
# 'clf': [ExtraTreeClassifier(**rs)],
'clf__max_depth': [None],
'clf__criterion': ['gini', 'entropy'],
'clf__max_features': [None, 'sqrt', 'log2', 0.5, 1],
'clf__min_samples_leaf': [1, 5, 10, 15, 20],
'clf__min_samples_split': [2, 5, 10, 15, 20]
}
]
#===========================
# DecisionTreeClassifier()
https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/decision_tree.py
@ -304,8 +328,8 @@ param_grid_gbc = [
#########################################################################
#===========================
# GaussianNB()
https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/gaussian_nb.py
https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html
#https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/gaussian_nb.py
#https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html
#===========================
# Define estimator
estimator = GaussianNB()
@ -439,12 +463,58 @@ param_grid_lr = [
'clf__solver': ['liblinear']
}
]
#########################################################################
#===========================
# LogisticRegressionCV () *
# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegressionCV.html
#===========================
# Define estimator
estimator = LogisticRegressionCV(cv = 10, **rs)
# Define pipleline with steps
pipe_lr = Pipeline([
('pre', MinMaxScaler())
, ('fs', RFECV(LogisticRegression(**rs), cv = rskf_cv, scoring = 'matthews_corrcoef'))
# , ('fs', RFECV(estimator, cv = cv, scoring = 'matthews_corrcoef'))
, ('clf', estimator)])
# Define hyperparmeter space to search for
param_grid_lr = [
{'fs__min_features_to_select' : [1,2]
# , 'fs__cv': [rskf_cv]
},
{
# 'clf': [LogisticRegressionCV(cv = 10, **rs)],
'clf__C': np.logspace(0, 4, 10),
'clf__penalty': ['none', 'l1', 'l2', 'elasticnet'],
'clf__max_iter': list(range(100,800,100)),
'clf__solver': ['saga']
},
{
# 'clf': [LogisticRegressionCV(cv = 10, **rs)],
'clf__C': np.logspace(0, 4, 10),
'clf__penalty': ['l2', 'none'],
'clf__max_iter': list(range(100,800,100)),
'clf__solver': ['newton-cg', 'lbfgs', 'sag']
},
{
# 'clf': [LogisticRegressionCV(cv = 10, **rs)],
'clf__C': np.logspace(0, 4, 10),
'clf__penalty': ['l1', 'l2'],
'clf__max_iter': list(range(100,800,100)),
'clf__solver': ['liblinear']
}
]
#########################################################################
#==================
# MLPClassifier()
https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/mlp.py
https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html
#https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/mlp.py
#https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html
#==================
# Define estimator
estimator = MLPClassifier(**rs)
@ -531,6 +601,35 @@ param_grid_rc = [
'clf__alpha': [0.1, 0.2, 0.5, 0.8, 1.0]
}
]
#######################################################################
#====================
# RidgeClassifier() *
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RidgeClassifierCV.html
#====================
# Define estimator
estimator = RidgeClassifierCV(cv = 10, **rs)
# Define pipleline with steps
pipe_rc = Pipeline([
('pre', MinMaxScaler())
, ('fs', RFECV(DecisionTreeClassifier(**rs), cv = cv, scoring = 'matthews_corrcoef'))
# , ('fs', RFECV(estimator, cv = cv, scoring = 'matthews_corrcoef'))
, ('clf', estimator)
])
param_grid_rc = [
{
'fs__min_features_to_select' : [1,2]
# , 'fs__cv': [cv]
},
{
#'clf' : [RidgeClassifierCV(cv = 10, **rs)],
'clf__alpha': [0.1, 0.2, 0.5, 0.8, 1.0]
}
]
#######################################################################
#========
# SVC()