modified loopity and multclass3 to have skf_cv as a parameters for cv
This commit is contained in:
parent
97620c1bb0
commit
d0c329a1d9
8 changed files with 161 additions and 127 deletions
41
imports.py
41
imports.py
|
@ -17,8 +17,12 @@ from sklearn.neighbors import KNeighborsClassifier
|
|||
from sklearn.svm import SVC
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
|
||||
from sklearn.ensemble import AdaBoostClassifier
|
||||
from sklearn.ensemble import GradientBoostingClassifier
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
from xgboost import XGBClassifier
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.linear_model import SGDClassifier
|
||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
|
||||
|
||||
from sklearn.compose import ColumnTransformer
|
||||
|
@ -52,11 +56,29 @@ from imblearn.over_sampling import RandomOverSampler
|
|||
from imblearn.over_sampling import SMOTE
|
||||
from imblearn.pipeline import Pipeline
|
||||
#from sklearn.datasets import make_classification
|
||||
from sklearn.model_selection import cross_validate
|
||||
from sklearn.model_selection import cross_validate, cross_val_score
|
||||
from sklearn.model_selection import RepeatedStratifiedKFold
|
||||
from sklearn.ensemble import AdaBoostClassifier
|
||||
from imblearn.combine import SMOTEENN
|
||||
from imblearn.under_sampling import EditedNearestNeighbours
|
||||
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.base import BaseEstimator
|
||||
|
||||
scoring_fn = ({'accuracy' : make_scorer(accuracy_score)
|
||||
, 'fscore' : make_scorer(f1_score)
|
||||
, 'mcc' : make_scorer(matthews_corrcoef)
|
||||
, 'precision' : make_scorer(precision_score)
|
||||
, 'recall' : make_scorer(recall_score)
|
||||
, 'roc_auc' : make_scorer(roc_auc_score)
|
||||
})
|
||||
|
||||
rs = {'random_state': 42}
|
||||
njobs = {'n_jobs': 10}
|
||||
skf_cv = StratifiedKFold(n_splits = 10
|
||||
#, shuffle = False, random_state= None)
|
||||
, shuffle = True,**rs)
|
||||
|
||||
#%%
|
||||
homedir = os.path.expanduser("~")
|
||||
os.chdir(homedir + "/git/ML_AI_training/")
|
||||
|
@ -64,8 +86,8 @@ os.chdir(homedir + "/git/ML_AI_training/")
|
|||
# my function
|
||||
from MultClassPipe import MultClassPipeline
|
||||
from MultClassPipe2 import MultClassPipeline2
|
||||
from loopity_loop import MultClassPipeSKF
|
||||
from MultClassPipe3 import MultClassPipelineCV
|
||||
from loopity_loop import MultClassPipeSKFLoop
|
||||
from MultClassPipe3 import MultClassPipeSKFCV
|
||||
|
||||
|
||||
gene = 'pncA'
|
||||
|
@ -199,3 +221,16 @@ cat_df_wtgt.shape
|
|||
|
||||
all_df_wtgt = my_df[numerical_FN + categorical_FN + ['mutation_class']]
|
||||
all_df_wtgt.shape
|
||||
|
||||
#%%
|
||||
#%% Get train-test split and scoring functions
|
||||
X = num_df_wtgt[numerical_FN]
|
||||
y = num_df_wtgt['mutation_class']
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X
|
||||
,y
|
||||
, test_size = 0.33
|
||||
, random_state = 2
|
||||
, shuffle = True
|
||||
, stratify = y)
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue