modified loopity and multclass3 to have skf_cv as a parameters for cv

This commit is contained in:
Tanushree Tunstall 2022-03-17 18:17:58 +00:00
parent 97620c1bb0
commit d0c329a1d9
8 changed files with 161 additions and 127 deletions

View file

@ -17,8 +17,12 @@ from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
@ -52,11 +56,29 @@ from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline
#from sklearn.datasets import make_classification
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_validate, cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import AdaBoostClassifier
from imblearn.combine import SMOTEENN
from imblearn.under_sampling import EditedNearestNeighbours
from sklearn.model_selection import GridSearchCV
from sklearn.base import BaseEstimator
scoring_fn = ({'accuracy' : make_scorer(accuracy_score)
, 'fscore' : make_scorer(f1_score)
, 'mcc' : make_scorer(matthews_corrcoef)
, 'precision' : make_scorer(precision_score)
, 'recall' : make_scorer(recall_score)
, 'roc_auc' : make_scorer(roc_auc_score)
})
rs = {'random_state': 42}
njobs = {'n_jobs': 10}
skf_cv = StratifiedKFold(n_splits = 10
#, shuffle = False, random_state= None)
, shuffle = True,**rs)
#%%
homedir = os.path.expanduser("~")
os.chdir(homedir + "/git/ML_AI_training/")
@ -64,8 +86,8 @@ os.chdir(homedir + "/git/ML_AI_training/")
# my function
from MultClassPipe import MultClassPipeline
from MultClassPipe2 import MultClassPipeline2
from loopity_loop import MultClassPipeSKF
from MultClassPipe3 import MultClassPipelineCV
from loopity_loop import MultClassPipeSKFLoop
from MultClassPipe3 import MultClassPipeSKFCV
gene = 'pncA'
@ -199,3 +221,16 @@ cat_df_wtgt.shape
all_df_wtgt = my_df[numerical_FN + categorical_FN + ['mutation_class']]
all_df_wtgt.shape
#%%
#%% Get train-test split and scoring functions
X = num_df_wtgt[numerical_FN]
y = num_df_wtgt['mutation_class']
X_train, X_test, y_train, y_test = train_test_split(X
,y
, test_size = 0.33
, random_state = 2
, shuffle = True
, stratify = y)