diff --git a/MultClassPipe3.py b/MultClassPipe3.py
index 4dfdc5b..aa161ab 100644
--- a/MultClassPipe3.py
+++ b/MultClassPipe3.py
@@ -61,23 +61,39 @@ from imblearn.combine import SMOTEENN
 from imblearn.under_sampling import EditedNearestNeighbours
 
 #%%
-rs = {'random_state': 42}
-# Done: add preprocessing step with one hot encoder
-# Done: get accuracy and other scores through K-fold stratified cv
+# rs = {'random_state': 42}
+# njobs = {'n_jobs': 10}
 
-scoring_fn =  ({ 'fscore'     : make_scorer(f1_score)
-                 , 'mcc'        : make_scorer(matthews_corrcoef)
-                 , 'precision' : make_scorer(precision_score)
-                 , 'recall'    : make_scorer(recall_score)
-                 , 'accuracy'      : make_scorer(accuracy_score)
-                 ,  'roc_auc'   : make_scorer(roc_auc_score)
-                 #,  'jaccard'   : make_scorer(jaccard_score)
+scoring_fn =  ({ 'fscore'       : make_scorer(f1_score)
+                  , 'mcc'        : make_scorer(matthews_corrcoef)
+                  , 'precision'  : make_scorer(precision_score)
+                  , 'recall'     : make_scorer(recall_score)
+                  , 'accuracy'   : make_scorer(accuracy_score)
+                  ,  'roc_auc'   : make_scorer(roc_auc_score)
+                  #,  'jaccard'   : make_scorer(jaccard_score)
             })    
 
 
 # Multiple Classification - Model Pipeline
-def MultClassPipelineCV(X_train, X_test, y_train, y_test, input_df, var_type = ['numerical', 'categorical','mixed']):
+def MultClassPipeSKFCV(input_df, target, skf_cv, var_type = ['numerical', 'categorical','mixed']):
 
+    '''
+    @ param input_df: input features 
+    @ type: df with input features WITHOUT the target variable
+    
+    @param target: target (or output) feature
+    @type: df or np.array or Series
+    
+    @param skv_cv: stratifiedK fold int or object to allow shuffle and random state to pass
+    @type: int or StratifiedKfold()
+    
+    @var_type: numerical, categorical and mixed to determine what col_transform to apply (MinMaxScalar and/or one-ho    t encoder)
+    @type: list
+
+    returns
+    Dict containing multiple classification scores for each model and mean of each Stratified Kfold including training
+       
+    '''
     # determine categorical and numerical features
     numerical_ix = input_df.select_dtypes(include=['int64', 'float64']).columns
     numerical_ix
@@ -98,66 +114,61 @@ def MultClassPipelineCV(X_train, X_test, y_train, y_test, input_df, var_type = [
     col_transform = ColumnTransformer(transformers = t
                                        , remainder='passthrough')
     
-    #%%
+    #%% Specify multiple Classification models
     log_reg = LogisticRegression(**rs)
-    nb = BernoulliNB()
-    knn = KNeighborsClassifier()
-    svm = SVC(**rs)
-    mlp = MLPClassifier(max_iter=500, **rs)
-    dt = DecisionTreeClassifier(**rs)
-    et = ExtraTreesClassifier(**rs)
-    rf = RandomForestClassifier(**rs)
-    rf2 = RandomForestClassifier(
-                          min_samples_leaf=50,
-                          n_estimators=150,
-                          bootstrap=True,
-                          oob_score=True,
-                          n_jobs=-1,
-                          random_state=42,
-                          max_features='auto')
-    
-    xgb = XGBClassifier(**rs, verbosity=0)
+    nb      = BernoulliNB()
+    knn     = KNeighborsClassifier()
+    svm     = SVC(**rs)
+    mlp     = MLPClassifier(max_iter = 500, **rs)
+    dt      = DecisionTreeClassifier(**rs)
+    et      = ExtraTreesClassifier(**rs)
+    rf      = RandomForestClassifier(**rs)
+    rf2     = RandomForestClassifier(
+                          min_samples_leaf = 50
+                          , n_estimators     = 150
+                          , bootstrap        = True
+                          , oob_score        = True
+                          , **njobs
+                          , **rs
+                          , max_features     = 'auto')
+    xgb = XGBClassifier(**rs
+                        , verbosity = 0, use_label_encoder =False)
 
-    models = [
-            ('Logistic Regression', log_reg), 
-            ('Naive Bayes', nb),
-            ('K-Nearest Neighbors', knn), 
-            ('SVM', svm), 
-            ('MLP', mlp), 
-            ('Decision Tree', dt), 
-            ('Extra Trees', et), 
-            ('Random Forest', rf), 
-            ('Random Forest2', rf2), 
-            #('XGBoost', xgb)
-            ]
-            
-    skf_cv_scores = {}
+    models = [('Logistic Regression', log_reg)
+            , ('Naive Bayes'        , nb)
+            , ('K-Nearest Neighbors', knn) 
+            , ('SVM'                , svm) 
+            , ('MLP'                , mlp) 
+            , ('Decision Tree'      , dt) 
+            , ('Extra Trees'        , et) 
+            , ('Random Forest'      , rf) 
+            , ('Naive Bayes'        , nb)
+            , ('Random Forest2'     , rf2) 
+            , ('XGBoost'            , xgb)]
+        
+    mm_skf_scoresD = {}
      
     for model_name, model_fn in models:
         print('\nModel_name:', model_name
         , '\nModel func:'    , model_fn
         , '\nList of models:', models)
     
-    #    model_pipeline = Pipeline([
-    #        ('pre'     , MinMaxScaler())
-    #        , ('model'  , model_fn)])
-            
         model_pipeline = Pipeline([
             ('prep'     , col_transform)
-            , ('model' , model_fn)])
+            , ('model'  , model_fn)])
             
         print('Running model pipeline:', model_pipeline)
-        skf_cv = cross_validate(model_pipeline
-                              , X_train
-                              , y_train
-                              , cv = 10
+        skf_cv_mod = cross_validate(model_pipeline
+                              , input_df
+                              , target
+                              , cv = skf_cv
                               , scoring = scoring_fn
                               , return_train_score = True)
-        skf_cv_scores[model_name] = {}
-        for key, value in skf_cv.items():
+        mm_skf_scoresD[model_name] = {}
+        for key, value in skf_cv_mod.items():
             print('\nkey:', key, '\nvalue:', value)
             print('\nmean value:', mean(value))
-            skf_cv_scores[model_name][key] = round(mean(value),2)
-            #pp.pprint(skf_cv_scores)
-    return(skf_cv_scores)
+            mm_skf_scoresD[model_name][key] = round(mean(value),2)
+            #pp.pprint(mm_skf_scoresD)
+    return(mm_skf_scoresD)
 
diff --git a/MultClassPipe3_CALL.py b/MultClassPipe3_CALL.py
index 6699707..c1d3808 100644
--- a/MultClassPipe3_CALL.py
+++ b/MultClassPipe3_CALL.py
@@ -5,29 +5,19 @@ Created on Tue Mar 15 11:09:50 2022
 
 @author: tanu
 """
-# stratified shuffle split
-X_train, X_test, y_train, y_test = train_test_split(num_df_wtgt[numerical_FN]
-                                                    , num_df_wtgt['mutation_class']
-                                                    , test_size = 0.33
-                                                    , **rs
-                                                    , shuffle = True
-                                                    , stratify = num_df_wtgt['mutation_class'])
+#%% Data
+X = all_df_wtgt[numerical_FN+categorical_FN]
+y = all_df_wtgt['mutation_class']
+#%% variables
 
-y_train.to_frame().value_counts().plot(kind = 'bar')
-y_test.to_frame().value_counts().plot(kind = 'bar')
-
-MultClassPipelineCV(X_train, X_test, y_train, y_test
-         , input_df = num_df_wtgt[numerical_FN]
-         , var_type = 'numerical')
+#%% MultClassPipeSKFCV: function call()
+mm_skf_scoresD = MultClassPipeSKFCV(input_df = X
+                                        , target = y
+                                        , var_type = 'mixed'
+                                        , skf_cv = skf_cv)
 
 
-skf_cv_scores = MultClassPipelineCV(X_train, X_test, y_train, y_test
-         , input_df = num_df_wtgt[numerical_FN]
-         , var_type = 'numerical')
-
-pp.pprint(skf_cv_scores)
-# construct a df
-skf_cv_scores_df = pd.DataFrame(skf_cv_scores)
-skf_cv_scores_df
-skf_cv_scores_df_test = skf_cv_scores_df.filter(like='test_', axis=0)
-skf_cv_scores_df_train = skf_cv_scores_df.filter(like='train_', axis=0)
+mm_skf_scores_df_all = pd.DataFrame(mm_skf_scoresD)
+mm_skf_scores_df_all
+mm_skf_scores_df_test = mm_skf_scores_df_all.filter(like='test_', axis=0)
+mm_skf_scores_df_train = mm_skf_scores_df_all.filter(like='train_', axis=0) # helps to see if you trust the results
diff --git a/__pycache__/MultClassPipe3.cpython-37.pyc b/__pycache__/MultClassPipe3.cpython-37.pyc
index f6693e8..c225213 100644
Binary files a/__pycache__/MultClassPipe3.cpython-37.pyc and b/__pycache__/MultClassPipe3.cpython-37.pyc differ
diff --git a/__pycache__/loopity_loop.cpython-37.pyc b/__pycache__/loopity_loop.cpython-37.pyc
index 5439565..f1efd32 100644
Binary files a/__pycache__/loopity_loop.cpython-37.pyc and b/__pycache__/loopity_loop.cpython-37.pyc differ
diff --git a/base_estimator.py b/base_estimator.py
index de9ddbb..275bb50 100644
--- a/base_estimator.py
+++ b/base_estimator.py
@@ -138,6 +138,14 @@ parameters = [
         #'tfidf__stop_words': [None],
         'clf__estimator__alpha': (1e-2, 1e-3, 1e-1),
     },
+    
+    {
+        'clf__estimator': [LogisticRegression()],
+        'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
+        'penalty': ['none', 'l1', 'l2', 'elasticnet'],
+        'max_iter': list(range(100,800,100)),
+        'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
+    },
 ]
 
 pipeline = Pipeline([
diff --git a/imports.py b/imports.py
index 928f59e..62ba294 100644
--- a/imports.py
+++ b/imports.py
@@ -17,8 +17,12 @@ from sklearn.neighbors import KNeighborsClassifier
 from sklearn.svm import SVC
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
+from sklearn.ensemble import AdaBoostClassifier
+from sklearn.ensemble import GradientBoostingClassifier
 from sklearn.neural_network import MLPClassifier
 from xgboost import XGBClassifier
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.linear_model import SGDClassifier
 from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
 
 from sklearn.compose import ColumnTransformer
@@ -52,11 +56,29 @@ from imblearn.over_sampling import RandomOverSampler
 from imblearn.over_sampling import SMOTE
 from imblearn.pipeline import Pipeline
 #from sklearn.datasets import make_classification
-from sklearn.model_selection import cross_validate
+from sklearn.model_selection import cross_validate, cross_val_score
 from sklearn.model_selection import RepeatedStratifiedKFold
 from sklearn.ensemble import AdaBoostClassifier
 from imblearn.combine import SMOTEENN
 from imblearn.under_sampling import EditedNearestNeighbours
+
+from sklearn.model_selection import GridSearchCV
+from sklearn.base import BaseEstimator
+
+scoring_fn =  ({'accuracy'      : make_scorer(accuracy_score)
+                 , 'fscore'     : make_scorer(f1_score)
+                 , 'mcc'        : make_scorer(matthews_corrcoef)
+                 ,  'precision' : make_scorer(precision_score)
+                 ,  'recall'    : make_scorer(recall_score)
+                 ,  'roc_auc'   : make_scorer(roc_auc_score)
+            }) 
+  
+rs = {'random_state': 42}
+njobs = {'n_jobs': 10}
+skf_cv = StratifiedKFold(n_splits = 10
+                          #, shuffle = False, random_state= None)
+                           , shuffle = True,**rs)
+
 #%%
 homedir = os.path.expanduser("~")
 os.chdir(homedir + "/git/ML_AI_training/")
@@ -64,8 +86,8 @@ os.chdir(homedir + "/git/ML_AI_training/")
 # my function
 from MultClassPipe import MultClassPipeline
 from MultClassPipe2 import MultClassPipeline2
-from loopity_loop import MultClassPipeSKF
-from MultClassPipe3 import MultClassPipelineCV
+from loopity_loop import MultClassPipeSKFLoop
+from MultClassPipe3 import MultClassPipeSKFCV
 
 
 gene = 'pncA'
@@ -199,3 +221,16 @@ cat_df_wtgt.shape
 
 all_df_wtgt = my_df[numerical_FN + categorical_FN + ['mutation_class']]
 all_df_wtgt.shape
+
+#%%
+#%% Get train-test split and scoring functions
+X = num_df_wtgt[numerical_FN]
+y = num_df_wtgt['mutation_class']
+
+X_train, X_test, y_train, y_test = train_test_split(X
+                                            ,y
+                                            , test_size    = 0.33
+                                            , random_state = 2
+                                            , shuffle      = True
+                                            , stratify     = y)
+ 
\ No newline at end of file
diff --git a/loopity_loop.py b/loopity_loop.py
index b4f00e7..a0afc35 100644
--- a/loopity_loop.py
+++ b/loopity_loop.py
@@ -33,23 +33,30 @@ from sklearn.metrics import roc_auc_score, roc_curve, f1_score, matthews_corrcoe
 from statistics import mean, stdev, median, mode
 #%%
 rs = {'random_state': 42}
+njobs = {'n_jobs': 10}
+   
 # Done: add preprocessing step with one hot encoder
-# TODO: supply stratified K-fold cv train and test data
+# TODO: supply stratified K-fold cv train and test dataskf
 # TODO: get accuracy and other scores through K-fold cv
 
 # Multiple Classification - Model Pipeline
-def MultClassPipeSKF(input_df, y_targetF, var_type = ['numerical', 'categorical','mixed'], skf_splits = 10):
+def MultClassPipeSKFLoop(input_df, target, skf_cv, var_type = ['numerical','categorical','mixed']):
 
     '''
     @ param input_df: input features 
-    @ type: df (gets converted to np.array for stratified Kfold, and helps identify names to apply column transformation)
+    @ type: df with input features WITHOUT the target variable
     
-    @param y_outputF: target (or output) feature
-    @type: df or np.array
+    @param target: target (or output) feature
+    @type: df or np.array or Series
     
+    @param skv_cv: stratifiedK fold int or object to allow shuffle and random state to pass
+    @type: int or StratifiedKfold()
+    
+    @var_type: numerical, categorical and mixed to determine what col_transform to apply (MinMaxScalar and/or one-hot encoder)
+    @type: list
 
     returns
-    multiple classification model scores
+    Dict containing multiple classification scores for each model and each Stratified Kfold
        
     '''
     # Determine categorical and numerical features
@@ -86,17 +93,17 @@ def MultClassPipeSKF(input_df, y_targetF, var_type = ['numerical', 'categorical'
                           , n_estimators     = 150
                           , bootstrap        = True
                           , oob_score        = True
-                          , n_jobs           = -1
+                          , **njobs
                           , **rs
                           , max_features     = 'auto')
     
-    xgb = XGBClassifier(**rs, verbosity = 0)
+    xgb = XGBClassifier(**rs, verbosity = 0, use_label_encoder = False)
     classification_metrics = {
         'F1_score': []
         ,'MCC': []
         ,'Precision': []
         ,'Recall': []
-        ,'Accuracy': []
+        , 'Accuracy': []
         ,'ROC_AUC': []
         }
     models = [
@@ -109,33 +116,29 @@ def MultClassPipeSKF(input_df, y_targetF, var_type = ['numerical', 'categorical'
              , ('Extra Trees'        , et) 
              , ('Random Forest'      , rf) 
              , ('Naive Bayes'        , nb)
-
-            , ('Random Forest2'     , rf2) 
-            #, ('XGBoost'            , xgb)
+             , ('Random Forest2'     , rf2) 
+             , ('XGBoost'            , xgb)
             ]
 
-    skf = StratifiedKFold(n_splits = skf_splits
-                          , shuffle = True
-                          , **rs)
+    # skf = StratifiedKFold(n_splits = 10
+    #                       #, shuffle = False, random_state= None)
+    #                       , shuffle = True,**rs)
 
-#    skf_dict = {}
     fold_no = 1
     fold_dict={}
 
-
     for model_name, model in models:
         fold_dict.update({ model_name: {}})
 
     #scores_df = pd.DataFrame()
-    for train_index, test_index in skf.split(input_df, y_targetF):
+    for train_index, test_index in skf_cv.split(input_df, target):
         x_train_fold, x_test_fold = input_df.iloc[train_index], input_df.iloc[test_index]
-        y_train_fold, y_test_fold = y_targetF.iloc[train_index], y_targetF.iloc[test_index]
+        y_train_fold, y_test_fold = target.iloc[train_index], target.iloc[test_index]
         #print("Fold: ", fold_no, len(train_index), len(test_index))
 
         for model_name, model in models:
             print("\nStart of model", model_name, "\nLoop no.", fold_no)
-            #skf_dict.update({model_name: classification_metrics })
-            model_pipeline = Pipeline(steps=[('prep'         , col_transform)
+            model_pipeline = Pipeline(steps=[('prep'          , col_transform)
                                               , ('classifier' , model)])
             model_pipeline.fit(x_train_fold, y_train_fold)
             y_pred_fold  = model_pipeline.predict(x_test_fold)
@@ -168,14 +171,4 @@ def MultClassPipeSKF(input_df, y_targetF, var_type = ['numerical', 'categorical'
             fold_dict[model_name][fold].update({'ROC_AUC'   : roc_auc})
             
         fold_no +=1
-        #pp.pprint(skf_dict)
-
-    return(fold_dict)
-
-#%% CAll function 
-# t3_res = MultClassPipeSKF(input_df = numerical_features_df
-#                           , y_targetF = target1
-#                           , var_type = 'numerical'
-#                           , skf_splits = 10)
-# pp.pprint(t3_res)
-# #print(t3_res)
+    return(fold_dict)
\ No newline at end of file
diff --git a/loopity_loop_CALL.py b/loopity_loop_CALL.py
index 00e33b1..e70763e 100644
--- a/loopity_loop_CALL.py
+++ b/loopity_loop_CALL.py
@@ -5,22 +5,19 @@ Created on Fri Mar 11 11:15:50 2022
 
 @author: tanu
 """
-#%%
-del(t3_res)
-# t3_res = MultClassPipeSKF(input_df = numerical_features_df
-#                           , y_targetF = target1
-#                           , var_type = 'numerical'
-#                           , skf_splits = 10)
-# pp.pprint(t3_res)
-# #print(t3_res)
+#%% variables
+rs = {'random_state': 42}
 
-t3_res = MultClassPipeSKF(input_df = num_df_wtgt[numerical_FN]
-                          , y_targetF = num_df_wtgt['mutation_class']
+skf_cv = StratifiedKFold(n_splits = 10
+                          #, shuffle = False, random_state= None)
+                          , shuffle = True,**rs)
+#%% MultClassPipeSKFLoop: function call()
+t3_res = MultClassPipeSKFLoop(input_df = num_df_wtgt[numerical_FN]
+                          , target = num_df_wtgt['mutation_class']
                           , var_type = 'numerical'
-                          , skf_splits = 10)
+                          , skf_cv = skf_cv)
 pp.pprint(t3_res)
 #print(t3_res)
-
 ################################################################
 # extract items from wwithin a nested dict
 #%% Classification Metrics we need to mean()