diff --git a/scripts/ml/dummy_classifier.py b/scripts/ml/dummy_classifier.py
index bbc4015..9ec1e92 100644
--- a/scripts/ml/dummy_classifier.py
+++ b/scripts/ml/dummy_classifier.py
@@ -62,6 +62,7 @@ X.columns
 
 y = df_clean.iloc[:,171] # dst
 y.value_counts()
+#########################
 
 y2 = df_clean.iloc[:,172] #dst_mode
 y2.value_counts()
@@ -107,3 +108,34 @@ acccuracy:
 TP+TN/TP+TN+FP+FN
 
 114/71
+
+######################################
+# try with CV
+
+X_eg = np.array([-1, 1, 1, 1, -2, 9, 4, 4, 1, -1, 3, 0])
+y_eg = np.array([0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1])
+dummy_clf = DummyClassifier(strategy="most_frequent")
+dummy_clf.fit(X_eg, y_eg)
+#DummyClassifier(strategy='most_frequent')
+dummy_clf.predict(X_eg)
+#dummy_clf.predict(np.array([1,1,1,1,1,1,1,1,1,1]))
+#dummy_clf.predict_proba(X_eg)
+
+dummy_clf.score(X_eg, y_eg)
+
+
+cv_DummyD = cross_validate(dummy_clf
+                      , X_eg
+                      , y_eg
+                      , cv = 5
+                      #, groups = group
+                      , scoring = scoring_fn
+                      , return_train_score = True)
+
+cv_dummyD_ALL= {}
+cv_dummyD_ALL['DUMMY'] = {}
+
+for key, value in cv_DummyD.items():
+    print('\nkey:', key, '\nvalue:', value)
+    print('\nmean value:', np.mean(value))
+    cv_dummyD_ALL['DUMMY'][key] = round(np.mean(value),2)
\ No newline at end of file
diff --git a/scripts/ml/ml_functions/MultClfs.py b/scripts/ml/ml_functions/MultClfs.py
index dafe756..c22e41b 100755
--- a/scripts/ml/ml_functions/MultClfs.py
+++ b/scripts/ml/ml_functions/MultClfs.py
@@ -78,9 +78,10 @@ import itertools
 from sklearn.model_selection import LeaveOneGroupOut
 from sklearn.decomposition import PCA
 from sklearn.naive_bayes import ComplementNB
+from sklearn.dummy import DummyClassifier
 
 #%% GLOBALS
-#rs = {'random_state': 42}
+#rs = {'random_state': 42} # INSIDE FUNCTION CALL NOW
 #njobs = {'n_jobs': os.cpu_count() } # the number of jobs should equal the number of CPU cores
 
 scoring_fn =  ({ 'mcc'        : make_scorer(matthews_corrcoef)
@@ -261,37 +262,36 @@ def MultModelsCl(input_df, target
     #======================================================
     models = [('AdaBoost Classifier'         , AdaBoostClassifier(**rs) )
               , ('Bagging Classifier'        , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True, verbose = 3, n_estimators = 100) )
-              #, ('Bernoulli NB'               , BernoulliNB() ) # pks Naive Bayes, CAUTION
-              , ('Complement NB'             , ComplementNB() )
-              , ('Decision Tree'             , DecisionTreeClassifier(**rs) ) 
-              , ('Extra Tree'                , ExtraTreeClassifier(**rs) )
-              , ('Extra Trees'               , ExtraTreesClassifier(**rs) ) 
-              , ('Gradient Boosting'         , GradientBoostingClassifier(**rs) )
-              , ('Gaussian NB'               , GaussianNB() )
-              , ('Gaussian Process'          , GaussianProcessClassifier(**rs) )
-              , ('K-Nearest Neighbors'       , KNeighborsClassifier() ) 
-              , ('LDA'                       , LinearDiscriminantAnalysis() )
-              , ('Logistic Regression'       , LogisticRegression(**rs) )
-              , ('Logistic RegressionCV'     , LogisticRegressionCV(cv = 3, **rs))
-              , ('MLP'                       , MLPClassifier(max_iter = 500, **rs) ) 
-              , ('Multinomial NB'               , MultinomialNB() )
-
-              , ('Passive Aggresive'         , PassiveAggressiveClassifier(**rs, **njobs) )
-              , ('QDA'                       , QuadraticDiscriminantAnalysis() )
-              , ('Random Forest'             , RandomForestClassifier(**rs, n_estimators = 1000, **njobs ) ) 
-              , ('Random Forest2'            , RandomForestClassifier(min_samples_leaf = 5
-                                                                         , n_estimators     = 1000
-                                                                         , bootstrap        = True
-                                                                         , oob_score        = True
-                                                                         , **njobs
-                                                                         , **rs
-                                                                         , max_features     = 'auto') ) 
-              , ('Ridge Classifier'          , RidgeClassifier(**rs)  )
-              , ('Ridge ClassifierCV'        , RidgeClassifierCV(cv = 3) )          
-              , ('SVC'                       , SVC(**rs) ) 
-              , ('Stochastic GDescent'       , SGDClassifier(**rs, **njobs) )
-              , ('XGBoost'                   , XGBClassifier(**rs, verbosity = 0, use_label_encoder = False, **njobs) )
-              
+               #, ('Bernoulli NB'               , BernoulliNB() ) # pks Naive Bayes, CAUTION
+               , ('Complement NB'             , ComplementNB() )
+               , ('Decision Tree'             , DecisionTreeClassifier(**rs) ) 
+               , ('Extra Tree'                , ExtraTreeClassifier(**rs) )
+               , ('Extra Trees'               , ExtraTreesClassifier(**rs) ) 
+               , ('Gradient Boosting'         , GradientBoostingClassifier(**rs) )
+               , ('Gaussian NB'               , GaussianNB() )
+               , ('Gaussian Process'          , GaussianProcessClassifier(**rs) )
+               , ('K-Nearest Neighbors'       , KNeighborsClassifier() ) 
+               , ('LDA'                       , LinearDiscriminantAnalysis() )
+               , ('Logistic Regression'       , LogisticRegression(**rs) )
+               , ('Logistic RegressionCV'     , LogisticRegressionCV(cv = 3, **rs))
+               , ('MLP'                       , MLPClassifier(max_iter = 500, **rs) ) 
+               , ('Multinomial NB'            , MultinomialNB() )
+               , ('Passive Aggresive'         , PassiveAggressiveClassifier(**rs, **njobs) )
+               , ('QDA'                       , QuadraticDiscriminantAnalysis() )
+               , ('Random Forest'             , RandomForestClassifier(**rs, n_estimators = 1000, **njobs ) ) 
+               , ('Random Forest2'            , RandomForestClassifier(min_samples_leaf = 5
+                                                                          , n_estimators     = 1000
+                                                                          , bootstrap        = True
+                                                                          , oob_score        = True
+                                                                          , **njobs
+                                                                          , **rs
+                                                                          , max_features     = 'auto') ) 
+               , ('Ridge Classifier'          , RidgeClassifier(**rs)  )
+               , ('Ridge ClassifierCV'        , RidgeClassifierCV(cv = 3) )          
+               , ('SVC'                       , SVC(**rs) ) 
+               , ('Stochastic GDescent'       , SGDClassifier(**rs, **njobs) )
+               , ('XGBoost'                   , XGBClassifier(**rs, verbosity = 0, use_label_encoder = False, **njobs) )
+               , ('Dummy Classifier'          , DummyClassifier(strategy = 'most_frequent') )
              ]
                 
     mm_skf_scoresD = {}
diff --git a/scripts/ml/ml_functions/test_func_singlegene.py b/scripts/ml/ml_functions/test_func_singlegene.py
index 707b188..18267dd 100644
--- a/scripts/ml/ml_functions/test_func_singlegene.py
+++ b/scripts/ml/ml_functions/test_func_singlegene.py
@@ -14,10 +14,11 @@ sys.path
 # import
 from GetMLData import *
 from SplitTTS import *
-#from MultClfs import *
-from MultClfs_SIMPLE import *
+from MultClfs import *
+#from MultClfs_SIMPLE import *
 
 #%%
+rs = {'random_state': 42}
 skf_cv = StratifiedKFold(n_splits = 10
                             , shuffle = True,**rs)
 #sel_cv = logo
@@ -28,12 +29,12 @@ skf_cv = StratifiedKFold(n_splits = 10
 gene_model_paramD = {'data_combined_model'       : False
                     , 'use_or'                   : False
                     , 'omit_all_genomic_features': False
-                    , 'write_maskfile'           : True
+                    , 'write_maskfile'           : False
                     , 'write_outfile'            : False }
 
 #df = getmldata(gene, drug, **gene_model_paramD)
-df = getmldata('pncA', 'pyrazinamide', **gene_model_paramD)
-#df = getmldata('embB', 'ethambutol'   , **gene_model_paramD)
+#df = getmldata('pncA', 'pyrazinamide', **gene_model_paramD)
+df = getmldata('embB', 'ethambutol'   , **gene_model_paramD)
 #df = getmldata('katG', 'isoniazid'    , **gene_model_paramD)
 #df = getmldata('rpoB', 'rifampicin'   , **gene_model_paramD)
 #df  = getmldata('gid' , 'streptomycin' , **gene_model_paramD)
@@ -68,9 +69,8 @@ len(df)
 Counter(df2['y'])
 Counter(df2['y_bts'])
 
-
-fooD = MultModelsCl(input_df = df2['X_ros']
-                , target = df2['y_ros']
+fooD = MultModelsCl(input_df = df2['X']
+                , target = df2['y']
                 , sel_cv = skf_cv
                 , run_blind_test = True
                 , blind_test_df =  df2['X_bts']
@@ -87,7 +87,12 @@ for k, v in fooD.items():
           , '\nTRAIN MCC:', fooD[k]['test_mcc']
           , '\nBTS MCC:' , fooD[k]['bts_mcc']
           , '\nDIFF:',fooD[k]['bts_mcc'] - fooD[k]['test_mcc'] )
-    
+
+for k, v in fooD.items():
+    print('\nModel:', k
+          , '\nTRAIN ACCURACY:', fooD[k]['test_accuracy']
+          , '\nBTS ACCURACY:' , fooD[k]['bts_accuracy']
+          , '\nDIFF:',fooD[k]['bts_accuracy'] - fooD[k]['test_accuracy'] )
 #%% CHECK SCALING
 embb_df = getmldata('embB', 'ethambutol'   , **combined_model_paramD)
 all(embb_df.columns.isin(['gene_name'])) # should be False
diff --git a/scripts/ml/ml_iterator.py b/scripts/ml/ml_iterator.py
index ea9002c..35b3c9d 100755
--- a/scripts/ml/ml_iterator.py
+++ b/scripts/ml/ml_iterator.py
@@ -82,22 +82,26 @@ for gene, drug in ml_gene_drugD.items():
                                         , 'target'         : tempD['y']
                                         , 'var_type'       : 'mixed'
                                         , 'resampling_type': 'none'}
-                    , 'smnc_paramD': { 'input_df'          : tempD['X_smnc']
-                                      , 'target'           : tempD['y_smnc']
+                    
+                    , 'smnc_paramD'  : { 'input_df'        : tempD['X_smnc']
+                                       , 'target'          : tempD['y_smnc']
+                                       , 'var_type'        : 'mixed'
+                                       , 'resampling_type' : 'smnc'}
+                    
+                    , 'ros_paramD'   : { 'input_df'        : tempD['X_ros']
+                                      , 'target'           : tempD['y_ros']
                                       , 'var_type'         : 'mixed'
-                                      , 'resampling_type'  : 'smnc'}
-                    , 'ros_paramD': { 'input_df'           : tempD['X_ros']
-                                    , 'target'             : tempD['y_ros']
-                                    , 'var_type'           : 'mixed'
-                                    , 'resampling_type'    : 'ros'}
-                    , 'rus_paramD' : { 'input_df'          : tempD['X_rus']
+                                      , 'resampling_type'  : 'ros'}
+                    
+                    , 'rus_paramD'   : { 'input_df'        : tempD['X_rus']
                                       , 'target'           : tempD['y_rus']
                                       , 'var_type'         : 'mixed'
                                       , 'resampling_type'  : 'rus'}
-                    , 'rouC_paramD' : { 'input_df'         : tempD['X_rouC']
-                                        , 'target'         : tempD['y_rouC']
-                                        , 'var_type'       : 'mixed'
-                                        , 'resampling_type': 'rouC'}
+                    
+                    , 'rouC_paramD'  : { 'input_df'        : tempD['X_rouC']
+                                      , 'target'           : tempD['y_rouC']
+                                      , 'var_type'         : 'mixed'
+                                      , 'resampling_type'  : 'rouC'}
                     }
             
             mmDD = {}