changed ml output dirs and ready to run fs

2022-07-01 21:40:14 +01:00 · 2022-07-01 21:40:14 +01:00 · 11af00f1db
commit 11af00f1db
parent 57348f1874
5 changed files with 67 additions and 152 deletions
--- a/scripts/ml/ml_iterator_fs.py
+++ b/scripts/ml/ml_iterator_fs.py
@ -15,6 +15,8 @@ homedir = os.path.expanduser("~")
 sys.path.append(homedir + '/git/LSHTM_analysis/scripts/ml/ml_functions')
 sys.path
 ###############################################################################
+outdir = homedir + '/git/LSHTM_ML/output/fs/'
+
 #====================
 # Import ML functions 
 #====================
@ -31,7 +33,8 @@ combined_model_paramD = {'data_combined_model'   : False
                    , 'write_outfile'            : False }
 ###############################################################################
 #ml_genes = ["pncA", "embB", "katG", "rpoB", "gid"]
-outdir = homedir + '/git/Data/ml_combined/fs/'
+# outdir = homedir + '/git/Data/ml_combined/fs/'
+
 ml_gene_drugD = {'pncA'   : 'pyrazinamide'
                 # , 'embB' : 'ethambutol'
                 # , 'katG' : 'isoniazid'
@ -39,26 +42,27 @@ ml_gene_drugD = {'pncA'   : 'pyrazinamide'
                 # , 'gid'  : 'streptomycin'
                 }
 gene_dataD={}
-#split_types = ['70_30', '80_20', 'sl']
-#split_data_types = ['actual', 'complete']
-split_types = ['70_30']
+split_types = ['70_30', '80_20', 'sl']
 split_data_types = ['actual', 'complete']
+#split_types = ['70_30']
+#split_data_types = ['actual', 'complete']

-fs_models  = [('Logistic Regression'       , LogisticRegression(**rs) )]
-# fs_models = [('AdaBoost Classifier'     , AdaBoostClassifier(**rs) )
-#           , ('Decision Tree'             , DecisionTreeClassifier(**rs) ) 
-#           , ('Extra Tree'                , ExtraTreeClassifier(**rs) )
-#           , ('Extra Trees'               , ExtraTreesClassifier(**rs) ) 
-#           , ('Gradient Boosting'         , GradientBoostingClassifier(**rs) )
-#           , ('LDA'                       , LinearDiscriminantAnalysis() )
-#           , ('Logistic Regression'       , LogisticRegression(**rs) )
-#           , ('Logistic RegressionCV'     , LogisticRegressionCV(cv = 3, **rs))
-#           , ('Passive Aggresive'         , PassiveAggressiveClassifier(**rs, **njobs) )
-#           , ('Random Forest'             , RandomForestClassifier(**rs, n_estimators = 1000 ) ) 
-#           , ('Ridge Classifier'          , RidgeClassifier(**rs)  )
-#           , ('Ridge ClassifierCV'        , RidgeClassifierCV(cv = 3) )          
-#           , ('Stochastic GDescent'       , SGDClassifier(**rs, **njobs) )
-#           ]
+#fs_models  = [('Logistic Regression'       , LogisticRegression(**rs) )]
+
+fs_models = [('AdaBoost Classifier'     , AdaBoostClassifier(**rs) )
+          , ('Decision Tree'             , DecisionTreeClassifier(**rs) ) 
+          , ('Extra Tree'                , ExtraTreeClassifier(**rs) )
+          , ('Extra Trees'               , ExtraTreesClassifier(**rs) ) 
+          , ('Gradient Boosting'         , GradientBoostingClassifier(**rs) )
+          , ('LDA'                       , LinearDiscriminantAnalysis() )
+          , ('Logistic Regression'       , LogisticRegression(**rs) )
+          , ('Logistic RegressionCV'     , LogisticRegressionCV(cv = 3, **rs))
+          , ('Passive Aggresive'         , PassiveAggressiveClassifier(**rs, **njobs) )
+          , ('Random Forest'             , RandomForestClassifier(**rs, n_estimators = 1000 ) ) 
+          , ('Ridge Classifier'          , RidgeClassifier(**rs)  )
+          , ('Ridge ClassifierCV'        , RidgeClassifierCV(cv = 3) )          
+          , ('Stochastic GDescent'       , SGDClassifier(**rs, **njobs) )
+          ]

 for gene, drug in ml_gene_drugD.items():
    print ('\nGene:', gene
@ -88,26 +92,28 @@ for gene, drug in ml_gene_drugD.items():
                                        , 'target'         : tempD['y']
                                        , 'var_type'       : 'mixed'
                                        , 'resampling_type': 'none'}
-                    ,'smnc_paramD': { 'input_df'          : tempD['X_smnc']
-                                     , 'target'           : tempD['y_smnc']
-                                     , 'var_type'         : 'mixed'
-                                     , 'resampling_type'  : 'smnc'}
-                    # , 'ros_paramD': { 'input_df'           : tempD['X_ros']
-                    #                 , 'target'             : tempD['y_ros']
-                    #                 , 'var_type'           : 'mixed'
-                    #                 , 'resampling_type'    : 'ros'}
-                    # , 'rus_paramD' : { 'input_df'          : tempD['X_rus']
-                    #                   , 'target'           : tempD['y_rus']
-                    #                   , 'var_type'         : 'mixed'
-                    #                   , 'resampling_type'  : 'rus'}
-                    # , 'rouC_paramD' : { 'input_df'         : tempD['X_rouC']
-                    #                     , 'target'         : tempD['y_rouC']
-                    #                     , 'var_type'       : 'mixed'
-                    #                     , 'resampling_type': 'rouC'}
+                    
+                    , 'smnc_paramD'  : { 'input_df'          : tempD['X_smnc']
+                                        , 'target'           : tempD['y_smnc']
+                                        , 'var_type'         : 'mixed'
+                                        , 'resampling_type'  : 'smnc'}
+
+                    , 'ros_paramD'   : { 'input_df'           : tempD['X_ros']
+                                        , 'target'             : tempD['y_ros']
+                                        , 'var_type'           : 'mixed'
+                                        , 'resampling_type'    : 'ros'}
+
+                    , 'rus_paramD'   : { 'input_df'          : tempD['X_rus']
+                                        , 'target'           : tempD['y_rus']
+                                        , 'var_type'         : 'mixed'
+                                        , 'resampling_type'  : 'rus'}
+
+                    , 'rouC_paramD'  : { 'input_df'         : tempD['X_rouC']
+                                        , 'target'         : tempD['y_rouC']
+                                        , 'var_type'       : 'mixed'
+                                        , 'resampling_type': 'rouC'}
                    }
-            #for m in fs_models:
-            #    print(m)
-            
+
            out_fsD = {}
            index = 1
            for model_name, model_fn in fs_models: