trying under and oversampling

2022-05-26 07:38:21 +01:00 · 2022-05-26 07:38:21 +01:00 · 5779331981
commit 5779331981
parent 8f8306d948
5 changed files with 129 additions and 16 deletions
--- a/uq_ml_models_FS/scriptfsycm.py
+++ b/uq_ml_models_FS/scriptfsycm.py
@ -0,0 +1,60 @@
+import pandas as pd
+import numpy as np
+import scipy as sp
+import time
+import sys
+import os
+import re
+import argparse
+from math import sqrt
+from scipy import stats
+import joblib
+# Alogorithm
+from xgboost.sklearn import XGBClassifier
+from sklearn import svm
+from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
+from sklearn.gaussian_process import GaussianProcessClassifier
+from sklearn.ensemble import AdaBoostClassifier
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.neural_network import MLPRegressor
+from sklearn.utils import all_estimators
+# Pre-processing
+from sklearn import preprocessing
+from sklearn.preprocessing import StandardScaler
+from sklearn.datasets import make_classification
+from sklearn.pipeline import Pipeline, make_pipeline
+from sklearn.model_selection import train_test_split, cross_validate, cross_val_score, LeaveOneOut, KFold, RepeatedKFold, cross_val_predict
+# Metric
+from sklearn.metrics import mean_squared_error, make_scorer, roc_auc_score, f1_score, matthews_corrcoef, accuracy_score, balanced_accuracy_score, confusion_matrix, classification_report
+
+def run_all_ML(input_pd, target_label):
+    #y = input_pd[target_label]
+    #X = input_pd.drop(target_label,axis=1)
+    y = target_label
+    X = input_pd
+    
+    result_pd = pd.DataFrame()
+    for name, algorithm in all_estimators(type_filter="classifier"):
+        try:
+            estmator = algorithm()
+            temp_pd = pd.DataFrame()
+            temp_cm = pd.DataFrame()
+            
+            pipe = Pipeline([
+                ("model", algorithm())
+            ])
+            y_pred = cross_val_predict(pipe, X, y, cv = 10, n_jobs=10)
+            _mcc = round(matthews_corrcoef(y_pred, y), 3)
+            _bacc = round(balanced_accuracy_score(y_pred, y), 3)
+            _f1 = round(f1_score(y_pred, y), 3)
+            _roc_auc = round(roc_auc_score(y_pred, y), 3)
+            _tn, _fp, _fn, _tp = confusion_matrix(y_pred, y).ravel()
+            
+            result_pd = result_pd.append(pd.DataFrame(np.column_stack([name, _tp, _tn, _fp, _fn, _roc_auc, _mcc, _bacc, _f1]),\
+                                                      columns=['estimator', 'TP', 'TN', 'FP', 'FN',
+                                                               'roc_auc', 'matthew', 'bacc', 'f1']),\
+                                         ignore_index=True)
+        except Exception as e:
+            print("Got an error while running {}".format(name))
+            print(e)
+    return(result_pd)