trying under and oversampling
This commit is contained in:
parent
8f8306d948
commit
5779331981
5 changed files with 129 additions and 16 deletions
60
uq_ml_models_FS/scriptfsycm.py
Normal file
60
uq_ml_models_FS/scriptfsycm.py
Normal file
|
@ -0,0 +1,60 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
import scipy as sp
|
||||
import time
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import argparse
|
||||
from math import sqrt
|
||||
from scipy import stats
|
||||
import joblib
|
||||
# Alogorithm
|
||||
from xgboost.sklearn import XGBClassifier
|
||||
from sklearn import svm
|
||||
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
|
||||
from sklearn.gaussian_process import GaussianProcessClassifier
|
||||
from sklearn.ensemble import AdaBoostClassifier
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
from sklearn.neural_network import MLPRegressor
|
||||
from sklearn.utils import all_estimators
|
||||
# Pre-processing
|
||||
from sklearn import preprocessing
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.datasets import make_classification
|
||||
from sklearn.pipeline import Pipeline, make_pipeline
|
||||
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score, LeaveOneOut, KFold, RepeatedKFold, cross_val_predict
|
||||
# Metric
|
||||
from sklearn.metrics import mean_squared_error, make_scorer, roc_auc_score, f1_score, matthews_corrcoef, accuracy_score, balanced_accuracy_score, confusion_matrix, classification_report
|
||||
|
||||
def run_all_ML(input_pd, target_label):
|
||||
#y = input_pd[target_label]
|
||||
#X = input_pd.drop(target_label,axis=1)
|
||||
y = target_label
|
||||
X = input_pd
|
||||
|
||||
result_pd = pd.DataFrame()
|
||||
for name, algorithm in all_estimators(type_filter="classifier"):
|
||||
try:
|
||||
estmator = algorithm()
|
||||
temp_pd = pd.DataFrame()
|
||||
temp_cm = pd.DataFrame()
|
||||
|
||||
pipe = Pipeline([
|
||||
("model", algorithm())
|
||||
])
|
||||
y_pred = cross_val_predict(pipe, X, y, cv = 10, n_jobs=10)
|
||||
_mcc = round(matthews_corrcoef(y_pred, y), 3)
|
||||
_bacc = round(balanced_accuracy_score(y_pred, y), 3)
|
||||
_f1 = round(f1_score(y_pred, y), 3)
|
||||
_roc_auc = round(roc_auc_score(y_pred, y), 3)
|
||||
_tn, _fp, _fn, _tp = confusion_matrix(y_pred, y).ravel()
|
||||
|
||||
result_pd = result_pd.append(pd.DataFrame(np.column_stack([name, _tp, _tn, _fp, _fn, _roc_auc, _mcc, _bacc, _f1]),\
|
||||
columns=['estimator', 'TP', 'TN', 'FP', 'FN',
|
||||
'roc_auc', 'matthew', 'bacc', 'f1']),\
|
||||
ignore_index=True)
|
||||
except Exception as e:
|
||||
print("Got an error while running {}".format(name))
|
||||
print(e)
|
||||
return(result_pd)
|
Loading…
Add table
Add a link
Reference in a new issue