import pandas as pd import numpy as np import scipy as sp import time import sys import os import re import argparse from math import sqrt from scipy import stats import joblib # Alogorithm from xgboost.sklearn import XGBClassifier from sklearn import svm from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.ensemble import AdaBoostClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.neural_network import MLPRegressor from sklearn.utils import all_estimators # Pre-processing from sklearn import preprocessing from sklearn.preprocessing import StandardScaler from sklearn.datasets import make_classification from sklearn.pipeline import Pipeline, make_pipeline from sklearn.model_selection import train_test_split, cross_validate, cross_val_score, LeaveOneOut, KFold, RepeatedKFold, cross_val_predict # Metric from sklearn.metrics import mean_squared_error, make_scorer, roc_auc_score, f1_score, matthews_corrcoef, accuracy_score, balanced_accuracy_score, confusion_matrix, classification_report def run_all_ML(input_pd, target_label): #y = input_pd[target_label] #X = input_pd.drop(target_label,axis=1) y = target_label X = input_pd result_pd = pd.DataFrame() for name, algorithm in all_estimators(type_filter="classifier"): try: estmator = algorithm() temp_pd = pd.DataFrame() temp_cm = pd.DataFrame() pipe = Pipeline([ ("model", algorithm()) ]) y_pred = cross_val_predict(pipe, X, y, cv = 10, n_jobs=10) _mcc = round(matthews_corrcoef(y_pred, y), 3) _bacc = round(balanced_accuracy_score(y_pred, y), 3) _f1 = round(f1_score(y_pred, y), 3) _roc_auc = round(roc_auc_score(y_pred, y), 3) _tn, _fp, _fn, _tp = confusion_matrix(y_pred, y).ravel() result_pd = result_pd.append(pd.DataFrame(np.column_stack([name, _tp, _tn, _fp, _fn, _roc_auc, _mcc, _bacc, _f1]),\ columns=['estimator', 'TP', 'TN', 'FP', 'FN', 'roc_auc', 'matthew', 'bacc', 'f1']),\ ignore_index=True) except Exception as e: print("Got an error while running {}".format(name)) print(e) return(result_pd)