diff --git a/UQ_pnca_ML.py b/UQ_pnca_ML.py index d8fad0f..8136892 100644 --- a/UQ_pnca_ML.py +++ b/UQ_pnca_ML.py @@ -14,105 +14,10 @@ print(np.__version__) print(pd.__version__) import pprint as pp from copy import deepcopy -from sklearn import linear_model -from sklearn import datasets from collections import Counter - -from sklearn.linear_model import LogisticRegression, LogisticRegressionCV -from sklearn.linear_model import RidgeClassifier, RidgeClassifierCV, SGDClassifier, PassiveAggressiveClassifier - -from sklearn.naive_bayes import BernoulliNB -from sklearn.neighbors import KNeighborsClassifier -from sklearn.svm import SVC -from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier -from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier, BaggingClassifier -from sklearn.naive_bayes import GaussianNB -from sklearn.gaussian_process import GaussianProcessClassifier, kernels -from sklearn.gaussian_process.kernels import RBF, DotProduct, Matern, RationalQuadratic, WhiteKernel - -from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis -from sklearn.neural_network import MLPClassifier - -from sklearn.svm import SVC -from xgboost import XGBClassifier -from sklearn.naive_bayes import MultinomialNB -from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder - -from sklearn.compose import ColumnTransformer -from sklearn.compose import make_column_transformer - -from sklearn.metrics import make_scorer, confusion_matrix, accuracy_score, balanced_accuracy_score, precision_score, average_precision_score, recall_score -from sklearn.metrics import roc_auc_score, roc_curve, f1_score, matthews_corrcoef, jaccard_score, classification_report - -from sklearn.model_selection import train_test_split, cross_validate, cross_val_score -from sklearn.model_selection import StratifiedKFold,RepeatedStratifiedKFold, RepeatedKFold - -from sklearn.pipeline import Pipeline, make_pipeline - -from sklearn.feature_selection import RFE, RFECV - -import itertools -import seaborn as sns -import matplotlib.pyplot as plt - -from statistics import mean, stdev, median, mode - -from imblearn.over_sampling import RandomOverSampler -from imblearn.under_sampling import RandomUnderSampler -from imblearn.over_sampling import SMOTE -from sklearn.datasets import make_classification -from imblearn.combine import SMOTEENN -from imblearn.combine import SMOTETomek - -from imblearn.over_sampling import SMOTENC -from imblearn.under_sampling import EditedNearestNeighbours -from imblearn.under_sampling import RepeatedEditedNearestNeighbours - -from sklearn.model_selection import GridSearchCV -from sklearn.base import BaseEstimator -import json from sklearn.impute import KNNImputer as KNN -# My functions and globals -scoring_fn = ({'accuracy' : make_scorer(accuracy_score) - , 'fscore' : make_scorer(f1_score) - , 'mcc' : make_scorer(matthews_corrcoef) - , 'precision' : make_scorer(precision_score) - , 'recall' : make_scorer(recall_score) - , 'roc_auc' : make_scorer(roc_auc_score) - , 'jcc' : make_scorer(jaccard_score) - }) - -rs = {'random_state': 42} -njobs = {'n_jobs': 10} -skf_cv = StratifiedKFold(n_splits = 10 - #, shuffle = False, random_state= None) - , shuffle = True,**rs) - -rskf_cv = RepeatedStratifiedKFold(n_splits = 10 - , n_repeats = 3 - , **rs) - -mcc_score_fn = {'mcc': make_scorer(matthews_corrcoef)} -jacc_score_fn = {'jcc': make_scorer(jaccard_score)} #%% -homedir = os.path.expanduser("~") -os.chdir(homedir + "/git/ML_AI_training/") - -# my function -#from MultClassPipe import MultClassPipeline -from MultClassPipe2 import MultClassPipeline2 -from loopity_loop import MultClassPipeSKFLoop -#from MultClassPipe3 import MultClassPipeSKFCV -#from UQ_MultClassPipe4 import MultClassPipeSKFCV -from UQ_MultModelsCl import MultModelsCl -#gene = 'pncA' -#drug = 'pyrazinamide' - -#gene = 'katG' -#drug = 'isoniazid' - - #============== # directories #==============