adding prints to pnca_config file
This commit is contained in:
parent
693a5324c1
commit
c91a994828
2 changed files with 132 additions and 20 deletions
|
@ -10,7 +10,7 @@ import os
|
|||
|
||||
gene = 'pncA'
|
||||
drug = 'pyrazinamide'
|
||||
total_mtblineage_u = 8
|
||||
#total_mtblineage_u = 8
|
||||
|
||||
|
||||
homedir = os.path.expanduser("~")
|
||||
|
@ -22,5 +22,29 @@ from UQ_ML_data import *
|
|||
|
||||
# from YC run_all_ML: run locally
|
||||
from UQ_MultModelsCl import MultModelsCl
|
||||
print('TESTING cmd:'
|
||||
, '\nGene name:', gene
|
||||
, '\nDrug name:', drug
|
||||
, '\nTotal input features:', X.shape
|
||||
, '\n', Counter(y))
|
||||
|
||||
print('TESTING cmd:', Counter(y))
|
||||
print('Strucutral features (n):'
|
||||
, len(common_cols_stabiltyN) + len(foldX_cols) + len(X_strFN)
|
||||
, '\nThese are:'
|
||||
, '\nCommon stablity features:', common_cols_stabiltyN
|
||||
, '\nFoldX columns:', foldX_cols
|
||||
, '\nOther struc columns:', X_strFN
|
||||
, '\n================================================================\n')
|
||||
|
||||
print('Evolutionary features (n):'
|
||||
, len(X_evolFN)
|
||||
, '\nThese are:\n'
|
||||
, X_evolFN
|
||||
, '\n================================================================\n')
|
||||
|
||||
print('Genomic features (n):'
|
||||
, len(X_genomicFN)
|
||||
, '\nThese are:\n'
|
||||
, X_genomic_mafor, '\n'
|
||||
, X_genomic_linegae
|
||||
, '\n================================================================\n')
|
||||
|
|
|
@ -26,6 +26,72 @@ from sklearn.pipeline import Pipeline, make_pipeline
|
|||
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score, LeaveOneOut, KFold, RepeatedKFold, cross_val_predict
|
||||
# Metric
|
||||
from sklearn.metrics import mean_squared_error, make_scorer, roc_auc_score, f1_score, matthews_corrcoef, accuracy_score, balanced_accuracy_score, confusion_matrix, classification_report
|
||||
###############################################################################
|
||||
# TT imports
|
||||
from sklearn.metrics import make_scorer, confusion_matrix, accuracy_score, balanced_accuracy_score, precision_score, average_precision_score, recall_score
|
||||
from sklearn.metrics import roc_auc_score, roc_curve, f1_score, matthews_corrcoef, jaccard_score, classification_report
|
||||
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score
|
||||
from sklearn.model_selection import StratifiedKFold,RepeatedStratifiedKFold, RepeatedKFold
|
||||
from copy import deepcopy
|
||||
from sklearn import linear_model
|
||||
from sklearn import datasets
|
||||
from collections import Counter
|
||||
|
||||
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
|
||||
from sklearn.linear_model import RidgeClassifier, RidgeClassifierCV, SGDClassifier, PassiveAggressiveClassifier
|
||||
|
||||
from sklearn.naive_bayes import BernoulliNB
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
|
||||
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier, BaggingClassifier
|
||||
from sklearn.naive_bayes import GaussianNB
|
||||
from sklearn.gaussian_process import GaussianProcessClassifier, kernels
|
||||
from sklearn.gaussian_process.kernels import RBF, DotProduct, Matern, RationalQuadratic, WhiteKernel
|
||||
|
||||
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
|
||||
from sklearn.svm import SVC
|
||||
from xgboost import XGBClassifier
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
|
||||
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.compose import make_column_transformer
|
||||
|
||||
from sklearn.metrics import make_scorer, confusion_matrix, accuracy_score, balanced_accuracy_score, precision_score, average_precision_score, recall_score
|
||||
from sklearn.metrics import roc_auc_score, roc_curve, f1_score, matthews_corrcoef, jaccard_score, classification_report
|
||||
|
||||
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score
|
||||
from sklearn.model_selection import StratifiedKFold,RepeatedStratifiedKFold, RepeatedKFold
|
||||
|
||||
from sklearn.pipeline import Pipeline, make_pipeline
|
||||
|
||||
from sklearn.feature_selection import RFE, RFECV
|
||||
|
||||
import itertools
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from statistics import mean, stdev, median, mode
|
||||
|
||||
from imblearn.over_sampling import RandomOverSampler
|
||||
from imblearn.under_sampling import RandomUnderSampler
|
||||
from imblearn.over_sampling import SMOTE
|
||||
from sklearn.datasets import make_classification
|
||||
from imblearn.combine import SMOTEENN
|
||||
from imblearn.combine import SMOTETomek
|
||||
|
||||
from imblearn.over_sampling import SMOTENC
|
||||
from imblearn.under_sampling import EditedNearestNeighbours
|
||||
from imblearn.under_sampling import RepeatedEditedNearestNeighbours
|
||||
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.base import BaseEstimator
|
||||
from sklearn.impute import KNNImputer as KNN
|
||||
import json
|
||||
##############################################################################
|
||||
|
||||
# other vars
|
||||
rs = {'random_state': 42}
|
||||
|
@ -119,9 +185,24 @@ def run_all_ML(input_pd, target_label, blind_test_input_df, blind_test_target, p
|
|||
_roc_auc = round(roc_auc_score(y_pred, y), 3)
|
||||
_tn, _fp, _fn, _tp = confusion_matrix(y_pred, y).ravel()
|
||||
|
||||
result_pd = result_pd.append(pd.DataFrame(np.column_stack([name, _tp, _tn, _fp, _fn, _roc_auc, _mcc, _bacc, _f1]),\
|
||||
columns=['estimator', 'TP', 'TN', 'FP', 'FN',
|
||||
'roc_auc', 'matthew', 'bacc', 'f1']),\
|
||||
# result_pd = result_pd.append(pd.DataFrame(np.column_stack([name
|
||||
# , _tp, _tn
|
||||
# , _fp , _fn
|
||||
# , _roc_auc
|
||||
# , _mcc
|
||||
# , _bacc, _f1]),\
|
||||
# columns=['estimator', 'TP', 'TN', 'FP', 'FN',
|
||||
# 'roc_auc', 'matthew', 'bacc', 'f1']),\
|
||||
# ignore_index=True)
|
||||
|
||||
result_pd = result_pd.append(pd.DataFrame(np.column_stack([name
|
||||
, _mcc
|
||||
, _roc_auc
|
||||
, _bacc, _f1
|
||||
, _tp, _tn
|
||||
, _fp , _fn]),\
|
||||
columns=['estimator', 'matthew', 'roc_auc', 'bacc', 'f1',\
|
||||
'TP', 'TN', 'FP', 'FN']),\
|
||||
ignore_index=True)
|
||||
#=========================
|
||||
# Blind test: BTS results
|
||||
|
@ -140,14 +221,23 @@ def run_all_ML(input_pd, target_label, blind_test_input_df, blind_test_target, p
|
|||
_roc_aucBTS = round(roc_auc_score(bts_predict, blind_test_target), 3)
|
||||
_tnBTS, _fpBTS, _fnBTS, _tpBTS = confusion_matrix(bts_predict, blind_test_target).ravel()
|
||||
|
||||
# result_bts_pd = result_bts_pd.append(pd.DataFrame(np.column_stack([name
|
||||
# , _tpBTS, _tnBTS
|
||||
# , _fpBTS, _fnBTS
|
||||
# , _roc_aucBTS
|
||||
# , _mccBTS
|
||||
# , _baccBTS, _f1BTS]),\
|
||||
# columns=['estimator', 'TP', 'TN', 'FP', 'FN',
|
||||
# 'roc_auc', 'matthew', 'bacc', 'f1']),\
|
||||
# ignore_index=True)
|
||||
result_bts_pd = result_bts_pd.append(pd.DataFrame(np.column_stack([name
|
||||
, _tpBTS, _tnBTS
|
||||
, _fpBTS, _fnBTS
|
||||
, _roc_aucBTS
|
||||
, _mccBTS
|
||||
, _baccBTS, _f1BTS]),\
|
||||
columns=['estimator', 'TP', 'TN', 'FP', 'FN',
|
||||
'roc_auc', 'matthew', 'bacc', 'f1']),\
|
||||
, _roc_aucBTS
|
||||
, _baccBTS, _f1BTS
|
||||
, _tpBTS, _tnBTS
|
||||
, _fpBTS, _fnBTS]),\
|
||||
columns=['estimator','matthew', 'roc_auc', 'bacc', 'f1',\
|
||||
'TP', 'TN', 'FP', 'FN']),\
|
||||
ignore_index=True)
|
||||
|
||||
|
||||
|
@ -165,15 +255,13 @@ def run_all_ML(input_pd, target_label, blind_test_input_df, blind_test_target, p
|
|||
|
||||
#%% CALL function
|
||||
#run_all_ML(input_pd=X, target_label=y, blind_test_input_df=X_bts, blind_test_target=y_bts, preprocess = True, var_type = 'mixed')
|
||||
# Baseline_data
|
||||
|
||||
YC_resD2 = run_all_ML(input_pd=X, target_label=y, blind_test_input_df=X_bts, blind_test_target=y_bts, preprocess = True, var_type = 'mixed')
|
||||
|
||||
YC_resD_ros = run_all_ML(input_pd=X_ros, target_label=y_ros, blind_test_input_df=X_bts, blind_test_target=y_bts, preprocess = True, var_type = 'mixed')
|
||||
|
||||
CVResultsDF = YC_resD2['CrossValResultsDF']
|
||||
CVResultsDF.sort_values(by=['matthew'], ascending=False, inplace=True)
|
||||
BTSResultsDF = YC_resD2['BlindTestResultsDF']
|
||||
BTSResultsDF.sort_values(by=['matthew'], ascending=False, inplace=True)
|
||||
CVResultsDF_baseline = YC_resD2['CrossValResultsDF']
|
||||
CVResultsDF_baseline.sort_values(by=['matthew'], ascending=False, inplace=True)
|
||||
BTSResultsDF_baseline = YC_resD2['BlindTestResultsDF']
|
||||
BTSResultsDF_baseline.sort_values(by=['matthew'], ascending=False, inplace=True)
|
||||
|
||||
# from sklearn.utils import all_estimators
|
||||
# for name, algorithm in all_estimators(type_filter="classifier"):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue