adding prints to pnca_config file
This commit is contained in:
parent
693a5324c1
commit
c91a994828
2 changed files with 132 additions and 20 deletions
|
@ -10,7 +10,7 @@ import os
|
||||||
|
|
||||||
gene = 'pncA'
|
gene = 'pncA'
|
||||||
drug = 'pyrazinamide'
|
drug = 'pyrazinamide'
|
||||||
total_mtblineage_u = 8
|
#total_mtblineage_u = 8
|
||||||
|
|
||||||
|
|
||||||
homedir = os.path.expanduser("~")
|
homedir = os.path.expanduser("~")
|
||||||
|
@ -22,5 +22,29 @@ from UQ_ML_data import *
|
||||||
|
|
||||||
# from YC run_all_ML: run locally
|
# from YC run_all_ML: run locally
|
||||||
from UQ_MultModelsCl import MultModelsCl
|
from UQ_MultModelsCl import MultModelsCl
|
||||||
|
print('TESTING cmd:'
|
||||||
|
, '\nGene name:', gene
|
||||||
|
, '\nDrug name:', drug
|
||||||
|
, '\nTotal input features:', X.shape
|
||||||
|
, '\n', Counter(y))
|
||||||
|
|
||||||
print('TESTING cmd:', Counter(y))
|
print('Strucutral features (n):'
|
||||||
|
, len(common_cols_stabiltyN) + len(foldX_cols) + len(X_strFN)
|
||||||
|
, '\nThese are:'
|
||||||
|
, '\nCommon stablity features:', common_cols_stabiltyN
|
||||||
|
, '\nFoldX columns:', foldX_cols
|
||||||
|
, '\nOther struc columns:', X_strFN
|
||||||
|
, '\n================================================================\n')
|
||||||
|
|
||||||
|
print('Evolutionary features (n):'
|
||||||
|
, len(X_evolFN)
|
||||||
|
, '\nThese are:\n'
|
||||||
|
, X_evolFN
|
||||||
|
, '\n================================================================\n')
|
||||||
|
|
||||||
|
print('Genomic features (n):'
|
||||||
|
, len(X_genomicFN)
|
||||||
|
, '\nThese are:\n'
|
||||||
|
, X_genomic_mafor, '\n'
|
||||||
|
, X_genomic_linegae
|
||||||
|
, '\n================================================================\n')
|
||||||
|
|
|
@ -26,6 +26,72 @@ from sklearn.pipeline import Pipeline, make_pipeline
|
||||||
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score, LeaveOneOut, KFold, RepeatedKFold, cross_val_predict
|
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score, LeaveOneOut, KFold, RepeatedKFold, cross_val_predict
|
||||||
# Metric
|
# Metric
|
||||||
from sklearn.metrics import mean_squared_error, make_scorer, roc_auc_score, f1_score, matthews_corrcoef, accuracy_score, balanced_accuracy_score, confusion_matrix, classification_report
|
from sklearn.metrics import mean_squared_error, make_scorer, roc_auc_score, f1_score, matthews_corrcoef, accuracy_score, balanced_accuracy_score, confusion_matrix, classification_report
|
||||||
|
###############################################################################
|
||||||
|
# TT imports
|
||||||
|
from sklearn.metrics import make_scorer, confusion_matrix, accuracy_score, balanced_accuracy_score, precision_score, average_precision_score, recall_score
|
||||||
|
from sklearn.metrics import roc_auc_score, roc_curve, f1_score, matthews_corrcoef, jaccard_score, classification_report
|
||||||
|
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score
|
||||||
|
from sklearn.model_selection import StratifiedKFold,RepeatedStratifiedKFold, RepeatedKFold
|
||||||
|
from copy import deepcopy
|
||||||
|
from sklearn import linear_model
|
||||||
|
from sklearn import datasets
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
|
||||||
|
from sklearn.linear_model import RidgeClassifier, RidgeClassifierCV, SGDClassifier, PassiveAggressiveClassifier
|
||||||
|
|
||||||
|
from sklearn.naive_bayes import BernoulliNB
|
||||||
|
from sklearn.neighbors import KNeighborsClassifier
|
||||||
|
from sklearn.svm import SVC
|
||||||
|
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
|
||||||
|
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier, BaggingClassifier
|
||||||
|
from sklearn.naive_bayes import GaussianNB
|
||||||
|
from sklearn.gaussian_process import GaussianProcessClassifier, kernels
|
||||||
|
from sklearn.gaussian_process.kernels import RBF, DotProduct, Matern, RationalQuadratic, WhiteKernel
|
||||||
|
|
||||||
|
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
|
||||||
|
from sklearn.neural_network import MLPClassifier
|
||||||
|
|
||||||
|
from sklearn.svm import SVC
|
||||||
|
from xgboost import XGBClassifier
|
||||||
|
from sklearn.naive_bayes import MultinomialNB
|
||||||
|
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
|
||||||
|
|
||||||
|
from sklearn.compose import ColumnTransformer
|
||||||
|
from sklearn.compose import make_column_transformer
|
||||||
|
|
||||||
|
from sklearn.metrics import make_scorer, confusion_matrix, accuracy_score, balanced_accuracy_score, precision_score, average_precision_score, recall_score
|
||||||
|
from sklearn.metrics import roc_auc_score, roc_curve, f1_score, matthews_corrcoef, jaccard_score, classification_report
|
||||||
|
|
||||||
|
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score
|
||||||
|
from sklearn.model_selection import StratifiedKFold,RepeatedStratifiedKFold, RepeatedKFold
|
||||||
|
|
||||||
|
from sklearn.pipeline import Pipeline, make_pipeline
|
||||||
|
|
||||||
|
from sklearn.feature_selection import RFE, RFECV
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
import seaborn as sns
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
from statistics import mean, stdev, median, mode
|
||||||
|
|
||||||
|
from imblearn.over_sampling import RandomOverSampler
|
||||||
|
from imblearn.under_sampling import RandomUnderSampler
|
||||||
|
from imblearn.over_sampling import SMOTE
|
||||||
|
from sklearn.datasets import make_classification
|
||||||
|
from imblearn.combine import SMOTEENN
|
||||||
|
from imblearn.combine import SMOTETomek
|
||||||
|
|
||||||
|
from imblearn.over_sampling import SMOTENC
|
||||||
|
from imblearn.under_sampling import EditedNearestNeighbours
|
||||||
|
from imblearn.under_sampling import RepeatedEditedNearestNeighbours
|
||||||
|
|
||||||
|
from sklearn.model_selection import GridSearchCV
|
||||||
|
from sklearn.base import BaseEstimator
|
||||||
|
from sklearn.impute import KNNImputer as KNN
|
||||||
|
import json
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
# other vars
|
# other vars
|
||||||
rs = {'random_state': 42}
|
rs = {'random_state': 42}
|
||||||
|
@ -119,9 +185,24 @@ def run_all_ML(input_pd, target_label, blind_test_input_df, blind_test_target, p
|
||||||
_roc_auc = round(roc_auc_score(y_pred, y), 3)
|
_roc_auc = round(roc_auc_score(y_pred, y), 3)
|
||||||
_tn, _fp, _fn, _tp = confusion_matrix(y_pred, y).ravel()
|
_tn, _fp, _fn, _tp = confusion_matrix(y_pred, y).ravel()
|
||||||
|
|
||||||
result_pd = result_pd.append(pd.DataFrame(np.column_stack([name, _tp, _tn, _fp, _fn, _roc_auc, _mcc, _bacc, _f1]),\
|
# result_pd = result_pd.append(pd.DataFrame(np.column_stack([name
|
||||||
columns=['estimator', 'TP', 'TN', 'FP', 'FN',
|
# , _tp, _tn
|
||||||
'roc_auc', 'matthew', 'bacc', 'f1']),\
|
# , _fp , _fn
|
||||||
|
# , _roc_auc
|
||||||
|
# , _mcc
|
||||||
|
# , _bacc, _f1]),\
|
||||||
|
# columns=['estimator', 'TP', 'TN', 'FP', 'FN',
|
||||||
|
# 'roc_auc', 'matthew', 'bacc', 'f1']),\
|
||||||
|
# ignore_index=True)
|
||||||
|
|
||||||
|
result_pd = result_pd.append(pd.DataFrame(np.column_stack([name
|
||||||
|
, _mcc
|
||||||
|
, _roc_auc
|
||||||
|
, _bacc, _f1
|
||||||
|
, _tp, _tn
|
||||||
|
, _fp , _fn]),\
|
||||||
|
columns=['estimator', 'matthew', 'roc_auc', 'bacc', 'f1',\
|
||||||
|
'TP', 'TN', 'FP', 'FN']),\
|
||||||
ignore_index=True)
|
ignore_index=True)
|
||||||
#=========================
|
#=========================
|
||||||
# Blind test: BTS results
|
# Blind test: BTS results
|
||||||
|
@ -140,14 +221,23 @@ def run_all_ML(input_pd, target_label, blind_test_input_df, blind_test_target, p
|
||||||
_roc_aucBTS = round(roc_auc_score(bts_predict, blind_test_target), 3)
|
_roc_aucBTS = round(roc_auc_score(bts_predict, blind_test_target), 3)
|
||||||
_tnBTS, _fpBTS, _fnBTS, _tpBTS = confusion_matrix(bts_predict, blind_test_target).ravel()
|
_tnBTS, _fpBTS, _fnBTS, _tpBTS = confusion_matrix(bts_predict, blind_test_target).ravel()
|
||||||
|
|
||||||
|
# result_bts_pd = result_bts_pd.append(pd.DataFrame(np.column_stack([name
|
||||||
|
# , _tpBTS, _tnBTS
|
||||||
|
# , _fpBTS, _fnBTS
|
||||||
|
# , _roc_aucBTS
|
||||||
|
# , _mccBTS
|
||||||
|
# , _baccBTS, _f1BTS]),\
|
||||||
|
# columns=['estimator', 'TP', 'TN', 'FP', 'FN',
|
||||||
|
# 'roc_auc', 'matthew', 'bacc', 'f1']),\
|
||||||
|
# ignore_index=True)
|
||||||
result_bts_pd = result_bts_pd.append(pd.DataFrame(np.column_stack([name
|
result_bts_pd = result_bts_pd.append(pd.DataFrame(np.column_stack([name
|
||||||
, _tpBTS, _tnBTS
|
|
||||||
, _fpBTS, _fnBTS
|
|
||||||
, _roc_aucBTS
|
|
||||||
, _mccBTS
|
, _mccBTS
|
||||||
, _baccBTS, _f1BTS]),\
|
, _roc_aucBTS
|
||||||
columns=['estimator', 'TP', 'TN', 'FP', 'FN',
|
, _baccBTS, _f1BTS
|
||||||
'roc_auc', 'matthew', 'bacc', 'f1']),\
|
, _tpBTS, _tnBTS
|
||||||
|
, _fpBTS, _fnBTS]),\
|
||||||
|
columns=['estimator','matthew', 'roc_auc', 'bacc', 'f1',\
|
||||||
|
'TP', 'TN', 'FP', 'FN']),\
|
||||||
ignore_index=True)
|
ignore_index=True)
|
||||||
|
|
||||||
|
|
||||||
|
@ -165,15 +255,13 @@ def run_all_ML(input_pd, target_label, blind_test_input_df, blind_test_target, p
|
||||||
|
|
||||||
#%% CALL function
|
#%% CALL function
|
||||||
#run_all_ML(input_pd=X, target_label=y, blind_test_input_df=X_bts, blind_test_target=y_bts, preprocess = True, var_type = 'mixed')
|
#run_all_ML(input_pd=X, target_label=y, blind_test_input_df=X_bts, blind_test_target=y_bts, preprocess = True, var_type = 'mixed')
|
||||||
|
# Baseline_data
|
||||||
|
|
||||||
YC_resD2 = run_all_ML(input_pd=X, target_label=y, blind_test_input_df=X_bts, blind_test_target=y_bts, preprocess = True, var_type = 'mixed')
|
YC_resD2 = run_all_ML(input_pd=X, target_label=y, blind_test_input_df=X_bts, blind_test_target=y_bts, preprocess = True, var_type = 'mixed')
|
||||||
|
CVResultsDF_baseline = YC_resD2['CrossValResultsDF']
|
||||||
YC_resD_ros = run_all_ML(input_pd=X_ros, target_label=y_ros, blind_test_input_df=X_bts, blind_test_target=y_bts, preprocess = True, var_type = 'mixed')
|
CVResultsDF_baseline.sort_values(by=['matthew'], ascending=False, inplace=True)
|
||||||
|
BTSResultsDF_baseline = YC_resD2['BlindTestResultsDF']
|
||||||
CVResultsDF = YC_resD2['CrossValResultsDF']
|
BTSResultsDF_baseline.sort_values(by=['matthew'], ascending=False, inplace=True)
|
||||||
CVResultsDF.sort_values(by=['matthew'], ascending=False, inplace=True)
|
|
||||||
BTSResultsDF = YC_resD2['BlindTestResultsDF']
|
|
||||||
BTSResultsDF.sort_values(by=['matthew'], ascending=False, inplace=True)
|
|
||||||
|
|
||||||
# from sklearn.utils import all_estimators
|
# from sklearn.utils import all_estimators
|
||||||
# for name, algorithm in all_estimators(type_filter="classifier"):
|
# for name, algorithm in all_estimators(type_filter="classifier"):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue