trying under and oversampling
This commit is contained in:
parent
8f8306d948
commit
5779331981
5 changed files with 129 additions and 16 deletions
|
@ -36,7 +36,8 @@ from sklearn.gaussian_process.kernels import WhiteKernel
|
|||
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
|
||||
from sklearn.linear_model import RidgeClassifier
|
||||
from sklearn.linear_model import RidgeClassifier, SGDClassifier, PassiveAggressiveClassifier
|
||||
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
|
||||
from sklearn.svm import SVC
|
||||
from xgboost import XGBClassifier
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
|
@ -72,6 +73,7 @@ print(pd.__version__)
|
|||
from statistics import mean, stdev, median, mode
|
||||
|
||||
from imblearn.over_sampling import RandomOverSampler
|
||||
from imblearn.under_sampling import RandomUnderSampler
|
||||
from imblearn.over_sampling import SMOTE
|
||||
from imblearn.pipeline import Pipeline
|
||||
from sklearn.datasets import make_classification
|
||||
|
@ -81,6 +83,7 @@ from sklearn.ensemble import AdaBoostClassifier
|
|||
from imblearn.combine import SMOTEENN
|
||||
from imblearn.under_sampling import EditedNearestNeighbours
|
||||
|
||||
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.base import BaseEstimator
|
||||
import json
|
||||
|
@ -119,6 +122,10 @@ from MultClassPipe3 import MultClassPipeSKFCV
|
|||
gene = 'pncA'
|
||||
drug = 'pyrazinamide'
|
||||
|
||||
#gene = 'katG'
|
||||
#drug = 'isoniazid'
|
||||
|
||||
|
||||
#==============
|
||||
# directories
|
||||
#==============
|
||||
|
@ -234,13 +241,13 @@ numerical_FN = common_cols_stabiltyN + foldX_cols + X_strFN + X_evolFN + X_genom
|
|||
|
||||
#categorical feature names
|
||||
categorical_FN = ['ss_class'
|
||||
, 'wt_prop_water'
|
||||
# , 'wt_prop_water'
|
||||
# , 'lineage_labels' # misleading if using merged_df3
|
||||
, 'mut_prop_water'
|
||||
, 'wt_prop_polarity'
|
||||
, 'mut_prop_polarity'
|
||||
, 'wt_calcprop'
|
||||
, 'mut_calcprop'
|
||||
# , 'mut_prop_water'
|
||||
# , 'wt_prop_polarity'
|
||||
# , 'mut_prop_polarity'
|
||||
# , 'wt_calcprop'
|
||||
# , 'mut_calcprop'
|
||||
#, 'active_aa_pos'
|
||||
]
|
||||
|
||||
|
@ -278,9 +285,9 @@ all_df_wtgt.shape
|
|||
#------
|
||||
# X
|
||||
#------
|
||||
X = all_df_wtgt[numerical_FN + categorical_FN] # training data ALL
|
||||
#X = all_df_wtgt[numerical_FN + categorical_FN] # training data ALL
|
||||
X_bts = blind_test_df[numerical_FN + categorical_FN] # blind test data ALL
|
||||
#X = all_df_wtgt[numerical_FN] # training numerical only
|
||||
X = all_df_wtgt[numerical_FN] # training numerical only
|
||||
#X_bts = blind_test_df[numerical_FN] # blind test data numerical
|
||||
|
||||
#------
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue