reran to generate merged_df3 with correct dst for dst muts. modified combining_dfs_plotting.R
This commit is contained in:
parent
289c8913d0
commit
8079dd7b6c
6 changed files with 148 additions and 211 deletions
|
@ -168,7 +168,7 @@ def MultModelsCl(input_df, target
|
|||
@param skv_cv: stratifiedK fold int or object to allow shuffle and random state to pass
|
||||
@type: int or StratifiedKfold()
|
||||
|
||||
@var_type: numerical, categorical and mixed to determine what col_transform to apply (MinMaxScalar and/or one-ho t encoder)
|
||||
@var_type: numerical, categorical and mixed to determine what col_transform to apply (MinMaxScalar and/or one-hot encoder)
|
||||
@type: list
|
||||
|
||||
returns
|
||||
|
|
|
@ -168,7 +168,7 @@ def MultModelsCl(input_df, target
|
|||
@param skv_cv: stratifiedK fold int or object to allow shuffle and random state to pass
|
||||
@type: int or StratifiedKfold()
|
||||
|
||||
@var_type: numerical, categorical and mixed to determine what col_transform to apply (MinMaxScalar and/or one-ho t encoder)
|
||||
@var_type: numerical, categorical and mixed to determine what col_transform to apply (MinMaxScalar and/or one-hot encoder)
|
||||
@type: list
|
||||
|
||||
returns
|
||||
|
@ -239,8 +239,8 @@ def MultModelsCl(input_df, target
|
|||
# , ('Gaussian NB' , GaussianNB() )
|
||||
# , ('Gaussian Process' , GaussianProcessClassifier(**rs) )
|
||||
# , ('K-Nearest Neighbors' , KNeighborsClassifier() )
|
||||
# , ('LDA' , LinearDiscriminantAnalysis() )
|
||||
# , ('Logistic Regression' , LogisticRegression(**rs) )
|
||||
, ('LDA' , LinearDiscriminantAnalysis() )
|
||||
, ('Logistic Regression' , LogisticRegression(**rs) )
|
||||
# , ('Logistic RegressionCV' , LogisticRegressionCV(cv = 3, **rs))
|
||||
# , ('MLP' , MLPClassifier(max_iter = 500, **rs) )
|
||||
#, ('Multinomial' , MultinomialNB() )
|
||||
|
@ -259,7 +259,7 @@ def MultModelsCl(input_df, target
|
|||
# , ('Ridge ClassifierCV' , RidgeClassifierCV(cv = 3) )
|
||||
# , ('SVC' , SVC(**rs) )
|
||||
# , ('Stochastic GDescent' , SGDClassifier(**rs, **njobs) )
|
||||
# , ('XGBoost' , XGBClassifier(**rs, verbosity = 0, use_label_encoder =False, **njobs) )
|
||||
, ('XGBoost' , XGBClassifier(**rs, verbosity = 0, use_label_encoder =False, **njobs) )
|
||||
#
|
||||
]
|
||||
|
||||
|
|
|
@ -14,23 +14,8 @@ sys.path
|
|||
# import
|
||||
from GetMLData import *
|
||||
from SplitTTS import *
|
||||
#from MultClfs_fi import *
|
||||
from MultClfs import *
|
||||
|
||||
#%%
|
||||
# X,y = load_boston(return_X_y=True)
|
||||
# features = load_boston()['feature_names']
|
||||
|
||||
# X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42)
|
||||
|
||||
# rf = RandomForestRegressor(random_state=0)
|
||||
# rf.fit(X_train,y_train)
|
||||
|
||||
|
||||
# f_i = list(zip(features,rf.feature_importances_))
|
||||
# f_i.sort(key = lambda x : x[1])
|
||||
# plt.barh([x[0] for x in f_i],[x[1] for x in f_i])
|
||||
# plt.show()
|
||||
#from MultClfs import *
|
||||
from MultClfs_SIMPLE import *
|
||||
|
||||
#%%
|
||||
|
||||
|
@ -54,9 +39,8 @@ df = getmldata('katG', 'isoniazid' , **gene_model_paramD)
|
|||
df = getmldata('rpoB', 'rifampicin' , **gene_model_paramD)
|
||||
df = getmldata('gid' , 'streptomycin' , **gene_model_paramD)
|
||||
#df = getmldata('alr' , 'cycloserine' , **combined_model_paramD)
|
||||
|
||||
all(df.columns.isin(['gene_name'])) # should be False
|
||||
|
||||
|
||||
spl_type = '70_30'
|
||||
#spl_type = '80_20'
|
||||
#spl_type = 'sl'
|
||||
|
@ -73,6 +57,16 @@ df2 = split_tts(df
|
|||
|
||||
all(df2['X'].columns.isin(['gene_name'])) # should be False
|
||||
|
||||
df['dst'].value_counts()
|
||||
df['dst'].isna().sum()
|
||||
df['dst_mode'].value_counts()
|
||||
|
||||
len(df)
|
||||
|
||||
Counter(df2['y'])
|
||||
Counter(df2['y_bts'])
|
||||
|
||||
|
||||
fooD = MultModelsCl(input_df = df2['X']
|
||||
, target = df2['y']
|
||||
, sel_cv = skf_cv
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue