reran to generate merged_df3 with correct dst for dst muts. modified combining_dfs_plotting.R

This commit is contained in:
Tanushree Tunstall 2022-07-08 21:33:57 +01:00
parent 289c8913d0
commit 8079dd7b6c
6 changed files with 148 additions and 211 deletions

View file

@ -14,23 +14,8 @@ sys.path
# import
from GetMLData import *
from SplitTTS import *
#from MultClfs_fi import *
from MultClfs import *
#%%
# X,y = load_boston(return_X_y=True)
# features = load_boston()['feature_names']
# X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42)
# rf = RandomForestRegressor(random_state=0)
# rf.fit(X_train,y_train)
# f_i = list(zip(features,rf.feature_importances_))
# f_i.sort(key = lambda x : x[1])
# plt.barh([x[0] for x in f_i],[x[1] for x in f_i])
# plt.show()
#from MultClfs import *
from MultClfs_SIMPLE import *
#%%
@ -54,9 +39,8 @@ df = getmldata('katG', 'isoniazid' , **gene_model_paramD)
df = getmldata('rpoB', 'rifampicin' , **gene_model_paramD)
df = getmldata('gid' , 'streptomycin' , **gene_model_paramD)
#df = getmldata('alr' , 'cycloserine' , **combined_model_paramD)
all(df.columns.isin(['gene_name'])) # should be False
spl_type = '70_30'
#spl_type = '80_20'
#spl_type = 'sl'
@ -73,6 +57,16 @@ df2 = split_tts(df
all(df2['X'].columns.isin(['gene_name'])) # should be False
df['dst'].value_counts()
df['dst'].isna().sum()
df['dst_mode'].value_counts()
len(df)
Counter(df2['y'])
Counter(df2['y_bts'])
fooD = MultModelsCl(input_df = df2['X']
, target = df2['y']
, sel_cv = skf_cv