various edits
This commit is contained in:
parent
90b9477520
commit
8d8a61675f
3 changed files with 601 additions and 1 deletions
|
@ -15,7 +15,8 @@ sys.path
|
|||
from GetMLData import *
|
||||
from SplitTTS import *
|
||||
from MultClfs import *
|
||||
#from MultClfs_SIMPLE import *
|
||||
from MultClfs_noBTS import *
|
||||
|
||||
|
||||
#%%
|
||||
rs = {'random_state': 42}
|
||||
|
@ -69,6 +70,8 @@ len(df)
|
|||
Counter(df2['y'])
|
||||
Counter(df2['y_bts'])
|
||||
|
||||
#%% Run Multiple models
|
||||
|
||||
fooD = MultModelsCl(input_df = df2['X']
|
||||
, target = df2['y']
|
||||
, sel_cv = skf_cv
|
||||
|
@ -140,3 +143,106 @@ from sklearn.utils import all_estimators
|
|||
all_clfs = all_estimators(type_filter="classifier")
|
||||
df = pd.DataFrame (all_clfs, columns = ['classifier_name', 'classifier_fn'])
|
||||
df.to_csv("Model_names_ALL.csv")
|
||||
#%% TEST different CV Thresholds for split_type = NONE
|
||||
|
||||
Counter(df2['y'])
|
||||
Counter(df2['y_bts'])
|
||||
|
||||
spl_type = 'none'
|
||||
data_type = "complete"
|
||||
|
||||
df2 = split_tts(df
|
||||
, data_type = data_type
|
||||
, split_type = spl_type
|
||||
, oversampling = True
|
||||
, dst_colname = 'dst'
|
||||
, target_colname = 'dst_mode'
|
||||
, include_gene_name = True
|
||||
, random_state = 42 # default
|
||||
)
|
||||
|
||||
fooD = MultModelsCl_noBTS(input_df = df2['X']
|
||||
, target = df2['y']
|
||||
, skf_cv_threshold = 10 # IMP to change
|
||||
|
||||
, tts_split_type = spl_type
|
||||
, resampling_type = 'XXXX' # default
|
||||
|
||||
, add_cm = True # adds confusion matrix based on cross_val_predict
|
||||
, add_yn = True # adds target var class numbers
|
||||
|
||||
, var_type = ['mixed']
|
||||
, scale_numeric = ['min_max']
|
||||
, random_state = 42
|
||||
, n_jobs = os.cpu_count()
|
||||
, return_formatted_output = False
|
||||
|
||||
)
|
||||
|
||||
for k, v in fooD.items():
|
||||
print('\nModel:', k
|
||||
, '\nTRAIN MCC:', fooD[k]['test_mcc']
|
||||
)
|
||||
|
||||
# formatted df
|
||||
foo_df3 = MultModelsCl_noBTS(input_df = df2['X']
|
||||
, target = df2['y']
|
||||
, skf_cv_threshold = 5 # IMP to change
|
||||
|
||||
, tts_split_type = spl_type
|
||||
, resampling_type = 'XXXX' # default
|
||||
|
||||
, add_cm = True # adds confusion matrix based on cross_val_predict
|
||||
, add_yn = True # adds target var class numbers
|
||||
|
||||
, var_type = ['mixed']
|
||||
, scale_numeric = ['min_max']
|
||||
, random_state = 42
|
||||
, n_jobs = os.cpu_count()
|
||||
, return_formatted_output = True
|
||||
|
||||
)
|
||||
|
||||
dfs_combine_wf = [foo_df, foo_df2, foo_df3]
|
||||
|
||||
common_cols_wf = list(set.intersection(*(set(df.columns) for df in dfs_combine_wf)))
|
||||
|
||||
print('\nCombinig', len(dfs_combine_wf), 'using pd.concat by row ~ rowbind'
|
||||
, '\nChecking Dims of df to combine:'
|
||||
, '\nDim of CV:', scoresDF_CV.shape
|
||||
, '\nDim of BT:', scoresDF_BT.shape)
|
||||
#print(scoresDF_CV)
|
||||
#print(scoresDF_BT)
|
||||
|
||||
dfs_nrows_wf = []
|
||||
for df in dfs_combine_wf:
|
||||
dfs_nrows_wf = dfs_nrows_wf + [len(df)]
|
||||
dfs_nrows_wf = max(dfs_nrows_wf)
|
||||
|
||||
dfs_ncols_wf = []
|
||||
for df in dfs_combine_wf:
|
||||
dfs_ncols_wf = dfs_ncols_wf + [len(df.columns)]
|
||||
dfs_ncols_wf = max(dfs_ncols_wf)
|
||||
print(dfs_ncols_wf)
|
||||
|
||||
expected_nrows_wf = len(dfs_combine_wf) * dfs_nrows_wf
|
||||
expected_ncols_wf = dfs_ncols_wf
|
||||
|
||||
if len(common_cols_wf) == dfs_ncols_wf :
|
||||
combined_baseline_wf = pd.concat([df[common_cols_wf] for df in dfs_combine_wf], ignore_index=False)
|
||||
print('\nConcatenating dfs with different resampling methods [WF]:'
|
||||
, '\nSplit type:', spl_type
|
||||
, '\nNo. of dfs combining:', len(dfs_combine_wf))
|
||||
|
||||
if len(combined_baseline_wf) == expected_nrows_wf and len(combined_baseline_wf.columns) == expected_ncols_wf:
|
||||
print('\nPASS:', len(dfs_combine_wf), 'dfs successfully combined'
|
||||
, '\nnrows in combined_df_wf:', len(combined_baseline_wf)
|
||||
, '\nncols in combined_df_wf:', len(combined_baseline_wf.columns))
|
||||
else:
|
||||
print('\nFAIL: concatenating failed'
|
||||
, '\nExpected nrows:', expected_nrows_wf
|
||||
, '\nGot:', len(combined_baseline_wf)
|
||||
, '\nExpected ncols:', expected_ncols_wf
|
||||
, '\nGot:', len(combined_baseline_wf.columns))
|
||||
sys.exit('\nFIRST IF FAILS')
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue