added reverse traininig as split type in SplitTTS.py
This commit is contained in:
parent
1965517681
commit
6950c4b057
2 changed files with 22 additions and 11 deletions
|
@ -47,7 +47,7 @@ njobs = {'n_jobs': os.cpu_count() } # the number of jobs should equal the number
|
|||
# NOTE: split_type 'none_with_bts' and 'none_only': WORK on complete data ONLY irrespective of data_type
|
||||
def split_tts(ml_input_data
|
||||
, data_type = ['actual', 'complete']
|
||||
, split_type = ['70_30', '80_20', 'sl', 'none_with_bts', 'none_only']
|
||||
, split_type = ['70_30', '80_20', 'sl', 'none_with_bts', 'none_only', 'reverse']
|
||||
, oversampling = True
|
||||
, dst_colname = 'dst'# determine how to subset the actual vs reverse data
|
||||
, target_colname = 'dst_mode'
|
||||
|
@ -128,10 +128,21 @@ def split_tts(ml_input_data
|
|||
|
||||
n_test_data_size = len(X) + len(X_bts)
|
||||
test_data_shape = X_bts.shape
|
||||
|
||||
if split_type == 'rt': # always on complete data
|
||||
temp_df_train = ml_input_data[ml_input_data[dst_colname].isna()]
|
||||
X = temp_df_train.drop(cols_to_dropL, axis = 1)
|
||||
y = temp_df_train[target_colname]
|
||||
|
||||
temp_df_bts = ml_input_data[ml_input_data[dst_colname].notna()]
|
||||
X_bts = temp_df_bts.drop(cols_to_dropL, axis = 1)
|
||||
y_bts = temp_df_bts[target_colname]
|
||||
|
||||
n_test_data_size = len(X) + len(X_bts)
|
||||
test_data_shape = X_bts.shape
|
||||
|
||||
if split_type == 'none_only':
|
||||
temp_df_train = ml_input_data.copy() # always complete
|
||||
|
||||
X = temp_df_train.drop(cols_to_dropL, axis = 1)
|
||||
y = temp_df_train[target_colname]
|
||||
|
||||
|
@ -194,16 +205,16 @@ def split_tts(ml_input_data
|
|||
, '\n Baseline'
|
||||
, '\n==========================='
|
||||
|
||||
, '\n\nTotal data size:', n_test_data_size
|
||||
, '\ninput data size:' , len(ml_input_data)
|
||||
|
||||
, '\n\nTrain data size:', X.shape
|
||||
, '\ny_train numbers:' , yc1
|
||||
, '\n\nTrain data size:' , X.shape
|
||||
, '\ny_train numbers:' , yc1
|
||||
|
||||
, '\n\nTest data size:', test_data_shape
|
||||
, '\ny_test_numbers:' , yc2
|
||||
, '\n\nTest data size:' , test_data_shape
|
||||
, '\ny_test_numbers:' , yc2
|
||||
|
||||
, '\n\ny_train ratio:' , yc1_ratio
|
||||
, '\ny_test ratio:' , yc2_ratio
|
||||
, '\n\ny_train ratio:' , yc1_ratio
|
||||
, '\ny_test ratio:' , yc2_ratio
|
||||
, '\n-------------------------------------------------------------')
|
||||
|
||||
if oversampling:
|
||||
|
|
|
@ -111,9 +111,9 @@ baz2 = pd.concat([baz, baz_df1], axis = 1)
|
|||
a = pd.concat([bar2, baz2], axis = 1)
|
||||
|
||||
#%% test added split_types i.e none_with_bts and none_only
|
||||
|
||||
spl_type = 'none_with_bts'
|
||||
spl_type = 'none_only'
|
||||
spl_type = 'none_with_bts'
|
||||
spl_type = 'rt'
|
||||
|
||||
#data_type = "actual"
|
||||
data_type = "complete"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue