added reverse traininig as split type in SplitTTS.py
This commit is contained in:
parent
1965517681
commit
6950c4b057
2 changed files with 22 additions and 11 deletions
|
@ -47,7 +47,7 @@ njobs = {'n_jobs': os.cpu_count() } # the number of jobs should equal the number
|
||||||
# NOTE: split_type 'none_with_bts' and 'none_only': WORK on complete data ONLY irrespective of data_type
|
# NOTE: split_type 'none_with_bts' and 'none_only': WORK on complete data ONLY irrespective of data_type
|
||||||
def split_tts(ml_input_data
|
def split_tts(ml_input_data
|
||||||
, data_type = ['actual', 'complete']
|
, data_type = ['actual', 'complete']
|
||||||
, split_type = ['70_30', '80_20', 'sl', 'none_with_bts', 'none_only']
|
, split_type = ['70_30', '80_20', 'sl', 'none_with_bts', 'none_only', 'reverse']
|
||||||
, oversampling = True
|
, oversampling = True
|
||||||
, dst_colname = 'dst'# determine how to subset the actual vs reverse data
|
, dst_colname = 'dst'# determine how to subset the actual vs reverse data
|
||||||
, target_colname = 'dst_mode'
|
, target_colname = 'dst_mode'
|
||||||
|
@ -128,10 +128,21 @@ def split_tts(ml_input_data
|
||||||
|
|
||||||
n_test_data_size = len(X) + len(X_bts)
|
n_test_data_size = len(X) + len(X_bts)
|
||||||
test_data_shape = X_bts.shape
|
test_data_shape = X_bts.shape
|
||||||
|
|
||||||
|
if split_type == 'rt': # always on complete data
|
||||||
|
temp_df_train = ml_input_data[ml_input_data[dst_colname].isna()]
|
||||||
|
X = temp_df_train.drop(cols_to_dropL, axis = 1)
|
||||||
|
y = temp_df_train[target_colname]
|
||||||
|
|
||||||
|
temp_df_bts = ml_input_data[ml_input_data[dst_colname].notna()]
|
||||||
|
X_bts = temp_df_bts.drop(cols_to_dropL, axis = 1)
|
||||||
|
y_bts = temp_df_bts[target_colname]
|
||||||
|
|
||||||
|
n_test_data_size = len(X) + len(X_bts)
|
||||||
|
test_data_shape = X_bts.shape
|
||||||
|
|
||||||
if split_type == 'none_only':
|
if split_type == 'none_only':
|
||||||
temp_df_train = ml_input_data.copy() # always complete
|
temp_df_train = ml_input_data.copy() # always complete
|
||||||
|
|
||||||
X = temp_df_train.drop(cols_to_dropL, axis = 1)
|
X = temp_df_train.drop(cols_to_dropL, axis = 1)
|
||||||
y = temp_df_train[target_colname]
|
y = temp_df_train[target_colname]
|
||||||
|
|
||||||
|
@ -194,16 +205,16 @@ def split_tts(ml_input_data
|
||||||
, '\n Baseline'
|
, '\n Baseline'
|
||||||
, '\n==========================='
|
, '\n==========================='
|
||||||
|
|
||||||
, '\n\nTotal data size:', n_test_data_size
|
, '\ninput data size:' , len(ml_input_data)
|
||||||
|
|
||||||
, '\n\nTrain data size:', X.shape
|
, '\n\nTrain data size:' , X.shape
|
||||||
, '\ny_train numbers:' , yc1
|
, '\ny_train numbers:' , yc1
|
||||||
|
|
||||||
, '\n\nTest data size:', test_data_shape
|
, '\n\nTest data size:' , test_data_shape
|
||||||
, '\ny_test_numbers:' , yc2
|
, '\ny_test_numbers:' , yc2
|
||||||
|
|
||||||
, '\n\ny_train ratio:' , yc1_ratio
|
, '\n\ny_train ratio:' , yc1_ratio
|
||||||
, '\ny_test ratio:' , yc2_ratio
|
, '\ny_test ratio:' , yc2_ratio
|
||||||
, '\n-------------------------------------------------------------')
|
, '\n-------------------------------------------------------------')
|
||||||
|
|
||||||
if oversampling:
|
if oversampling:
|
||||||
|
|
|
@ -111,9 +111,9 @@ baz2 = pd.concat([baz, baz_df1], axis = 1)
|
||||||
a = pd.concat([bar2, baz2], axis = 1)
|
a = pd.concat([bar2, baz2], axis = 1)
|
||||||
|
|
||||||
#%% test added split_types i.e none_with_bts and none_only
|
#%% test added split_types i.e none_with_bts and none_only
|
||||||
|
|
||||||
spl_type = 'none_with_bts'
|
|
||||||
spl_type = 'none_only'
|
spl_type = 'none_only'
|
||||||
|
spl_type = 'none_with_bts'
|
||||||
|
spl_type = 'rt'
|
||||||
|
|
||||||
#data_type = "actual"
|
#data_type = "actual"
|
||||||
data_type = "complete"
|
data_type = "complete"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue