added reverse traininig as split type in SplitTTS.py

This commit is contained in:
Tanushree Tunstall 2022-07-11 20:03:06 +01:00
parent 1965517681
commit 6950c4b057
2 changed files with 22 additions and 11 deletions

View file

@ -47,7 +47,7 @@ njobs = {'n_jobs': os.cpu_count() } # the number of jobs should equal the number
# NOTE: split_type 'none_with_bts' and 'none_only': WORK on complete data ONLY irrespective of data_type
def split_tts(ml_input_data
, data_type = ['actual', 'complete']
, split_type = ['70_30', '80_20', 'sl', 'none_with_bts', 'none_only']
, split_type = ['70_30', '80_20', 'sl', 'none_with_bts', 'none_only', 'reverse']
, oversampling = True
, dst_colname = 'dst'# determine how to subset the actual vs reverse data
, target_colname = 'dst_mode'
@ -129,9 +129,20 @@ def split_tts(ml_input_data
n_test_data_size = len(X) + len(X_bts)
test_data_shape = X_bts.shape
if split_type == 'rt': # always on complete data
temp_df_train = ml_input_data[ml_input_data[dst_colname].isna()]
X = temp_df_train.drop(cols_to_dropL, axis = 1)
y = temp_df_train[target_colname]
temp_df_bts = ml_input_data[ml_input_data[dst_colname].notna()]
X_bts = temp_df_bts.drop(cols_to_dropL, axis = 1)
y_bts = temp_df_bts[target_colname]
n_test_data_size = len(X) + len(X_bts)
test_data_shape = X_bts.shape
if split_type == 'none_only':
temp_df_train = ml_input_data.copy() # always complete
X = temp_df_train.drop(cols_to_dropL, axis = 1)
y = temp_df_train[target_colname]
@ -194,12 +205,12 @@ def split_tts(ml_input_data
, '\n Baseline'
, '\n==========================='
, '\n\nTotal data size:', n_test_data_size
, '\ninput data size:' , len(ml_input_data)
, '\n\nTrain data size:', X.shape
, '\n\nTrain data size:' , X.shape
, '\ny_train numbers:' , yc1
, '\n\nTest data size:', test_data_shape
, '\n\nTest data size:' , test_data_shape
, '\ny_test_numbers:' , yc2
, '\n\ny_train ratio:' , yc1_ratio

View file

@ -111,9 +111,9 @@ baz2 = pd.concat([baz, baz_df1], axis = 1)
a = pd.concat([bar2, baz2], axis = 1)
#%% test added split_types i.e none_with_bts and none_only
spl_type = 'none_with_bts'
spl_type = 'none_only'
spl_type = 'none_with_bts'
spl_type = 'rt'
#data_type = "actual"
data_type = "complete"