109 lines
No EOL
4.3 KiB
Python
109 lines
No EOL
4.3 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Thu Jun 23 20:39:20 2022
|
|
|
|
@author: tanu
|
|
"""
|
|
|
|
def ProcessMultModelCl(inputD = {}):
|
|
scoresDF = pd.DataFrame(inputD)
|
|
#------------------------
|
|
# WF: only CV and BTS
|
|
#-----------------------
|
|
scoresDFT = scoresDF.T
|
|
|
|
scoresDF_CV = scoresDFT.filter(regex='test_', axis = 1); scoresDF_CV.columns
|
|
# map colnames for consistency to allow concatenting
|
|
scoresDF_CV.columns = scoresDF_CV.columns.map(scoreCV_mapD); scoresDF_CV.columns
|
|
scoresDF_CV['Data_source'] = 'CV'
|
|
|
|
scoresDF_BT = scoresDFT.filter(regex='bts_', axis = 1); scoresDF_BT.columns
|
|
# map colnames for consistency to allow concatenting
|
|
scoresDF_BT.columns = scoresDF_BT.columns.map(scoreBT_mapD); scoresDF_BT.columns
|
|
scoresDF_BT['Data_source'] = 'BT'
|
|
|
|
# dfs_combine_wf = [baseline_BT, smnc_BT, ros_BT, rus_BT, rouC_BT,
|
|
# baseline_CV, smnc_CV, ros_CV, rus_CV, rouC_CV]
|
|
|
|
#baseline_all = baseline_all_scores.filter(regex = 'bts_.*|test_.*|.*_time|TN|FP|FN|TP|.*_neg|.*_pos', axis = 0)
|
|
|
|
metaDF = scoresDFT.filter(regex='training_size|testSize|_time|TN|FP|FN|TP|.*_neg|.*_pos|resampling', axis = 1); scoresDF_BT.columns
|
|
|
|
#-----------------
|
|
# Combine WF
|
|
#-----------------
|
|
dfs_combine_wf = [scoresDF_CV, scoresDF_BT]
|
|
print('\n---------->\n', len(dfs_combine_wf))
|
|
print(scoresDF_CV)
|
|
print(scoresDF_BT)
|
|
|
|
print('\nCV dim:', scoresDF_CV.shape
|
|
, '\nBT dim:',scoresDF_BT.shape)
|
|
|
|
|
|
dfs_nrows_wf = []
|
|
for df in dfs_combine_wf:
|
|
dfs_nrows_wf = dfs_nrows_wf + [len(df)]
|
|
dfs_nrows_wf = max(dfs_nrows_wf)
|
|
|
|
dfs_ncols_wf = []
|
|
for df in dfs_combine_wf:
|
|
dfs_ncols_wf = dfs_ncols_wf + [len(df.columns)]
|
|
dfs_ncols_wf = max(dfs_ncols_wf)
|
|
print(dfs_ncols_wf)
|
|
|
|
expected_nrows_wf = len(dfs_combine_wf) * dfs_nrows_wf
|
|
expected_ncols_wf = dfs_ncols_wf
|
|
|
|
common_cols_wf = list(set.intersection(*(set(df.columns) for df in dfs_combine_wf)))
|
|
print('\nCOMMON COLS:', common_cols_wf
|
|
, dfs_ncols_wf)
|
|
|
|
if len(common_cols_wf) == dfs_ncols_wf :
|
|
combined_baseline_wf = pd.concat([df[common_cols_wf] for df in dfs_combine_wf], ignore_index=False)
|
|
#resampling_methods_wf = combined_baseline_wf[['resampling']]
|
|
#resampling_methods_wf = resampling_methods_wf.drop_duplicates()
|
|
print('\nConcatenating dfs with different resampling methods [WF]:', tts_split
|
|
, '\nNo. of dfs combining:', len(dfs_combine_wf))
|
|
print('\n================================================^^^^^^^^^^^^')
|
|
if len(combined_baseline_wf) == expected_nrows_wf and len(combined_baseline_wf.columns) == expected_ncols_wf:
|
|
print('\n================================================^^^^^^^^^^^^')
|
|
|
|
print('\nPASS:', len(dfs_combine_wf), 'dfs successfully combined'
|
|
, '\nnrows in combined_df_wf:', len(combined_baseline_wf)
|
|
, '\nncols in combined_df_wf:', len(combined_baseline_wf.columns))
|
|
else:
|
|
print('\nFAIL: concatenating failed'
|
|
, '\nExpected nrows:', expected_nrows_wf
|
|
, '\nGot:', len(combined_baseline_wf)
|
|
, '\nExpected ncols:', expected_ncols_wf
|
|
, '\nGot:', len(combined_baseline_wf.columns))
|
|
sys.exit('\nFIRST IF FAILS')
|
|
else:
|
|
print('\nConcatenting dfs not possible [WF],check numbers ')
|
|
|
|
|
|
# TODOadd check here
|
|
combDF = pd.merge(combined_baseline_wf, metaDF, left_index = True, right_index = True)
|
|
#resampling_methods_wf = combined_baseline_wf[['resampling']]
|
|
#resampling_methods_wf = resampling_methods_wf.drop_duplicates()
|
|
#, '\n', resampling_methods_wf)
|
|
|
|
return combDF
|
|
|
|
|
|
# test
|
|
|
|
#ProcessMultModelCl(smnc_scores_mmD)
|
|
bazDF = MultModelsCl(input_df = X_smnc
|
|
, target = y_smnc
|
|
, var_type = 'mixed'
|
|
, tts_split_type = tts_split_7030
|
|
, resampling_type = 'smnc'
|
|
, skf_cv = skf_cv
|
|
, blind_test_df = X_bts
|
|
, blind_test_target = y_bts
|
|
, add_cm = True
|
|
, add_yn = True
|
|
, return_formatted_output = True) |