saving work

2022-06-24 13:21:21 +01:00 · 2022-06-24 13:21:21 +01:00 · ad99efedd7
commit ad99efedd7
parent 3514e1b4ba
5 changed files with 46 additions and 507 deletions
--- a/scripts/ml/ProcessMultModelsCl.py
+++ b/scripts/ml/ProcessMultModelsCl.py
@ -5,9 +5,27 @@ Created on Thu Jun 23 20:39:20 2022

@author: tanu
 """
-
-def ProcessMultModelCl(inputD = {}):
+import os, sys
+import pandas as pd
+import numpy as np
+import re
+##############################################################################
+#%% FUNCTION: Process outout dicr from MultModelsCl
+def ProcessMultModelsCl(inputD = {}):
+    
    scoresDF = pd.DataFrame(inputD)
+    
+    #------------------------
+    #  Extracting split_name
+    #-----------------------
+    tts_split_nameL = []
+    for k,v in inputD.items():
+        tts_split_nameL = tts_split_nameL + [v['tts_split']]
+    
+    if len(set(tts_split_nameL)) == 1:
+        tts_split_name = str(list(set(tts_split_nameL))[0])
+        print('\nExtracting tts_split_name:', tts_split_name)
+        
    #------------------------
    #  WF: only CV and BTS
    #-----------------------
@ -28,7 +46,7 @@ def ProcessMultModelCl(inputD = {}):
    
    #baseline_all = baseline_all_scores.filter(regex = 'bts_.*|test_.*|.*_time|TN|FP|FN|TP|.*_neg|.*_pos', axis = 0)

-    metaDF = scoresDFT.filter(regex='training_size|testSize|_time|TN|FP|FN|TP|.*_neg|.*_pos|resampling', axis = 1); scoresDF_BT.columns
+    metaDF = scoresDFT.filter(regex='training_size|blind_test_size|_time|TN|FP|FN|TP|.*_neg|.*_pos|resampling', axis = 1); scoresDF_BT.columns

    #-----------------
    # Combine WF
@ -38,8 +56,10 @@ def ProcessMultModelCl(inputD = {}):
    print(scoresDF_CV)
    print(scoresDF_BT)

-    print('\nCV dim:', scoresDF_CV.shape
-          , '\nBT dim:',scoresDF_BT.shape)
+    print('\nCombinig', len(dfs_combine_wf), 'using pd.concat by row ~ rowbind'
+          , '\nChecking Dims of df to combine:'
+          , '\nDim of CV:', scoresDF_CV.shape
+          , '\nDim of BT:', scoresDF_BT.shape)

    
    dfs_nrows_wf = []
@ -57,14 +77,15 @@ def ProcessMultModelCl(inputD = {}):
    expected_ncols_wf = dfs_ncols_wf
    
    common_cols_wf = list(set.intersection(*(set(df.columns) for df in dfs_combine_wf)))
-    print('\nCOMMON COLS:', common_cols_wf
+    print('\nFinding Common cols to ensure row bind is correct:', len(common_cols_wf)
+          , '\nCOMMON cols are:', common_cols_wf
          , dfs_ncols_wf)
    
    if len(common_cols_wf) == dfs_ncols_wf :
        combined_baseline_wf = pd.concat([df[common_cols_wf] for df in dfs_combine_wf], ignore_index=False)
        #resampling_methods_wf = combined_baseline_wf[['resampling']]
        #resampling_methods_wf = resampling_methods_wf.drop_duplicates()
-        print('\nConcatenating dfs with different resampling methods [WF]:', tts_split
+        print('\nConcatenating dfs with different resampling methods [WF]:', tts_split_name
              , '\nNo. of dfs combining:', len(dfs_combine_wf))
        print('\n================================================^^^^^^^^^^^^')
        if len(combined_baseline_wf) == expected_nrows_wf  and len(combined_baseline_wf.columns) == expected_ncols_wf:
@ -92,18 +113,4 @@ def ProcessMultModelCl(inputD = {}):

    return combDF

-
-# test
-
-#ProcessMultModelCl(smnc_scores_mmD)
-bazDF = MultModelsCl(input_df = X_smnc
-                    , target = y_smnc
-                    , var_type = 'mixed'
-                    , tts_split_type = tts_split_7030
-                    , resampling_type = 'smnc'
-                    , skf_cv = skf_cv
-                    , blind_test_df = X_bts
-                    , blind_test_target = y_bts
-                    , add_cm = True 
-                    , add_yn = True
-                    , return_formatted_output = True)
+###############################################################################