From e55906d2c78168294070c2db0819dbad2165f050 Mon Sep 17 00:00:00 2001
From: Tanushree Tunstall <tanu@tunstall.in>
Date: Fri, 29 Jul 2022 00:12:43 +0100
Subject: [PATCH] saving work

---
 scripts/ml/combined_model/cm_logo_skf.py    | 13 +++++++------
 scripts/ml/ml_functions/SplitTTS.py         |  4 +++-
 scripts/ml/ml_functions/ml_data_combined.py |  2 +-
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/scripts/ml/combined_model/cm_logo_skf.py b/scripts/ml/combined_model/cm_logo_skf.py
index f57ae43..fb1f5d7 100755
--- a/scripts/ml/combined_model/cm_logo_skf.py
+++ b/scripts/ml/combined_model/cm_logo_skf.py
@@ -139,9 +139,9 @@ def CMLogoSkf(cm_input_df
         # else:
         #     file_suffix = file_suffix
 
-        outFile = output_dir + str(n_tr_genes+1) + "genes_" + tts_split_type + '_' + file_suffix +  ".csv"
+        #outFile = output_dir + str(n_tr_genes+1) + "genes_" + tts_split_type + '_' + file_suffix +  ".csv"
                
-        print(outFile)
+        #print(outFile)
     
         #-------
         # training
@@ -175,6 +175,7 @@ def CMLogoSkf(cm_input_df
               , '\nTEST Target dim:' , cm_bts_y.shape)
         
         print("Running Multiple models on LOGO with SKF")
+        
         #%%:Running Multiple models on LOGO with SKF
 #        cD3_v2 = MultModelsCl_logo_skf(input_df = cm_X # two func were identical excpet for name
         cD3_v2 = MultModelsCl(input_df = cm_X        
@@ -203,11 +204,11 @@ def CMLogoSkf(cm_input_df
 #===============
 # Complete Data
 #===============
-CMLogoSkf(cm_input_df = combined_df,file_suffix = "complete")
-CMLogoSkf(cm_input_df = combined_df, std_gene_omit=['alr'], file_suffix = "complete")
+#CMLogoSkf(cm_input_df = combined_df,file_suffix = "complete")
+#CMLogoSkf(cm_input_df = combined_df, std_gene_omit=['alr'], file_suffix = "complete")
 
 #===============
 # Actual Data
 #===============
-CMLogoSkf(cm_input_df = combined_df_actual, file_suffix = "actual")
-CMLogoSkf(cm_input_df = combined_df_actual, std_gene_omit=['alr'], file_suffix = "actual")
+#CMLogoSkf(cm_input_df = combined_df_actual, file_suffix = "actual")
+#CMLogoSkf(cm_input_df = combined_df_actual, std_gene_omit=['alr'], file_suffix = "actual")
diff --git a/scripts/ml/ml_functions/SplitTTS.py b/scripts/ml/ml_functions/SplitTTS.py
index ad417e4..8e1ed56 100644
--- a/scripts/ml/ml_functions/SplitTTS.py
+++ b/scripts/ml/ml_functions/SplitTTS.py
@@ -269,7 +269,9 @@ def split_tts(ml_input_data
         
         #k_sm = 5 # default
         k_sm = k_smote
-        sm_nc = SMOTENC(categorical_features=categorical_colind, k_neighbors = k_sm, **rs, **njobs)
+        sm_nc = SMOTENC(categorical_features=categorical_colind
+                        , k_neighbors = k_sm
+                        , **rs, **njobs)
         X_smnc, y_smnc = sm_nc.fit_resample(X, y)
         print('\nSMOTE_NC OverSampling\n', Counter(y_smnc))
         print(X_smnc.shape)
diff --git a/scripts/ml/ml_functions/ml_data_combined.py b/scripts/ml/ml_functions/ml_data_combined.py
index 7dca351..f57f557 100644
--- a/scripts/ml/ml_functions/ml_data_combined.py
+++ b/scripts/ml/ml_functions/ml_data_combined.py
@@ -54,6 +54,7 @@ expected_ncols
 if len(common_cols) == expected_ncols:
     print('\nProceeding to combine based on common cols (n):', len(common_cols))
     combined_df = pd.concat([df[common_cols] for df in dfs_combine], ignore_index = False)
+    
     print('\nSuccessfully combined dfs:'
           , '\nNo. of dfs combined:', len(dfs_combine)
           , '\nDim of combined df:', combined_df.shape)
@@ -76,7 +77,6 @@ cm_input_df5 =  combined_df[~combined_df['gene_name'].isin(omit_gene_alr)]
 combined_df['dst'].isna().sum()
 combined_df['dst'].value_counts().sum()
 
-
 combined_df_actual = combined_df[~combined_df['dst'].isna()]
 
 ##############################################################################