saving work

2022-07-29 00:12:43 +01:00 · 2022-07-29 00:12:43 +01:00 · e55906d2c7
commit e55906d2c7
parent 1695e90b42
3 changed files with 11 additions and 8 deletions
--- a/scripts/ml/combined_model/cm_logo_skf.py
+++ b/scripts/ml/combined_model/cm_logo_skf.py
@ -139,9 +139,9 @@ def CMLogoSkf(cm_input_df
        # else:
        #     file_suffix = file_suffix

-        outFile = output_dir + str(n_tr_genes+1) + "genes_" + tts_split_type + '_' + file_suffix +  ".csv"
+        #outFile = output_dir + str(n_tr_genes+1) + "genes_" + tts_split_type + '_' + file_suffix +  ".csv"
               
-        print(outFile)
+        #print(outFile)
    
        #-------
        # training
@ -175,6 +175,7 @@ def CMLogoSkf(cm_input_df
              , '\nTEST Target dim:' , cm_bts_y.shape)
        
        print("Running Multiple models on LOGO with SKF")
+        
        #%%:Running Multiple models on LOGO with SKF
 #        cD3_v2 = MultModelsCl_logo_skf(input_df = cm_X # two func were identical excpet for name
        cD3_v2 = MultModelsCl(input_df = cm_X        
@ -203,11 +204,11 @@ def CMLogoSkf(cm_input_df
 #===============
 # Complete Data
 #===============
-CMLogoSkf(cm_input_df = combined_df,file_suffix = "complete")
-CMLogoSkf(cm_input_df = combined_df, std_gene_omit=['alr'], file_suffix = "complete")
+#CMLogoSkf(cm_input_df = combined_df,file_suffix = "complete")
+#CMLogoSkf(cm_input_df = combined_df, std_gene_omit=['alr'], file_suffix = "complete")

 #===============
 # Actual Data
 #===============
-CMLogoSkf(cm_input_df = combined_df_actual, file_suffix = "actual")
-CMLogoSkf(cm_input_df = combined_df_actual, std_gene_omit=['alr'], file_suffix = "actual")
+#CMLogoSkf(cm_input_df = combined_df_actual, file_suffix = "actual")
+#CMLogoSkf(cm_input_df = combined_df_actual, std_gene_omit=['alr'], file_suffix = "actual")
--- a/scripts/ml/ml_functions/SplitTTS.py
+++ b/scripts/ml/ml_functions/SplitTTS.py
@ -269,7 +269,9 @@ def split_tts(ml_input_data
        
        #k_sm = 5 # default
        k_sm = k_smote
-        sm_nc = SMOTENC(categorical_features=categorical_colind, k_neighbors = k_sm, **rs, **njobs)
+        sm_nc = SMOTENC(categorical_features=categorical_colind
+                        , k_neighbors = k_sm
+                        , **rs, **njobs)
        X_smnc, y_smnc = sm_nc.fit_resample(X, y)
        print('\nSMOTE_NC OverSampling\n', Counter(y_smnc))
        print(X_smnc.shape)
--- a/scripts/ml/ml_functions/ml_data_combined.py
+++ b/scripts/ml/ml_functions/ml_data_combined.py
@ -54,6 +54,7 @@ expected_ncols
 if len(common_cols) == expected_ncols:
    print('\nProceeding to combine based on common cols (n):', len(common_cols))
    combined_df = pd.concat([df[common_cols] for df in dfs_combine], ignore_index = False)
+    
    print('\nSuccessfully combined dfs:'
          , '\nNo. of dfs combined:', len(dfs_combine)
          , '\nDim of combined df:', combined_df.shape)
@ -76,7 +77,6 @@ cm_input_df5 =  combined_df[~combined_df['gene_name'].isin(omit_gene_alr)]
 combined_df['dst'].isna().sum()
 combined_df['dst'].value_counts().sum()

-
 combined_df_actual = combined_df[~combined_df['dst'].isna()]

 ##############################################################################