diff --git a/scripts/ml/alr_7030.py b/scripts/ml/alr_7030.py
index 90637e4..bf1de0f 100755
--- a/scripts/ml/alr_7030.py
+++ b/scripts/ml/alr_7030.py
@@ -40,11 +40,19 @@ outdir_ml = outdir + 'ml/tts_7030/'
 print('\nOutput directory:', outdir_ml)
 
 #%%###########################################################################
-print('Sanity checks:'
-      , '\nML source data size:', x_features.shape
-      , '\nTotal input features:', X.shape
-      , '\nTarget feature numbers:', Counter(y)
-      , '\nTarget features ratio:', yc1_ratio
+print('\nSanity checks:'
+      #, '\nML source data size:', x_features.shape
+      , '\nTotal input features:', len(X.columns)
+      , '\n'
+      , '\nTraining data size:', X.shape
+      , '\nTest data size:', X_bts.shape
+      , '\n'
+      , '\nTarget feature numbers (training data):', Counter(y)
+      , '\nTarget features ratio (training data:', yc1_ratio
+      , '\n'
+      , '\nTarget feature numbers (test data):', Counter(y_bts)
+      , '\nTarget features ratio (test data):', yc2_ratio
+      
       , '\n\n#####################################################################\n')
 
 print('\n================================================================\n')
@@ -59,8 +67,8 @@ print('Strucutral features (n):'
 
 print('AAindex features (n):'
       , len(X_aaindexFN)
-      # , '\nThese are:\n'
-      # , X_aaindexFN
+      , '\nThese are:\n'
+      , X_aaindexFN
       , '\n================================================================\n')
 
 print('Evolutionary features (n):'
diff --git a/scripts/ml/embb_7030.py b/scripts/ml/embb_7030.py
index 37ae81b..9c169b2 100755
--- a/scripts/ml/embb_7030.py
+++ b/scripts/ml/embb_7030.py
@@ -40,11 +40,19 @@ outdir_ml = outdir + 'ml/tts_7030/'
 print('\nOutput directory:', outdir_ml)
 
 #%%###########################################################################
-print('Sanity checks:'
-      , '\nML source data size:', x_features.shape
-      , '\nTotal input features:', X.shape
-      , '\nTarget feature numbers:', Counter(y)
-      , '\nTarget features ratio:', yc1_ratio
+print('\nSanity checks:'
+      #, '\nML source data size:', x_features.shape
+      , '\nTotal input features:', len(X.columns)
+      , '\n'
+      , '\nTraining data size:', X.shape
+      , '\nTest data size:', X_bts.shape
+      , '\n'
+      , '\nTarget feature numbers (training data):', Counter(y)
+      , '\nTarget features ratio (training data:', yc1_ratio
+      , '\n'
+      , '\nTarget feature numbers (test data):', Counter(y_bts)
+      , '\nTarget features ratio (test data):', yc2_ratio
+      
       , '\n\n#####################################################################\n')
 
 print('\n================================================================\n')
@@ -59,8 +67,8 @@ print('Strucutral features (n):'
 
 print('AAindex features (n):'
       , len(X_aaindexFN)
-      # , '\nThese are:\n'
-      # , X_aaindexFN
+      , '\nThese are:\n'
+      , X_aaindexFN
       , '\n================================================================\n')
 
 print('Evolutionary features (n):'
diff --git a/scripts/ml/gid_7030.py b/scripts/ml/gid_7030.py
index 0769f9c..9beaaee 100755
--- a/scripts/ml/gid_7030.py
+++ b/scripts/ml/gid_7030.py
@@ -40,11 +40,19 @@ outdir_ml = outdir + 'ml/tts_7030/'
 print('\nOutput directory:', outdir_ml)
 
 #%%###########################################################################
-print('Sanity checks:'
-      , '\nML source data size:', x_features.shape
-      , '\nTotal input features:', X.shape
-      , '\nTarget feature numbers:', Counter(y)
-      , '\nTarget features ratio:', yc1_ratio
+print('\nSanity checks:'
+      #, '\nML source data size:', x_features.shape
+      , '\nTotal input features:', len(X.columns)
+      , '\n'
+      , '\nTraining data size:', X.shape
+      , '\nTest data size:', X_bts.shape
+      , '\n'
+      , '\nTarget feature numbers (training data):', Counter(y)
+      , '\nTarget features ratio (training data:', yc1_ratio
+      , '\n'
+      , '\nTarget feature numbers (test data):', Counter(y_bts)
+      , '\nTarget features ratio (test data):', yc2_ratio
+      
       , '\n\n#####################################################################\n')
 
 print('\n================================================================\n')
@@ -59,8 +67,8 @@ print('Strucutral features (n):'
 
 print('AAindex features (n):'
       , len(X_aaindexFN)
-      # , '\nThese are:\n'
-      # , X_aaindexFN
+      , '\nThese are:\n'
+      , X_aaindexFN
       , '\n================================================================\n')
 
 print('Evolutionary features (n):'
diff --git a/scripts/ml/katg_7030.py b/scripts/ml/katg_7030.py
index 0192f21..7bb9307 100755
--- a/scripts/ml/katg_7030.py
+++ b/scripts/ml/katg_7030.py
@@ -40,11 +40,19 @@ outdir_ml = outdir + 'ml/tts_7030/'
 print('\nOutput directory:', outdir_ml)
 
 #%%###########################################################################
-print('Sanity checks:'
-      , '\nML source data size:', x_features.shape
-      , '\nTotal input features:', X.shape
-      , '\nTarget feature numbers:', Counter(y)
-      , '\nTarget features ratio:', yc1_ratio
+print('\nSanity checks:'
+      #, '\nML source data size:', x_features.shape
+      , '\nTotal input features:', len(X.columns)
+      , '\n'
+      , '\nTraining data size:', X.shape
+      , '\nTest data size:', X_bts.shape
+      , '\n'
+      , '\nTarget feature numbers (training data):', Counter(y)
+      , '\nTarget features ratio (training data:', yc1_ratio
+      , '\n'
+      , '\nTarget feature numbers (test data):', Counter(y_bts)
+      , '\nTarget features ratio (test data):', yc2_ratio
+      
       , '\n\n#####################################################################\n')
 
 print('\n================================================================\n')
@@ -59,8 +67,8 @@ print('Strucutral features (n):'
 
 print('AAindex features (n):'
       , len(X_aaindexFN)
-      # , '\nThese are:\n'
-      # , X_aaindexFN
+      , '\nThese are:\n'
+      , X_aaindexFN
       , '\n================================================================\n')
 
 print('Evolutionary features (n):'
diff --git a/scripts/ml/ml_data.py b/scripts/ml/ml_data.py
index e63da15..488c549 100644
--- a/scripts/ml/ml_data.py
+++ b/scripts/ml/ml_data.py
@@ -422,22 +422,6 @@ def setvars(gene,drug):
     #     Data for ML
     #==========================
     my_df_ml = my_df.copy()
-        
-    #===============================
-    #   Training and BLIND test set
-    #===============================
-    # Separate blind test set
-    my_df_ml[drug].isna().sum()
-    
-    blind_test_df = my_df_ml[my_df_ml[drug].isna()]
-    blind_test_df.shape
-    
-    training_df =  my_df_ml[my_df_ml[drug].notna()]
-    training_df.shape
-    
-    # Target1: dst_mode
-    training_df[drug].value_counts()
-    training_df['dst_mode'].value_counts()
     
     #%% Build X: input for ML
     common_cols_stabiltyN = ['ligand_distance'
@@ -546,7 +530,6 @@ def setvars(gene,drug):
     # Masking columns:
     # (mCSM-lig, mCSM-NA, mCSM-ppi2) values for lig_dist >10
     #=======================
-    #%% Masking columns 
     # my_df_ml['mutationinformation'][my_df['ligand_distance']>10].value_counts()
     # my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts()
     
@@ -567,6 +550,23 @@ def setvars(gene,drug):
     mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
     mask_check.to_csv(outdir + 'ml/' + gene.lower() + '_mask_check.csv')
     
+    #===================================================
+    # Training and BLIND test set: actual vs imputed
+    # dst with actual values  : training set
+    # dst with imputed values : blind test
+    #==================================================
+    my_df_ml[drug].isna().sum()  #'na' ones are the blind_test set
+    
+    blind_test_df = my_df_ml[my_df_ml[drug].isna()]
+    blind_test_df.shape
+    
+    training_df =  my_df_ml[my_df_ml[drug].notna()]
+    training_df.shape
+    
+    # Target 1: dst_mode
+    training_df[drug].value_counts()
+    training_df['dst_mode'].value_counts()
+    ####################################################################
     #%% extracting dfs based on numerical, categorical column names
     #----------------------------------
     # WITHOUT the target var included
@@ -625,11 +625,11 @@ def setvars(gene,drug):
     print('Original Data\n', Counter(y)
           , 'Data dim:', X.shape)
     
-    ###############################################################################
+    ###########################################################################
     #%% 
-    ############################################################################
+    ###########################################################################
     #                               RESAMPLING
-    ###############################################################################
+    ###########################################################################
     #------------------------------
     # Simple Random oversampling
     # [Numerical + catgeorical]
diff --git a/scripts/ml/ml_data_7030.py b/scripts/ml/ml_data_7030.py
index 6e90454..4976439 100644
--- a/scripts/ml/ml_data_7030.py
+++ b/scripts/ml/ml_data_7030.py
@@ -528,7 +528,6 @@ def setvars(gene,drug):
     # Masking columns:
     # (mCSM-lig, mCSM-NA, mCSM-ppi2) values for lig_dist >10
     #=======================
-    #%% Masking columns 
     # my_df_ml['mutationinformation'][my_df['ligand_distance']>10].value_counts()
     # my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts()
     
@@ -557,7 +556,6 @@ def setvars(gene,drug):
     # as these were imputed values and initial analysis shows that this
     # is not very representative
     #================================================================
-    # Separate blind test set
     my_df_ml[drug].isna().sum()
     #    blind_test_df = my_df_ml[my_df_ml[drug].isna()]
     #    blind_test_df.shape
@@ -565,77 +563,81 @@ def setvars(gene,drug):
     training_df =  my_df_ml[my_df_ml[drug].notna()]
     training_df.shape
     
-    # Target1: dst_mode
+    # Target 1: dst_mode
     training_df[drug].value_counts()
     training_df['dst_mode'].value_counts()
     
     ####################################################################
     
-                        # #%% extracting dfs based on numerical, categorical column names
-                        # #----------------------------------
-                        # # WITHOUT the target var included
-                        # #----------------------------------
-                        # num_df = training_df[numerical_FN]
-                        # num_df.shape
-                        
-                        # cat_df = training_df[categorical_FN]
-                        # cat_df.shape
-                        
-                        # all_df = training_df[numerical_FN + categorical_FN]
-                        # all_df.shape
-                        
-                        # #------------------------------
-                        # # WITH the target var included:
-                        #     #'wtgt': with target
-                        # #------------------------------
-                        # # drug and dst_mode should be the same thing
-                        # num_df_wtgt = training_df[numerical_FN + ['dst_mode']]
-                        # num_df_wtgt.shape
-                        
-                        # cat_df_wtgt = training_df[categorical_FN + ['dst_mode']]
-                        # cat_df_wtgt.shape
-                        
-                        # all_df_wtgt = training_df[numerical_FN + categorical_FN + ['dst_mode']]
-                        # all_df_wtgt.shape
-    #%%########################################################################
-                        # #============
-                        # # ML data: OLD
-                        # #============
-                        # #------
-                        # # X: Training and Blind test (BTS)
-                        # #------
-                        # X     = all_df_wtgt[numerical_FN + categorical_FN] # training data ALL
-                        # X_bts = blind_test_df[numerical_FN + categorical_FN] # blind test data ALL
-                        # #X = all_df_wtgt[numerical_FN] # training numerical only
-                        # #X_bts = blind_test_df[numerical_FN] # blind test data numerical
-                        
-                        # #------
-                        # # y
-                        # #------
-                        # y = all_df_wtgt['dst_mode'] # training data y
-                        # y_bts = blind_test_df['dst_mode'] # blind data test y
-                        
-                        # #X_bts_wt = blind_test_df[numerical_FN + ['dst_mode']] 
-                        
-                        # # Quick check
-                        # #(X['ligand_affinity_change']==0).sum() == (X['ligand_distance']>10).sum()
-                        # for i in range(len(cols_to_mask)):
-                        #     ind = i+1
-                        #     print('\nindex:', i, '\nind:', ind)
-                        #     print('\nMask count check:'
-                        #           , (my_df_ml[cols_to_mask[i]]==0).sum() == (my_df_ml['ligand_distance']>10).sum()
-                        #           )
-                        
-                        # print('Original Data\n', Counter(y)
-                        #       , 'Data dim:', X.shape)
-                        
+###############################################################################
+###############################################################################
+    # #%% extracting dfs based on numerical, categorical column names
+    # #----------------------------------
+    # # WITHOUT the target var included
+    # #----------------------------------
+    # num_df = training_df[numerical_FN]
+    # num_df.shape
     
-    #============
+    # cat_df = training_df[categorical_FN]
+    # cat_df.shape
+    
+    # all_df = training_df[numerical_FN + categorical_FN]
+    # all_df.shape
+    
+    # #------------------------------
+    # # WITH the target var included:
+    #     #'wtgt': with target
+    # #------------------------------
+    # # drug and dst_mode should be the same thing
+    # num_df_wtgt = training_df[numerical_FN + ['dst_mode']]
+    # num_df_wtgt.shape
+    
+    # cat_df_wtgt = training_df[categorical_FN + ['dst_mode']]
+    # cat_df_wtgt.shape
+    
+    # all_df_wtgt = training_df[numerical_FN + categorical_FN + ['dst_mode']]
+    # all_df_wtgt.shape
+    
+    #%%########################################################################
+    # #============
+    # # ML data: OLD
+    # #============
+    # #------
+    # # X: Training and Blind test (BTS)
+    # #------
+    # X     = all_df_wtgt[numerical_FN + categorical_FN] # training data ALL
+    # X_bts = blind_test_df[numerical_FN + categorical_FN] # blind test data ALL
+    # #X = all_df_wtgt[numerical_FN] # training numerical only
+    # #X_bts = blind_test_df[numerical_FN] # blind test data numerical
+    
+    # #------
+    # # y
+    # #------
+    # y = all_df_wtgt['dst_mode'] # training data y
+    # y_bts = blind_test_df['dst_mode'] # blind data test y
+    
+    # #X_bts_wt = blind_test_df[numerical_FN + ['dst_mode']] 
+    
+    # # Quick check
+    # #(X['ligand_affinity_change']==0).sum() == (X['ligand_distance']>10).sum()
+    # for i in range(len(cols_to_mask)):
+    #     ind = i+1
+    #     print('\nindex:', i, '\nind:', ind)
+    #     print('\nMask count check:'
+    #           , (my_df_ml[cols_to_mask[i]]==0).sum() == (my_df_ml['ligand_distance']>10).sum()
+    #           )
+    
+    # print('Original Data\n', Counter(y)
+    #       , 'Data dim:', X.shape)
+    
+###############################################################################
+###############################################################################    
+    #====================================
     # ML data: Train test split: 70/30
     # with stratification
     # 70% : training_data for CV
     # 30% : blind test 
-    #============
+    #=====================================
       
     # features: all_df or
     x_features = training_df[numerical_FN + categorical_FN]
@@ -664,7 +666,9 @@ def setvars(gene,drug):
     yc2_ratio = yc2[0]/yc2[1]
 
     print('\n-------------------------------------------------------------'
-          , '\nSuccessfully split data 70/30 with stratification'
+          , '\nSuccessfully split data with stratification: 70/30'
+          , '\nTrain data size:', X.shape
+          , '\nTest data size:', X_bts.shape
           , '\ny_train numbers:', yc1
           , '\ny_train ratio:',yc1_ratio
           , '\n'
@@ -672,12 +676,23 @@ def setvars(gene,drug):
           , '\ny_test ratio:', yc2_ratio
           , '\n-------------------------------------------------------------'
           )
+    ##########################################################################    
+    # Quick check
+    #(X['ligand_affinity_change']==0).sum() == (X['ligand_distance']>10).sum()
+    for i in range(len(cols_to_mask)):
+        ind = i+1
+        print('\nindex:', i, '\nind:', ind)
+        print('\nMask count check:'
+              , (my_df_ml[cols_to_mask[i]]==0).sum() == (my_df_ml['ligand_distance']>10).sum()
+              )
     
-    ###############################################################################
+    print('Original Data\n', Counter(y)
+          , 'Data dim:', X.shape)
+    ###########################################################################
     #%% 
-    ############################################################################
+    ###########################################################################
     #                               RESAMPLING
-    ###############################################################################
+    ###########################################################################
     #------------------------------
     # Simple Random oversampling
     # [Numerical + catgeorical]
diff --git a/scripts/ml/pnca_7030.py b/scripts/ml/pnca_7030.py
index a04c813..ddda78d 100755
--- a/scripts/ml/pnca_7030.py
+++ b/scripts/ml/pnca_7030.py
@@ -40,11 +40,19 @@ outdir_ml = outdir + 'ml/tts_7030/'
 print('\nOutput directory:', outdir_ml)
 
 #%%###########################################################################
-print('Sanity checks:'
-      , '\nML source data size:', x_features.shape
-      , '\nTotal input features:', X.shape
-      , '\nTarget feature numbers:', Counter(y)
-      , '\nTarget features ratio:', yc1_ratio
+print('\nSanity checks:'
+      #, '\nML source data size:', x_features.shape
+      , '\nTotal input features:', len(X.columns)
+      , '\n'
+      , '\nTraining data size:', X.shape
+      , '\nTest data size:', X_bts.shape
+      , '\n'
+      , '\nTarget feature numbers (training data):', Counter(y)
+      , '\nTarget features ratio (training data:', yc1_ratio
+      , '\n'
+      , '\nTarget feature numbers (test data):', Counter(y_bts)
+      , '\nTarget features ratio (test data):', yc2_ratio
+      
       , '\n\n#####################################################################\n')
 
 print('\n================================================================\n')
@@ -59,8 +67,8 @@ print('Strucutral features (n):'
 
 print('AAindex features (n):'
       , len(X_aaindexFN)
-      # , '\nThese are:\n'
-      # , X_aaindexFN
+      , '\nThese are:\n'
+      , X_aaindexFN
       , '\n================================================================\n')
 
 print('Evolutionary features (n):'
diff --git a/scripts/ml/rpob_7030.py b/scripts/ml/rpob_7030.py
index a4e6bb8..2a98bf0 100755
--- a/scripts/ml/rpob_7030.py
+++ b/scripts/ml/rpob_7030.py
@@ -40,11 +40,19 @@ outdir_ml = outdir + 'ml/tts_7030/'
 print('\nOutput directory:', outdir_ml)
 
 #%%###########################################################################
-print('Sanity checks:'
-      , '\nML source data size:', x_features.shape
-      , '\nTotal input features:', X.shape
-      , '\nTarget feature numbers:', Counter(y)
-      , '\nTarget features ratio:', yc1_ratio
+print('\nSanity checks:'
+      #, '\nML source data size:', x_features.shape
+      , '\nTotal input features:', len(X.columns)
+      , '\n'
+      , '\nTraining data size:', X.shape
+      , '\nTest data size:', X_bts.shape
+      , '\n'
+      , '\nTarget feature numbers (training data):', Counter(y)
+      , '\nTarget features ratio (training data:', yc1_ratio
+      , '\n'
+      , '\nTarget feature numbers (test data):', Counter(y_bts)
+      , '\nTarget features ratio (test data):', yc2_ratio
+      
       , '\n\n#####################################################################\n')
 
 print('\n================================================================\n')
@@ -59,8 +67,8 @@ print('Strucutral features (n):'
 
 print('AAindex features (n):'
       , len(X_aaindexFN)
-      # , '\nThese are:\n'
-      # , X_aaindexFN
+      , '\nThese are:\n'
+      , X_aaindexFN
       , '\n================================================================\n')
 
 print('Evolutionary features (n):'