diff --git a/scripts/ml/ml_data.py b/scripts/ml/ml_data.py
index d5fbe11..e63da15 100644
--- a/scripts/ml/ml_data.py
+++ b/scripts/ml/ml_data.py
@@ -423,9 +423,9 @@ def setvars(gene,drug):
     #==========================
     my_df_ml = my_df.copy()
         
-    #==========================
-    #     BLIND test set
-    #==========================
+    #===============================
+    #   Training and BLIND test set
+    #===============================
     # Separate blind test set
     my_df_ml[drug].isna().sum()
     
@@ -435,7 +435,7 @@ def setvars(gene,drug):
     training_df =  my_df_ml[my_df_ml[drug].notna()]
     training_df.shape
     
-    # Target1: dst
+    # Target1: dst_mode
     training_df[drug].value_counts()
     training_df['dst_mode'].value_counts()
     
@@ -514,15 +514,11 @@ def setvars(gene,drug):
     
     print('\nTotal no. of features for aaindex:', len(X_aaindexFN))
     
-    #%% Construct numerical and categorical column names
     # numerical feature names
-    #    numerical_FN = common_cols_stabiltyN + foldX_cols + X_strFN + X_evolFN + X_genomicFN 
-    
-    #numerical_FN = X_ssFN  + X_evolFN + X_genomicFN
     numerical_FN = X_ssFN  + X_evolFN + X_genomicFN + X_aaindexFN
 
     
-    #categorical feature names
+    # categorical feature names
     categorical_FN = ['ss_class'
                 # , 'wt_prop_water'
                 # , 'mut_prop_water'
@@ -534,8 +530,8 @@ def setvars(gene,drug):
                 , 'electrostatics_change'
                 , 'polarity_change'
                 , 'water_change'
-                #, 'drtype_mode_labels' # beware then you can't use it to predict [USED it for uq_v1]
-                , 'active_site'
+                , 'drtype_mode_labels' # beware then you can't use it to predict [USED it for uq_v1, not v2]
+                , 'active_site' #[didn't use it for uq_v1]
                 #, 'gene_name' # will be required for the combined stuff
                  ]
     #----------------------------------------------
@@ -561,7 +557,7 @@ def setvars(gene,drug):
     my_df_ml.groupby('mutationinformation')['ligand_distance'].apply(lambda x: (x>10)).value_counts()
     my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask].value_counts()
     
-    # mask the column ligand distance > 10
+    # mask the mcsm affinity related columns where ligand distance > 10
     my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask] = 0
     (my_df_ml['ligand_affinity_change'] == 0).sum()
     
diff --git a/scripts/ml/pnca_config.py b/scripts/ml/pnca_config.py
index fa2d158..ecc34f3 100755
--- a/scripts/ml/pnca_config.py
+++ b/scripts/ml/pnca_config.py
@@ -13,7 +13,7 @@ drug  = 'pyrazinamide'
 #total_mtblineage_uc = 8
 
 homedir = os.path.expanduser("~")
-os.chdir( homedir + '/git/ML_AI_training/')
+os.chdir( homedir + '/git/LSHTM_analysis/scripts/ml/')
 
 #---------------------------
 # Version 1: no AAindex
@@ -30,7 +30,7 @@ from ml_data import *
 #from UQ_yc_RunAllClfs import run_all_ML
 
 # TT run all ML clfs: baseline mode
-from UQ_MultModelsCl import MultModelsCl
+from MultModelsCl import MultModelsCl
 
 #%%###########################################################################