diff --git a/scripts/data_extraction.py b/scripts/data_extraction.py
index e340000..b6f6411 100644
--- a/scripts/data_extraction.py
+++ b/scripts/data_extraction.py
@@ -1615,6 +1615,7 @@ else:
 ###########################
 # magic merge happens here
 ###########################
+# FIXME: add check here to see if the duplicated indices rows are actual duplicates as the cols I need should be summary cols
 lf_lin_split.index.drop_duplicates(keep='first')
 lf_lin_split = lf_lin_split
 lf_lin_split_U = lf_lin_split[~lf_lin_split.index.duplicated(keep='first')]
@@ -1640,4 +1641,5 @@ foo = gene_LF4[['mutationinformation', 'lineage', 'lineage_ucount'
                 , 'lineage_mode'
                 , 'lineage_list']]
 #%%
+
 #Subset relevant columns for output and put the rest of the output here
\ No newline at end of file