diff --git a/scripts/data_extraction.py b/scripts/data_extraction.py
index b8694b5..2f3531a 100755
--- a/scripts/data_extraction.py
+++ b/scripts/data_extraction.py
@@ -1094,6 +1094,24 @@ print('Created column: mutationinformation'
 	, '\n=====================================================================\n'
     , gene_LF1.mutationinformation.head(10))
 
+#order by position for convenience
+gene_LF1.dtypes
+
+# converting position to numeric
+gene_LF1['position'] = pd.to_numeric(gene_LF1['position'])
+
+# sort by position inplace 
+foo = gene_LF1['position'].value_counts()
+gene_LF1.sort_values(by = ['position'], inplace = True)
+bar = gene_LF1['position'].value_counts()
+
+if (foo == bar).all():
+    print('PASS: df ordered by position')
+    print(gene_LF1['position'].head())
+else:
+    print('FAIL: df could not be orderd. Check source')
+    sys.exit()
+
 #%% Write file: mCSM muts
 snps_only = pd.DataFrame(gene_LF1['mutationinformation'].unique())
 snps_only.head()
@@ -1128,6 +1146,31 @@ print('Finished writing:', outfile_mcsmsnps
       , '\n=============================================================')
 del(out_filename_mcsmsnps)
 
+#%%# write frequency of position counts
+metadata_pos = pd.DataFrame(gene_LF1['position'])
+z =  gene_LF1['position'].value_counts()
+z1 = z.to_dict()
+metadata_pos['meta_pos_count'] = metadata_pos['position'].map(z1)
+metadata_pos['meta_pos_count'].value_counts()
+
+metadata_pos.sort_values(by = ['meta_pos_count'], ascending = False, inplace = True)
+
+# Write file: gene_metadata (i.e gene_LF1)
+# where each row has UNIQUE mutations NOT unique sample ids
+out_filename_metadata_poscounts = gene.lower() + '_metadata_poscounts.csv'
+outfile_metadata_poscounts = outdir + '/' + out_filename_metadata_poscounts
+print('Writing file: Metadata poscounts'
+      , '\nFile:', outfile_metadata_poscounts
+      , '\n============================================================')
+
+metadata_pos.to_csv(outfile_metadata_poscounts, header = True, index = False)
+print('Finished writing:', outfile_metadata_poscounts
+      , '\nNo. of rows:', len(metadata_pos)
+      , '\nNo. of cols:', len(metadata_pos.columns)
+      , '\n=============================================================')
+del(out_filename_metadata_poscounts)
+
+
 #%% Write file: gene_metadata (i.e gene_LF1)
 # where each row has UNIQUE mutations NOT unique sample ids
 out_filename_metadata = gene.lower() + '_metadata.csv'
@@ -1213,9 +1256,20 @@ pos_only_sorted.to_csv(outfile_pos, header = True, index = False)
 print('Finished writing:', outfile_pos
       , '\nNo. of rows:', len(pos_only_sorted)
       , '\nNo. of cols:', len(pos_only_sorted.columns)
-      , '\n=============================================================')
+      , '\n============================================================='
+      , '\n\n\n')
 
 del(out_filename_pos)
+#%% quick summary output
+print('============================================'
+      , '\nQuick summary output for', gene.lower()
+      , '\n============================================'
+      , '\nTotal no.of unique missense muts:', gene_LF1['mutationinformation'].nunique()
+      , '\nTotal no.of unique positions associated with missense muts:',gene_LF1['position'].nunique()
+      , '\nTotal no. of samples with missense muts:', len(gene_LF1)
+      , '\n============================================================='
+      , '\n\n\n')
+
 #=======================================================================
 print(u'\u2698' * 50,
       '\nEnd of script: Data extraction and writing files'