renamed files to make more generic

2020-03-23 18:13:02 +00:00 · 2020-03-23 18:13:02 +00:00 · dd91692673
commit dd91692673
parent 22a0d38563
1 changed files with 17 additions and 14 deletions
--- a/meta_data_analysis/data_extraction.py
+++ b/meta_data_analysis/data_extraction.py
@ -328,7 +328,7 @@ out_filename0 = gene.lower() + '_' + 'common_ids.csv'
 outfile0 = homedir + '/' + outdir + '/' + out_filename0

 #FIXME: CHECK line len(common_ids)
-print('Writing file: common ids:\n',
+print('Writing file: common ids:',
      '\nFilename:', out_filename0,
      '\nPath:', homedir +'/'+ outdir,
      '\nExpected no. of rows:', len(common_ids) )
@ -459,7 +459,7 @@ dr_muts_df['dr_sample_freq'] = dr_muts_df.groupby('id')['id'].transform('count')
 print("revised dim of dr_muts_df:", dr_muts_df.shape) 

 c1 = dr_muts_df.dr_sample_freq.value_counts()
-print("counting no. of sample frequency\n:", c1)
+print('counting no. of sample frequency:\n', c1)
 print('======================================================================')

 # sanity check: length of pnca samples
@ -521,7 +521,7 @@ other_muts_df['other_sample_freq'] = other_muts_df.groupby('id')['id'].transform
 print("revised dim of other_muts_df:", other_muts_df.shape) 

 c2 = other_muts_df.other_sample_freq.value_counts()
-print("counting no. of sample frequency\n:", c2)
+print('counting no. of sample frequency:\n', c2)
 print('======================================================================')
 # sanity check: length of pnca samples
 if len(other_pnca_WF1) == c2.sum():
@ -696,7 +696,7 @@ else:
   print('Error: ambiguous muts detected, but extraction failed. Debug!',
         '\nNo. of ambiguous muts in dr:', len(dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist() ),
         '\nNo. of ambiguous muts in other:', len(other_muts[other_muts.isin(dr_muts)].value_counts().keys().tolist()))       
-
+print('======================================================================')
 #%% clear variables
 del(id_dr, id_other, meta_data, meta_pnca_dr, meta_pnca_other, mut_grouped, muts_split, other_WF1, other_df, other_muts_df, other_pnca_count, pnca_LF0, pnca_na)  

@ -712,7 +712,7 @@ del(c1, c2, col_to_split1, col_to_split2, comp_pnca_samples, dr_WF0, dr_df, dr_m

 out_filename1 = gene.lower() + '_' + 'ambiguous_muts.csv'
 outfile1 = homedir + '/' + outdir + '/' + out_filename1
-print('Writing file: ambiguous muts...',
+print('Writing file: ambiguous muts',
      '\nFilename:', out_filename1,
      '\nPath:',  homedir +'/'+ outdir)

@ -869,16 +869,17 @@ out_filename2 = gene.lower() + '_' + 'mcsm_snps.csv'
 outfile2 = homedir + '/' + outdir + '/' + out_filename2

 print('Writing file: mCSM style muts',
+      '\nFilename:', out_filename2,
+      '\nPath:',  homedir +'/'+ outdir,
      '\nmutation format (SNP): {Wt}<POS>{Mut}',
      '\nNo. of distinct muts:', len(snps_only),
-      '\nNo. of distinct positions:', len(pos_only),
-      '\nFilename:', out_filename2,
-      '\nPath:',  homedir +'/'+ outdir)
+      '\nNo. of distinct positions:', len(pos_only))

 snps_only.to_csv(outfile2, header = False, index = False)

 print('Finished writing:', out_filename2,
-      '\nNo. of rows:', len(snps_only) )
+      '\nNo. of rows:', len(snps_only),
+      '\nNo. of cols:', len(snps_only.columns))
 print('======================================================================')
 del(out_filename2)

@ -931,15 +932,17 @@ out_filename4 = gene.lower() + '_' + 'all_muts_msa.csv'
 outfile4 = homedir + '/' + outdir + '/' + out_filename4

 print('Writing file: mCSM style muts for msa',
+      '\nFilename:', out_filename4,
+      '\nPath:',  homedir +'/'+ outdir,
      '\nmutation format (SNP): {Wt}<POS>{Mut}',
      '\nNo.of lines of msa:', len(all_muts_msa),  
-      '\nFilename:', out_filename4,
-      '\nPath:',  homedir +'/'+ outdir)
+)

 all_muts_msa_sorted.to_csv(outfile4, header = False, index = False)

 print('Finished writing:', out_filename4,
-      '\nNo. of rows:', len(all_muts_msa) )
+      '\nNo. of rows:', len(all_muts_msa),
+      '\nNo. of cols:', len(all_muts_msa.columns) )
 print('======================================================================')
 del(out_filename4)

@ -968,13 +971,13 @@ print('Writing file: mutational positions',
 pos_only_sorted.to_csv(outfile5, header = True, index = False)

 print('Finished writing:', out_filename5,
-      '\nNo. of rows:', len(pos_only_sorted) )
+      '\nNo. of rows:', len(pos_only_sorted),
+      '\nNo. of cols:', len(pos_only_sorted.columns) )
 print('======================================================================')
 del(out_filename5)


 #%% end of script
-print('======================================================================')
 print(u'\u2698' * 50,
      '\nEnd of script: Data extraction and writing files'
      '\n' + u'\u2698' * 50 )