renamed files to make more generic
This commit is contained in:
parent
22a0d38563
commit
dd91692673
1 changed files with 17 additions and 14 deletions
|
@ -328,7 +328,7 @@ out_filename0 = gene.lower() + '_' + 'common_ids.csv'
|
|||
outfile0 = homedir + '/' + outdir + '/' + out_filename0
|
||||
|
||||
#FIXME: CHECK line len(common_ids)
|
||||
print('Writing file: common ids:\n',
|
||||
print('Writing file: common ids:',
|
||||
'\nFilename:', out_filename0,
|
||||
'\nPath:', homedir +'/'+ outdir,
|
||||
'\nExpected no. of rows:', len(common_ids) )
|
||||
|
@ -459,7 +459,7 @@ dr_muts_df['dr_sample_freq'] = dr_muts_df.groupby('id')['id'].transform('count')
|
|||
print("revised dim of dr_muts_df:", dr_muts_df.shape)
|
||||
|
||||
c1 = dr_muts_df.dr_sample_freq.value_counts()
|
||||
print("counting no. of sample frequency\n:", c1)
|
||||
print('counting no. of sample frequency:\n', c1)
|
||||
print('======================================================================')
|
||||
|
||||
# sanity check: length of pnca samples
|
||||
|
@ -521,7 +521,7 @@ other_muts_df['other_sample_freq'] = other_muts_df.groupby('id')['id'].transform
|
|||
print("revised dim of other_muts_df:", other_muts_df.shape)
|
||||
|
||||
c2 = other_muts_df.other_sample_freq.value_counts()
|
||||
print("counting no. of sample frequency\n:", c2)
|
||||
print('counting no. of sample frequency:\n', c2)
|
||||
print('======================================================================')
|
||||
# sanity check: length of pnca samples
|
||||
if len(other_pnca_WF1) == c2.sum():
|
||||
|
@ -696,7 +696,7 @@ else:
|
|||
print('Error: ambiguous muts detected, but extraction failed. Debug!',
|
||||
'\nNo. of ambiguous muts in dr:', len(dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist() ),
|
||||
'\nNo. of ambiguous muts in other:', len(other_muts[other_muts.isin(dr_muts)].value_counts().keys().tolist()))
|
||||
|
||||
print('======================================================================')
|
||||
#%% clear variables
|
||||
del(id_dr, id_other, meta_data, meta_pnca_dr, meta_pnca_other, mut_grouped, muts_split, other_WF1, other_df, other_muts_df, other_pnca_count, pnca_LF0, pnca_na)
|
||||
|
||||
|
@ -712,7 +712,7 @@ del(c1, c2, col_to_split1, col_to_split2, comp_pnca_samples, dr_WF0, dr_df, dr_m
|
|||
|
||||
out_filename1 = gene.lower() + '_' + 'ambiguous_muts.csv'
|
||||
outfile1 = homedir + '/' + outdir + '/' + out_filename1
|
||||
print('Writing file: ambiguous muts...',
|
||||
print('Writing file: ambiguous muts',
|
||||
'\nFilename:', out_filename1,
|
||||
'\nPath:', homedir +'/'+ outdir)
|
||||
|
||||
|
@ -869,16 +869,17 @@ out_filename2 = gene.lower() + '_' + 'mcsm_snps.csv'
|
|||
outfile2 = homedir + '/' + outdir + '/' + out_filename2
|
||||
|
||||
print('Writing file: mCSM style muts',
|
||||
'\nFilename:', out_filename2,
|
||||
'\nPath:', homedir +'/'+ outdir,
|
||||
'\nmutation format (SNP): {Wt}<POS>{Mut}',
|
||||
'\nNo. of distinct muts:', len(snps_only),
|
||||
'\nNo. of distinct positions:', len(pos_only),
|
||||
'\nFilename:', out_filename2,
|
||||
'\nPath:', homedir +'/'+ outdir)
|
||||
'\nNo. of distinct positions:', len(pos_only))
|
||||
|
||||
snps_only.to_csv(outfile2, header = False, index = False)
|
||||
|
||||
print('Finished writing:', out_filename2,
|
||||
'\nNo. of rows:', len(snps_only) )
|
||||
'\nNo. of rows:', len(snps_only),
|
||||
'\nNo. of cols:', len(snps_only.columns))
|
||||
print('======================================================================')
|
||||
del(out_filename2)
|
||||
|
||||
|
@ -931,15 +932,17 @@ out_filename4 = gene.lower() + '_' + 'all_muts_msa.csv'
|
|||
outfile4 = homedir + '/' + outdir + '/' + out_filename4
|
||||
|
||||
print('Writing file: mCSM style muts for msa',
|
||||
'\nFilename:', out_filename4,
|
||||
'\nPath:', homedir +'/'+ outdir,
|
||||
'\nmutation format (SNP): {Wt}<POS>{Mut}',
|
||||
'\nNo.of lines of msa:', len(all_muts_msa),
|
||||
'\nFilename:', out_filename4,
|
||||
'\nPath:', homedir +'/'+ outdir)
|
||||
)
|
||||
|
||||
all_muts_msa_sorted.to_csv(outfile4, header = False, index = False)
|
||||
|
||||
print('Finished writing:', out_filename4,
|
||||
'\nNo. of rows:', len(all_muts_msa) )
|
||||
'\nNo. of rows:', len(all_muts_msa),
|
||||
'\nNo. of cols:', len(all_muts_msa.columns) )
|
||||
print('======================================================================')
|
||||
del(out_filename4)
|
||||
|
||||
|
@ -968,13 +971,13 @@ print('Writing file: mutational positions',
|
|||
pos_only_sorted.to_csv(outfile5, header = True, index = False)
|
||||
|
||||
print('Finished writing:', out_filename5,
|
||||
'\nNo. of rows:', len(pos_only_sorted) )
|
||||
'\nNo. of rows:', len(pos_only_sorted),
|
||||
'\nNo. of cols:', len(pos_only_sorted.columns) )
|
||||
print('======================================================================')
|
||||
del(out_filename5)
|
||||
|
||||
|
||||
#%% end of script
|
||||
print('======================================================================')
|
||||
print(u'\u2698' * 50,
|
||||
'\nEnd of script: Data extraction and writing files'
|
||||
'\n' + u'\u2698' * 50 )
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue