renamed files to make more generic

This commit is contained in:
Tanushree Tunstall 2020-03-23 18:13:02 +00:00
parent 22a0d38563
commit dd91692673

View file

@ -328,7 +328,7 @@ out_filename0 = gene.lower() + '_' + 'common_ids.csv'
outfile0 = homedir + '/' + outdir + '/' + out_filename0
#FIXME: CHECK line len(common_ids)
print('Writing file: common ids:\n',
print('Writing file: common ids:',
'\nFilename:', out_filename0,
'\nPath:', homedir +'/'+ outdir,
'\nExpected no. of rows:', len(common_ids) )
@ -459,7 +459,7 @@ dr_muts_df['dr_sample_freq'] = dr_muts_df.groupby('id')['id'].transform('count')
print("revised dim of dr_muts_df:", dr_muts_df.shape)
c1 = dr_muts_df.dr_sample_freq.value_counts()
print("counting no. of sample frequency\n:", c1)
print('counting no. of sample frequency:\n', c1)
print('======================================================================')
# sanity check: length of pnca samples
@ -521,7 +521,7 @@ other_muts_df['other_sample_freq'] = other_muts_df.groupby('id')['id'].transform
print("revised dim of other_muts_df:", other_muts_df.shape)
c2 = other_muts_df.other_sample_freq.value_counts()
print("counting no. of sample frequency\n:", c2)
print('counting no. of sample frequency:\n', c2)
print('======================================================================')
# sanity check: length of pnca samples
if len(other_pnca_WF1) == c2.sum():
@ -696,7 +696,7 @@ else:
print('Error: ambiguous muts detected, but extraction failed. Debug!',
'\nNo. of ambiguous muts in dr:', len(dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist() ),
'\nNo. of ambiguous muts in other:', len(other_muts[other_muts.isin(dr_muts)].value_counts().keys().tolist()))
print('======================================================================')
#%% clear variables
del(id_dr, id_other, meta_data, meta_pnca_dr, meta_pnca_other, mut_grouped, muts_split, other_WF1, other_df, other_muts_df, other_pnca_count, pnca_LF0, pnca_na)
@ -712,7 +712,7 @@ del(c1, c2, col_to_split1, col_to_split2, comp_pnca_samples, dr_WF0, dr_df, dr_m
out_filename1 = gene.lower() + '_' + 'ambiguous_muts.csv'
outfile1 = homedir + '/' + outdir + '/' + out_filename1
print('Writing file: ambiguous muts...',
print('Writing file: ambiguous muts',
'\nFilename:', out_filename1,
'\nPath:', homedir +'/'+ outdir)
@ -869,16 +869,17 @@ out_filename2 = gene.lower() + '_' + 'mcsm_snps.csv'
outfile2 = homedir + '/' + outdir + '/' + out_filename2
print('Writing file: mCSM style muts',
'\nFilename:', out_filename2,
'\nPath:', homedir +'/'+ outdir,
'\nmutation format (SNP): {Wt}<POS>{Mut}',
'\nNo. of distinct muts:', len(snps_only),
'\nNo. of distinct positions:', len(pos_only),
'\nFilename:', out_filename2,
'\nPath:', homedir +'/'+ outdir)
'\nNo. of distinct positions:', len(pos_only))
snps_only.to_csv(outfile2, header = False, index = False)
print('Finished writing:', out_filename2,
'\nNo. of rows:', len(snps_only) )
'\nNo. of rows:', len(snps_only),
'\nNo. of cols:', len(snps_only.columns))
print('======================================================================')
del(out_filename2)
@ -931,15 +932,17 @@ out_filename4 = gene.lower() + '_' + 'all_muts_msa.csv'
outfile4 = homedir + '/' + outdir + '/' + out_filename4
print('Writing file: mCSM style muts for msa',
'\nFilename:', out_filename4,
'\nPath:', homedir +'/'+ outdir,
'\nmutation format (SNP): {Wt}<POS>{Mut}',
'\nNo.of lines of msa:', len(all_muts_msa),
'\nFilename:', out_filename4,
'\nPath:', homedir +'/'+ outdir)
)
all_muts_msa_sorted.to_csv(outfile4, header = False, index = False)
print('Finished writing:', out_filename4,
'\nNo. of rows:', len(all_muts_msa) )
'\nNo. of rows:', len(all_muts_msa),
'\nNo. of cols:', len(all_muts_msa.columns) )
print('======================================================================')
del(out_filename4)
@ -968,13 +971,13 @@ print('Writing file: mutational positions',
pos_only_sorted.to_csv(outfile5, header = True, index = False)
print('Finished writing:', out_filename5,
'\nNo. of rows:', len(pos_only_sorted) )
'\nNo. of rows:', len(pos_only_sorted),
'\nNo. of cols:', len(pos_only_sorted.columns) )
print('======================================================================')
del(out_filename5)
#%% end of script
print('======================================================================')
print(u'\u2698' * 50,
'\nEnd of script: Data extraction and writing files'
'\n' + u'\u2698' * 50 )