renamed files to make more generic

This commit is contained in:
Tanushree Tunstall 2020-03-23 18:13:02 +00:00
parent d29b81a686
commit 0001c727e0

View file

@ -328,7 +328,7 @@ out_filename0 = gene.lower() + '_' + 'common_ids.csv'
outfile0 = homedir + '/' + outdir + '/' + out_filename0 outfile0 = homedir + '/' + outdir + '/' + out_filename0
#FIXME: CHECK line len(common_ids) #FIXME: CHECK line len(common_ids)
print('Writing file: common ids:\n', print('Writing file: common ids:',
'\nFilename:', out_filename0, '\nFilename:', out_filename0,
'\nPath:', homedir +'/'+ outdir, '\nPath:', homedir +'/'+ outdir,
'\nExpected no. of rows:', len(common_ids) ) '\nExpected no. of rows:', len(common_ids) )
@ -459,7 +459,7 @@ dr_muts_df['dr_sample_freq'] = dr_muts_df.groupby('id')['id'].transform('count')
print("revised dim of dr_muts_df:", dr_muts_df.shape) print("revised dim of dr_muts_df:", dr_muts_df.shape)
c1 = dr_muts_df.dr_sample_freq.value_counts() c1 = dr_muts_df.dr_sample_freq.value_counts()
print("counting no. of sample frequency\n:", c1) print('counting no. of sample frequency:\n', c1)
print('======================================================================') print('======================================================================')
# sanity check: length of pnca samples # sanity check: length of pnca samples
@ -521,7 +521,7 @@ other_muts_df['other_sample_freq'] = other_muts_df.groupby('id')['id'].transform
print("revised dim of other_muts_df:", other_muts_df.shape) print("revised dim of other_muts_df:", other_muts_df.shape)
c2 = other_muts_df.other_sample_freq.value_counts() c2 = other_muts_df.other_sample_freq.value_counts()
print("counting no. of sample frequency\n:", c2) print('counting no. of sample frequency:\n', c2)
print('======================================================================') print('======================================================================')
# sanity check: length of pnca samples # sanity check: length of pnca samples
if len(other_pnca_WF1) == c2.sum(): if len(other_pnca_WF1) == c2.sum():
@ -696,7 +696,7 @@ else:
print('Error: ambiguous muts detected, but extraction failed. Debug!', print('Error: ambiguous muts detected, but extraction failed. Debug!',
'\nNo. of ambiguous muts in dr:', len(dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist() ), '\nNo. of ambiguous muts in dr:', len(dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist() ),
'\nNo. of ambiguous muts in other:', len(other_muts[other_muts.isin(dr_muts)].value_counts().keys().tolist())) '\nNo. of ambiguous muts in other:', len(other_muts[other_muts.isin(dr_muts)].value_counts().keys().tolist()))
print('======================================================================')
#%% clear variables #%% clear variables
del(id_dr, id_other, meta_data, meta_pnca_dr, meta_pnca_other, mut_grouped, muts_split, other_WF1, other_df, other_muts_df, other_pnca_count, pnca_LF0, pnca_na) del(id_dr, id_other, meta_data, meta_pnca_dr, meta_pnca_other, mut_grouped, muts_split, other_WF1, other_df, other_muts_df, other_pnca_count, pnca_LF0, pnca_na)
@ -712,7 +712,7 @@ del(c1, c2, col_to_split1, col_to_split2, comp_pnca_samples, dr_WF0, dr_df, dr_m
out_filename1 = gene.lower() + '_' + 'ambiguous_muts.csv' out_filename1 = gene.lower() + '_' + 'ambiguous_muts.csv'
outfile1 = homedir + '/' + outdir + '/' + out_filename1 outfile1 = homedir + '/' + outdir + '/' + out_filename1
print('Writing file: ambiguous muts...', print('Writing file: ambiguous muts',
'\nFilename:', out_filename1, '\nFilename:', out_filename1,
'\nPath:', homedir +'/'+ outdir) '\nPath:', homedir +'/'+ outdir)
@ -869,16 +869,17 @@ out_filename2 = gene.lower() + '_' + 'mcsm_snps.csv'
outfile2 = homedir + '/' + outdir + '/' + out_filename2 outfile2 = homedir + '/' + outdir + '/' + out_filename2
print('Writing file: mCSM style muts', print('Writing file: mCSM style muts',
'\nFilename:', out_filename2,
'\nPath:', homedir +'/'+ outdir,
'\nmutation format (SNP): {Wt}<POS>{Mut}', '\nmutation format (SNP): {Wt}<POS>{Mut}',
'\nNo. of distinct muts:', len(snps_only), '\nNo. of distinct muts:', len(snps_only),
'\nNo. of distinct positions:', len(pos_only), '\nNo. of distinct positions:', len(pos_only))
'\nFilename:', out_filename2,
'\nPath:', homedir +'/'+ outdir)
snps_only.to_csv(outfile2, header = False, index = False) snps_only.to_csv(outfile2, header = False, index = False)
print('Finished writing:', out_filename2, print('Finished writing:', out_filename2,
'\nNo. of rows:', len(snps_only) ) '\nNo. of rows:', len(snps_only),
'\nNo. of cols:', len(snps_only.columns))
print('======================================================================') print('======================================================================')
del(out_filename2) del(out_filename2)
@ -931,15 +932,17 @@ out_filename4 = gene.lower() + '_' + 'all_muts_msa.csv'
outfile4 = homedir + '/' + outdir + '/' + out_filename4 outfile4 = homedir + '/' + outdir + '/' + out_filename4
print('Writing file: mCSM style muts for msa', print('Writing file: mCSM style muts for msa',
'\nFilename:', out_filename4,
'\nPath:', homedir +'/'+ outdir,
'\nmutation format (SNP): {Wt}<POS>{Mut}', '\nmutation format (SNP): {Wt}<POS>{Mut}',
'\nNo.of lines of msa:', len(all_muts_msa), '\nNo.of lines of msa:', len(all_muts_msa),
'\nFilename:', out_filename4, )
'\nPath:', homedir +'/'+ outdir)
all_muts_msa_sorted.to_csv(outfile4, header = False, index = False) all_muts_msa_sorted.to_csv(outfile4, header = False, index = False)
print('Finished writing:', out_filename4, print('Finished writing:', out_filename4,
'\nNo. of rows:', len(all_muts_msa) ) '\nNo. of rows:', len(all_muts_msa),
'\nNo. of cols:', len(all_muts_msa.columns) )
print('======================================================================') print('======================================================================')
del(out_filename4) del(out_filename4)
@ -968,13 +971,13 @@ print('Writing file: mutational positions',
pos_only_sorted.to_csv(outfile5, header = True, index = False) pos_only_sorted.to_csv(outfile5, header = True, index = False)
print('Finished writing:', out_filename5, print('Finished writing:', out_filename5,
'\nNo. of rows:', len(pos_only_sorted) ) '\nNo. of rows:', len(pos_only_sorted),
'\nNo. of cols:', len(pos_only_sorted.columns) )
print('======================================================================') print('======================================================================')
del(out_filename5) del(out_filename5)
#%% end of script #%% end of script
print('======================================================================')
print(u'\u2698' * 50, print(u'\u2698' * 50,
'\nEnd of script: Data extraction and writing files' '\nEnd of script: Data extraction and writing files'
'\n' + u'\u2698' * 50 ) '\n' + u'\u2698' * 50 )