renamed files to make more generic
This commit is contained in:
parent
22a0d38563
commit
dd91692673
1 changed files with 17 additions and 14 deletions
|
@ -328,7 +328,7 @@ out_filename0 = gene.lower() + '_' + 'common_ids.csv'
|
||||||
outfile0 = homedir + '/' + outdir + '/' + out_filename0
|
outfile0 = homedir + '/' + outdir + '/' + out_filename0
|
||||||
|
|
||||||
#FIXME: CHECK line len(common_ids)
|
#FIXME: CHECK line len(common_ids)
|
||||||
print('Writing file: common ids:\n',
|
print('Writing file: common ids:',
|
||||||
'\nFilename:', out_filename0,
|
'\nFilename:', out_filename0,
|
||||||
'\nPath:', homedir +'/'+ outdir,
|
'\nPath:', homedir +'/'+ outdir,
|
||||||
'\nExpected no. of rows:', len(common_ids) )
|
'\nExpected no. of rows:', len(common_ids) )
|
||||||
|
@ -459,7 +459,7 @@ dr_muts_df['dr_sample_freq'] = dr_muts_df.groupby('id')['id'].transform('count')
|
||||||
print("revised dim of dr_muts_df:", dr_muts_df.shape)
|
print("revised dim of dr_muts_df:", dr_muts_df.shape)
|
||||||
|
|
||||||
c1 = dr_muts_df.dr_sample_freq.value_counts()
|
c1 = dr_muts_df.dr_sample_freq.value_counts()
|
||||||
print("counting no. of sample frequency\n:", c1)
|
print('counting no. of sample frequency:\n', c1)
|
||||||
print('======================================================================')
|
print('======================================================================')
|
||||||
|
|
||||||
# sanity check: length of pnca samples
|
# sanity check: length of pnca samples
|
||||||
|
@ -521,7 +521,7 @@ other_muts_df['other_sample_freq'] = other_muts_df.groupby('id')['id'].transform
|
||||||
print("revised dim of other_muts_df:", other_muts_df.shape)
|
print("revised dim of other_muts_df:", other_muts_df.shape)
|
||||||
|
|
||||||
c2 = other_muts_df.other_sample_freq.value_counts()
|
c2 = other_muts_df.other_sample_freq.value_counts()
|
||||||
print("counting no. of sample frequency\n:", c2)
|
print('counting no. of sample frequency:\n', c2)
|
||||||
print('======================================================================')
|
print('======================================================================')
|
||||||
# sanity check: length of pnca samples
|
# sanity check: length of pnca samples
|
||||||
if len(other_pnca_WF1) == c2.sum():
|
if len(other_pnca_WF1) == c2.sum():
|
||||||
|
@ -696,7 +696,7 @@ else:
|
||||||
print('Error: ambiguous muts detected, but extraction failed. Debug!',
|
print('Error: ambiguous muts detected, but extraction failed. Debug!',
|
||||||
'\nNo. of ambiguous muts in dr:', len(dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist() ),
|
'\nNo. of ambiguous muts in dr:', len(dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist() ),
|
||||||
'\nNo. of ambiguous muts in other:', len(other_muts[other_muts.isin(dr_muts)].value_counts().keys().tolist()))
|
'\nNo. of ambiguous muts in other:', len(other_muts[other_muts.isin(dr_muts)].value_counts().keys().tolist()))
|
||||||
|
print('======================================================================')
|
||||||
#%% clear variables
|
#%% clear variables
|
||||||
del(id_dr, id_other, meta_data, meta_pnca_dr, meta_pnca_other, mut_grouped, muts_split, other_WF1, other_df, other_muts_df, other_pnca_count, pnca_LF0, pnca_na)
|
del(id_dr, id_other, meta_data, meta_pnca_dr, meta_pnca_other, mut_grouped, muts_split, other_WF1, other_df, other_muts_df, other_pnca_count, pnca_LF0, pnca_na)
|
||||||
|
|
||||||
|
@ -712,7 +712,7 @@ del(c1, c2, col_to_split1, col_to_split2, comp_pnca_samples, dr_WF0, dr_df, dr_m
|
||||||
|
|
||||||
out_filename1 = gene.lower() + '_' + 'ambiguous_muts.csv'
|
out_filename1 = gene.lower() + '_' + 'ambiguous_muts.csv'
|
||||||
outfile1 = homedir + '/' + outdir + '/' + out_filename1
|
outfile1 = homedir + '/' + outdir + '/' + out_filename1
|
||||||
print('Writing file: ambiguous muts...',
|
print('Writing file: ambiguous muts',
|
||||||
'\nFilename:', out_filename1,
|
'\nFilename:', out_filename1,
|
||||||
'\nPath:', homedir +'/'+ outdir)
|
'\nPath:', homedir +'/'+ outdir)
|
||||||
|
|
||||||
|
@ -869,16 +869,17 @@ out_filename2 = gene.lower() + '_' + 'mcsm_snps.csv'
|
||||||
outfile2 = homedir + '/' + outdir + '/' + out_filename2
|
outfile2 = homedir + '/' + outdir + '/' + out_filename2
|
||||||
|
|
||||||
print('Writing file: mCSM style muts',
|
print('Writing file: mCSM style muts',
|
||||||
|
'\nFilename:', out_filename2,
|
||||||
|
'\nPath:', homedir +'/'+ outdir,
|
||||||
'\nmutation format (SNP): {Wt}<POS>{Mut}',
|
'\nmutation format (SNP): {Wt}<POS>{Mut}',
|
||||||
'\nNo. of distinct muts:', len(snps_only),
|
'\nNo. of distinct muts:', len(snps_only),
|
||||||
'\nNo. of distinct positions:', len(pos_only),
|
'\nNo. of distinct positions:', len(pos_only))
|
||||||
'\nFilename:', out_filename2,
|
|
||||||
'\nPath:', homedir +'/'+ outdir)
|
|
||||||
|
|
||||||
snps_only.to_csv(outfile2, header = False, index = False)
|
snps_only.to_csv(outfile2, header = False, index = False)
|
||||||
|
|
||||||
print('Finished writing:', out_filename2,
|
print('Finished writing:', out_filename2,
|
||||||
'\nNo. of rows:', len(snps_only) )
|
'\nNo. of rows:', len(snps_only),
|
||||||
|
'\nNo. of cols:', len(snps_only.columns))
|
||||||
print('======================================================================')
|
print('======================================================================')
|
||||||
del(out_filename2)
|
del(out_filename2)
|
||||||
|
|
||||||
|
@ -931,15 +932,17 @@ out_filename4 = gene.lower() + '_' + 'all_muts_msa.csv'
|
||||||
outfile4 = homedir + '/' + outdir + '/' + out_filename4
|
outfile4 = homedir + '/' + outdir + '/' + out_filename4
|
||||||
|
|
||||||
print('Writing file: mCSM style muts for msa',
|
print('Writing file: mCSM style muts for msa',
|
||||||
|
'\nFilename:', out_filename4,
|
||||||
|
'\nPath:', homedir +'/'+ outdir,
|
||||||
'\nmutation format (SNP): {Wt}<POS>{Mut}',
|
'\nmutation format (SNP): {Wt}<POS>{Mut}',
|
||||||
'\nNo.of lines of msa:', len(all_muts_msa),
|
'\nNo.of lines of msa:', len(all_muts_msa),
|
||||||
'\nFilename:', out_filename4,
|
)
|
||||||
'\nPath:', homedir +'/'+ outdir)
|
|
||||||
|
|
||||||
all_muts_msa_sorted.to_csv(outfile4, header = False, index = False)
|
all_muts_msa_sorted.to_csv(outfile4, header = False, index = False)
|
||||||
|
|
||||||
print('Finished writing:', out_filename4,
|
print('Finished writing:', out_filename4,
|
||||||
'\nNo. of rows:', len(all_muts_msa) )
|
'\nNo. of rows:', len(all_muts_msa),
|
||||||
|
'\nNo. of cols:', len(all_muts_msa.columns) )
|
||||||
print('======================================================================')
|
print('======================================================================')
|
||||||
del(out_filename4)
|
del(out_filename4)
|
||||||
|
|
||||||
|
@ -968,13 +971,13 @@ print('Writing file: mutational positions',
|
||||||
pos_only_sorted.to_csv(outfile5, header = True, index = False)
|
pos_only_sorted.to_csv(outfile5, header = True, index = False)
|
||||||
|
|
||||||
print('Finished writing:', out_filename5,
|
print('Finished writing:', out_filename5,
|
||||||
'\nNo. of rows:', len(pos_only_sorted) )
|
'\nNo. of rows:', len(pos_only_sorted),
|
||||||
|
'\nNo. of cols:', len(pos_only_sorted.columns) )
|
||||||
print('======================================================================')
|
print('======================================================================')
|
||||||
del(out_filename5)
|
del(out_filename5)
|
||||||
|
|
||||||
|
|
||||||
#%% end of script
|
#%% end of script
|
||||||
print('======================================================================')
|
|
||||||
print(u'\u2698' * 50,
|
print(u'\u2698' * 50,
|
||||||
'\nEnd of script: Data extraction and writing files'
|
'\nEnd of script: Data extraction and writing files'
|
||||||
'\n' + u'\u2698' * 50 )
|
'\n' + u'\u2698' * 50 )
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue