From dd916926735370691d8942069f979ad59f5d79cb Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 23 Mar 2020 18:13:02 +0000 Subject: [PATCH] renamed files to make more generic --- meta_data_analysis/data_extraction.py | 31 +++++++++++++++------------ 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/meta_data_analysis/data_extraction.py b/meta_data_analysis/data_extraction.py index a47cec7..48c31e7 100755 --- a/meta_data_analysis/data_extraction.py +++ b/meta_data_analysis/data_extraction.py @@ -328,7 +328,7 @@ out_filename0 = gene.lower() + '_' + 'common_ids.csv' outfile0 = homedir + '/' + outdir + '/' + out_filename0 #FIXME: CHECK line len(common_ids) -print('Writing file: common ids:\n', +print('Writing file: common ids:', '\nFilename:', out_filename0, '\nPath:', homedir +'/'+ outdir, '\nExpected no. of rows:', len(common_ids) ) @@ -459,7 +459,7 @@ dr_muts_df['dr_sample_freq'] = dr_muts_df.groupby('id')['id'].transform('count') print("revised dim of dr_muts_df:", dr_muts_df.shape) c1 = dr_muts_df.dr_sample_freq.value_counts() -print("counting no. of sample frequency\n:", c1) +print('counting no. of sample frequency:\n', c1) print('======================================================================') # sanity check: length of pnca samples @@ -521,7 +521,7 @@ other_muts_df['other_sample_freq'] = other_muts_df.groupby('id')['id'].transform print("revised dim of other_muts_df:", other_muts_df.shape) c2 = other_muts_df.other_sample_freq.value_counts() -print("counting no. of sample frequency\n:", c2) +print('counting no. of sample frequency:\n', c2) print('======================================================================') # sanity check: length of pnca samples if len(other_pnca_WF1) == c2.sum(): @@ -696,7 +696,7 @@ else: print('Error: ambiguous muts detected, but extraction failed. Debug!', '\nNo. of ambiguous muts in dr:', len(dr_muts[dr_muts.isin(other_muts)].value_counts().keys().tolist() ), '\nNo. of ambiguous muts in other:', len(other_muts[other_muts.isin(dr_muts)].value_counts().keys().tolist())) - +print('======================================================================') #%% clear variables del(id_dr, id_other, meta_data, meta_pnca_dr, meta_pnca_other, mut_grouped, muts_split, other_WF1, other_df, other_muts_df, other_pnca_count, pnca_LF0, pnca_na) @@ -712,7 +712,7 @@ del(c1, c2, col_to_split1, col_to_split2, comp_pnca_samples, dr_WF0, dr_df, dr_m out_filename1 = gene.lower() + '_' + 'ambiguous_muts.csv' outfile1 = homedir + '/' + outdir + '/' + out_filename1 -print('Writing file: ambiguous muts...', +print('Writing file: ambiguous muts', '\nFilename:', out_filename1, '\nPath:', homedir +'/'+ outdir) @@ -869,16 +869,17 @@ out_filename2 = gene.lower() + '_' + 'mcsm_snps.csv' outfile2 = homedir + '/' + outdir + '/' + out_filename2 print('Writing file: mCSM style muts', + '\nFilename:', out_filename2, + '\nPath:', homedir +'/'+ outdir, '\nmutation format (SNP): {Wt}{Mut}', '\nNo. of distinct muts:', len(snps_only), - '\nNo. of distinct positions:', len(pos_only), - '\nFilename:', out_filename2, - '\nPath:', homedir +'/'+ outdir) + '\nNo. of distinct positions:', len(pos_only)) snps_only.to_csv(outfile2, header = False, index = False) print('Finished writing:', out_filename2, - '\nNo. of rows:', len(snps_only) ) + '\nNo. of rows:', len(snps_only), + '\nNo. of cols:', len(snps_only.columns)) print('======================================================================') del(out_filename2) @@ -931,15 +932,17 @@ out_filename4 = gene.lower() + '_' + 'all_muts_msa.csv' outfile4 = homedir + '/' + outdir + '/' + out_filename4 print('Writing file: mCSM style muts for msa', + '\nFilename:', out_filename4, + '\nPath:', homedir +'/'+ outdir, '\nmutation format (SNP): {Wt}{Mut}', '\nNo.of lines of msa:', len(all_muts_msa), - '\nFilename:', out_filename4, - '\nPath:', homedir +'/'+ outdir) +) all_muts_msa_sorted.to_csv(outfile4, header = False, index = False) print('Finished writing:', out_filename4, - '\nNo. of rows:', len(all_muts_msa) ) + '\nNo. of rows:', len(all_muts_msa), + '\nNo. of cols:', len(all_muts_msa.columns) ) print('======================================================================') del(out_filename4) @@ -968,13 +971,13 @@ print('Writing file: mutational positions', pos_only_sorted.to_csv(outfile5, header = True, index = False) print('Finished writing:', out_filename5, - '\nNo. of rows:', len(pos_only_sorted) ) + '\nNo. of rows:', len(pos_only_sorted), + '\nNo. of cols:', len(pos_only_sorted.columns) ) print('======================================================================') del(out_filename5) #%% end of script -print('======================================================================') print(u'\u2698' * 50, '\nEnd of script: Data extraction and writing files' '\n' + u'\u2698' * 50 )