added sections and slotted relevant bits from data_extraction to v2

This commit is contained in:
Tanushree Tunstall 2022-04-14 12:21:16 +01:00
parent e6faf80c20
commit f05cb96346
2 changed files with 446 additions and 52 deletions

View file

@ -1210,7 +1210,7 @@ print('Finished writing:', outfile_mcsmsnps
, '\n=============================================================')
del(out_filename_mcsmsnps)
#%%# write frequency of position counts
#%% write frequency of position counts
metadata_pos = pd.DataFrame(gene_LF1['position'])
z = gene_LF1['position'].value_counts()
z1 = z.to_dict()
@ -1219,8 +1219,6 @@ metadata_pos['meta_pos_count'].value_counts()
metadata_pos.sort_values(by = ['meta_pos_count'], ascending = False, inplace = True)
# Write file: gene_metadata (i.e gene_LF1)
# where each row has UNIQUE mutations NOT unique sample ids
out_filename_metadata_poscounts = gene.lower() + '_metadata_poscounts.csv'
outfile_metadata_poscounts = outdir + '/' + out_filename_metadata_poscounts
print('\n----------------------------------'
@ -1236,7 +1234,6 @@ print('Finished writing:', outfile_metadata_poscounts
, '\n=============================================================')
del(out_filename_metadata_poscounts)
#%% Write file: gene_metadata (i.e gene_LF1)
# where each row has UNIQUE mutations NOT unique sample ids
#out_filename_metadata = gene.lower() + '_metadata_raw.csv' #! rationale?