finally revised data processing is complete
This commit is contained in:
parent
ac0d14e116
commit
3c436f0c27
1 changed files with 9 additions and 11 deletions
|
@ -923,14 +923,14 @@ changes_val = []
|
|||
changes_dict = {}
|
||||
|
||||
##BROKENNNN!!!!
|
||||
|
||||
common_muts
|
||||
gene_LF1['mutation'].head()
|
||||
common_muts_lower = list((map(lambda x: x.lower(), common_muts)))
|
||||
common_muts_lower
|
||||
##BROKENNNN!!!!
|
||||
#for i in common_muts:
|
||||
for i in common_muts_lower:
|
||||
|
||||
print(i)
|
||||
for i in common_muts:
|
||||
#for i in common_muts_lower:
|
||||
#print(i)
|
||||
temp_df = gene_LF1[gene_LF1['mutation'] == i][['mutation', 'mutation_info_orig']]
|
||||
temp_df
|
||||
# DANGER: ASSUMES TWO STATES ONLY and that value_counts sorts by descending
|
||||
|
@ -992,7 +992,6 @@ print('\n----------------------------------'
|
|||
|
||||
ambiguous_muts_value_counts.to_csv(outfile_ambig_mut_counts, index = True)
|
||||
#%% FIXME: Add sanity check to make sure you can add value_count checks
|
||||
print('\nREACHED here...................>>>>>>>')
|
||||
#%% Resolving ambiguous muts
|
||||
# Merging ambiguous muts
|
||||
#=================
|
||||
|
@ -1013,8 +1012,7 @@ else:
|
|||
gene_LF1.loc[ambig_muts_rev_df.index, 'mutation_info'] = ambig_muts_rev_df['mutation_info_REV']
|
||||
|
||||
gene_LF1['mutation_info_orig'].value_counts()
|
||||
#gene_LF1['mutation_info_v1'].value_counts()
|
||||
foo = gene_LF1.iloc[ambig_muts_rev_df.index]
|
||||
gene_LF1['mutation_info_v1'].value_counts()
|
||||
|
||||
# Sanity check1: if there are still any ambiguous muts
|
||||
#muts_split_rev = list(gene_LF1.groupby('mutation_info_v1'))
|
||||
|
@ -1527,8 +1525,8 @@ if (gene_LF3['dst_mode'].value_counts().sum() == len(gene_LF3)) and (gene_LF3['d
|
|||
else:
|
||||
print('\nFAIL: revised dst mode numbers MISmatch')
|
||||
|
||||
foo = gene_LF3[['Mut', 'position', 'dst', 'dst_multimode', 'dst_noNA', 'dst_mode']]
|
||||
foo2 = foo.sort_values(['position', 'Mut'])
|
||||
#foo = gene_LF3[['Mut', 'position', 'dst', 'dst_multimode', 'dst_noNA', 'dst_mode']]
|
||||
#foo2 = foo.sort_values(['position', 'Mut'])
|
||||
|
||||
print('\n------------------------------------------------------'
|
||||
, '\nRevised counting: mutation_info i.e dm om column\n'
|
||||
|
@ -1727,7 +1725,7 @@ len(gene_LF3)
|
|||
# Dropping duplicates from lineage df
|
||||
lf_lin_split_dups = lf_lin_split_ColSel[lf_lin_split_ColSel.index.duplicated()]
|
||||
lf_lin_split_U = lf_lin_split_ColSel[~lf_lin_split_ColSel.index.duplicated()]
|
||||
if len(lf_lin_split_U) == len(snps_only):
|
||||
if len(lf_lin_split_U) == len(SnpFDict):
|
||||
print('\nPASS: Duplicate entries removed from lf_lin'
|
||||
, '\nReady to start the final merge to generate gene_LF4')
|
||||
else:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue