finally revised data processing is complete
This commit is contained in:
parent
ac0d14e116
commit
3c436f0c27
1 changed files with 9 additions and 11 deletions
|
@ -923,14 +923,14 @@ changes_val = []
|
||||||
changes_dict = {}
|
changes_dict = {}
|
||||||
|
|
||||||
##BROKENNNN!!!!
|
##BROKENNNN!!!!
|
||||||
|
common_muts
|
||||||
|
gene_LF1['mutation'].head()
|
||||||
common_muts_lower = list((map(lambda x: x.lower(), common_muts)))
|
common_muts_lower = list((map(lambda x: x.lower(), common_muts)))
|
||||||
common_muts_lower
|
common_muts_lower
|
||||||
##BROKENNNN!!!!
|
##BROKENNNN!!!!
|
||||||
#for i in common_muts:
|
for i in common_muts:
|
||||||
for i in common_muts_lower:
|
#for i in common_muts_lower:
|
||||||
|
#print(i)
|
||||||
print(i)
|
|
||||||
temp_df = gene_LF1[gene_LF1['mutation'] == i][['mutation', 'mutation_info_orig']]
|
temp_df = gene_LF1[gene_LF1['mutation'] == i][['mutation', 'mutation_info_orig']]
|
||||||
temp_df
|
temp_df
|
||||||
# DANGER: ASSUMES TWO STATES ONLY and that value_counts sorts by descending
|
# DANGER: ASSUMES TWO STATES ONLY and that value_counts sorts by descending
|
||||||
|
@ -992,7 +992,6 @@ print('\n----------------------------------'
|
||||||
|
|
||||||
ambiguous_muts_value_counts.to_csv(outfile_ambig_mut_counts, index = True)
|
ambiguous_muts_value_counts.to_csv(outfile_ambig_mut_counts, index = True)
|
||||||
#%% FIXME: Add sanity check to make sure you can add value_count checks
|
#%% FIXME: Add sanity check to make sure you can add value_count checks
|
||||||
print('\nREACHED here...................>>>>>>>')
|
|
||||||
#%% Resolving ambiguous muts
|
#%% Resolving ambiguous muts
|
||||||
# Merging ambiguous muts
|
# Merging ambiguous muts
|
||||||
#=================
|
#=================
|
||||||
|
@ -1013,8 +1012,7 @@ else:
|
||||||
gene_LF1.loc[ambig_muts_rev_df.index, 'mutation_info'] = ambig_muts_rev_df['mutation_info_REV']
|
gene_LF1.loc[ambig_muts_rev_df.index, 'mutation_info'] = ambig_muts_rev_df['mutation_info_REV']
|
||||||
|
|
||||||
gene_LF1['mutation_info_orig'].value_counts()
|
gene_LF1['mutation_info_orig'].value_counts()
|
||||||
#gene_LF1['mutation_info_v1'].value_counts()
|
gene_LF1['mutation_info_v1'].value_counts()
|
||||||
foo = gene_LF1.iloc[ambig_muts_rev_df.index]
|
|
||||||
|
|
||||||
# Sanity check1: if there are still any ambiguous muts
|
# Sanity check1: if there are still any ambiguous muts
|
||||||
#muts_split_rev = list(gene_LF1.groupby('mutation_info_v1'))
|
#muts_split_rev = list(gene_LF1.groupby('mutation_info_v1'))
|
||||||
|
@ -1527,8 +1525,8 @@ if (gene_LF3['dst_mode'].value_counts().sum() == len(gene_LF3)) and (gene_LF3['d
|
||||||
else:
|
else:
|
||||||
print('\nFAIL: revised dst mode numbers MISmatch')
|
print('\nFAIL: revised dst mode numbers MISmatch')
|
||||||
|
|
||||||
foo = gene_LF3[['Mut', 'position', 'dst', 'dst_multimode', 'dst_noNA', 'dst_mode']]
|
#foo = gene_LF3[['Mut', 'position', 'dst', 'dst_multimode', 'dst_noNA', 'dst_mode']]
|
||||||
foo2 = foo.sort_values(['position', 'Mut'])
|
#foo2 = foo.sort_values(['position', 'Mut'])
|
||||||
|
|
||||||
print('\n------------------------------------------------------'
|
print('\n------------------------------------------------------'
|
||||||
, '\nRevised counting: mutation_info i.e dm om column\n'
|
, '\nRevised counting: mutation_info i.e dm om column\n'
|
||||||
|
@ -1727,7 +1725,7 @@ len(gene_LF3)
|
||||||
# Dropping duplicates from lineage df
|
# Dropping duplicates from lineage df
|
||||||
lf_lin_split_dups = lf_lin_split_ColSel[lf_lin_split_ColSel.index.duplicated()]
|
lf_lin_split_dups = lf_lin_split_ColSel[lf_lin_split_ColSel.index.duplicated()]
|
||||||
lf_lin_split_U = lf_lin_split_ColSel[~lf_lin_split_ColSel.index.duplicated()]
|
lf_lin_split_U = lf_lin_split_ColSel[~lf_lin_split_ColSel.index.duplicated()]
|
||||||
if len(lf_lin_split_U) == len(snps_only):
|
if len(lf_lin_split_U) == len(SnpFDict):
|
||||||
print('\nPASS: Duplicate entries removed from lf_lin'
|
print('\nPASS: Duplicate entries removed from lf_lin'
|
||||||
, '\nReady to start the final merge to generate gene_LF4')
|
, '\nReady to start the final merge to generate gene_LF4')
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue