handled rpob 5uhc position offset in mcsm_ppi2
This commit is contained in:
parent
46e2c93885
commit
00b84ccb1c
30 changed files with 395 additions and 63 deletions
|
@ -75,15 +75,14 @@ args = arg_parser.parse_args()
|
|||
drug = args.drug
|
||||
gene = args.gene
|
||||
|
||||
#drug = 'pyrazinamide'
|
||||
#gene = 'pncA'
|
||||
|
||||
gene_match = gene + '_p.'
|
||||
print('mut pattern for gene', gene, ':', gene_match)
|
||||
|
||||
nssnp_match = gene_match +'[A-Za-z]{3}[0-9]+[A-Za-z]{3}'
|
||||
print('nsSNP for gene', gene, ':', nssnp_match)
|
||||
|
||||
nssnp_match2 = re.compile(nssnp_match)
|
||||
|
||||
wt_regex = gene_match.lower()+'([A-Za-z]{3})'
|
||||
print('wt regex:', wt_regex)
|
||||
|
||||
|
@ -219,20 +218,21 @@ meta_gene_epi = meta_gene_multi.loc[(meta_gene_multi['dr_mult_snp_count']>1) | (
|
|||
|
||||
#%% TEST
|
||||
# formatting, replace !nssnp_match with nothing
|
||||
foo1 = 'pncA_p.Thr47Pro;pncA_p.Thr61Pro;rpsA_c.XX'
|
||||
foo2 = 'pncA_Chromosome:g.2288693_2289280del; WT; pncA_p.Thr61Ala'
|
||||
#foo1 = 'pncA_p.Thr47Pro;pncA_p.Thr61Pro;rpsA_c.XX'
|
||||
#foo2 = 'pncA_Chromosome:g.2288693_2289280del; WT; pncA_p.Thr61Ala'
|
||||
|
||||
|
||||
foo1_s = foo1.split(';')
|
||||
foo1_s
|
||||
nssnp_match2 = re.compile('(pncA_p.[A-Za-z]{3}[0-9]+[A-Za-z]{3})')
|
||||
arse=list(filter(nssnp_match2.match, foo1_s))
|
||||
arse
|
||||
#foo1_s = foo1.split(';')
|
||||
#foo1_s
|
||||
#nssnp_match2 = re.compile('(pncA_p.[A-Za-z]{3}[0-9]+[A-Za-z]{3})')
|
||||
#arse=list(filter(nssnp_match2.match, foo1_s))
|
||||
#arse
|
||||
|
||||
#foo1_s2 = ';'.join(arse)
|
||||
#foo1_s2
|
||||
|
||||
foo1_s2 = ';'.join(arse)
|
||||
foo1_s2
|
||||
#%%
|
||||
nssnp_match2 = re.compile('(pncA_p.[A-Za-z]{3}[0-9]+[A-Za-z]{3})')
|
||||
#nssnp_match2 = re.compile('(pncA_p.[A-Za-z]{3}[0-9]+[A-Za-z]{3})')
|
||||
|
||||
# dr_muts_col
|
||||
dr_clean_col = dr_muts_col + '_clean'
|
||||
|
@ -248,6 +248,7 @@ for i, v in enumerate(meta_gene_epi[dr_muts_col]):
|
|||
dr2_s = v.split(';')
|
||||
print(dr2_s)
|
||||
dr2_sf = list(filter(nssnp_match2.match, dr2_s))
|
||||
#dr2_sf = list(filter(nssnp_match.match, dr2_s))
|
||||
print(dr2_sf)
|
||||
dr2_sf2 = ';'.join(dr2_sf)
|
||||
meta_gene_epi[dr_clean_col].iloc[i] = dr2_sf2
|
||||
|
@ -262,13 +263,13 @@ meta_gene_epi[other_clean_col] = ''
|
|||
|
||||
for i, v in enumerate(meta_gene_epi[other_muts_col]):
|
||||
#print(i, v)
|
||||
print('======================================================')
|
||||
print(i)
|
||||
print(v)
|
||||
#print('======================================================')
|
||||
#print(i)
|
||||
#print(v)
|
||||
other2_s = v.split(';')
|
||||
print(other2_s)
|
||||
#print(other2_s)
|
||||
other2_sf = list(filter(nssnp_match2.match, other2_s))
|
||||
print(other2_sf)
|
||||
#print(other2_sf)
|
||||
other2_sf2 = ';'.join(other2_sf)
|
||||
meta_gene_epi[other_clean_col].iloc[i] = other2_sf2
|
||||
|
||||
|
@ -281,7 +282,8 @@ meta_gene_epi_f = meta_gene_epi[['id', 'sample'
|
|||
, 'dr_mult_snp_count'
|
||||
, other_muts_col, other_clean_col
|
||||
, 'other_mult_snp_count']]
|
||||
meta_gene_epi_f.columns
|
||||
#print(meta_gene_epi_f.columns)
|
||||
print(meta_gene_epi_f)
|
||||
|
||||
cols_to_output = ['id', 'sample'
|
||||
, dr_clean_col
|
||||
|
@ -293,7 +295,6 @@ cols_to_output = ['id', 'sample'
|
|||
meta_gene_epi_f2 = meta_gene_epi_f[cols_to_output]
|
||||
|
||||
|
||||
|
||||
#%%
|
||||
# formatting, replace !nssnp_match with nothing
|
||||
#nssnp_neg_match = '(?!pncA_p.[A-Za-z]{3}[0-9]+[A-Za-z]{3})'
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue