LSHTM_analysis/scripts/align.py

51 lines
1.3 KiB
Python
Executable file

#!/usr/bin/env python3
from Bio import SeqIO
from Bio import pairwise2
from Bio.pairwise2 import format_alignment
import re
import os
#%%
def myalign(ref_seq, pdb_seq):
myalign_dict = {}
alignments = pairwise2.align.globalxx(ref_seq, pdb_seq)
#alignments = pairwise2.align.localxx(ref, struct)
match = []
for a, b in zip(alignments[0][0], alignments[0][1]):
if a == b:
match.append('|')
else:
match.append(' ')
#print(match)
print(alignments[0][0])
print("".join(match))
print(alignments[0][1])
result_align = alignments[0][1]
#print(result_align)
print('===============================================================\n')
# update dict
myalign_dict.update({'aligned_fasta': result_align})
# find start and end of match
aa_regex = '\w'
m = re.search(aa_regex, result_align)
#m = my_match.span()
offset = m.start()
offset_end = m.end()
print('start of match:', offset
, '\nend of match:', offset_end)
print('===============================================================\n')
# update dict
myalign_dict.update({'start_match' : offset})
myalign_dict.update({'end_match' : offset_end})
return myalign_dict