LSHTM_analysis/scripts/align.py

#!/usr/bin/env python3
from Bio import SeqIO
from Bio import pairwise2
from Bio.pairwise2 import format_alignment
import re
import os
#%%
def myalign(ref_seq, pdb_seq):

    myalign_dict = {}
    alignments = pairwise2.align.globalxx(ref_seq, pdb_seq)
    #alignments = pairwise2.align.localxx(ref, struct)

    match = []

    for a, b in zip(alignments[0][0], alignments[0][1]):
            if a == b:
                    match.append('|')
            else:
                    match.append(' ')


    #print(match)
    print(alignments[0][0])
    print("".join(match))
    print(alignments[0][1])

    result_align = alignments[0][1]
    #print(result_align)
    print('===============================================================\n')

    # update dict
    myalign_dict.update({'aligned_fasta': result_align})

    # find start and end of match
    aa_regex = '\w'
    m = re.search(aa_regex, result_align)
    #m = my_match.span()
    offset = m.start()
    offset_end = m.end()

    print('start of match:', offset
    , '\nend of match:', offset_end)
    print('===============================================================\n')

    # update dict
    myalign_dict.update({'start_match' : offset})
    myalign_dict.update({'end_match' : offset_end})

    return myalign_dict