#!/usr/bin/env python3 # useful links #https://towardsdatascience.com/pairwise-sequence-alignment-using-biopython-d1a9d0ba861f #https://www.biostars.org/p/265338/ from Bio import SeqIO from Bio import pairwise2 from Bio.pairwise2 import format_alignment import re import os #%% os.chdir('/home/tanu/git/LSHTM_analysis/scripts/examples') #%% def myalign(ref_seq, pdb_seq): myalign_dict = {} alignments = pairwise2.align.globalxx(ref_seq, pdb_seq) #alignments = pairwise2.align.localxx(ref, struct) match = [] for a, b in zip(alignments[0][0], alignments[0][1]): if a == b: match.append('|') else: match.append(' ') #print(match) print(alignments[0][0]) print("".join(match)) print(alignments[0][1]) result_align = alignments[0][1] #print(result_align) print('===============================================================\n') # update dict myalign_dict.update({'aligned_fasta': result_align}) aa_regex = '\w' m = re.search(aa_regex, result_align) #m = my_match.span() offset = m.start() offset_end = m.end() print('start of match:', offset , '\nend of match:', offset_end) print('===============================================================\n') # update dict myalign_dict.update({'start_match' : offset}) myalign_dict.update({'end_match' : offset_end}) return myalign_dict def main(): """ read file containing reference and pdb_sequence to align """ my_dict = {} align_fastas_to_align = open('align_fastas.txt', 'r') for record in SeqIO.parse(align_fastas_to_align,"fasta"): myid = record.id seq = str(record.seq) my_dict.update({myid : seq}) my_keys = list(my_dict.keys()) my_ref_seq = my_dict[my_keys[0]] # ref_seq my_pdb_seq = my_dict[my_keys[1]] # pdb_seq fasta_alignment = myalign(my_ref_seq, my_pdb_seq) print('this is my result:', fasta_alignment) if __name__ == '__main__': main() #%% debug: individually my_dict = {} align_fastas_to_align = open('align_fastas.txt', 'r') for record in SeqIO.parse(align_fastas_to_align,"fasta"): myid =record.id seq=str(record.seq) #print(myid, seq) my_dict.update({myid: seq}) print(my_dict) print(my_dict.keys()) my_keys = list(my_dict.keys()) alignments = pairwise2.align.globalxx(my_dict[my_keys[0]], my_dict[my_keys[1]]) match = [] for a, b in zip(alignments[0][0], alignments[0][1]): if a == b: match.append('|') else: match.append(' ') #print(match) print(alignments[0][0]) print("".join(match)) print(alignments[0][1]) result_align = alignments[0][1] #print(result_align) print('===============================================================\n') #offset = '' aa_regex = '\w' m = re.search(aa_regex, result_align) #m = my_match.span() offset = m.start() offset_end = m.end() print('start of match:', offset, '\nend of match:', offset_end) print('===============================================================\n')