55 lines
1.3 KiB
Python
Executable file
55 lines
1.3 KiB
Python
Executable file
#!/usr/bin/python
|
|
# Surface calculation
|
|
#
|
|
from pylab import *
|
|
from Bio.SeqUtils import ProtParamData
|
|
from Bio.SeqUtils.ProtParam import ProteinAnalysis
|
|
from Bio import SeqIO
|
|
from Bio.Alphabet.IUPAC import IUPACProtein
|
|
import pprint as pp
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
|
|
infile='/home/tanu/git/Data/pyrazinamide/input/original/3pl1.fasta.txt'
|
|
|
|
window=9
|
|
offset=round((window/2)-0.5)
|
|
|
|
fh = open(infile)
|
|
|
|
for record in SeqIO.parse(fh, "fasta"):
|
|
id = record.id
|
|
seq = record.seq
|
|
num_residues = len(seq)
|
|
fh.close()
|
|
|
|
sequence = str(seq)
|
|
|
|
X = ProteinAnalysis(sequence)
|
|
|
|
values=(X.protein_scale(ProtParamData.kd,window))
|
|
|
|
print('Sequence Length:', num_residues)
|
|
print('Post-Analysis Length:',len(values))
|
|
print('Window Length:',window)
|
|
print('Window Offset:',offset)
|
|
|
|
dfSeq=pd.DataFrame({'seq':list(sequence)})
|
|
dfVals=pd.DataFrame({'values':values})
|
|
|
|
# FIXME:
|
|
# These need to be offset by 'offset' from the start and finish
|
|
# so that the sequence letters line up with the value calculated
|
|
df=pd.concat([dfSeq,dfVals], ignore_index=True, axis=1)
|
|
|
|
print(df)
|
|
#df=pd.DataFrame(list(sequence), values)
|
|
|
|
#plot(185, values, linewidth=1.0)
|
|
#axis(xmin = 1, xmax = num_residues)
|
|
#xlabel("Residue Number")
|
|
#ylabel("Hydrophobicity")
|
|
#title("K&D Hydrophobicity for " + id)
|
|
#show()
|
|
|