This commit is contained in:
Tanushree Tunstall 2020-02-20 10:39:45 +00:00
parent ec25e9fd2d
commit 26e4652d63

View file

@ -0,0 +1,55 @@
#!/usr/bin/python
# Surface calculation
#
from pylab import *
from Bio.SeqUtils import ProtParamData
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from Bio import SeqIO
from Bio.Alphabet.IUPAC import IUPACProtein
import pprint as pp
import pandas as pd
import numpy as np
infile='/home/tanu/git/Data/pyrazinamide/input/original/3pl1.fasta.txt'
window=9
offset=round((window/2)-0.5)
fh = open(infile)
for record in SeqIO.parse(fh, "fasta"):
id = record.id
seq = record.seq
num_residues = len(seq)
fh.close()
sequence = str(seq)
X = ProteinAnalysis(sequence)
values=(X.protein_scale(ProtParamData.kd,window))
print('Sequence Length:', num_residues)
print('Post-Analysis Length:',len(values))
print('Window Length:',window)
print('Window Offset:',offset)
dfSeq=pd.DataFrame({'seq':list(sequence)})
dfVals=pd.DataFrame({'values':values})
# FIXME:
# These need to be offset by 'offset' from the start and finish
# so that the sequence letters line up with the value calculated
df=pd.concat([dfSeq,dfVals], ignore_index=True, axis=1)
print(df)
#df=pd.DataFrame(list(sequence), values)
#plot(185, values, linewidth=1.0)
#axis(xmin = 1, xmax = num_residues)
#xlabel("Residue Number")
#ylabel("Hydrophobicity")
#title("K&D Hydrophobicity for " + id)
#show()