#!/usr/bin/python # Surface calculation # from pylab import * from Bio.SeqUtils import ProtParamData from Bio.SeqUtils.ProtParam import ProteinAnalysis from Bio import SeqIO from Bio.Alphabet.IUPAC import IUPACProtein import pprint as pp import pandas as pd import numpy as np infile='/home/tanu/git/Data/pyrazinamide/input/original/3pl1.fasta.txt' window=9 offset=round((window/2)-0.5) fh = open(infile) for record in SeqIO.parse(fh, "fasta"): id = record.id seq = record.seq num_residues = len(seq) fh.close() sequence = str(seq) X = ProteinAnalysis(sequence) values=(X.protein_scale(ProtParamData.kd,window)) print('Sequence Length:', num_residues) print('Post-Analysis Length:',len(values)) print('Window Length:',window) print('Window Offset:',offset) dfSeq=pd.DataFrame({'seq':list(sequence)}) dfVals=pd.DataFrame({'values':values}) # FIXME: # These need to be offset by 'offset' from the start and finish # so that the sequence letters line up with the value calculated df=pd.concat([dfSeq,dfVals], ignore_index=True, axis=1) print(df) #df=pd.DataFrame(list(sequence), values) #plot(185, values, linewidth=1.0) #axis(xmin = 1, xmax = num_residues) #xlabel("Residue Number") #ylabel("Hydrophobicity") #title("K&D Hydrophobicity for " + id) #show()