Adding residue index validator

This commit is contained in:
mvaradi 2018-10-13 20:07:54 +01:00
parent 69c0f3831d
commit bf3bb72bb8
3 changed files with 240 additions and 1 deletions

View file

@ -1 +1,2 @@
jsonschema
jsonschema
requests

104
tests/test_residue_index.py Normal file
View file

@ -0,0 +1,104 @@
#!/usr/bin/env python3
"""
Copyright 2018 EMBL - European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
either express or implied. See the License for the specific
language governing permissions and limitations under the
License.
"""
import json
from unittest import TestCase
from validator.residue_index import CheckResidueIndices
with open("data/test_data.json", "r") as mock_data_file:
mock_data = json.load(mock_data_file)
mock_data_no_pdb_id = {"foo": "bar"}
mock_data_bad_numbering = {"pdb_id": "2aqa",
"chains": [{"chain_label": "A",
"residues": [{"pdb_res_label": "2",
"aa_type": "ALA"}]}]}
def mock_get_residue_numbering_false(self):
return False
def mock_get_residue_numbering_true(self):
return True
def mock_compare_residue_number(self, foo, bar):
return False
class TestCheckResidueIndices(TestCase):
def setUp(self):
self.cri = CheckResidueIndices(mock_data)
def test_loop_chains(self):
self.cri.get_residue_numbering = mock_get_residue_numbering_false
result = self.cri.loop_chains()
self.assertFalse(result)
self.cri.get_residue_numbering = mock_get_residue_numbering_true
result = self.cri.loop_chains()
self.assertTrue(result)
self.cri.pdb_id = None
self.assertFalse(self.cri.loop_chains())
def test_set_pdb_id(self):
self.assertIsNotNone(self.cri.set_pdb_id())
bad_cri = CheckResidueIndices(mock_data_no_pdb_id)
self.assertIsNone(bad_cri.set_pdb_id())
def test_check_numbering(self):
result = self.cri.check_numbering({}, {})
self.assertFalse(result)
self.cri.compare_residue_number = mock_compare_residue_number
result = self.cri.check_numbering({}, {"residues": [{"pdb_res_label": 0, "aa_type": "ALA"}]})
self.assertFalse(result)
def test_get_residue_numbering(self):
mock_data = {"chain_label": "A"}
self.cri.pdb_id = "1CBS"
self.cri.check_numbering = lambda x, y : True
result = self.cri.get_residue_numbering(mock_data)
self.assertTrue(result)
self.cri.pdb_id = "2H58"
result = self.cri.get_residue_numbering(mock_data)
self.assertFalse(result)
def test_recursive_loop(self):
result = self.cri.recursive_loop([{"foo": "bar"}], "foo", None, None)
self.assertFalse(result)
def test_with_bad_numbering(self):
cri_with_bad_numbering = CheckResidueIndices(mock_data_bad_numbering)
result = cri_with_bad_numbering.loop_chains()
self.assertFalse(result)
def test_process_residues(self):
result = self.cri.process_residues(
[{"author_residue_number": 1, "residue_name": "ALA", "author_insertion_code": ""}], "1", "ALA")
self.assertTrue(result)
result = self.cri.process_residues(
[{"author_residue_number": 1, "residue_name": "ALA", "author_insertion_code": "C"}], "1C", "ALA")
self.assertTrue(result)
result = self.cri.process_residues(
[{"author_residue_number": 2, "residue_name": "ALA", "author_insertion_code": ""}], "1", "ALA")
self.assertFalse(result)
result = self.cri.process_residues(
[{"author_residue_number": 1, "residue_name": "ALA", "author_insertion_code": ""}], "1", "HIS")
self.assertFalse(result)

134
validator/residue_index.py Normal file
View file

@ -0,0 +1,134 @@
import json
import requests
class CheckResidueIndices(object):
"""
This class has all the methods required for validating the
residue indices that are in the user submitted data.
Each residue has an index number in the submitted JSON,
and each has to match the indices in the official PDB entry
This class relies on the PDBe API to get the current residue
indices
"""
def __init__(self, data):
self.api_url = "https://www.ebi.ac.uk/pdbe/api/pdb/entry/residue_listing/"
self.data = data
self.pdb_id = self.set_pdb_id()
self.mismatches = []
self.labels = ["residues", "chains", "molecules"]
def set_pdb_id(self):
"""
Sets the PDB id based on the JSON data
:return: String, PDB id or None
"""
if "pdb_id" in self.data.keys():
return self.data["pdb_id"].lower()
return None
def loop_chains(self):
"""
Looping through all the chains that are present
in the JSON data
:return: True if the residue numbering is valid, False if not
"""
if not self.pdb_id:
return False
for chain_data in self.data["chains"]:
if not self.get_residue_numbering(chain_data):
return False
return True
def get_residue_numbering(self, chain_data):
"""
Gets the residue numbering from the PDBe API and
checks all residues
:param chain_data: JSON sub-data
:return: True if residue numbering is valid, False if not
"""
chain_id = chain_data["chain_label"]
url = "%s%s/chain/%s" % (self.api_url, self.pdb_id, chain_id)
response = requests.get(url)
residue_numbering = json.loads(response.text)
if not residue_numbering.keys():
self.mismatches.append("No residues in PDB for this entry - probably obsoleted entry")
return False
return self.check_numbering(residue_numbering, chain_data)
def check_numbering(self, residue_numbering, chain_data):
"""
This method loops through all the residues in a chain
and call the residue index comparator method
:param residue_numbering: JSON data from PDBe API
:param chain_data: JSON data from user
:return: True is residue numbering is valid, False if not
"""
if not "residues" in chain_data.keys():
return False
for residue in chain_data["residues"]:
depositor_residue_number = residue["pdb_res_label"]
depositor_aa_type = residue["aa_type"]
if not self.compare_residue_number(depositor_residue_number, depositor_aa_type, residue_numbering):
return False
return True
def compare_residue_number(self, depositor_residue_number, depositor_aa_type, residue_numbering):
"""
This method starts looping through the substructure of the PDBe API data
:param depositor_residue_number: Residue number provided by the user
:param depositor_aa_type: Residue amino acid code provided by user
:param residue_numbering: Residue numbering provided by PDBe API
:return: True is residue numbering is valid, False if not
"""
molecules = residue_numbering[self.pdb_id]["molecules"]
return self.recursive_loop(molecules, "chains", depositor_residue_number, depositor_aa_type)
def recursive_loop(self, data, label, depositor_residue_number, depositor_aa_type):
"""
A recursive loop that goes down to residue level and processes all residues
:param data: JSON data
:param label: String, "chains" or "residues" depending on the level
:param depositor_residue_number: Residue number provided by the user
:param depositor_aa_type: Residue amino acid code provided by user
:return: True is residue numbering is valid, False if not
"""
for item in data:
sub_data = item[label]
if label == "chains":
return self.recursive_loop(sub_data, "residues", depositor_residue_number, depositor_aa_type)
elif label == "residues":
return self.process_residues(sub_data, depositor_residue_number, depositor_aa_type)
return False
def process_residues(self, residues, depositor_residue_number, depositor_aa_type):
"""
This method grabs the residue information and call the comparator if the
residue number of PDBe is the same as the user input
:param residues: Residue data from PDBe API
:param depositor_residue_number: Residue number provided by the user
:param depositor_aa_type: Residue amino acid code provided by user
:return: True is residue numbering is valid, False if not
"""
for residue in residues:
if "%i%s" % (residue["author_residue_number"], residue["author_insertion_code"]) == depositor_residue_number:
return self.make_comparison(residue["residue_name"], depositor_aa_type, depositor_residue_number)
self.mismatches.append("residue numbering is completely mismatched between data and PDB entry")
return False
def make_comparison(self, residue_name, depositor_aa_type, depositor_residue_number):
"""
This method does the comparison between two residues that have the same index number
The comparison is between amino acid code
:param residue_name: Residue amino acid code provided by PDBe API
:param depositor_aa_type: Residue amino acid code provided by user
:param depositor_residue_number: Residue number provided by the user
:return: True is residue numbering is valid, False if not
"""
if residue_name == depositor_aa_type:
return True
mismatch = "residue %s (%s) in data does not match residue %s (%s) in PDB" % (
depositor_residue_number, depositor_aa_type, depositor_residue_number, residue_name)
self.mismatches.append(mismatch)
return False