Merging improvements from https://github.com/rdk/funpdbe-validator rdk@github - "more specific information in mismatch messages"

This commit is contained in:
Mihaly Varadi 2019-02-20 15:08:30 +00:00
parent b37b155bfc
commit 89ac85e304
2 changed files with 32 additions and 24 deletions

View file

@ -42,7 +42,7 @@ def mock_get_residue_numbering_true(self):
return True return True
def mock_compare_residue_number(self, foo, bar): def mock_compare_residue_number(self, foo, bar, asd):
return False return False
@ -67,16 +67,16 @@ class TestCheckResidueIndices(TestCase):
self.assertIsNone(bad_cri._set_pdb_id()) self.assertIsNone(bad_cri._set_pdb_id())
def test_check_numbering(self): def test_check_numbering(self):
result = self.cri._check_numbering({}, {}) result = self.cri._check_numbering({}, {}, "A")
self.assertFalse(result) self.assertFalse(result)
self.cri._compare_residue_number = mock_compare_residue_number self.cri._compare_residue_number = mock_compare_residue_number
result = self.cri._check_numbering({}, {"residues": [{"pdb_res_label": 0, "aa_type": "ALA"}]}) result = self.cri._check_numbering({}, {"residues": [{"pdb_res_label": 0, "aa_type": "ALA"}]}, "A")
self.assertFalse(result) self.assertFalse(result)
def test_get_residue_numbering(self): def test_get_residue_numbering(self):
mock_data = {"chain_label": "A"} mock_data = {"chain_label": "A"}
self.cri.pdb_id = "1CBS" self.cri.pdb_id = "1CBS"
self.cri._check_numbering = lambda x, y : True self.cri._check_numbering = lambda x, y, z : True
result = self.cri._get_residue_numbering(mock_data) result = self.cri._get_residue_numbering(mock_data)
self.assertTrue(result) self.assertTrue(result)
self.cri.pdb_id = "2H58" self.cri.pdb_id = "2H58"
@ -84,7 +84,7 @@ class TestCheckResidueIndices(TestCase):
self.assertFalse(result) self.assertFalse(result)
def test_recursive_loop(self): def test_recursive_loop(self):
result = self.cri._recursive_loop([{"foo": "bar"}], "foo", None, None) result = self.cri._recursive_loop([{"foo": "bar"}], "foo", None, None, "A")
self.assertFalse(result) self.assertFalse(result)
def test_with_bad_numbering(self): def test_with_bad_numbering(self):
@ -94,16 +94,16 @@ class TestCheckResidueIndices(TestCase):
def test_process_residues(self): def test_process_residues(self):
result = self.cri._process_residues( result = self.cri._process_residues(
[{"author_residue_number": 1, "residue_name": "ALA", "author_insertion_code": ""}], "1", "ALA") [{"author_residue_number": 1, "residue_name": "ALA", "author_insertion_code": ""}], "1", "ALA", "A")
self.assertTrue(result) self.assertTrue(result)
result = self.cri._process_residues( result = self.cri._process_residues(
[{"author_residue_number": 1, "residue_name": "ALA", "author_insertion_code": "C"}], "1C", "ALA") [{"author_residue_number": 1, "residue_name": "ALA", "author_insertion_code": "C"}], "1C", "ALA", "A")
self.assertTrue(result) self.assertTrue(result)
result = self.cri._process_residues( result = self.cri._process_residues(
[{"author_residue_number": 2, "residue_name": "ALA", "author_insertion_code": ""}], "1", "ALA") [{"author_residue_number": 2, "residue_name": "ALA", "author_insertion_code": ""}], "1", "ALA", "A")
self.assertFalse(result) self.assertFalse(result)
result = self.cri._process_residues( result = self.cri._process_residues(
[{"author_residue_number": 1, "residue_name": "ALA", "author_insertion_code": ""}], "1", "HIS") [{"author_residue_number": 1, "residue_name": "ALA", "author_insertion_code": ""}], "1", "HIS", "A")
self.assertFalse(result) self.assertFalse(result)
def test_with_multichain(self): def test_with_multichain(self):

View file

@ -77,9 +77,9 @@ class ResidueIndexes(object):
if not residue_numbering.keys(): if not residue_numbering.keys():
self.mismatches.append("No residues in PDB for this entry - probably obsoleted entry") self.mismatches.append("No residues in PDB for this entry - probably obsoleted entry")
return False return False
return self._check_numbering(residue_numbering, chain_data) return self._check_numbering(residue_numbering, chain_data, chain_id)
def _check_numbering(self, residue_numbering, chain_data): def _check_numbering(self, residue_numbering, chain_data, chain_id):
""" """
This method loops through all the residues in a chain This method loops through all the residues in a chain
and call the residue index comparator method and call the residue index comparator method
@ -92,11 +92,13 @@ class ResidueIndexes(object):
for residue in chain_data["residues"]: for residue in chain_data["residues"]:
depositor_residue_number = residue["pdb_res_label"] depositor_residue_number = residue["pdb_res_label"]
depositor_aa_type = residue["aa_type"] depositor_aa_type = residue["aa_type"]
if not self._compare_residue_number(depositor_residue_number, depositor_aa_type, residue_numbering): if not self._compare_residue_number(depositor_residue_number, depositor_aa_type, residue_numbering,
chain_id):
return False return False
return True return True
def _compare_residue_number(self, depositor_residue_number, depositor_aa_type, residue_numbering): def _compare_residue_number(self, depositor_residue_number, depositor_aa_type, residue_numbering,
depositor_chain_id):
""" """
This method starts looping through the substructure of the PDBe API data This method starts looping through the substructure of the PDBe API data
:param depositor_residue_number: Residue number provided by the user :param depositor_residue_number: Residue number provided by the user
@ -105,9 +107,10 @@ class ResidueIndexes(object):
:return: True is residue numbering is valid, False if not :return: True is residue numbering is valid, False if not
""" """
molecules = residue_numbering[self.pdb_id]["molecules"] molecules = residue_numbering[self.pdb_id]["molecules"]
return self._recursive_loop(molecules, "chains", depositor_residue_number, depositor_aa_type) return self._recursive_loop(molecules, "chains", depositor_residue_number, depositor_aa_type,
depositor_chain_id)
def _recursive_loop(self, data, label, depositor_residue_number, depositor_aa_type): def _recursive_loop(self, data, label, depositor_residue_number, depositor_aa_type, depositor_chain_id):
""" """
A recursive loop that goes down to residue level and processes all residues A recursive loop that goes down to residue level and processes all residues
:param data: JSON data :param data: JSON data
@ -120,13 +123,14 @@ class ResidueIndexes(object):
for item in data: for item in data:
sub_data = item[label] sub_data = item[label]
if label == "chains": if label == "chains":
flag = self._recursive_loop(sub_data, "residues", depositor_residue_number, depositor_aa_type) flag = self._recursive_loop(sub_data, "residues", depositor_residue_number, depositor_aa_type,
depositor_chain_id)
elif label == "residues": elif label == "residues":
return self._process_residues(sub_data, depositor_residue_number, depositor_aa_type) return self._process_residues(sub_data, depositor_residue_number, depositor_aa_type, depositor_chain_id)
if label == "chains": if label == "chains":
return flag return flag
def _process_residues(self, residues, depositor_residue_number, depositor_aa_type): def _process_residues(self, residues, depositor_residue_number, depositor_aa_type, depositor_chain_id):
""" """
This method grabs the residue information and call the comparator if the This method grabs the residue information and call the comparator if the
residue number of PDBe is the same as the user input residue number of PDBe is the same as the user input
@ -136,12 +140,16 @@ class ResidueIndexes(object):
:return: True is residue numbering is valid, False if not :return: True is residue numbering is valid, False if not
""" """
for residue in residues: for residue in residues:
if "%i%s" % (residue["author_residue_number"], residue["author_insertion_code"]) == depositor_residue_number: if "%i%s" % (
return self._make_comparison(residue["residue_name"], depositor_aa_type, depositor_residue_number) residue["author_residue_number"], residue["author_insertion_code"]) == depositor_residue_number:
self.mismatches.append("residue numbering is completely mismatched between data and PDB entry") return self._make_comparison(residue["residue_name"], depositor_aa_type, depositor_residue_number,
depositor_chain_id)
self.mismatches.append(
"residue numbering is completely mismatched between data and PDB entry (invalid residue: %s_%s)" % (
depositor_chain_id, depositor_residue_number))
return False return False
def _make_comparison(self, residue_name, depositor_aa_type, depositor_residue_number): def _make_comparison(self, residue_name, depositor_aa_type, depositor_residue_number, depositor_chain_id):
""" """
This method does the comparison between two residues that have the same index number This method does the comparison between two residues that have the same index number
The comparison is between amino acid code The comparison is between amino acid code
@ -152,7 +160,7 @@ class ResidueIndexes(object):
""" """
if residue_name == depositor_aa_type: if residue_name == depositor_aa_type:
return True return True
mismatch = "residue %s (%s) in data does not match residue %s (%s) in PDB" % ( mismatch = "residue %s_%s (%s) in data does not match residue %s (%s) in PDB" % (
depositor_residue_number, depositor_aa_type, depositor_residue_number, residue_name) depositor_chain_id, depositor_residue_number, depositor_aa_type, depositor_residue_number, residue_name)
self.mismatches.append(mismatch) self.mismatches.append(mismatch)
return False return False