From 42a62f6c889bdc97a158acd2dea9b3e5cad32104 Mon Sep 17 00:00:00 2001 From: mvaradi Date: Fri, 12 Oct 2018 17:26:03 +0100 Subject: [PATCH] Initial commit of Validator() with tests --- tests/validator_tests.py | 64 ++++++++++++++++++++++++++++ validator/__init__.py | 0 validator/validator.py | 92 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 156 insertions(+) create mode 100644 tests/validator_tests.py create mode 100644 validator/__init__.py create mode 100644 validator/validator.py diff --git a/tests/validator_tests.py b/tests/validator_tests.py new file mode 100644 index 0000000..00a3c1e --- /dev/null +++ b/tests/validator_tests.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 + +""" +Copyright 2018 EMBL - European Bioinformatics Institute + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +either express or implied. See the License for the specific +language governing permissions and limitations under the +License. +""" + +import unittest +from validator.validator import Validator + + +class TestValidator(unittest.TestCase): + + def test_no_file_at_path(self): + validator = Validator("resource_name") + validator.parse_json("invalid_path") + self.assertIn("File error", validator.error_log) + + def test_json_parser(self): + validator = Validator("resource_name") + data = validator.parse_json("../data/test_data.json") + self.assertIsNotNone(data) + validator.parse_json("../data/test_data_malformed.json") + self.assertIn("JSON error", validator.error_log) + + def test_basic_checks(self): + validator = Validator("test") + validator.json_data = {"data_resource": "test", "pdb_id": "1abc"} + print(validator.json_data) + self.assertTrue(validator.basic_checks()) + validator.json_data = {"data_resource": "test"} + self.assertFalse(validator.basic_checks()) + validator.json_data = {"pdb_id": "1abc"} + self.assertFalse(validator.basic_checks()) + + def test_no_resource_name(self): + validator = Validator("test") + validator.json_data = {"pdb_id": "1abc"} + self.assertFalse(validator.test_resource()) + + def test_resource_name_mismatch(self): + validator = Validator("test") + validator.json_data = {"data_resource": "test2", "pdb_id": "1abc"} + self.assertFalse(validator.test_resource()) + + def test_no_pdb_id(self): + validator = Validator("test") + validator.json_data = {"data_resource": "test"} + self.assertFalse(validator.test_pdb_id()) + + def test_invalid_pdb_id(self): + validator = Validator("test") + validator.json_data = {"data_resource": "test", "pdb_id": "invalid"} + self.assertFalse(validator.test_pdb_id()) \ No newline at end of file diff --git a/validator/__init__.py b/validator/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/validator/validator.py b/validator/validator.py new file mode 100644 index 0000000..83e642c --- /dev/null +++ b/validator/validator.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 + +""" +Copyright 2018 EMBL - European Bioinformatics Institute + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +either express or implied. See the License for the specific +language governing permissions and limitations under the +License. +""" + +import jsonschema +import json +import re + + +class Validator(object): + """ + Validates FunPDBe JSON files + """ + + def __init__(self, resource): + self.resource = resource + self.schema = None + self.json_data = None + self.error_log = None + + def load_json(self, path_to_file): + self.json_data = self.parse_json(path_to_file) + + def load_schema(self, path_to_schema): + self.schema = self.parse_json(path_to_schema) + + def parse_json(self, path): + """ + Parses a FunPDBe JSON file and in case of file error + or JSON error, the error message is saved to self.error_log + + :return: Bool, True is parsed, False if failed + """ + try: + with open(path, "r") as json_file: + try: + return json.load(json_file) + except json.decoder.JSONDecodeError as err: + self.error_log = "JSON error: %s" % err + return None + except IOError as ioerr: + self.error_log = "File error: %s" % ioerr + return None + + def basic_checks(self): + if self.test_resource() and self.test_pdb_id(): + return True + return False + + def test_resource(self): + if "data_resource" not in self.json_data.keys(): + self.error_log = "No data resource name found" + return False + elif self.json_data["data_resource"] != self.resource: + self.error_log = "Data resource name mismatch" + return False + return True + + def test_pdb_id(self): + if "pdb_id" not in self.json_data.keys(): + self.error_log = "No PDB id found" + return False + elif not re.match("^[1-9][a-zA-Z0-9]{3}$", self.json_data["pdb_id"]): + self.error_log = "Invalid PDB id found" + return False + return True + + def validate_against_schema(self): + try: + jsonschema.validate(self.json_data, self.schema) + return True + except jsonschema.exceptions.ValidationError as err: + self.error_log = "JSON does not comply with schema: %s" % err + return False + + + +# TODO - Add the file path to the error log, if not empty +# jsonschema.validate(json_data, self.json_schema) \ No newline at end of file