├── tests ├── __init__.py ├── unit │ └── __init__.py ├── integration │ └── __init__.py └── fixtures │ ├── README.rst │ └── full_example │ ├── abstract_extends.sch │ ├── check_weights.sch │ ├── diagnostics.sch │ ├── cargo.xml │ └── schema.sch ├── docs ├── c055982_ISO_IEC_19757-3_2016.pdf └── iso-schematron.xsd ├── pyschematron ├── direct_mode │ ├── __init__.py │ ├── xml_validation │ │ ├── __init__.py │ │ ├── queries │ │ │ ├── __init__.py │ │ │ ├── exceptions.py │ │ │ ├── factories.py │ │ │ ├── base.py │ │ │ └── xpath.py │ │ └── results │ │ │ ├── __init__.py │ │ │ ├── xml_nodes.py │ │ │ ├── validation_results.py │ │ │ └── svrl_builder.py │ ├── schematron │ │ ├── parsers │ │ │ ├── __init__.py │ │ │ └── xml │ │ │ │ ├── __init__.py │ │ │ │ ├── utils.py │ │ │ │ └── builders.py │ │ ├── __init__.py │ │ ├── utils.py │ │ ├── ast_yaml.py │ │ └── ast_visitors.py │ ├── lib │ │ ├── __init__.py │ │ └── ast.py │ ├── svrl │ │ ├── __init__.py │ │ ├── svrl_visitors.py │ │ ├── ast.py │ │ └── xml_writer.py │ └── api.py ├── __version__.py ├── utils.py ├── cli.py ├── __init__.py └── api.py ├── .editorconfig ├── .travis.yml ├── .coveragerc ├── tox.ini ├── pyproject.toml ├── .gitignore ├── scripts ├── demo_ast.py ├── generic_test_script.py ├── demo_validation.py └── demo_custom_functions.py ├── CHANGELOG.rst ├── cliff.toml ├── Makefile └── README.rst /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /tests/fixtures/README.rst: -------------------------------------------------------------------------------- 1 | All data needed for unit tests go here. 2 | -------------------------------------------------------------------------------- /docs/c055982_ISO_IEC_19757-3_2016.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robbert-harms/pyschematron/HEAD/docs/c055982_ISO_IEC_19757-3_2016.pdf -------------------------------------------------------------------------------- /pyschematron/direct_mode/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2023-03-02' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/xml_validation/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2023-02-18' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/schematron/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2023-02-18' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/schematron/parsers/xml/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2023-02-18' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/xml_validation/queries/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2023-03-19' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/lib/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2024-03-21' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | __licence__ = 'LGPL v3' 6 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/svrl/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2024-03-11' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | __licence__ = 'LGPL v3' 6 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/schematron/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2024-03-19' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | __licence__ = 'LGPL v3' 6 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/xml_validation/results/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2024-03-25' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | __licence__ = 'LGPL v3' 6 | -------------------------------------------------------------------------------- /tests/fixtures/full_example/abstract_extends.sch: -------------------------------------------------------------------------------- 1 | 2 | 3 | The item is in the wrong category ($pv_category) (external check). 4 | 5 | 6 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/xml_validation/queries/exceptions.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2023-03-25' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | __licence__ = 'GPL v3' 6 | 7 | 8 | class MissingRootNodeError(Exception): 9 | ... 10 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 4 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | charset = utf-8 11 | end_of_line = lf 12 | 13 | [LICENSE] 14 | insert_final_newline = false 15 | 16 | [Makefile] 17 | indent_style = tab 18 | -------------------------------------------------------------------------------- /tests/fixtures/full_example/check_weights.sch: -------------------------------------------------------------------------------- 1 | 2 | 3 | Weight not correct ( vs at ). 4 | 5 | 6 | -------------------------------------------------------------------------------- /tests/fixtures/full_example/diagnostics.sch: -------------------------------------------------------------------------------- 1 | 2 | 3 | The item "" is too heavy. 4 | 5 | 6 | Het item "" is te zwaar. 7 | 8 | 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Config file for automatic testing at travis-ci.org 2 | 3 | language: python 4 | 5 | python: 6 | - "3.12" 7 | 8 | # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors 9 | install: 10 | - python -c 'import tomllib; f = open("pyproject.toml", "rb"); c = tomllib.load(f); print("\n".join(c["project"]["dependencies"]));' | pip install -r /dev/stdin 11 | 12 | # command to run tests, e.g. python setup.py test 13 | script: make test 14 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/svrl/svrl_visitors.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2024-03-17' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | __licence__ = 'LGPL v3' 6 | 7 | from abc import ABCMeta 8 | 9 | from pyschematron.direct_mode.lib.ast import GenericASTVisitor 10 | from pyschematron.direct_mode.svrl.ast import SVRLNode 11 | 12 | 13 | class SVRLASTVisitor(GenericASTVisitor[SVRLNode], metaclass=ABCMeta): 14 | """Visitor implementation for the SVRL nodes.""" 15 | ... 16 | -------------------------------------------------------------------------------- /pyschematron/__version__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2020-02-04' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@laltoida.com' 5 | 6 | 7 | from importlib import metadata 8 | from importlib.metadata import PackageNotFoundError 9 | from pathlib import Path 10 | 11 | import tomllib 12 | 13 | try: 14 | __version__ = metadata.version('pyschematron') 15 | except PackageNotFoundError: 16 | with open(Path(__file__).parent.parent / 'pyproject.toml', 'rb') as f: 17 | pyproject = tomllib.load(f) 18 | __version__ = pyproject['project']['version'] 19 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = pyschematron 4 | 5 | [report] 6 | ignore_errors = True 7 | ; Regexes for lines to exclude from consideration 8 | exclude_lines = 9 | ; Have to re-enable the standard pragma 10 | pragma: no cover 11 | 12 | ; Don't complain about missing debug-only code: 13 | def __repr__ 14 | if self\.debug 15 | 16 | ; Don't complain if tests don't hit defensive assertion code: 17 | raise AssertionError 18 | raise NotImplementedError 19 | 20 | ; Don't complain if non-runnable code isn't run: 21 | if 0: 22 | if __name__ == .__main__.: 23 | -------------------------------------------------------------------------------- /tests/fixtures/full_example/cargo.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/schematron/utils.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2023-03-06' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | 6 | import re 7 | 8 | 9 | def macro_expand(string: str, macros: dict[str, str]) -> str: 10 | """Expand the provided macros on the provided string. 11 | 12 | This replaces all the macros in one go. This is a specialized version of multi-string replacement which 13 | assumes all the replacements are unique (as they are in macro's). 14 | 15 | We use this function for instantiating abstract patterns to concrete patterns. 16 | 17 | This assumes the macros already have the prefix `$`. 18 | 19 | Args: 20 | string: the string on which to apply the macro 21 | macros: the macros to expand 22 | 23 | Returns: 24 | A version of the string with the macros expanded 25 | """ 26 | macros_pattern = '|'.join(re.escape(k) for k in macros) 27 | pattern = re.compile(f'({macros_pattern})\\b') 28 | return pattern.sub(lambda match: macros[match.group(0)], string) 29 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | ; with ideas copied from: https://github.com/zopefoundation/RestrictedPython/blob/master/tox.ini 2 | 3 | [tox] 4 | isolated_build = True 5 | envlist = setup,py312,report 6 | toxworkdir = {toxinidir}/build/.tox 7 | 8 | [testenv] 9 | usedevelop = True 10 | allowlist_externals = which 11 | commands = 12 | python --version 13 | which python 14 | which pip 15 | which pytest 16 | pytest --cov=pyschematron --cov-report=html:build/coverage/{envname} --cov-append --html=build/pytest/report-{envname}.html --self-contained-html {posargs} 17 | deps = 18 | pytest 19 | pytest-cov 20 | pytest-html 21 | joblib 22 | tqdm 23 | moto 24 | docker 25 | setenv = 26 | COVERAGE_FILE=build/.coverage.{envname} 27 | 28 | [testenv:report] 29 | deps = coverage 30 | skip_install = true 31 | depends = py312 32 | setenv = 33 | COVERAGE_FILE=build/.coverage 34 | commands = 35 | coverage erase 36 | coverage combine 37 | coverage html -d build/coverage/all/ 38 | coverage report 39 | 40 | [testenv:setup] 41 | deps = coverage 42 | skip_install = true 43 | setenv = 44 | COVERAGE_FILE=build/.coverage 45 | commands = coverage erase 46 | 47 | 48 | [pytest] 49 | testpaths = tests 50 | cache_dir = build/.pytest_cache 51 | addopts = --capture=tee-sys 52 | -------------------------------------------------------------------------------- /pyschematron/utils.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2023-02-17' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | 6 | 7 | from io import BytesIO, IOBase 8 | from pathlib import Path 9 | from typing import BinaryIO, Union 10 | 11 | from lxml import etree 12 | from lxml.etree import _ElementTree 13 | 14 | 15 | def load_xml_document(xml_data: Union[bytes, str, Path, IOBase, BinaryIO], 16 | parser: etree.XMLParser | None = None) -> _ElementTree: 17 | """Load an XML document from a polymorphic source. 18 | 19 | Args: 20 | xml_data: the XML data to load. Can be loaded from a string, file, or byte-like object. 21 | parser: the XMLParser to use. Can be specialized for your use-case. 22 | 23 | Returns: 24 | The document node of the loaded XML. 25 | """ 26 | parser = parser or etree.XMLParser(ns_clean=True) 27 | 28 | match xml_data: 29 | case IOBase(): 30 | return etree.parse(xml_data, parser) 31 | case bytes(): 32 | return load_xml_document(BytesIO(xml_data), parser=parser) 33 | case str(): 34 | return load_xml_document(BytesIO(xml_data.encode('utf-8')), parser=parser) 35 | case Path(): 36 | with open(xml_data, 'rb') as f: 37 | return load_xml_document(f, parser=parser) 38 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pyschematron" 3 | description = "Schematron validation in Python." 4 | readme = "README.rst" 5 | version = "1.1.13" 6 | requires-python = ">=3.12" 7 | keywords = ["Schematron", "XML validation"] 8 | classifiers = [ 9 | "Development Status :: 5 - Production/Stable", 10 | "Environment :: Console", 11 | "Intended Audience :: Developers", 12 | "Natural Language :: English", 13 | "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", 14 | "Programming Language :: Python :: 3.12", 15 | ] 16 | license = { file="LICENSE" } 17 | authors = [ 18 | { name = "Robbert Harms", email = "robbert@xkls.nl" } 19 | ] 20 | dependencies = [ 21 | "xmlschema~=4.1.0", 22 | "elementpath~=5.0.4", 23 | "typer>=0.19.2", 24 | "appdirs~=1.4.4", 25 | "lxml~=6.0.0", 26 | "Jinja2~=3.1.6", 27 | "ruyaml~=0.91.0", 28 | "frozendict~=2.4.6" 29 | ] 30 | 31 | [project.optional-dependencies] 32 | test = [ 33 | "pytest~=8.3.4", 34 | "pytest-check~=2.5.0", 35 | "pytest-cov~=6.0.0", 36 | "pytest-html~=4.1.1", 37 | "tox~=4.24.1" 38 | ] 39 | doc = [ 40 | "Sphinx~=8.2.0", 41 | "git-cliff~=2.8.0" 42 | ] 43 | 44 | [project.urls] 45 | Homepage = "https://github.com/robbert-harms/pyschematron/" 46 | 47 | [project.scripts] 48 | pyschematron = "pyschematron.cli:app" 49 | 50 | [build-system] 51 | requires = ["flit_core >=3.9,<4"] 52 | build-backend = "flit_core.buildapi" 53 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | /dist 10 | /build 11 | /eggs 12 | /parts 13 | /var 14 | /sdist 15 | /develop-eggs 16 | /lib 17 | /lib64 18 | .installed.cfg 19 | /.pybuild 20 | 21 | # Installer logs 22 | pip-log.txt 23 | 24 | # Unit test / coverage reports 25 | .coverage 26 | .tox 27 | nosetests.xml 28 | htmlcov 29 | 30 | # Translations 31 | *.mo 32 | 33 | # Mr Developer 34 | .mr.developer.cfg 35 | .project 36 | .pydevproject 37 | 38 | # Complexity 39 | output/*.html 40 | output/*/index.html 41 | 42 | # Sphinx 43 | docs/_build 44 | 45 | #Idea 46 | .idea 47 | 48 | #Miscellaneous 49 | /misc 50 | 51 | # debian tmps 52 | /debian/source 53 | /debian/tmp 54 | /debian/changelog 55 | /debian/compat 56 | /debian/control 57 | /debian/rules 58 | /debian/watch 59 | 60 | # Elastic Beanstalk Files 61 | .elasticbeanstalk/* 62 | !.elasticbeanstalk/*.cfg.yml 63 | !.elasticbeanstalk/*.global.yml 64 | 65 | # MAC os files 66 | .DS_Store 67 | 68 | ######### 69 | ## Latex 70 | ######### 71 | ## Core latex/pdflatex auxiliary files: 72 | *.aux 73 | *.lof 74 | *.log 75 | *.lot 76 | *.fls 77 | *.out 78 | *.toc 79 | *.fmt 80 | *.fot 81 | *.cb 82 | *.cb2 83 | .*.lb 84 | 85 | ## Intermediate documents: 86 | *.dvi 87 | *.xdv 88 | *-converted-to.* 89 | 90 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 91 | *.bbl 92 | *.bcf 93 | *.blg 94 | *-blx.aux 95 | *-blx.bib 96 | *.run.xml 97 | 98 | ## Build tool auxiliary files: 99 | *.fdb_latexmk 100 | *.synctex 101 | *.synctex(busy) 102 | *.synctex.gz 103 | *.synctex.gz(busy) 104 | *.pdfsync 105 | 106 | ######### 107 | ## End Latex 108 | ######### 109 | 110 | .python-version 111 | -------------------------------------------------------------------------------- /scripts/demo_ast.py: -------------------------------------------------------------------------------- 1 | """This script demonstrates how to load a Schematron Schema in the PySchematron direct-mode. 2 | 3 | This example shows the use of the direct-mode Abstract Syntax Tree (AST) for PySchematron Schemas. By loading Schematron 4 | schema's in the AST, you can inspect the Schema using Python functionality. 5 | 6 | Please note that only Schematron specific XML nodes are loaded from the Schematron Schema. Custom nodes are not loaded. 7 | You can however augment the AST and the parser with your own nodes. This is however not demonstrated (here). 8 | """ 9 | 10 | __author__ = 'Robbert Harms' 11 | __date__ = '2023-02-21' 12 | __maintainer__ = 'Robbert Harms' 13 | __email__ = 'robbert@xkls.nl' 14 | 15 | from pathlib import Path 16 | 17 | from pyschematron.direct_mode.schematron.ast_visitors import ResolveExtendsVisitor, ResolveAbstractPatternsVisitor, \ 18 | PhaseSelectionVisitor 19 | from pyschematron.direct_mode.schematron.ast_yaml import RuyamlASTYamlConverter 20 | from pyschematron.direct_mode.schematron.parsers.xml.parser import ParsingContext, SchemaParser 21 | from pyschematron.utils import load_xml_document 22 | 23 | 24 | schematron_path = Path('../tests/fixtures/full_example/schema.sch') 25 | schematron_xml = load_xml_document(schematron_path) 26 | parsing_context = ParsingContext(base_path=schematron_path.parent) 27 | 28 | # Parse the Schema 29 | schematron_parser = SchemaParser() 30 | schema = schematron_parser.parse(schematron_xml.getroot(), parsing_context) 31 | 32 | # Shows the use of the visitor pattern to modify the Schema 33 | schema = ResolveExtendsVisitor(schema).apply(schema) 34 | schema = ResolveAbstractPatternsVisitor(schema).apply(schema) 35 | schema = PhaseSelectionVisitor(schema, '#ALL').apply(schema) 36 | 37 | # Experimental, YAML conversion. 38 | yaml_converter = RuyamlASTYamlConverter() 39 | yaml_shema = yaml_converter.dump(schema) 40 | round_trip = yaml_converter.load(yaml_shema) 41 | 42 | print(yaml_shema) 43 | print(round_trip == schema) 44 | -------------------------------------------------------------------------------- /scripts/generic_test_script.py: -------------------------------------------------------------------------------- 1 | """A test script I use when developing PySchematron.""" 2 | 3 | __author__ = 'Robbert Harms' 4 | __date__ = '2023-02-21' 5 | __maintainer__ = 'Robbert Harms' 6 | __email__ = 'robbert@xkls.nl' 7 | 8 | from pathlib import Path 9 | 10 | from lxml import etree 11 | 12 | from pyschematron.direct_mode.schematron.ast_visitors import ResolveExtendsVisitor, \ 13 | ResolveAbstractPatternsVisitor, PhaseSelectionVisitor 14 | from pyschematron.direct_mode.schematron.parsers.xml.parser import ParsingContext, SchemaParser 15 | from pyschematron.direct_mode.xml_validation.results.svrl_builder import DefaultSVRLReportBuilder 16 | from pyschematron.direct_mode.xml_validation.validators import SimpleSchematronXMLValidator 17 | from pyschematron.utils import load_xml_document 18 | 19 | ''' 20 | cd programming/python/pyschematron/tests/fixtures/full_example/ 21 | java -jar ~/programming/java/schxslt-cli.jar -d cargo.xml -s schema.sch -o /tmp/report.xml 22 | ''' 23 | 24 | example_path = Path('../tests/fixtures/full_example/') 25 | schematron_path = example_path / 'schema.sch' 26 | phase = '#ALL' 27 | 28 | schematron_xml = load_xml_document(schematron_path) 29 | parsing_context = ParsingContext(base_path=schematron_path.parent) 30 | 31 | schematron_parser = SchemaParser() 32 | schema = schematron_parser.parse(schematron_xml.getroot(), parsing_context) 33 | schema = ResolveExtendsVisitor(schema).apply(schema) 34 | schema = ResolveAbstractPatternsVisitor(schema).apply(schema) 35 | schema = PhaseSelectionVisitor(schema, phase).apply(schema) 36 | 37 | validator = SimpleSchematronXMLValidator(schema, phase, parsing_context.base_path) 38 | 39 | xml_document = load_xml_document(example_path / 'cargo.xml') 40 | validation_results = validator.validate_xml(xml_document) 41 | 42 | svrl_report = DefaultSVRLReportBuilder().create_svrl_xml(validation_results) 43 | report_str = etree.tostring(svrl_report, pretty_print=True).decode('utf-8') 44 | 45 | with open('/tmp/report_pyschematron.xml', 'w') as f: 46 | f.write(report_str) 47 | 48 | print(report_str) 49 | 50 | print() 51 | -------------------------------------------------------------------------------- /pyschematron/cli.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2024-04-06' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | __licence__ = 'LGPL v3' 6 | 7 | from pathlib import Path 8 | 9 | import typer 10 | 11 | from pyschematron import validate_documents, validate_document 12 | 13 | app = typer.Typer(no_args_is_help=True, pretty_exceptions_enable=False) 14 | 15 | 16 | @app.command(help='Validate one or more documents using PySchematron.') 17 | def validate(xml_documents: list[Path] = typer.Argument(help='One or more documents to validate.', 18 | exists=True, file_okay=True, dir_okay=False, resolve_path=True), 19 | schema: Path = typer.Argument(help='The Schematron Schema to use for the validation.', 20 | exists=True, file_okay=True, dir_okay=False, resolve_path=True), 21 | phase: str = typer.Option('#DEFAULT', '--phase', '-p', help='The Schematron phase to use.'), 22 | svrl_out: Path = typer.Option(None, '--svrl-out', 23 | help='The file to write the SVRL to. ' 24 | 'For multiple documents we append the XML document name to this name.', 25 | file_okay=True, dir_okay=False, writable=True)): 26 | 27 | if svrl_out: 28 | svrl_out.parent.mkdir(parents=True, exist_ok=True) 29 | 30 | if len(xml_documents) == 1: 31 | result = validate_document(xml_documents[0], schema, phase=phase) 32 | 33 | print(xml_documents[0], 'VALID' if result.is_valid() else 'INVALID') 34 | 35 | if svrl_out: 36 | result.get_svrl().write(str(svrl_out), pretty_print=True, xml_declaration=True, encoding="utf-8") 37 | else: 38 | results = validate_documents(xml_documents, schema, phase=phase) 39 | 40 | for filename, result in zip(xml_documents, results): 41 | print(filename, 'VALID' if result.is_valid() else 'INVALID') 42 | 43 | if svrl_out: 44 | out_fname = svrl_out.with_stem(svrl_out.stem + '_' + Path(filename).stem) 45 | result.get_svrl().write(str(out_fname), pretty_print=True, xml_declaration=True, encoding="utf-8") 46 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/xml_validation/results/xml_nodes.py: -------------------------------------------------------------------------------- 1 | """This module contains class representations for XML nodes. 2 | 3 | For the purpose of storing the validation results, we would like to reference the XML node at 4 | which a check was performed. The ISO Schematron applies rules to: 5 | 6 | - Elements (*) 7 | - Attributes (@*) 8 | - Root node (/) 9 | - Comments (comment()) 10 | - Processing instructions (processing-instruction()) 11 | 12 | As such, when we list the results, we need a way to represent these different kind of nodes in a uniform manner. 13 | This module aids by having objects to represent each visited XML node. 14 | """ 15 | from __future__ import annotations 16 | 17 | __author__ = 'Robbert Harms' 18 | __date__ = '2024-03-25' 19 | __maintainer__ = 'Robbert Harms' 20 | __email__ = 'robbert@xkls.nl' 21 | __licence__ = 'LGPL v3' 22 | 23 | from abc import ABCMeta 24 | from dataclasses import dataclass 25 | from lxml.etree import _Element, _ProcessingInstruction, _Comment 26 | 27 | 28 | @dataclass(frozen=True, slots=True) 29 | class XMLNode(metaclass=ABCMeta): 30 | """The base class for all XML nodes in this module. 31 | 32 | Args: 33 | xpath_location: The location of the provided element in XPath 3.1 notation, using the `BracedURILiteral` 34 | style for the qualified names. 35 | """ 36 | xpath_location: str 37 | 38 | 39 | @dataclass(frozen=True, slots=True) 40 | class AttributeNode(XMLNode): 41 | """Represents attributes of XML elements. 42 | 43 | Args: 44 | name: the attribute name. 45 | value: a string value for the attribute 46 | parent: the parent element node 47 | """ 48 | name: str 49 | value: str 50 | parent: _Element 51 | 52 | 53 | @dataclass(frozen=True, slots=True) 54 | class CommentNode(XMLNode): 55 | """Represents XML comments 56 | 57 | Args: 58 | element: the XML element 59 | """ 60 | element: _Comment 61 | 62 | 63 | @dataclass(frozen=True, slots=True) 64 | class ProcessingInstructionNode(XMLNode): 65 | """Represents XML processing instructions nodes. 66 | 67 | Args: 68 | element: the wrapped Processing Instruction Element. 69 | """ 70 | element: _ProcessingInstruction 71 | 72 | 73 | @dataclass(frozen=True, slots=True) 74 | class ElementNode(XMLNode): 75 | """Representation of an XML element 76 | 77 | Args: 78 | element: the wrapper XML element. 79 | """ 80 | element: _Element 81 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/schematron/parsers/xml/utils.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2023-02-18' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | 6 | from pathlib import Path 7 | from typing import Callable, Any 8 | 9 | import lxml 10 | from lxml import etree 11 | from lxml.etree import _Element 12 | 13 | 14 | def node_to_str(node: _Element, remove_namespaces: bool = True) -> str: 15 | """Convert an lxml node to string. 16 | 17 | This can be used to convert a node to string without namespaces. 18 | 19 | Args: 20 | node: the node to convert to string 21 | remove_namespaces: if we want to string the namespaces 22 | 23 | Returns: 24 | A string representation of the provided node. 25 | """ 26 | tag_str = lxml.etree.tostring(node, with_tail=False, encoding='unicode') 27 | 28 | if remove_namespaces: 29 | new_root = etree.fromstring(tag_str) 30 | for elem in new_root.getiterator(): 31 | elem.tag = etree.QName(elem).localname 32 | etree.cleanup_namespaces(new_root) 33 | 34 | return lxml.etree.tostring(new_root, encoding='unicode') 35 | return tag_str 36 | 37 | 38 | def resolve_href(href: str, base_path: Path) -> Path: 39 | """Resolve a href attribute to a file on the filesystem. 40 | 41 | This can be used to resolve the `href` attributes of extend or include tags. 42 | 43 | Args: 44 | href: an absolute or relative path pointing to a file on the filesystem 45 | base_path: the base path to resolve relative paths. 46 | 47 | Returns: 48 | An absolute path to a file on the filesystem. 49 | """ 50 | file_path = Path(href) 51 | if file_path.is_absolute(): 52 | return file_path 53 | return (base_path / file_path).resolve() 54 | 55 | 56 | def parse_attributes(attributes: dict[str, str], 57 | allowed_attributes: list[str], 58 | attribute_handlers: dict[str: Callable[[str, str], Any]] | None = None) -> dict[str, Any]: 59 | """Parse the attributes of the given element. 60 | 61 | By default, it returns all attributes as a string value. By using the attribute handlers it is possible 62 | to specify for each attribute how it is to be treated. 63 | 64 | Args: 65 | attributes: the attributes we wish to parse 66 | allowed_attributes: the set of allowed attributes, we will only parse and return the items in this list 67 | attribute_handlers: for each attribute name, a callback taking in the name and attribute value to return 68 | a new modified name and attribute value. 69 | 70 | Returns: 71 | For each allowed attribute the parsed values. 72 | """ 73 | attribute_handlers = attribute_handlers or {} 74 | 75 | parsed_attributes = {} 76 | for item in allowed_attributes: 77 | if item in attributes: 78 | handler = attribute_handlers.get(item, lambda k, v: {k: v}) 79 | parsed_attributes.update(handler(item, attributes[item])) 80 | return parsed_attributes 81 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | ********* 2 | Changelog 3 | ********* 4 | 5 | 6 | Version 1.1.13 (2025-11-05) 7 | =========================== 8 | 9 | Other 10 | ----- 11 | - Updated default parser in load_xml_document, this improves parallel processing speed according to the documentation of XMLParser. 12 | 13 | 14 | 15 | Version 1.1.12 (2025-11-04) 16 | =========================== 17 | 18 | Features 19 | -------- 20 | - The function load_xml_document now accepts a parser instance for flexibility. 21 | - The XML Schematron writer now allows overriding the default nsmap namespace. 22 | 23 | 24 | 25 | Version 1.1.11 (2025-09-24) 26 | =========================== 27 | 28 | Other 29 | ----- 30 | - Updated typer dependency and removed the all limiter. 31 | 32 | 33 | 34 | Version 1.1.10 (2025-09-21) 35 | =========================== 36 | 37 | Other 38 | ----- 39 | - Relaxed the version constraint on lxml. 40 | 41 | 42 | 43 | Version 1.1.9 (2025-09-18) 44 | ========================== 45 | 46 | Other 47 | ----- 48 | - Updated xmlschema, elementpath and lxml dependencies. 49 | 50 | 51 | 52 | Version 1.1.8 (2025-03-26) 53 | ========================== 54 | 55 | Features 56 | -------- 57 | - Adds Schematron base path to the function API interface. 58 | 59 | Documentation 60 | ------------- 61 | - Updated editorconfig 62 | 63 | 64 | 65 | Version 1.1.7 (2025-02-19) 66 | ========================== 67 | 68 | Miscellaneous Tasks 69 | ------------------- 70 | - *(deps\)* Updated all the dependencies. 71 | 72 | 73 | Version 1.1.6 (2025-01-31) 74 | ========================== 75 | 76 | Other 77 | ----- 78 | - Replaced gitchangelog with git-cliff. 79 | 80 | 81 | Version 1.1.5 (2025-01-22) 82 | ========================== 83 | 84 | Added 85 | ----- 86 | - Adds PyPi Homepage url. 87 | 88 | Fixed 89 | ----- 90 | - Fixes bug #7. 91 | 92 | 93 | Version 1.1.4 (2024-12-23) 94 | ========================== 95 | 96 | Other 97 | ----- 98 | - Made the assert and report checks more robust for queries not returning a single boolean. This fixes the second part of issue #6. 99 | 100 | Version 1.1.3 (2024-12-21) 101 | ========================== 102 | 103 | Other 104 | ----- 105 | - Made rich text evaluation more robust for complex results. 106 | 107 | 108 | Version 1.1.2 (2024-12-20) 109 | ========================== 110 | 111 | Other 112 | ----- 113 | - Bumped required elementpath version to fix bug #6. 114 | 115 | 116 | Version 1.1.1 (2024-11-27) 117 | ========================== 118 | 119 | Other 120 | ----- 121 | - Updated is_valid comment in the API. 122 | 123 | 124 | Version 1.1.0 (2024-11-27) 125 | ========================== 126 | 127 | Other 128 | ----- 129 | - Fixes github bug #5. The reporting of the is_valid method was reversed with regard to assert/report. 130 | - Fixed the documentation regarding the is_valid function. 131 | 132 | 133 | Version 1.0.3 (2024-10-29) 134 | ========================== 135 | 136 | Other 137 | ----- 138 | - Updated elementpath dependency version. 139 | 140 | 141 | Version 1.0.2 (2024-10-18) 142 | ========================== 143 | 144 | Other 145 | ----- 146 | - Updated readme to include supported Python version and other textual changes. 147 | - Updated lxml dependency from 5.1.0 to 5.2.1 148 | 149 | 150 | Version 1.0.1 (2024-09-24) 151 | ========================== 152 | 153 | Other 154 | ----- 155 | - Upgraded to elementpath==4.5.0 156 | - Fixed email address in info blocks. 157 | 158 | 159 | Version 1.0.0 (2024-08-23) 160 | ========================== 161 | 162 | Other 163 | ----- 164 | First complete version of PySchematron. See the readme for the functionality and limitations. 165 | 166 | 167 | Version 0.1.0 (2022-09-12) 168 | ========================== 169 | 170 | Other 171 | ----- 172 | - First version 173 | 174 | 175 | 176 | -------------------------------------------------------------------------------- /cliff.toml: -------------------------------------------------------------------------------- 1 | # Git Cliff configuration file. See: https://git-cliff.org/docs/configuration 2 | 3 | [changelog] 4 | header = """ 5 | ********* 6 | Changelog 7 | ********* 8 | 9 | """ 10 | 11 | # template for the changelog body 12 | # https://keats.github.io/tera/docs/#introduction 13 | body = """ 14 | {% if version %}\ 15 | Version {{ version | trim_start_matches(pat="v") }} ({{ timestamp | date(format="%Y-%m-%d") }}) 16 | {%- set header_length = version | trim_start_matches(pat="v") | length + timestamp | date(format="%Y-%m-%d") | length + 11 %} 17 | {% for i in range(end=header_length) %}={% endfor %} 18 | {% else %}\ 19 | [unreleased] 20 | {% for i in range(end=12) %}-{% endfor %} 21 | {% endif %}\ 22 | 23 | {% for group, commits in commits | group_by(attribute="group") %} 24 | {{ group | striptags | trim | upper_first }} 25 | {%- set subheader_length = group | striptags | trim | length%} 26 | {% for i in range(end=subheader_length) %}-{% endfor %} 27 | {%- for commit in commits %} 28 | - {% if commit.scope %}*({{ commit.scope }})* {% endif %}\ 29 | {% if commit.breaking %}[**breaking**] {% endif %}\ 30 | {{ commit.message | upper_first }}\ 31 | {% endfor %} 32 | {% endfor %}\n 33 | """ 34 | 35 | # template for the changelog footer 36 | footer = "" 37 | 38 | # remove the leading and trailing s 39 | trim = false 40 | 41 | # postprocessors 42 | postprocessors = [ 43 | # { pattern = '', replace = "https://github.com/orhun/git-cliff" }, # replace repository URL 44 | ] 45 | # render body even when there are no releases to process 46 | # render_always = true 47 | # output file path 48 | # output = "test.md" 49 | 50 | [git] 51 | # parse the commits based on https://www.conventionalcommits.org 52 | conventional_commits = true 53 | 54 | # filter out the commits that are not conventional 55 | filter_unconventional = false 56 | 57 | # process each line of a commit as an individual commit 58 | split_commits = false 59 | 60 | # regex for preprocessing the commit messages 61 | commit_preprocessors = [ 62 | # Replace issue numbers 63 | #{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](/issues/${2}))"}, 64 | # Check spelling of the commit with https://github.com/crate-ci/typos 65 | # If the spelling is incorrect, it will be automatically fixed. 66 | #{ pattern = '.*', replace_command = 'typos --write-changes -' }, 67 | ] 68 | 69 | # regex for parsing and grouping commits 70 | commit_parsers = [ 71 | { message = "^feat", group = "Features" }, 72 | { message = "^fix", group = "Bug Fixes" }, 73 | { message = "^doc", group = "Documentation" }, 74 | { message = "^perf", group = "Performance" }, 75 | { message = "^refactor", group = "Refactor" }, 76 | { message = "^style", group = "Styling" }, 77 | { message = "^test", group = "Testing" }, 78 | { message = "^chore\\(release\\): prepare for", skip = true }, 79 | { message = "^chore\\(deps.*\\)", skip = true }, 80 | { message = "^chore\\(pr\\)", skip = true }, 81 | { message = "^chore\\(pull\\)", skip = true }, 82 | { message = "^chore|^ci", group = "Miscellaneous Tasks" }, 83 | { body = ".*security", group = "Security" }, 84 | { message = "^revert", group = "Revert" }, 85 | { message = ".*", group = "Other" }, 86 | ] 87 | 88 | # filter out the commits that are not matched by commit parsers 89 | filter_commits = false 90 | 91 | # sort the tags topologically 92 | topo_order = false 93 | 94 | # sort the commits inside sections by oldest/newest order 95 | sort_commits = "oldest" 96 | 97 | # git tag pattern for finding the versions (taken from https://semver.org/) 98 | tag_pattern = "^v(?P0|[1-9]\\d*)\\.(?P0|[1-9]\\d*)\\.(?P0|[1-9]\\d*)(?:-(?P(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+(?P[0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" 99 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/lib/ast.py: -------------------------------------------------------------------------------- 1 | """This module defines abstract base types for an Abstract Syntax Tree (AST) and an AST visitor.""" 2 | from __future__ import annotations 3 | 4 | __author__ = 'Robbert Harms' 5 | __date__ = '2024-03-21' 6 | __maintainer__ = 'Robbert Harms' 7 | __email__ = 'robbert@xkls.nl' 8 | __licence__ = 'LGPL v3' 9 | 10 | from abc import abstractmethod, ABCMeta 11 | from dataclasses import dataclass, fields 12 | from typing import Any, Mapping, Iterable, Self 13 | 14 | 15 | @dataclass(slots=True, frozen=True) 16 | class GenericASTNode: 17 | """Abstract base class for Abstract Syntax Tree (AST) nodes. 18 | 19 | Each node in an AST (also the root) is of this type. Since we, in general, aim for immutability, the AST nodes 20 | are defined as frozen dataclasses with slots. If you want true immutability, avoid dictionaries and lists in 21 | your AST implementations. 22 | 23 | This class already implements the visitor pattern using the :class:`GenericASTVisitor`. 24 | """ 25 | 26 | def accept_visitor(self, visitor: GenericASTVisitor) -> Any: 27 | """Accept a visitor on this node. 28 | 29 | Since Python allows polymorphic return values, we allow the visitor pattern to return values. 30 | 31 | Args: 32 | visitor: the visitor we accept and call 33 | 34 | Returns: 35 | The result of the visitor. 36 | """ 37 | return visitor.visit(self) 38 | 39 | def get_init_values(self) -> dict[str, Any]: 40 | """Get the initialisation values with which this class was instantiated. 41 | 42 | Returns: 43 | A dictionary with the arguments which instantiated this object. 44 | """ 45 | return {field.name: getattr(self, field.name) for field in fields(self)} 46 | 47 | def get_children(self) -> list[Self]: 48 | """Get a list of all the AST nodes in this node. 49 | 50 | This should return all the references to AST nodes in this node, all bundled in one list. 51 | 52 | Returns: 53 | All the child nodes in this node 54 | """ 55 | def get_ast_nodes(init_values): 56 | children = [] 57 | 58 | if isinstance(init_values, GenericASTNode): 59 | children.append(init_values) 60 | elif isinstance(init_values, Mapping): 61 | for el in init_values.values(): 62 | children.extend(get_ast_nodes(el)) 63 | elif isinstance(init_values, Iterable) and not isinstance(init_values, str): 64 | for el in init_values: 65 | children.extend(get_ast_nodes(el)) 66 | 67 | return children 68 | 69 | return get_ast_nodes(self.get_init_values()) 70 | 71 | 72 | class GenericASTVisitor[T: GenericASTNode](metaclass=ABCMeta): 73 | """Generic base class for visitors, according to the visitor design pattern. 74 | 75 | Instead of a typed double dispatch we use dynamic double dispatching in which each node, when visited, calls 76 | the :meth:``visit` of this class instead of a visit method for each node type. This makes it easier to 77 | do edits on class names since the types can be looked up by an IDE. 78 | 79 | We use the generic type hint `T` to ensure solid type hinting for implementing visitors. 80 | """ 81 | 82 | @abstractmethod 83 | def visit(self, ast_node: T) -> Any: 84 | """Visit the AST node. 85 | 86 | This uses dynamic dispatch to accept all types of AST nodes. 87 | 88 | Since Python allows polymorphic return values, we allow the visitor pattern to return values. 89 | 90 | Args: 91 | ast_node: an AST node of any type 92 | 93 | Returns: 94 | The result of the visitor. 95 | """ 96 | 97 | def apply(self, ast_node: T) -> Any: 98 | """Convenience method to apply this visitor on the indicated node and get the result value. 99 | 100 | Args: 101 | ast_node: the node on which to apply this visitor 102 | 103 | Returns: 104 | The result value from :meth:`get_result` 105 | """ 106 | return ast_node.accept_visitor(self) 107 | 108 | -------------------------------------------------------------------------------- /scripts/demo_validation.py: -------------------------------------------------------------------------------- 1 | """This script demonstrates using the direct-mode PySchematron validator to validate your XML documents. 2 | 3 | In Schematron validation, we apply a Schematron Schema to an XML resulting in either a pass or a fail. A fail indicates 4 | that the document could not be validated using the Schema, hence the XML may have problems. In addition to this boolean 5 | output, Schematron also defines the Schematron Validation Report Language (SVRL), loosely defining a format in which 6 | more information about the validation results can be represented. 7 | 8 | This script shows three different ways of interacting with the PySchematron direct-mode validator. The most simple is 9 | by using the functional interface defined in the main module. Second, you can use a generalized API which might be 10 | extended in the future with an XSLT methodology. Finally, you can use the full-blown direct-mode classes and methods. 11 | The latter is the most complicated but gives the most control. 12 | """ 13 | 14 | __author__ = 'Robbert Harms' 15 | __date__ = '2024-04-03' 16 | __maintainer__ = 'Robbert Harms' 17 | __email__ = 'robbert@xkls.nl' 18 | __licence__ = 'LGPL v3' 19 | 20 | from pathlib import Path 21 | 22 | from lxml import etree 23 | 24 | from pyschematron import DirectModeSchematronValidatorFactory, validate_document 25 | from pyschematron.direct_mode.schematron.ast_visitors import ResolveExtendsVisitor, ResolveAbstractPatternsVisitor, \ 26 | PhaseSelectionVisitor 27 | from pyschematron.direct_mode.schematron.parsers.xml.parser import SchemaParser, ParsingContext 28 | from pyschematron.direct_mode.xml_validation.results.svrl_builder import DefaultSVRLReportBuilder 29 | from pyschematron.direct_mode.xml_validation.validators import SimpleSchematronXMLValidator 30 | from pyschematron.utils import load_xml_document 31 | 32 | 33 | # the paths to the example data and Schema 34 | example_base_path = Path('../tests/fixtures/full_example/') 35 | schematron_schema_path = example_base_path / 'schema.sch' 36 | example_xml_document_path = example_base_path / 'cargo.xml' 37 | 38 | # the phase we would like to evaluate 39 | phase = '#ALL' 40 | 41 | 42 | def demo_functional_interface(): 43 | """This example uses the functional interface, the most simple method of interacting with PySchematron. """ 44 | result = validate_document(example_xml_document_path, schematron_schema_path) 45 | svrl = result.get_svrl() 46 | 47 | report_str = etree.tostring(svrl, pretty_print=True).decode('utf-8') 48 | print(report_str) 49 | print(result.is_valid()) 50 | 51 | 52 | def demo_generic_api(): 53 | """This demonstrates the use of the generic API.""" 54 | validator_factory = DirectModeSchematronValidatorFactory() 55 | validator_factory.set_schema(schematron_schema_path) 56 | validator_factory.set_phase(phase) 57 | 58 | validator = validator_factory.build() 59 | validation_result = validator.validate(example_xml_document_path) 60 | 61 | svrl = validation_result.get_svrl() 62 | report_str = etree.tostring(svrl, pretty_print=True).decode('utf-8') 63 | 64 | print(report_str) 65 | print(validation_result.is_valid()) 66 | 67 | 68 | def demo_full_api(): 69 | """This demonstrates the inner workings of the direct-mode validator.""" 70 | schematron_xml = load_xml_document(schematron_schema_path) 71 | parsing_context = ParsingContext(base_path=schematron_schema_path.parent) 72 | 73 | schematron_parser = SchemaParser() 74 | schema = schematron_parser.parse(schematron_xml.getroot(), parsing_context) 75 | schema = ResolveExtendsVisitor(schema).apply(schema) 76 | schema = ResolveAbstractPatternsVisitor(schema).apply(schema) 77 | schema = PhaseSelectionVisitor(schema, phase).apply(schema) 78 | 79 | validator = SimpleSchematronXMLValidator(schema, phase, parsing_context.base_path) 80 | 81 | xml_document = load_xml_document(example_xml_document_path) 82 | validation_results = validator.validate_xml(xml_document) 83 | 84 | svrl_report = DefaultSVRLReportBuilder().create_svrl_xml(validation_results) 85 | 86 | report_str = etree.tostring(svrl_report, pretty_print=True).decode('utf-8') 87 | print(report_str) 88 | print(validation_results.is_valid()) 89 | 90 | 91 | demo_functional_interface() 92 | demo_generic_api() 93 | demo_full_api() 94 | 95 | -------------------------------------------------------------------------------- /pyschematron/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2022-02-25' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | 6 | from pathlib import Path 7 | from typing import Literal 8 | 9 | from lxml.etree import _ElementTree 10 | 11 | from pyschematron.__version__ import __version__ 12 | from pyschematron.api import ValidationResult 13 | from pyschematron.direct_mode.api import DirectModeSchematronValidatorFactory 14 | from pyschematron.direct_mode.xml_validation.queries.base import CustomQueryFunction 15 | 16 | 17 | def validate_document(xml_document: Path | _ElementTree, 18 | schematron_schema: Path | _ElementTree, 19 | phase: str | None = None, 20 | schematron_base_path: Path | None = None, 21 | custom_functions: dict[str, str | list[CustomQueryFunction]] | None = None, 22 | mode: Literal['direct-mode'] = 'direct-mode') -> ValidationResult: 23 | """Validate an XML document using a Schematron schema. 24 | 25 | Args: 26 | xml_document: the XML document we would like to validate 27 | schematron_schema: the Schematron Schema we would like to load and use for the validation 28 | phase: the Schematron phase we would like to use, optional. 29 | schematron_base_path: explicitly set the Schematron base path, this is used in Schematron file inclusions. 30 | custom_functions: a dictionary defining additional custom functions to add to the parser(s). 31 | This should at least contain the key 'query_binding' mapping to a query binding name, and the 32 | key 'custom_query_functions' specifying a list of custom query functions to add. 33 | For non-standard query binding language, you also need to provide the key 'base_query_binding' 34 | mapping to a standard query binding. Example usage: 35 | `{'query_binding': 'xpath31-custom', 'base_query_binding': 'xpath31', 'custom_query_functions': [...]}`. 36 | mode: which validation mode we would like to use, at the moment this only supports the 'direct-mode'. 37 | 38 | Returns: 39 | The validation result in an API wrapper. 40 | """ 41 | validator_factory = DirectModeSchematronValidatorFactory(schematron_xml=schematron_schema, phase=phase, 42 | schematron_base_path=schematron_base_path) 43 | if custom_functions: 44 | validator_factory.add_custom_functions(**custom_functions) 45 | validator = validator_factory.build() 46 | return validator.validate(xml_document) 47 | 48 | 49 | def validate_documents(xml_documents: list[Path | _ElementTree], 50 | schematron_schema: Path | _ElementTree, 51 | phase: str | None = None, 52 | custom_functions: dict[str, str | list[CustomQueryFunction]] | None = None, 53 | mode: Literal['direct-mode'] = 'direct-mode') -> list[ValidationResult]: 54 | """Validate multiple XML documents using the same Schematron schema. 55 | 56 | This assumes we would like to use the same Schematron phase for each validation. As such, we can afford a speed-up 57 | since we don't need to compile the Schematron every run again. 58 | 59 | Args: 60 | xml_documents: the XML documents we would like to validate 61 | schematron_schema: the Schematron Schema we would like to load and use for the validation 62 | phase: the Schematron phase we would like to use, optional. 63 | custom_functions: a dictionary defining additional custom functions to add to the parser(s). 64 | This should at least contain the key 'query_binding' mapping to a query binding name, and the 65 | key 'custom_query_functions' specifying a list of custom query functions to add. 66 | For non-standard query binding language, you also need to provide the key 'base_query_binding' 67 | mapping to a standard query binding. Example usage: 68 | `{'query_binding': 'xpath31-custom', 'base_query_binding': 'xpath31', 'custom_query_functions': [...]}`. 69 | mode: which validation mode we would like to use, at the moment this only supports the 'direct-mode'. 70 | 71 | Returns: 72 | The validation results in an API wrapper. 73 | """ 74 | validator_factory = DirectModeSchematronValidatorFactory(schematron_xml=schematron_schema, phase=phase) 75 | if custom_functions: 76 | validator_factory.add_custom_functions(**custom_functions) 77 | validator = validator_factory.build() 78 | 79 | validation_results = [] 80 | for document in xml_documents: 81 | validation_results.append(validator.validate(document)) 82 | 83 | return validation_results 84 | 85 | 86 | -------------------------------------------------------------------------------- /pyschematron/api.py: -------------------------------------------------------------------------------- 1 | """Definition of the common API for validating an XML using Schematron. 2 | 3 | This defines a common interface for Schematron validators to implement. 4 | 5 | At the moment this is only implemented by direct mode evaluation, but in the future it might also support 6 | the XSLT evaluation. 7 | """ 8 | from __future__ import annotations 9 | 10 | __author__ = 'Robbert Harms' 11 | __date__ = '2024-04-01' 12 | __maintainer__ = 'Robbert Harms' 13 | __email__ = 'robbert@xkls.nl' 14 | __licence__ = 'LGPL v3' 15 | 16 | from abc import ABCMeta, abstractmethod 17 | from pathlib import Path 18 | 19 | from lxml.etree import _ElementTree 20 | 21 | from pyschematron.direct_mode.xml_validation.queries.base import CustomQueryFunction 22 | 23 | 24 | class SchematronValidatorFactory(metaclass=ABCMeta): 25 | """Factory class for generating Schematron validators. 26 | 27 | The Schematron validators do the hard work of validating an XML document. To ensure that a single validator can 28 | process multiple XML documents once set on a phase, we make the validators immutable and built them using this 29 | factory. 30 | """ 31 | 32 | @abstractmethod 33 | def set_schema(self, schematron_xml: Path | _ElementTree): 34 | """Set the Schematron schema. 35 | 36 | Args: 37 | schematron_xml: the Schematron Schema we would like to use in the validation. 38 | """ 39 | 40 | @abstractmethod 41 | def set_base_path(self, schematron_base_path: Path): 42 | """Set the Schematron base path. 43 | 44 | Some Schematron schemas include files from other locations. If you would like explicit control over the 45 | Schematron base path, or if the path could not be inferred from the provided schema, set it here explicitly. 46 | 47 | Args: 48 | schematron_base_path: the base path for the Schematron definition 49 | """ 50 | 51 | @abstractmethod 52 | def set_phase(self, phase: str | None): 53 | """Set the phase we would like the Schematron validator to validate. 54 | 55 | By setting this before we load the Schematron validator, we can prepare the phase for evaluation, giving 56 | us some speed benefit. 57 | 58 | Args: 59 | phase: the phase we would like to validate. If set to None we use the '#DEFAULT' phase. 60 | """ 61 | 62 | @abstractmethod 63 | def build(self) -> SchematronValidator: 64 | """Construct the configured Schematron validator. 65 | 66 | Returns: 67 | An implementation of the Schematron validator. 68 | """ 69 | 70 | @abstractmethod 71 | def add_custom_functions(self, 72 | query_binding: str, 73 | custom_query_functions: list[CustomQueryFunction], 74 | base_query_binding: str | None = None): 75 | """Add custom functions to the parser we would like to use. 76 | 77 | The custom query functions are added to the parser for a specific query binding language. Repeated calls 78 | accumulate the additional functions. If you create a new query binding specifier, you need to specify the base 79 | query binding language to use as basis for the new query binding language. 80 | 81 | Args: 82 | query_binding: the query binding language for which we are adding the custom query functions 83 | custom_query_functions: a list of custom query function objects 84 | base_query_binding: the basis to use for any new query binding language 85 | """ 86 | 87 | 88 | class SchematronValidator(metaclass=ABCMeta): 89 | """The Schematron validator, validating an XML document using the init injected Schematron definition.""" 90 | 91 | @abstractmethod 92 | def validate(self, xml_data: Path | _ElementTree) -> ValidationResult: 93 | """Validate an XML document and return an SVRL XML document. 94 | 95 | Args: 96 | xml_data: the XML data we would like to validate 97 | 98 | Returns: 99 | The validation results. 100 | """ 101 | 102 | 103 | class ValidationResult(metaclass=ABCMeta): 104 | """Results from Schematron validation. 105 | 106 | This should be able to produce an SVRL XML document, and should be able to tell if the document was valid or not. 107 | """ 108 | 109 | @abstractmethod 110 | def get_svrl(self) -> _ElementTree: 111 | """Get the SVRL as an XML element tree. 112 | 113 | Returns: 114 | The SVRL as an element tree. 115 | """ 116 | 117 | @abstractmethod 118 | def is_valid(self) -> bool: 119 | """Check if the document we validated was valid. 120 | 121 | According to the specifications, a successful report is considered a failure. As such, this method considers 122 | an XML document to be valid if none of the assertions and none of the reports were raised. 123 | 124 | Returns: 125 | If the document was valid return True, else False. 126 | """ 127 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/xml_validation/queries/factories.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | __author__ = 'Robbert Harms' 4 | __date__ = '2023-03-25' 5 | __maintainer__ = 'Robbert Harms' 6 | __email__ = 'robbert@xkls.nl' 7 | __licence__ = 'GPL v3' 8 | 9 | from abc import ABCMeta, abstractmethod 10 | from typing import override 11 | 12 | from pyschematron.direct_mode.schematron.ast import Schema 13 | from pyschematron.direct_mode.xml_validation.queries.base import QueryProcessor 14 | from pyschematron.direct_mode.xml_validation.queries.xpath import XPath1QueryParser, XPath2QueryParser, \ 15 | XPath3QueryParser, XPath31QueryParser, XPathQueryProcessor 16 | 17 | 18 | class QueryProcessorFactory(metaclass=ABCMeta): 19 | """Query processor factories can construct QueryProcessor classes specific to your query binding language. 20 | 21 | In Schematron, the queryBinding attribute determines which query language is used. This factory 22 | allows you to get the right query processor for your query binding language. 23 | """ 24 | 25 | @abstractmethod 26 | def get_query_processor(self, query_binding: str) -> QueryProcessor: 27 | """Get the processor you can use for this query binding language. 28 | 29 | Args: 30 | query_binding: the query binding for which we want to get a parser. 31 | 32 | Returns: 33 | A query processor specialized for this query binding language. 34 | 35 | Raises: 36 | ValueError: if no query processor could be found for the indicated query binding. 37 | """ 38 | 39 | @abstractmethod 40 | def has_query_processor(self, query_binding: str) -> bool: 41 | """Check if we have a processor for the specific query binding language. 42 | 43 | Args: 44 | query_binding: the query binding for which we want to check if a processor is available. 45 | 46 | Returns: 47 | True if we have a processor, False otherwise. 48 | """ 49 | 50 | @abstractmethod 51 | def get_schema_query_processor(self, schema: Schema) -> QueryProcessor: 52 | """Get the processor you can use for this schema. 53 | 54 | Not only will this select the right query binding, it will also load the namespaces. 55 | 56 | Args: 57 | schema: the Schema for which we want to get a query processor. 58 | 59 | Returns: 60 | A query processor specialized for this Schema, with the right query binding language and 61 | the namespaces loaded. 62 | 63 | Raises: 64 | ValueError: if no query processor could be found for this Schema. 65 | """ 66 | 67 | 68 | class DefaultQueryProcessorFactory(QueryProcessorFactory): 69 | 70 | def __init__(self): 71 | """The default query processor factory. 72 | 73 | This factory only supports XSLT and XPath query languages. The XSLT query binding is additionally limited 74 | to XPath expressions. 75 | """ 76 | self._query_processors = { 77 | 'xslt': XPathQueryProcessor(XPath1QueryParser()), 78 | 'xslt2': XPathQueryProcessor(XPath2QueryParser()), 79 | 'xslt3': XPathQueryProcessor(XPath3QueryParser()), 80 | 'xpath': XPathQueryProcessor(XPath1QueryParser()), 81 | 'xpath2': XPathQueryProcessor(XPath2QueryParser()), 82 | 'xpath3': XPathQueryProcessor(XPath3QueryParser()), 83 | 'xpath31': XPathQueryProcessor(XPath31QueryParser()), 84 | } 85 | 86 | @override 87 | def get_query_processor(self, query_binding: str) -> QueryProcessor: 88 | try: 89 | return self._query_processors[query_binding] 90 | except KeyError: 91 | raise ValueError(f'No parser could be found for the query binding "{query_binding}".') 92 | 93 | @override 94 | def has_query_processor(self, query_binding: str) -> bool: 95 | return query_binding in self._query_processors 96 | 97 | @override 98 | def get_schema_query_processor(self, schema: Schema) -> QueryProcessor: 99 | query_binding = schema.query_binding or 'xslt' 100 | namespaces = {ns.prefix: ns.uri for ns in schema.namespaces} 101 | 102 | processor = self.get_query_processor(query_binding) 103 | return processor.with_namespaces(namespaces) 104 | 105 | 106 | class ExtendableQueryProcessorFactory(DefaultQueryProcessorFactory): 107 | 108 | def __init__(self): 109 | """An extendable query processor factory. 110 | 111 | This has all the processors from the default query processor factory, but allows extending and/or overwriting 112 | these using getters and setters. 113 | """ 114 | super().__init__() 115 | 116 | def set_query_processor(self, query_binding: str, query_processor: QueryProcessor): 117 | """Set the query processor to use for a specific query binding language. 118 | 119 | Args: 120 | query_binding: the query binding we wish to add / overwrite. 121 | query_processor: the query processor we would like to use for this query binding. 122 | """ 123 | self._query_processors[query_binding] = query_processor 124 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SHELL := /bin/bash 2 | PYTHON := $$(which python3) 3 | PIP := $$(which pip3) 4 | PYTEST := $$(which pytest) 5 | PROJECT_NAME := pyschematron 6 | GIT_BRANCH := $$(git branch --show-current) 7 | PROJECT_VERSION := $(shell grep -m 1 version pyproject.toml | tr -s ' ' | tr -d '"' | tr -d "'" | cut -d' ' -f3) 8 | 9 | .PHONY: help 10 | help: 11 | @echo "clean: remove all build, test, coverage and Python artifacts (no uninstall)" 12 | @echo "test(s): run unit and integration tests with the default Python." 13 | @echo "test-unit: run the unit tests using the default Python." 14 | @echo "test-integration: run the integration tests using the default Python." 15 | @echo "test-all: run all tests using all environments using tox" 16 | @echo "docs: generate Sphinx HTML documentation, including API docs" 17 | @echo "docs-pdf: generate the PDF documentation, including API docs" 18 | @echo "docs-man: generate the linux manpages" 19 | @echo "docs-changelog: generate the changelog documentation" 20 | @echo "install-deps: install all the dependencies" 21 | @echo "install-symlink: install the package as a symlink, to allow continuous development" 22 | @echo "uninstall: uninstall PySchematron (while keeping the dependencies)" 23 | @echo "prepare-release: prepare for a new release" 24 | @echo "release: package and release the new version" 25 | 26 | 27 | .PHONY: clean 28 | clean: clean-build clean-pyc clean-test 29 | 30 | .PHONY: clean-build 31 | clean-build: 32 | rm -fr build/ 33 | rm -fr dist/ 34 | rm -fr .eggs/ 35 | find . -name '*.egg-info' -exec rm -fr {} + 36 | find . -name '*.egg' -exec rm -f {} + 37 | 38 | .PHONY: clean-pyc 39 | clean-pyc: 40 | find . -name '*.pyc' -exec rm -f {} + 41 | find . -name '*.pyo' -exec rm -f {} + 42 | find . -name '*~' -exec rm -f {} + 43 | find . -name '__pycache__' -exec rm -fr {} + 44 | 45 | .PHONY: clean-test 46 | clean-test: 47 | rm -rf .tox/ 48 | rm -f .coverage 49 | rm -rf htmlcov/ 50 | rm -rf .pytest_cache 51 | rm -rf tests/htmlcov 52 | rm -rf tests/.coverage 53 | find tests -name 'build' -exec rm -rf {} + 54 | find tests -name '.coverage' -exec rm -rf {} + 55 | 56 | 57 | .PHONY: tests 58 | tests: test 59 | 60 | .PHONY: test 61 | test: 62 | mkdir -p build 63 | COVERAGE_FILE=build/.coverage \ 64 | $(PYTEST) tests --cov=$(PROJECT_NAME) --cov-report=html:build/coverage/defaultenv --cov-report=term --html=build/pytest/report-defaultenv.html --self-contained-html 65 | 66 | .PHONY: test-unit 67 | test-unit: 68 | mkdir -p build 69 | COVERAGE_FILE=build/.coverage \ 70 | $(PYTEST) tests/unit --cov=$(PROJECT_NAME) --cov-report=html:build/coverage/defaultenv --cov-report=term --html=build/pytest/report-defaultenv.html --self-contained-html 71 | 72 | .PHONY: test-integration 73 | test-integration: 74 | mkdir -p build 75 | COVERAGE_FILE=build/.coverage \ 76 | $(PYTEST) tests/integration --cov=$(PROJECT_NAME) --cov-report=html:build/coverage/defaultenv --cov-report=term --html=build/pytest/report-defaultenv.html --self-contained-html 77 | 78 | .PHONY: test-all 79 | test-all: 80 | tox 81 | 82 | .PHONY: docs 83 | docs: 84 | mkdir -p build 85 | rm -f docs/$(PROJECT_NAME)*.rst 86 | rm -f docs/modules.rst 87 | $(MAKE) -C docs clean 88 | sphinx-apidoc -o docs/ $(PROJECT_NAME) 89 | $(MAKE) -C docs html SPHINXBUILD='python3 $(shell which sphinx-build)' 90 | @echo "To view results type: firefox docs/_build/html/index.html &" 91 | 92 | .PHONY: docs-pdf 93 | docs-pdf: 94 | mkdir -p build 95 | rm -f docs/$(PROJECT_NAME)*.rst 96 | rm -f docs/modules.rst 97 | $(MAKE) -C docs clean 98 | sphinx-apidoc -o docs/ $(PROJECT_NAME) 99 | $(MAKE) -C docs latexpdf SPHINXBUILD='python3 $(shell which sphinx-build)' 100 | @echo "To view results use something like: evince docs/_build/latex/$(PROJECT_NAME).pdf &" 101 | 102 | .PHONY: docs-man 103 | docs-man: 104 | rm -f docs/$(PROJECT_NAME)*.rst 105 | rm -f docs/modules.rst 106 | $(MAKE) -C docs clean 107 | sphinx-apidoc -o docs/ $(PROJECT_NAME) 108 | $(MAKE) -C docs man SPHINXBUILD='python3 $(shell which sphinx-build)' 109 | @echo "To view results use something like: man docs/_build/man/$(PROJECT_NAME).1 &" 110 | 111 | 112 | .PHONY: docs-changelog 113 | docs-changelog: 114 | git cliff --prepend CHANGELOG.rst -l -u 115 | 116 | 117 | .PHONY: prepare-release 118 | prepare-release: clean 119 | @echo "Current version: "$(PROJECT_VERSION) 120 | @while [ -z "$$NEW_VERSION" ]; do \ 121 | read -r -p "Give new version: " NEW_VERSION;\ 122 | done && \ 123 | ( \ 124 | printf 'Setting new version: %s \n\n' \ 125 | "$$NEW_VERSION " \ 126 | ) && sed -i 's/version = \"\(.*\)\"/version = "'$$NEW_VERSION'"/g' pyproject.toml \ 127 | && git cliff -l -u --tag $$NEW_VERSION --prepend CHANGELOG.rst \ 128 | && echo "Please manually inspect CHANGELOG.rst before continuing." \ 129 | && read ans \ 130 | && git add -u \ 131 | && git diff-index --quiet HEAD || git commit -am "release: New release" \ 132 | && git tag -a v$$NEW_VERSION -m "Version $$NEW_VERSION" \ 133 | 134 | 135 | .PHONY: release 136 | release: clean release-git release-pip 137 | 138 | .PHONY: release-git 139 | release-git: 140 | git push 141 | git push origin --tags 142 | 143 | .PHONY: release-pip 144 | release-pip: 145 | flit publish 146 | 147 | 148 | .PHONY: dist 149 | dist: clean 150 | $(PYTHON) setup.py sdist 151 | $(PYTHON) setup.py bdist_wheel 152 | ls -l dist 153 | 154 | .PHONY: install-deps 155 | install-deps: 156 | flit install --only-deps 157 | 158 | .PHONY: install-symlink 159 | install-symlink: 160 | flit install --symlink 161 | 162 | .PHONY: uninstall 163 | uninstall: 164 | $(PIP) uninstall -y $(PROJECT_NAME) 165 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ############ 2 | PySchematron 3 | ############ 4 | This is a library package for Schematron validation in Python. 5 | 6 | Schematron is a schema language used to validate XML documents. 7 | A Schematron schema is defined as an XML containing various assertions to validate a target XML document. 8 | If the XML you wish to validate passes all the Schematron assertions, 9 | your XML is considered valid according to the Schematron schema. 10 | Complete validation results are offered using the Schematron Validation Report Language, 11 | a loose definition of an XML based validation report. 12 | 13 | There are various versions of Schematron available. 14 | This library only supports the latest version of Schematron, 15 | `ISO/IEC 19757-3:2020 `_, with a few limitations (see below). 16 | 17 | Currently, this library only supports a pure Python mode of Schematron validation. 18 | In this pure Python mode we load the Schematron into an internal representation and apply that to an XML. 19 | The advantage of such direct evaluation is that it offers superior performance compared to an XSLT 20 | transformation based evaluation. 21 | The disadvantage is that it only supports XPath expressions and does not support XSLT functions. 22 | 23 | In the future we hope to expand this library with an XSLT transformation based processing. 24 | Unfortunately XSLT transformations require an XSLT processor, 25 | which is currently not available in Python for XSLT >= 2.0. 26 | 27 | A few similar packages to this software in other languages are 28 | `node-schematron `_ in Javascript, and 29 | `ph-schematron `_ in Java. 30 | 31 | For all XPath expressions this package uses the 32 | `elementpath `_ library supporting XPath 1.0, 2.0, 3.0 and 3.1 selectors. 33 | 34 | Please note that, as of this writing, this package only supports Python 3.12. 35 | Older Python versions are not supported due to missing functionality (Python syntax primarily). 36 | Newer versions will be supported in due time. 37 | 38 | ********** 39 | Python API 40 | ********** 41 | To use the Python API, install the project like any other Python project, e.g. using ``pip install pyschematron``. 42 | 43 | After that you can use: 44 | 45 | .. code:: python 46 | 47 | from pyschematron import validate_document 48 | 49 | result = validate_document(, ) 50 | 51 | svrl = result.get_svrl() 52 | is_valid = result.is_valid() 53 | 54 | 55 | To process multiple documents with the same Schematron schema, you can use: 56 | 57 | .. code:: python 58 | 59 | from pyschematron import validate_document 60 | 61 | documents = [...] 62 | schema = 63 | 64 | results = validate_documents(documents, schema) 65 | 66 | 67 | For more examples, or examples on how to use different parts of the API, please see the `demo_*` files in the 68 | `scripts` directory. 69 | 70 | 71 | ********************** 72 | Command Line Interface 73 | ********************** 74 | To use the command line interface, first install the application using pip: ``pip install pyschematron``. 75 | Afterwards, you can use the command ``pyschematron`` to validate your documents. 76 | Use ``pyschematron --help`` to see the command line options. 77 | 78 | 79 | ************* 80 | Functionality 81 | ************* 82 | This library offers a basic implementation of Schematron using a pure Python "direct mode" evaluation method. 83 | 84 | Direct mode evaluation 85 | ====================== 86 | The direct mode evaluation allows for basic validity checks using all XPath functionality of Schematron. 87 | 88 | When applied to a document, the direct mode evaluation follows this procedure to validate a document: 89 | 90 | #. Read in the Schematron from either a document or a string. 91 | In this phase the document is loaded into an AST (abstract syntax tree). 92 | All ```` are resolved and inlined into the AST. 93 | All ```` are loaded but not fully resolved at this stage. 94 | #. Recreate the AST without abstract patterns and rules. 95 | In this phase we process the AST to create a concrete set of patterns and rules. 96 | All ```` are resolved, abstract patterns are instantiated, 97 | and redundant abstract rules and patterns are removed. 98 | #. Phase selection, we limit the AST to only include patterns and phases limited to the selected phase. 99 | #. Query binding, we determine the query binding language to use. 100 | This library only supports ``xslt``, ``xslt2``, ``xslt3``, ``xpath``, ``xpath2``, ``xpath3``, and ``xpath31``, 101 | where all ``xslt`` variations are limited to XPath expressions only. 102 | #. Apply the bound schema to an XML document to validate. 103 | 104 | 105 | Custom functions 106 | ---------------- 107 | With the current direct mode evaluation method, custom XSLT functions in your Schematron (````) are not supported. 108 | Custom Python functions are supported however. View the `demo_custom_functions.py` in the `scripts` directory for examples. 109 | 110 | 111 | Compliance 112 | ---------- 113 | The direct mode evaluation supports most of the `ISO/IEC 19757-3:2020 `_ standard, with a few exceptions. 114 | All Schematron specific elements are supported, except for XSLT elements. 115 | 116 | In terms of attributes, the ``@documents`` attribute of the ```` tag is not supported. 117 | Furthermore, ``@icon``, ``@see``, ``@fpi``, ``@flag``, and ``@role`` are loaded but not used. 118 | 119 | Note that the ISO Schematron applies rules to: 120 | 121 | - Elements (*) 122 | - Attributes (@*) 123 | - Root node (/) 124 | - Comments (comment()) 125 | - Processing instructions (processing-instruction()) 126 | 127 | But it does not apply rules to text nodes. 128 | 129 | If there are any problems, please open a Github issue. 130 | 131 | -------------------------------------------------------------------------------- /tests/fixtures/full_example/schema.sch: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | Cargo checking 10 | 11 |

This checks the cargo manifest on weight, size, and vehicles on number of wheels. 12 | By default it checks on weight only.

13 |

A second paragraph to test if this is processed correctly.

14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | Weight check 23 | 24 | info 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | Volume check 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 |

Just a check on the banana's.

51 | 52 | Banana is a fruit. 53 | And the weight of the banana shall be 1. 54 | 55 |
56 | 57 | 58 | 59 | 60 | Volume not correct ( vs at ). 61 | Testing name with path: 62 | 63 | 64 | We report an item with a volume greater than allowed. 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | XML model processing instruction does not include foobar. 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | This comment does not start with "Comment: ". 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | The id attribute does not starts with "id_". 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | The root node does not have an ID. 101 | 102 | 103 | The namespace for "test" should be "http://www.test.com". 104 | 105 | 106 | 107 | 108 | 109 | 110 |

Check for all the vehicles if they are in the right category.

111 | 112 | 113 |
114 | 115 | 116 |

Check for all the fruits if they are in the right category.

117 | 118 | 119 |
120 | 121 | 122 |

Check if items are in the right category ($pv_category).

123 | 124 | 125 | The item is in the wrong category ($pv_category). 126 | Extra data 127 | 128 | 129 | 130 |
131 | 132 | 133 | 134 |

Only check the cargo items for weight.

135 | Check for weights 136 | 137 |
138 | 139 | 140 |

Only check the cargo items for volume.

141 | 142 |
143 | 144 | 145 |

Only check the cargo for the right category.

146 | 147 | 148 |
149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 |
163 | -------------------------------------------------------------------------------- /scripts/demo_custom_functions.py: -------------------------------------------------------------------------------- 1 | """This script shows how to use custom Python functions inside your Schematron schema's. 2 | 3 | The general idea is that you either overwrite an existing query binding language, or define a new query binding 4 | with your custom functions loaded. Your custom functions will then be attached to the query parser defined for that 5 | specific query language. 6 | 7 | As an example. Suppose you have a small custom function named `custom-func()`, and you want to use it in your 8 | Schematron Schema. Your Schema is defined using `queryBinding="xpath31"` and you wish to extend this with your 9 | custom function. For clarity, you want to call your new query binding language "xpath31-custom". In your Schematron 10 | schema you then use `queryBinding="xpath31-custom"`, and in your queries you can use the `custom-func()`. For 11 | PySchematron to know about this function, you must define it and add it to the library. This module shows how. 12 | 13 | There are three ways of interacting with the PySchematron direct-mode validator. The most simple is by using the 14 | functional interface defined in the main module. Second, you can use a generalized API which might be extended in the 15 | future with an XSLT methodology. Finally, you can use the full-blown direct-mode classes and methods. The latter is the 16 | most complicated but gives the most control. Either of these though enables adding custom functions. 17 | """ 18 | 19 | __author__ = 'Robbert Harms' 20 | __date__ = '2024-04-03' 21 | __maintainer__ = 'Robbert Harms' 22 | __email__ = 'robbert@xkls.nl' 23 | __licence__ = 'LGPL v3' 24 | 25 | from pathlib import Path 26 | 27 | from elementpath import ElementNode 28 | from lxml import etree 29 | from lxml.etree import _ElementTree 30 | 31 | from pyschematron import DirectModeSchematronValidatorFactory, validate_document 32 | from pyschematron.direct_mode.schematron.parsers.xml.parser import SchemaParser 33 | from pyschematron.direct_mode.xml_validation.queries.factories import ExtendableQueryProcessorFactory 34 | from pyschematron.direct_mode.xml_validation.queries.xpath import (XPathQueryProcessor, XPath31QueryParser, 35 | SimpleCustomXPathFunction) 36 | from pyschematron.direct_mode.xml_validation.results.svrl_builder import DefaultSVRLReportBuilder 37 | from pyschematron.direct_mode.xml_validation.validators import SimpleSchematronXMLValidator 38 | from pyschematron.utils import load_xml_document 39 | 40 | 41 | def get_example_schema() -> _ElementTree: 42 | """Get the example Schema for the examples. 43 | 44 | In this Schema, we defined a new query binding language `queryBinding="xpath31-custom"` which we will 45 | also need to add in PySchematron. Note also the use of `custom-func()`. 46 | 47 | Returns: 48 | The loaded Schema. 49 | """ 50 | schematron = ''' 51 | 56 | 57 | 58 | 59 | 60 |

Just a check on the banana's.

61 | 62 | Banana is a fruit 63 | 64 |
65 |
66 | ''' 67 | return load_xml_document(schematron) 68 | 69 | 70 | def get_example_xml_document() -> _ElementTree: 71 | """Get the example XML document we wish to validate. 72 | 73 | This returns the XML document from the tests fixtures. 74 | 75 | Returns: 76 | The XML document we wish to validate. 77 | """ 78 | return load_xml_document(Path('../tests/fixtures/full_example/cargo.xml')) 79 | 80 | 81 | def custom_func(el: ElementNode, number: int) -> int: 82 | """An example of a custom function. 83 | 84 | It can have any number of inputs and outputs. This example takes an elementpath element as input and an integer. 85 | It returns the XPath node position times the provided number. 86 | """ 87 | return el.position * number 88 | 89 | 90 | def demo_functional_interface(xml_document: _ElementTree, schematron_xml: _ElementTree): 91 | """Showing how to add custom path functions using the functional interface. 92 | 93 | This uses the functional interface, the most simple method of interacting with PySchematron. 94 | 95 | Args: 96 | xml_document: the document we wish to validate 97 | schematron_xml: the Schematron Schema 98 | """ 99 | custom_functions = { 100 | 'query_binding': 'xpath31-custom', 101 | 'base_query_binding': 'xpath31', 102 | 'custom_query_functions': [SimpleCustomXPathFunction(custom_func, 'custom-func')] 103 | } 104 | 105 | result = validate_document(xml_document, schematron_xml, custom_functions=custom_functions) 106 | 107 | svrl = result.get_svrl() 108 | print(etree.tostring(svrl, pretty_print=True).decode('utf-8')) 109 | print(result.is_valid()) 110 | 111 | 112 | def demo_generic_api(xml_document: _ElementTree, schematron_xml: _ElementTree): 113 | """Showing how to add custom path functions using the general API. 114 | 115 | This uses the generic API which in the future might be extended using the XSLT method. 116 | 117 | Args: 118 | xml_document: the document we wish to validate 119 | schematron_xml: the Schematron Schema 120 | """ 121 | validator_factory = DirectModeSchematronValidatorFactory(schematron_xml=schematron_xml) 122 | 123 | validator_factory.add_custom_functions('xpath31-custom', 124 | [SimpleCustomXPathFunction(custom_func, 'custom-func')], 'xpath31') 125 | 126 | validator = validator_factory.build() 127 | validation_result = validator.validate(xml_document) 128 | 129 | svrl = validation_result.get_svrl() 130 | print(etree.tostring(svrl, pretty_print=True).decode('utf-8')) 131 | print(validation_result.is_valid()) 132 | 133 | 134 | def demo_full_api(xml_document: _ElementTree, schematron_xml: _ElementTree): 135 | """Showing how to add custom path functions using the full direct-mode API. 136 | 137 | This is the most complex method, but shows how the direct-mode method operates. 138 | 139 | Args: 140 | xml_document: the document we wish to validate 141 | schematron_xml: the Schematron Schema 142 | 143 | """ 144 | custom_xpath_function = SimpleCustomXPathFunction(custom_func, 'custom-func') 145 | 146 | custom_parser = XPath31QueryParser() 147 | custom_parser = custom_parser.with_custom_function(custom_xpath_function) 148 | 149 | custom_query_processor = XPathQueryProcessor(custom_parser) 150 | 151 | custom_processor_factory = ExtendableQueryProcessorFactory() 152 | custom_processor_factory.set_query_processor('xpath31-custom', custom_query_processor) 153 | 154 | schema = SchemaParser().parse(schematron_xml.getroot()) 155 | 156 | validator = SimpleSchematronXMLValidator(schema, query_processor_factory=custom_processor_factory) 157 | validation_results = validator.validate_xml(xml_document) 158 | 159 | svrl = DefaultSVRLReportBuilder().create_svrl_xml(validation_results) 160 | 161 | print(etree.tostring(svrl, pretty_print=True).decode('utf-8')) 162 | print(validation_results.is_valid()) 163 | 164 | 165 | demo_functional_interface(get_example_xml_document(), get_example_schema()) 166 | demo_generic_api(get_example_xml_document(), get_example_schema()) 167 | demo_full_api(get_example_xml_document(), get_example_schema()) 168 | 169 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/api.py: -------------------------------------------------------------------------------- 1 | """Implementation of the common API defined in the pyschematron package.""" 2 | 3 | __author__ = 'Robbert Harms' 4 | __date__ = '2024-04-01' 5 | __maintainer__ = 'Robbert Harms' 6 | __email__ = 'robbert@xkls.nl' 7 | __licence__ = 'LGPL v3' 8 | 9 | from pathlib import Path 10 | 11 | from lxml.etree import _ElementTree 12 | 13 | from pyschematron.api import SchematronValidatorFactory, SchematronValidator, ValidationResult 14 | from pyschematron.direct_mode.schematron.ast import Schema 15 | from pyschematron.direct_mode.schematron.ast_visitors import ResolveExtendsVisitor, ResolveAbstractPatternsVisitor, \ 16 | PhaseSelectionVisitor 17 | from pyschematron.direct_mode.schematron.parsers.xml.parser import SchemaParser, ParsingContext 18 | from pyschematron.direct_mode.xml_validation.queries.base import CustomQueryFunction 19 | from pyschematron.direct_mode.xml_validation.queries.factories import ExtendableQueryProcessorFactory, \ 20 | QueryProcessorFactory 21 | from pyschematron.direct_mode.xml_validation.results.svrl_builder import DefaultSVRLReportBuilder 22 | from pyschematron.direct_mode.xml_validation.results.validation_results import XMLDocumentValidationResult 23 | from pyschematron.direct_mode.xml_validation.validators import SimpleSchematronXMLValidator 24 | from pyschematron.utils import load_xml_document 25 | 26 | 27 | class DirectModeSchematronValidatorFactory(SchematronValidatorFactory): 28 | 29 | def __init__(self, 30 | schematron_xml: Path | _ElementTree | None = None, 31 | phase: str | None = None, 32 | schematron_base_path: Path | None = None): 33 | """Validator factory for direct mode Schematron validation. 34 | 35 | Args: 36 | schematron_xml: the Schematron Schema we want to evaluate. Can also be set using the class methods. 37 | phase: the phase we would like to evaluate. Can also be set using the class methods. If set to None 38 | we use the Schema's default phase. 39 | schematron_base_path: explicitly set the Schematron base path, this is used in Schematron file inclusions. 40 | """ 41 | self._schematron_xml = schematron_xml 42 | self._phase = phase 43 | self._schematron_base_path = schematron_base_path 44 | self._custom_query_functions: dict[str, list[CustomQueryFunction]] = {} 45 | self._custom_query_bases: dict[str, str] = {} 46 | 47 | def set_schema(self, schematron_xml: Path | _ElementTree): 48 | self._schematron_xml = schematron_xml 49 | 50 | def set_phase(self, phase: str | None): 51 | self._phase = phase 52 | 53 | def set_base_path(self, schematron_base_path: Path): 54 | self._schematron_base_path = schematron_base_path 55 | 56 | def add_custom_functions(self, 57 | query_binding: str, 58 | custom_query_functions: list[CustomQueryFunction], 59 | base_query_binding: str | None = None): 60 | if query_binding in self._custom_query_functions: 61 | self._custom_query_functions[query_binding] += custom_query_functions 62 | else: 63 | self._custom_query_functions[query_binding] = custom_query_functions 64 | 65 | if base_query_binding: 66 | self._custom_query_bases[query_binding] = base_query_binding 67 | 68 | def build(self) -> SchematronValidator: 69 | if isinstance(self._schematron_xml, Path): 70 | schematron = load_xml_document(self._schematron_xml) 71 | else: 72 | schematron = self._schematron_xml 73 | 74 | schematron_base_path = self._get_schematron_base_path() 75 | 76 | schematron_parser = SchemaParser() 77 | parsing_context = ParsingContext(base_path=schematron_base_path) 78 | schema = schematron_parser.parse(schematron.getroot(), parsing_context) 79 | 80 | query_processor_factory = self._get_query_processor_factory() 81 | 82 | return DirectModeSchematronValidator(schema, self._phase, schematron_base_path, query_processor_factory) 83 | 84 | def _get_query_processor_factory(self) -> QueryProcessorFactory | None: 85 | """Get the query processor factory we would like to use for the validation. 86 | 87 | Returns: 88 | The query processor to use, or None if defaults can be used. 89 | """ 90 | if not len(self._custom_query_functions): 91 | return None 92 | 93 | custom_processor_factory = ExtendableQueryProcessorFactory() 94 | 95 | for query_binding, custom_functions in self._custom_query_functions.items(): 96 | if query_binding in self._custom_query_bases: 97 | processor = custom_processor_factory.get_query_processor(self._custom_query_bases[query_binding]) 98 | else: 99 | if custom_processor_factory.has_query_processor(query_binding): 100 | processor = custom_processor_factory.get_query_processor(query_binding) 101 | else: 102 | raise ValueError(f'No query binding base provided for adding ' 103 | f'custom functions to query binding "{query_binding}"') 104 | 105 | for custom_function in custom_functions: 106 | processor = processor.with_custom_function(custom_function) 107 | 108 | custom_processor_factory.set_query_processor(query_binding, processor) 109 | return custom_processor_factory 110 | 111 | def _get_schematron_base_path(self) -> Path | None: 112 | """Get the base path we use for the Schematron parsing. 113 | 114 | If set explicitly, we return that. Else we try to infer it from the provided Schematron schema. 115 | 116 | Returns: 117 | The path we would like to use in the Schematron parsing. 118 | """ 119 | if self._schematron_base_path is not None: 120 | return self._schematron_base_path 121 | 122 | if isinstance(self._schematron_xml, Path): 123 | return self._schematron_xml.parent 124 | 125 | if hasattr(self._schematron_xml, 'docinfo'): 126 | docinfo = self._schematron_xml.docinfo 127 | if hasattr(docinfo, 'URL') and docinfo.URL: 128 | return Path(docinfo.URL).parent 129 | 130 | return None 131 | 132 | 133 | class DirectModeSchematronValidator(SchematronValidator): 134 | 135 | def __init__(self, schema: Schema, 136 | phase: str | None, 137 | base_path: Path | None, 138 | query_processor_factory: QueryProcessorFactory | None = None): 139 | """Validator API implementation for the direct mode evaluation. 140 | 141 | Args: 142 | schema: the Schema we would like to use in the validation 143 | phase: the phase we would like to use 144 | base_path: the base path of the Schema, used for loading external Schema parts. 145 | query_processor_factory: optionally, specify the query processor factory to use. 146 | """ 147 | self._schema = schema 148 | self._phase = phase 149 | self._base_path = base_path 150 | 151 | schema = ResolveExtendsVisitor(schema).apply(schema) 152 | schema = ResolveAbstractPatternsVisitor(schema).apply(schema) 153 | schema = PhaseSelectionVisitor(schema, phase).apply(schema) 154 | 155 | self._validator = SimpleSchematronXMLValidator(schema, phase, base_path, 156 | query_processor_factory=query_processor_factory) 157 | 158 | def validate(self, xml_data: Path | _ElementTree) -> ValidationResult: 159 | if isinstance(xml_data, Path): 160 | xml_document = load_xml_document(xml_data) 161 | else: 162 | xml_document = xml_data 163 | 164 | validation_results = self._validator.validate_xml(xml_document) 165 | return DirectModeValidationResult(validation_results) 166 | 167 | 168 | class DirectModeValidationResult(ValidationResult): 169 | 170 | def __init__(self, validation_results: XMLDocumentValidationResult): 171 | """Validation results from using the direct mode evaluation. 172 | 173 | Args: 174 | validation_results: the validation results from the direct mode evaluator. 175 | """ 176 | self._validation_results = validation_results 177 | self._svrl_report = DefaultSVRLReportBuilder().create_svrl_xml(validation_results) 178 | self._is_valid = validation_results.is_valid() 179 | 180 | def get_svrl(self) -> _ElementTree: 181 | return self._svrl_report 182 | 183 | def is_valid(self) -> bool: 184 | return self._is_valid 185 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/svrl/ast.py: -------------------------------------------------------------------------------- 1 | """The abstract syntax tree for representing an SVRL report. 2 | 3 | This is primarily used to write out an SVRL report after Schematron validation of an XML. 4 | """ 5 | from __future__ import annotations 6 | 7 | __author__ = 'Robbert Harms' 8 | __date__ = '2024-03-11' 9 | __maintainer__ = 'Robbert Harms' 10 | __email__ = 'robbert@xkls.nl' 11 | __licence__ = 'LGPL v3' 12 | 13 | from dataclasses import dataclass 14 | from typing import Literal 15 | 16 | from lxml.etree import _Element 17 | 18 | from pyschematron.direct_mode.lib.ast import GenericASTNode 19 | 20 | 21 | @dataclass(slots=True, frozen=True) 22 | class SVRLNode(GenericASTNode): 23 | """Base class for the Schematron Validation Report Language (SVRL) nodes.""" 24 | 25 | 26 | @dataclass(slots=True, frozen=True) 27 | class SchematronOutput(SVRLNode): 28 | """Representation of the `` SVRL node. 29 | 30 | Args: 31 | texts: zero or more text nodes containing some text about the schema and/or validation. 32 | ns_prefix_in_attribute_values: namespace and prefix declarations. 33 | validation_events: schematron validation events, in order of proceeding 34 | phase: the Schematron phase this SVRL is a result of 35 | schema_version: copy of the Schematron's schemaVersion attribute 36 | title: some title for this validation report. 37 | """ 38 | texts: tuple[Text, ...] = tuple() 39 | ns_prefix_in_attribute_values: tuple[NSPrefixInAttributeValues, ...] = tuple() 40 | validation_events: tuple[ValidationEvent, ...] = tuple() 41 | metadata: MetaData | None = None 42 | phase: str | None = None 43 | schema_version: str | None = None 44 | title: str | None = None 45 | 46 | 47 | @dataclass(slots=True, frozen=True) 48 | class MetaData(SVRLNode): 49 | """Metadata for this SVRL report. 50 | 51 | A non-standard defined node containing metadata. We use it to add metadata about PySchematron. 52 | 53 | Args: 54 | xml_elements: listing of the XML elements contained in this metadata 55 | namespaces: the namespaces to be used in the attributes of this metadata node. 56 | """ 57 | xml_elements: tuple[_Element, ...] = tuple() 58 | namespaces: tuple[Namespace, ...] = tuple() 59 | 60 | @dataclass(slots=True, frozen=True) 61 | class MetaDataNode: 62 | """Base class for metadata nodes.""" 63 | 64 | @dataclass(slots=True, frozen=True) 65 | class Namespace(MetaDataNode): 66 | """Representation of a namespace attribute in the metadata node. 67 | 68 | Args: 69 | prefix: the prefix 70 | uri: the namespace's URI 71 | """ 72 | prefix: str 73 | uri: str 74 | 75 | 76 | @dataclass(slots=True, frozen=True) 77 | class NSPrefixInAttributeValues(SVRLNode): 78 | """Namespace declaration, representation of the SVRL `` node. 79 | 80 | Args: 81 | prefix: the prefix to use for this namespace 82 | uri: the namespace's URI 83 | """ 84 | prefix: str 85 | uri: str 86 | 87 | 88 | @dataclass(slots=True, frozen=True) 89 | class ValidationEvent(SVRLNode): 90 | """Base class for the validation events. 91 | 92 | An SVRL is a flat representation of the patterns, rules and assertions / reports visited during validation. 93 | To represent these in a class hierarchy we group these as validation events. 94 | """ 95 | 96 | 97 | @dataclass(slots=True, frozen=True) 98 | class ActivePattern(ValidationEvent): 99 | """Representation of the `` SVRL node. 100 | 101 | Args: 102 | documents: list of URIs of datatype `xs:anyURI`, pointing to the documents processed. 103 | id: the identifier of this pattern, typically a copy of the Schematron pattern id. 104 | name: some name for this pattern, up to the implementation. 105 | role: some role indicator for this pattern, up to the implementation. 106 | """ 107 | documents: tuple[str, ...] | None = None 108 | id: str | None = None 109 | name: str | None = None 110 | role: str | None = None 111 | 112 | 113 | @dataclass(slots=True, frozen=True) 114 | class FiredRule(ValidationEvent): 115 | """Representation of the `` SVRL node. 116 | 117 | Args: 118 | context: a copy of the context of the Schematron rule element 119 | document: Reference of the document to which this rule was defined. 120 | flag: a flag that was set to true when this rule fired, typically a copy of the flag of the Schematron rule. 121 | id: the identifier of this rule, typically a copy of the Schematron rule id. 122 | name: some name for this rule, up to the implementation. 123 | role: the role for this rule, typically a copy of the role of the rule element. 124 | """ 125 | context: SchematronQuery 126 | document: str | None = None 127 | flag: str | None = None 128 | id: str | None = None 129 | name: str | None = None 130 | role: str | None = None 131 | 132 | 133 | @dataclass(slots=True, frozen=True) 134 | class SuppressedRule(ValidationEvent): 135 | """Representation of the `` SVRL node. 136 | 137 | This node type is officially not in the standard, but it is added by some packages, and so do we. 138 | 139 | Args: 140 | context: a copy of the context of the Schematron rule element 141 | id: the identifier of this rule, typically a copy of the Schematron rule id. 142 | """ 143 | context: SchematronQuery 144 | id: str | None = None 145 | 146 | 147 | @dataclass(slots=True, frozen=True) 148 | class CheckResult(ValidationEvent): 149 | """Base class for the `` and `` SVRL nodes. 150 | 151 | Args: 152 | text: result description of this check. 153 | location: the location of this failed assert as an XPath expression 154 | test: the test expression for this assert, copied from the Schematron assert node. 155 | diagnostic_references: listing of the diagnostic references by this check 156 | property_references: properties referenced by this check 157 | subject_location: the location referenced by the subject of either the check or the parent rule. 158 | flag: a flag that was set to true when this assertion fired, typically a copy of the Schematron's flag rule. 159 | id: the identifier of this rule, typically a copy of the Schematron assert id. 160 | role: the role for this assert, typically a copy of the role of the assert element. 161 | """ 162 | text: Text 163 | location: XPathExpression 164 | test: SchematronQuery 165 | diagnostic_references: tuple[DiagnosticReference, ...] = tuple() 166 | property_references: tuple[PropertyReference, ...] = tuple() 167 | subject_location: XPathExpression | None = None 168 | flag: str | None = None 169 | id: str | None = None 170 | role: str | None = None 171 | 172 | 173 | @dataclass(slots=True, frozen=True) 174 | class FailedAssert(CheckResult): 175 | """Representation of the `` SVRL node.""" 176 | 177 | 178 | @dataclass(slots=True, frozen=True) 179 | class SuccessfulReport(CheckResult): 180 | """Representation of the `` SVRL node.""" 181 | 182 | 183 | @dataclass(slots=True, frozen=True) 184 | class DiagnosticReference(SVRLNode): 185 | """Representation of the `` node. 186 | 187 | These nodes reference a diagnostic connected to an assert/report node. 188 | 189 | Args: 190 | text: resulting text 191 | diagnostic: identifier of this diagnostic, copied from the diagnostic element's id attribute 192 | """ 193 | text: Text 194 | diagnostic: str 195 | 196 | 197 | @dataclass(slots=True, frozen=True) 198 | class PropertyReference(SVRLNode): 199 | """Representation of the `` node. 200 | 201 | These nodes reference a property connected to an assert/report node. 202 | 203 | Args: 204 | text: resulting text 205 | property: identifier of this property 206 | role: the role attribute for this property, copied from the properties' role attribute 207 | scheme: the scheme attribute for this property, copied from the properties' scheme attribute 208 | """ 209 | text: Text 210 | property: str 211 | role: str | None = None 212 | scheme: str | None = None 213 | 214 | 215 | @dataclass(slots=True, frozen=True) 216 | class XPathExpression(SVRLNode): 217 | """Representation of an XPath expression used in the SVRL nodes.""" 218 | expression: str 219 | 220 | 221 | @dataclass(slots=True, frozen=True) 222 | class SchematronQuery(SVRLNode): 223 | """Representation of a Schematron Query, as used in the SVRL nodes.""" 224 | query: str 225 | 226 | 227 | @dataclass(slots=True, frozen=True) 228 | class Text(SVRLNode): 229 | """Representation of a `` tag. 230 | 231 | Although the attributes `class` and `id` are not specified in the SVRL specification, we add them nonetheless since 232 | they can be forwarded from Schematron nodes. 233 | 234 | Args: 235 | content: the text content of this text element, all loaded as one string 236 | fpi: formal public identifier, may be copied from the relevant Schematron FPI attribute. 237 | icon: the icon attribute 238 | see: A URI pointing to some external information of this element. 239 | class_: some class attribute 240 | id: unique identifier 241 | xml_lang: the default natural language for this node 242 | xml_space: defines how whitespace must be handled for this element. 243 | """ 244 | content: str 245 | fpi: str | None = None 246 | icon: str | None = None 247 | see: str | None = None 248 | class_: str | None = None 249 | id: str | None = None 250 | xml_lang: str | None = None 251 | xml_space: Literal['default', 'preserve'] | None = None 252 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/xml_validation/queries/base.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | __author__ = 'Robbert Harms' 4 | __date__ = '2023-03-24' 5 | __maintainer__ = 'Robbert Harms' 6 | __email__ = 'robbert@xkls.nl' 7 | __licence__ = 'GPL v3' 8 | 9 | from abc import ABCMeta, abstractmethod 10 | from typing import Any, Self, override, Callable 11 | from elementpath.tree_builders import RootArgType 12 | from elementpath.xpath_context import ItemArgType 13 | 14 | 15 | class QueryProcessor(metaclass=ABCMeta): 16 | """Interface class for the query processing classes. 17 | 18 | Successful query parsing requires a query parser and an evaluation context. These need to be matched to each other. 19 | This class ensures matching parsers and evaluation contexts. 20 | """ 21 | 22 | @abstractmethod 23 | def get_query_parser(self) -> QueryParser: 24 | """Get the query parser for parsing the queries in an AST. 25 | 26 | Returns: 27 | A query parser to parse queries in the Schematron 28 | """ 29 | 30 | @abstractmethod 31 | def get_evaluation_context(self) -> EvaluationContext: 32 | """Get the evaluation context for the parsed queries. 33 | 34 | Returns: 35 | An evaluation context to evaluate the parsed queries. 36 | """ 37 | 38 | @abstractmethod 39 | def with_namespaces(self, namespaces: dict[str, str]) -> Self: 40 | """Create a copy of this query processor with updated namespaces. 41 | 42 | Args: 43 | namespaces: a dictionary mapping namespace prefixes to URIs. 44 | 45 | Returns: 46 | An updated Query Processor. 47 | """ 48 | 49 | @abstractmethod 50 | def with_custom_function(self, custom_function: CustomQueryFunction) -> Self: 51 | """Create a copy of this query processor with support for the provided custom function. 52 | 53 | Args: 54 | custom_function: the custom function to add to the parser. 55 | 56 | Returns: 57 | An updated query processor 58 | """ 59 | 60 | 61 | class SimpleQueryProcessor(QueryProcessor): 62 | 63 | def __init__(self, query_parser: QueryParser, evaluation_context: EvaluationContext): 64 | """Simple query processor prepared with a query parser and evaluation context. 65 | 66 | Defined to be immutable. 67 | 68 | Args: 69 | query_parser: the query parser this instance specialize in 70 | evaluation_context: the evaluation context this instance specializes in 71 | """ 72 | self._query_parser = query_parser 73 | self._evaluation_context = evaluation_context 74 | 75 | @override 76 | def get_query_parser(self) -> QueryParser: 77 | return self._query_parser 78 | 79 | @override 80 | def get_evaluation_context(self) -> EvaluationContext: 81 | return self._evaluation_context 82 | 83 | @override 84 | def with_namespaces(self, namespaces: dict[str, str]) -> Self: 85 | return type(self)(self._query_parser.with_namespaces(namespaces), 86 | self._evaluation_context.with_namespaces(namespaces)) 87 | 88 | @override 89 | def with_custom_function(self, custom_function: CustomQueryFunction) -> Self: 90 | return type(self)(self._query_parser.with_custom_function(custom_function), 91 | self._evaluation_context) 92 | 93 | 94 | class QueryParser(metaclass=ABCMeta): 95 | """Representation of a parser for Schematron queries.""" 96 | 97 | @abstractmethod 98 | def parse(self, source: str) -> Query: 99 | """Parse an expression in the implemented query language. 100 | 101 | Args: 102 | source: the source code of the expression, in the language supported by this parser. 103 | 104 | Returns: 105 | A parsed expression in the language supported by this parser. 106 | """ 107 | 108 | @abstractmethod 109 | def with_namespaces(self, namespaces: dict[str, str]) -> Self: 110 | """Create a copy of this query parser with updated namespaces. 111 | 112 | Args: 113 | namespaces: a dictionary mapping namespace prefixes to URIs. 114 | 115 | Returns: 116 | An updated Query Parser. 117 | """ 118 | 119 | @abstractmethod 120 | def with_custom_function(self, custom_function: CustomQueryFunction) -> Self: 121 | """Create a copy of this query parser with an additional custom query function. 122 | 123 | Args: 124 | custom_function: the custom function to add to the parser. 125 | 126 | Returns: 127 | An updated Query Parser. 128 | """ 129 | 130 | 131 | class CachingQueryParser(QueryParser): 132 | 133 | def __init__(self, query_parser: QueryParser): 134 | """A wrapper around a query parser enabling caching of compiled queries. 135 | 136 | This keeps a mapping of source strings to Queries and checks this first before compiling a query. 137 | 138 | Args: 139 | query_parser: the query parser we use for actual parsing 140 | """ 141 | self._query_parser = query_parser 142 | self._query_cache = {} 143 | 144 | @override 145 | def parse(self, source: str) -> Query: 146 | return self._query_cache.setdefault(source, self._query_parser.parse(source)) 147 | 148 | @override 149 | def with_namespaces(self, namespaces: dict[str, str]) -> Self: 150 | return type(self)(self._query_parser.with_namespaces(namespaces)) 151 | 152 | @override 153 | def with_custom_function(self, custom_function: CustomQueryFunction) -> Self: 154 | return type(self)(self._query_parser.with_custom_function(custom_function)) 155 | 156 | 157 | class CustomQueryFunction(metaclass=ABCMeta): 158 | 159 | @property 160 | @abstractmethod 161 | def callback(self) -> Callable[..., Any]: 162 | """Get the callback of this custom function. 163 | 164 | Returns: 165 | The (Python) callback function. 166 | """ 167 | 168 | @property 169 | @abstractmethod 170 | def name(self) -> str: 171 | """The name of this function. 172 | 173 | Returns: 174 | The name of the callable function. 175 | """ 176 | 177 | @property 178 | @abstractmethod 179 | def prefix(self) -> str | None: 180 | """The XML prefix of this function. 181 | 182 | Returns: 183 | The XML prefix of this function 184 | """ 185 | 186 | 187 | class SimpleCustomQueryFunction(CustomQueryFunction): 188 | 189 | def __init__(self, callback: Callable[..., Any], name: str, prefix: str | None = None): 190 | """Simple definition of a custom query function. 191 | 192 | Args: 193 | callback: the function to call when evaluating the parsed expression 194 | name: the function name for use inside the query language. 195 | prefix: the function's name prefix, if not provided it is set to the XPath default 196 | function namespace (`fn:`). This means, it may overwrite library functions. 197 | """ 198 | self._callback = callback 199 | self._name = name 200 | self._prefix = prefix 201 | 202 | @property 203 | def callback(self) -> Callable[..., Any]: 204 | return self._callback 205 | 206 | @property 207 | def name(self) -> str: 208 | return self._name 209 | 210 | @property 211 | def prefix(self) -> str | None: 212 | return self._prefix 213 | 214 | 215 | class EvaluationContext(metaclass=ABCMeta): 216 | """Representation of the context required when evaluating a Query. 217 | 218 | Each context should be immutable. Every change constructs a new evaluation context. 219 | """ 220 | 221 | @abstractmethod 222 | def with_xml_root(self, xml_root: RootArgType) -> Self: 223 | """Create a new evaluation context with the XML root node we can use for dynamic queries. 224 | 225 | For queries like: `xs:integer(42)` no XML root node is needed. 226 | For dynamic queries like: `xs:integer(/data/@nmr_items)`, a root node is needed. 227 | 228 | Args: 229 | xml_root: the root node usable for dynamic query evaluations 230 | 231 | Returns: 232 | A new evaluation context 233 | """ 234 | 235 | @abstractmethod 236 | def with_context_item(self, xml_item: ItemArgType) -> Self: 237 | """Create a new evaluation context with the provided xml item (node, comment, attribute) as query base. 238 | 239 | This is needed for asserts and reports queries which assume the context of the rule node. 240 | 241 | Args: 242 | xml_item: the XML item we use as context node for parser evaluation. 243 | 244 | Returns: 245 | A new evaluation context 246 | """ 247 | 248 | @abstractmethod 249 | def with_namespaces(self, namespaces: dict[str, str]) -> Self: 250 | """Create a new evaluation context with the namespaces used during evaluation. 251 | 252 | Args: 253 | namespaces: a dictionary mapping namespace prefixes to URIs. 254 | This is used when namespace information is not available within document and element nodes. 255 | 256 | Returns: 257 | A new evaluation context 258 | """ 259 | 260 | @abstractmethod 261 | def with_variables(self, variables: dict[str, Any], overwrite: bool = False) -> Self: 262 | """Create a new evaluation context with the namespaces used during evaluation. 263 | 264 | Args: 265 | variables: a dictionary mapping variable names (QNames) to variables. This expects the 266 | variables to be a parsed and evaluated value. 267 | overwrite: if set to True, we will overwrite any stored variables. If set to False, we update 268 | the dictionary of variables. 269 | 270 | Returns: 271 | A new evaluation context 272 | """ 273 | 274 | @abstractmethod 275 | def get_xml_root(self) -> RootArgType | None: 276 | """Get the XML root node current in this context. 277 | 278 | Returns: 279 | The current root node. May not be set, giving None. 280 | """ 281 | 282 | @abstractmethod 283 | def get_context_item(self) -> ItemArgType | None: 284 | """Get the XML node serving as the query base. 285 | 286 | Returns: 287 | The XML item we use as context node for parser evaluation. May not be set yet. 288 | """ 289 | 290 | 291 | class Query(metaclass=ABCMeta): 292 | """Representation of an executable Schematron query. 293 | 294 | To specialize for a new language, one must implement a specialized Context, Parser and Query. 295 | """ 296 | 297 | @abstractmethod 298 | def evaluate(self, context: EvaluationContext | None = None) -> Any: 299 | """Evaluate this query. 300 | 301 | Args: 302 | context: optional context to be used during evaluation. 303 | The exact context and its usage is implementation defined. 304 | 305 | Returns: 306 | The results of running this query. 307 | """ 308 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/xml_validation/queries/xpath.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | """The XPath query bindings. 4 | 5 | This uses the adapter pattern around the elementpath library. 6 | 7 | The elementpath library is used for the parsing, context and evaluation of all XPath related queries. 8 | """ 9 | 10 | __author__ = 'Robbert Harms' 11 | __date__ = '2023-03-24' 12 | __maintainer__ = 'Robbert Harms' 13 | __email__ = 'robbert@xkls.nl' 14 | __licence__ = 'GPL v3' 15 | 16 | from typing import Any, Type, Self, override 17 | from abc import ABCMeta 18 | 19 | from elementpath import XPathToken, XPath1Parser, XPath2Parser, XPathContext 20 | from elementpath.xpath3 import XPath3Parser 21 | from elementpath.xpath31 import XPath31Parser 22 | from elementpath.xpath_context import ItemArgType 23 | from elementpath.tree_builders import RootArgType 24 | 25 | from pyschematron.direct_mode.xml_validation.queries.base import QueryParser, Query, EvaluationContext, \ 26 | SimpleQueryProcessor, CustomQueryFunction, SimpleCustomQueryFunction 27 | from pyschematron.direct_mode.xml_validation.queries.exceptions import MissingRootNodeError 28 | 29 | 30 | class XPathQueryProcessor(SimpleQueryProcessor): 31 | 32 | def __init__(self, query_parser: XPathQueryParser, evaluation_context: EvaluationContext = None): 33 | """Query processor for XPath queries. 34 | 35 | This is simply a wrapper around the simple query processor, with as default evaluation context the XPath 36 | evaluation context. 37 | 38 | Args: 39 | query_parser: the (XPath) query processor to use 40 | evaluation_context: the evaluation context, defaults to the XPath evaluation context 41 | """ 42 | super().__init__(query_parser, evaluation_context or XPathEvaluationContext()) 43 | 44 | 45 | class XPathQueryParser(QueryParser, metaclass=ABCMeta): 46 | """Wrapper around the query parser to indicate specialization for XPath query parsers.""" 47 | 48 | 49 | class ElementPathXPathQueryParser(XPathQueryParser, metaclass=ABCMeta): 50 | 51 | def __init__(self, 52 | parser_type: Type[XPath1Parser | XPath2Parser | XPath3Parser | XPath31Parser], 53 | namespaces: dict[str, str] | None = None, 54 | custom_functions: list[CustomXPathFunction] | None = None): 55 | """Base class for XPath parsers wrapping the `elementpath` library. 56 | 57 | Args: 58 | parser_type: the type of XPath parser from the elementpath library we are wrapping 59 | namespaces: namespaces to use during parsing 60 | custom_functions: the list of custom functions to load 61 | """ 62 | self._parser_type = parser_type 63 | self._namespaces = namespaces or {} 64 | self._custom_functions = custom_functions or [] 65 | self._parser = self._get_elementpath_parser() 66 | 67 | @override 68 | def parse(self, source: str) -> Query: 69 | xpath_token = self._parser.parse(source) 70 | return XPathQuery(xpath_token) 71 | 72 | def _get_elementpath_parser(self) -> XPath1Parser | XPath2Parser | XPath3Parser | XPath31Parser: 73 | """Get an elementpath parser using the defined namespaces and custom functions. 74 | 75 | Returns: 76 | An elementpath parser instance to use during parsing. 77 | """ 78 | parser = self._parser_type(namespaces=self._namespaces) 79 | for custom_function in self._custom_functions: 80 | parser.external_function(custom_function.callback, custom_function.name, custom_function.prefix) 81 | return parser 82 | 83 | 84 | class CustomXPathFunction(CustomQueryFunction, metaclass=ABCMeta): 85 | """Wrapper around the custom query functions to specify functions for use in XPath query parsers.""" 86 | 87 | 88 | class SimpleCustomXPathFunction(CustomXPathFunction, SimpleCustomQueryFunction): 89 | """Simple definition of an XPath custom function.""" 90 | 91 | 92 | class XPathEvaluationContext(EvaluationContext): 93 | 94 | def __init__(self, 95 | root: RootArgType | None = None, 96 | namespaces: dict[str, str] | None = None, 97 | item: ItemArgType | None = None, 98 | variables: dict[str, Any] | None = None): 99 | super().__init__() 100 | self._context_variables = { 101 | 'root': root, 102 | 'namespaces': namespaces or {}, 103 | 'item': item, 104 | 'variables': variables or {} 105 | } 106 | 107 | self._xpath_context = None 108 | if root is not None: 109 | self._xpath_context = XPathContext(**self._context_variables) 110 | 111 | def get_xpath_context(self) -> XPathContext | None: 112 | """Get the XPath context we can use for evaluation of a query. 113 | 114 | If no root node is set yet, we return None. Else, we return an XPathContext from the elementpath library. 115 | 116 | Returns: 117 | The XPath context if a root node is set, else None. 118 | 119 | Raises: 120 | MissingRootNodeError: if the XPath node could not be 121 | """ 122 | if self._xpath_context is None: 123 | raise MissingRootNodeError('Missing root node in XPath context, please set a root node first.') 124 | return self._xpath_context 125 | 126 | @override 127 | def with_context_item(self, xml_item: ItemArgType) -> Self: 128 | if xml_item is self._context_variables['item']: 129 | return self 130 | return self._get_updated({'item': xml_item}) 131 | 132 | @override 133 | def with_xml_root(self, xml_root: RootArgType) -> Self: 134 | if xml_root is self._context_variables['root']: 135 | return self 136 | return self._get_updated({'root': xml_root}) 137 | 138 | @override 139 | def with_namespaces(self, namespaces: dict[str, str]) -> Self: 140 | return self._get_updated({'namespaces': namespaces}) 141 | 142 | @override 143 | def with_variables(self, variables: dict[str, Any], overwrite: bool = False) -> Self: 144 | if overwrite: 145 | return self._get_updated({'variables': variables}) 146 | else: 147 | return self._get_updated({'variables': self._context_variables['variables'] | variables}) 148 | 149 | @override 150 | def get_xml_root(self) -> RootArgType | None: 151 | return self._context_variables['root'] 152 | 153 | @override 154 | def get_context_item(self) -> ItemArgType | None: 155 | return self._context_variables['item'] 156 | 157 | def _get_updated(self, updates: dict[str, Any]) -> Self: 158 | kwargs = self._context_variables.copy() 159 | kwargs.update(updates) 160 | return type(self)(**kwargs) 161 | 162 | 163 | class XPathQuery(Query): 164 | 165 | def __init__(self, xpath_token: XPathToken): 166 | """Representation of an XPath query. 167 | 168 | This uses the elementpath library for representing the XPath expressions. 169 | 170 | Args: 171 | xpath_token: the parsed XPath expression 172 | """ 173 | self._xpath_token = xpath_token 174 | 175 | @override 176 | def evaluate(self, context: XPathEvaluationContext | None = None) -> Any: 177 | xpath_context = None 178 | if context: 179 | xpath_context = context.get_xpath_context() 180 | 181 | return self._xpath_token.evaluate(xpath_context) 182 | 183 | 184 | class XPath1QueryParser(ElementPathXPathQueryParser): 185 | 186 | def __init__(self, namespaces: dict[str, str] | None = None): 187 | """Query parser for XPath 1.0 expressions. 188 | 189 | This uses the XPath 1.0 parser of the `elementpath` library. 190 | 191 | Args: 192 | namespaces: a dictionary with namespaces to use while parsing. 193 | """ 194 | super().__init__(XPath1Parser, namespaces=namespaces) 195 | 196 | @override 197 | def with_namespaces(self, namespaces: dict[str, str]) -> Self: 198 | return type(self)(self._namespaces | namespaces) 199 | 200 | @override 201 | def with_custom_function(self, custom_function: CustomXPathFunction) -> Self: 202 | raise ValueError('Custom functions are not supported for XPath1 parsers.') 203 | 204 | 205 | class XPath2QueryParser(ElementPathXPathQueryParser): 206 | 207 | def __init__(self, 208 | namespaces: dict[str, str] | None = None, 209 | custom_functions: list[CustomXPathFunction] | None = None): 210 | """Query parser for XPath 2.0 expressions. 211 | 212 | This uses the XPath 2.0 parser of the `elementpath` library. 213 | 214 | Args: 215 | namespaces: a dictionary with namespaces to use while parsing. 216 | custom_functions: the list of custom functions to load 217 | """ 218 | super().__init__(XPath2Parser, namespaces=namespaces, custom_functions=custom_functions) 219 | 220 | @override 221 | def with_namespaces(self, namespaces: dict[str, str]) -> Self: 222 | return type(self)(self._namespaces | namespaces, 223 | custom_functions=self._custom_functions) 224 | 225 | @override 226 | def with_custom_function(self, custom_function: CustomXPathFunction) -> Self: 227 | return type(self)(namespaces=self._namespaces, 228 | custom_functions=self._custom_functions + [custom_function]) 229 | 230 | 231 | class XPath3QueryParser(ElementPathXPathQueryParser): 232 | 233 | def __init__(self, 234 | namespaces: dict[str, str] | None = None, 235 | custom_functions: list[CustomXPathFunction] | None = None): 236 | """Query parser for XPath 3.0 expressions. 237 | 238 | This uses the XPath 3.0 parser of the `elementpath` library. 239 | 240 | Args: 241 | namespaces: a dictionary with namespaces to use while parsing. 242 | custom_functions: the list of custom functions to load 243 | """ 244 | super().__init__(XPath3Parser, namespaces=namespaces, custom_functions=custom_functions) 245 | 246 | @override 247 | def with_namespaces(self, namespaces: dict[str, str]) -> Self: 248 | return type(self)(self._namespaces | namespaces, 249 | custom_functions=self._custom_functions) 250 | 251 | @override 252 | def with_custom_function(self, custom_function: CustomXPathFunction) -> Self: 253 | return type(self)(namespaces=self._namespaces, 254 | custom_functions=self._custom_functions + [custom_function]) 255 | 256 | 257 | class XPath31QueryParser(ElementPathXPathQueryParser): 258 | 259 | def __init__(self, 260 | namespaces: dict[str, str] | None = None, 261 | custom_functions: list[CustomXPathFunction] | None = None): 262 | """Query parser for XPath 3.1 expressions. 263 | 264 | This uses the XPath 3.1 parser of the `elementpath` library. 265 | 266 | Args: 267 | namespaces: a dictionary with namespaces to use while parsing. 268 | custom_functions: the list of custom functions to load 269 | """ 270 | super().__init__(XPath31Parser, namespaces=namespaces, custom_functions=custom_functions) 271 | 272 | @override 273 | def with_namespaces(self, namespaces: dict[str, str]) -> Self: 274 | return type(self)(self._namespaces | namespaces, 275 | custom_functions=self._custom_functions) 276 | 277 | @override 278 | def with_custom_function(self, custom_function: CustomXPathFunction) -> Self: 279 | return type(self)(namespaces=self._namespaces, 280 | custom_functions=self._custom_functions + [custom_function]) 281 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/schematron/ast_yaml.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | __author__ = 'Robbert Harms' 4 | __date__ = '2023-02-21' 5 | __maintainer__ = 'Robbert Harms' 6 | __email__ = 'robbert@xkls.nl' 7 | 8 | import dataclasses 9 | import inspect 10 | from abc import ABCMeta, abstractmethod 11 | from io import StringIO 12 | from pathlib import PosixPath, Path 13 | from typing import Callable, Any, Mapping, Iterable 14 | 15 | from ruyaml import YAML, BaseRepresenter, BaseConstructor, Node 16 | 17 | import pyschematron.direct_mode.schematron.ast 18 | from pyschematron.direct_mode.schematron.ast import SchematronASTNode 19 | 20 | 21 | class ASTYamlConverter(metaclass=ABCMeta): 22 | 23 | @abstractmethod 24 | def load(self, stream: Path | Any) -> SchematronASTNode: 25 | """Load a provided stream into a Schematron AST node. 26 | 27 | Args: 28 | stream: the stream with Yaml data to load, either a path to a file, or some sort of (binary) string. 29 | 30 | Returns: 31 | The loaded Schematron AST node. 32 | """ 33 | 34 | @abstractmethod 35 | def dump(self, data: SchematronASTNode, stream: Path | Any | None = None) -> str | None: 36 | """Dump the provided data into YAML format. 37 | 38 | Args: 39 | data: the Schematron AST node to dump 40 | stream: the stream to dump the data to, either a file path, or some sort of string buffer. 41 | This is optional, if not provided we return a string. 42 | 43 | Returns: 44 | If no stream was provided, we return the output as a string. Else we dump the output to the provided stream. 45 | """ 46 | 47 | 48 | class RuyamlASTYamlConverter(ASTYamlConverter): 49 | 50 | def __init__(self): 51 | """Basic AST to YAML (and back) converter. 52 | 53 | This works together with the ruyaml library to dump and load AST nodes to and from yaml. 54 | """ 55 | self._codec = _ASTYamlCodec() 56 | 57 | def load(self, stream: Path | Any) -> SchematronASTNode: 58 | return self._codec.load(stream) 59 | 60 | def dump(self, data: SchematronASTNode, stream: Path | Any | None = None) -> str | None: 61 | if stream is None: 62 | with StringIO() as dumped: 63 | self._codec.dump(data, dumped) 64 | return dumped.getvalue() 65 | else: 66 | self._codec.dump(data, stream) 67 | 68 | 69 | class _ASTYamlCodec(YAML): 70 | 71 | def __init__(self, *args, **kwargs): 72 | """Specialized RuYAML loader and dumper for :class:`SchematronASTNode` instances. 73 | 74 | For loading the YAML, the ruyaml library uses shared mutable lists to construct the class hierarchy. 75 | Since we are creating immutable AST nodes, we need a builder pattern to construct the final classes. 76 | During processing of the YAML, the :class:`YamlRepresenter` create :class:`SchematronASTNodeBuilder` nodes. 77 | As a final step, the build method of these builders is called to construct the final AST nodes. 78 | """ 79 | super().__init__(*args, typ='safe', **kwargs) 80 | self._add_ast_representers() 81 | 82 | def load(self, stream: Path | Any) -> Any: 83 | loaded_objects = super().load(stream) 84 | if isinstance(loaded_objects, SchematronASTNodeBuilder): 85 | return loaded_objects.build() 86 | return loaded_objects 87 | 88 | def _add_ast_representers(self): 89 | """Add YAML representers (loading and dumping) for all relevant types in `pyschematron.ast`. 90 | 91 | This modifies the current instance by adding representer objects. 92 | """ 93 | representers = self._get_ast_node_representers() 94 | representers.append(PathRepresenter()) 95 | 96 | for representer in representers: 97 | self.representer.add_representer(representer.element_class, representer.get_dumping_function()) 98 | self.constructor.add_constructor(representer.yaml_tag, representer.get_loading_function()) 99 | 100 | def _get_ast_node_representers(self) -> list["YamlRepresenter"]: 101 | """Get representers for all the :class:`SchematronASTNode` nodes. 102 | 103 | Returns: 104 | A list of YAML representers for the AST nodes. 105 | """ 106 | ast_nodes = self._list_representable_ast_nodes() 107 | return [self._get_representer(ast_node) for ast_node in ast_nodes] 108 | 109 | def _list_representable_ast_nodes(self) -> list[type[SchematronASTNode]]: 110 | """List the representable AST nodes. 111 | 112 | Returns: 113 | Get a list of AST nodes we can YAML represent. 114 | """ 115 | def filter_function(el): 116 | return inspect.isclass(el) and issubclass(el, SchematronASTNode) and el is not SchematronASTNode 117 | return [el[1] for el in inspect.getmembers(pyschematron.direct_mode.schematron.ast, filter_function)] 118 | 119 | def _get_representer(self, ast_node: type[SchematronASTNode]) -> "YamlRepresenter": 120 | """Get a representor for the indicated node type. 121 | 122 | Args: 123 | ast_node: the type of AST node for which we want a representer. 124 | 125 | Returns: 126 | A representer for this specific node type. 127 | """ 128 | return GenericASTNodeYamlRepresenter(ast_node) 129 | 130 | 131 | class YamlRepresenter(metaclass=ABCMeta): 132 | """Specialized class for YAML representing elements and contents of :class:`SchematronASTNode`. 133 | 134 | This works in conjunction with the Ruyaml library. 135 | """ 136 | 137 | @property 138 | @abstractmethod 139 | def element_class(self) -> Any: 140 | """Get the type of element this representer represents.""" 141 | 142 | @property 143 | @abstractmethod 144 | def yaml_tag(self) -> str: 145 | """Get the YAML tag for the class representation.""" 146 | 147 | @abstractmethod 148 | def get_dumping_function(self) -> Callable[[BaseRepresenter, Any], Callable]: 149 | """Get the dumping function `to_yaml` we can use to dump the item to YAML. 150 | 151 | Returns: 152 | A function to create the yaml representation. 153 | """ 154 | 155 | @abstractmethod 156 | def get_loading_function(self) -> Callable[[BaseConstructor, Node], Any]: 157 | """Get the loading function we can use to load the YAML node into a `SchematronASTNode`. 158 | 159 | Returns: 160 | A function to load the yaml representation. 161 | """ 162 | 163 | 164 | class PathRepresenter(YamlRepresenter): 165 | 166 | @property 167 | def element_class(self) -> Any: 168 | return PosixPath 169 | 170 | @property 171 | def yaml_tag(self) -> str: 172 | return f'!Path' 173 | 174 | def get_dumping_function(self) -> Callable[[BaseRepresenter, Any], Node]: 175 | def to_yaml(representer: BaseRepresenter, node: Any) -> Node: 176 | return representer.represent_scalar(self.yaml_tag, str(node)) 177 | return to_yaml 178 | 179 | def get_loading_function(self) -> Callable[[BaseConstructor, Node], Any]: 180 | def from_yaml(constructor: BaseConstructor, node: Node) -> Any: 181 | return self.element_class(node.value) 182 | return from_yaml 183 | 184 | 185 | class ASTNodeYamlRepresenter(YamlRepresenter): 186 | 187 | def get_dumping_function(self) -> Callable[[BaseRepresenter, Any], Node]: 188 | def to_yaml(representer: BaseRepresenter, node: SchematronASTNode) -> Node: 189 | return self._to_yaml(representer, node) 190 | return to_yaml 191 | 192 | def get_loading_function(self) -> Callable[[BaseConstructor, Node], Any]: 193 | def from_yaml(constructor: BaseConstructor, node: Node) -> SchematronASTNodeBuilder: 194 | return self._from_yaml(constructor, node) 195 | return from_yaml 196 | 197 | @abstractmethod 198 | def _to_yaml(self, representer: BaseRepresenter, node: SchematronASTNode) -> Node: 199 | """The YAML dumping function. 200 | 201 | This is the actual function used to dump a specific Schematron node to YAML. 202 | 203 | Returns: 204 | The ruyaml Node used to dump to YAML. 205 | """ 206 | 207 | @abstractmethod 208 | def _from_yaml(self, constructor: BaseConstructor, node: Node) -> SchematronASTNodeBuilder: 209 | """The function to load a ruyaml YAML Node into a Schematron AST Node. 210 | 211 | Returns: 212 | A builder for the Schematron node. 213 | """ 214 | 215 | 216 | class GenericASTNodeYamlRepresenter(ASTNodeYamlRepresenter): 217 | 218 | def __init__(self, node_type: type[SchematronASTNode]): 219 | """Create a basic representer for :class:`SchematronASTNode`. 220 | 221 | When loading a YAML, this class returns :class:`SchematronASTNodeBuilder` nodes. 222 | 223 | Args: 224 | node_type: the type of node we are representing 225 | """ 226 | self._node_type = node_type 227 | 228 | @property 229 | def element_class(self) -> type[SchematronASTNode]: 230 | return self._node_type 231 | 232 | @property 233 | def yaml_tag(self) -> str: 234 | return f'!{self._node_type.__name__}' 235 | 236 | def _to_yaml(self, representer: BaseRepresenter, node: SchematronASTNode) -> Node: 237 | """Internal function called by the forwarding function in `get_dumping_function`""" 238 | init_names = [f.name for f in dataclasses.fields(self.element_class) if f.init] 239 | 240 | node_data = {} 241 | for init_name in init_names: 242 | if value := getattr(node, init_name): 243 | node_data[init_name] = value 244 | 245 | return representer.represent_mapping(self.yaml_tag, node_data) 246 | 247 | def _from_yaml(self, constructor: BaseConstructor, node: Node) -> SchematronASTNodeBuilder: 248 | """Internal function called by the forwarding function in `get_loading_function`""" 249 | values = constructor.construct_mapping(node) 250 | return DictionaryNodeBuilder(self._node_type, values) 251 | 252 | 253 | class SchematronASTNodeBuilder(metaclass=ABCMeta): 254 | """Builder pattern for delayed construction of Schematron nodes.""" 255 | 256 | @abstractmethod 257 | def build(self) -> SchematronASTNode: 258 | """Build a Schematron node based on the information in this builder. 259 | 260 | Returns: 261 | The constructed schematron element. 262 | """ 263 | 264 | 265 | class DictionaryNodeBuilder(SchematronASTNodeBuilder): 266 | 267 | def __init__(self, node_type: type[SchematronASTNode], init_values: dict): 268 | """Construct a Schematron AST node using a dictionary containing the init values. 269 | 270 | During the build phase, this builder will transform the provided init values according to these rules: 271 | 1. if a node contains a builder, build it 272 | 2. if a node contains a list, modify it to become a tuple 273 | 274 | Args: 275 | node_type: the type of node we will build 276 | init_values: the init values to pass to the constructor. 277 | """ 278 | self._node_type = node_type 279 | self._init_values = init_values 280 | 281 | def build(self) -> SchematronASTNode: 282 | def _expand_value(value): 283 | if isinstance(value, SchematronASTNodeBuilder): 284 | return value.build() 285 | elif isinstance(value, str): 286 | return value 287 | elif isinstance(value, Mapping): 288 | return {k: _expand_value(v) for k, v in value.items()} 289 | elif isinstance(value, Iterable): 290 | return tuple(_expand_value(el) for el in value) 291 | else: 292 | return value 293 | 294 | final_inits = {} 295 | for key, value in self._init_values.items(): 296 | final_inits[key] = _expand_value(value) 297 | 298 | return self._node_type(**final_inits) 299 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/xml_validation/results/validation_results.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | __author__ = 'Robbert Harms' 4 | __date__ = '2023-05-06' 5 | __maintainer__ = 'Robbert Harms' 6 | __email__ = 'robbert@xkls.nl' 7 | __licence__ = 'GPL v3' 8 | 9 | from abc import ABCMeta, abstractmethod 10 | from dataclasses import dataclass 11 | from pathlib import Path 12 | from typing import Literal 13 | 14 | from lxml.etree import _ElementTree 15 | 16 | from pyschematron.direct_mode.schematron.ast import ConcreteRule, Assert, Report, ConcretePattern, Schema 17 | from pyschematron.direct_mode.xml_validation.queries.base import EvaluationContext 18 | from pyschematron.direct_mode.xml_validation.results.xml_nodes import XMLNode 19 | 20 | 21 | @dataclass(slots=True, frozen=True) 22 | class ValidationResult: 23 | """Type class for the validation results. """ 24 | 25 | 26 | @dataclass(slots=True, frozen=True) 27 | class XMLDocumentValidationResult(ValidationResult): 28 | """Result class for the full evaluation of the entire XML document. 29 | 30 | This encapsulates the processing of all patterns over all nodes. 31 | 32 | Args: 33 | xml_information: the knowledge of the XML 34 | schema_information: information about the applied Schema 35 | node_results: the results over all nodes 36 | """ 37 | xml_information: XMLInformation 38 | schema_information: SchemaInformation 39 | node_results: tuple[FullNodeResult, ...] 40 | 41 | def is_valid(self) -> bool: 42 | """Return True if the XML document was considered valid, False otherwise. 43 | 44 | According to the specifications, a successful report is considered a failure. As such, this method considers 45 | an XML document to be valid if none of the assertions and none of the reports were raised. 46 | 47 | Returns: 48 | True if the document passed the Schematron validation, False otherwise. 49 | """ 50 | for node_result in self.node_results: 51 | if not node_result.is_valid(): 52 | return False 53 | return True 54 | 55 | 56 | @dataclass(slots=True, frozen=True) 57 | class XMLInformation(ValidationResult): 58 | """Container for the knowledge of the XML being validated. 59 | 60 | This encapsulates the processing of all patterns over all nodes. 61 | 62 | Args: 63 | xml_document: the XML document provided as input 64 | """ 65 | xml_document: _ElementTree 66 | 67 | 68 | @dataclass(slots=True, frozen=True) 69 | class SchemaInformation(ValidationResult): 70 | """Container for the information of the Schematron used during validation. 71 | 72 | Args: 73 | schema: the Schema AST node used during evaluation 74 | phase: the phase used in evaluation 75 | schematron_base_path: the base path from which we loaded the Schematron file, provided for context. 76 | """ 77 | schema: Schema 78 | phase: str | Literal['#ALL', '#DEFAULT'] | None = None, 79 | schematron_base_path: Path | None = None 80 | 81 | 82 | @dataclass(slots=True, frozen=True) 83 | class BaseXMLNodeResult(ValidationResult): 84 | """Base class for the result of processing a specific XML node. 85 | 86 | Args: 87 | xml_node: the node on which we are reporting the processing result. 88 | evaluation_context: the context in which the node was processed. 89 | This should not be specialized to the context in which the node was processed. For example, for a processed 90 | pattern, this should be the "outside" evaluation context without the parameters inside the pattern. 91 | """ 92 | xml_node: XMLNode 93 | evaluation_context: EvaluationContext 94 | 95 | 96 | @dataclass(slots=True, frozen=True) 97 | class FullNodeResult(BaseXMLNodeResult): 98 | """Result class for the full evaluation of an XML node. 99 | 100 | This encapsulates the processing of all the patterns over the indicated XML node. 101 | 102 | Args: 103 | pattern_results: the results of all the patterns 104 | """ 105 | pattern_results: tuple[PatternResult, ...] 106 | 107 | def is_valid(self) -> bool: 108 | """Return True if all patterns yielded a valid results, False otherwise. 109 | 110 | Returns: 111 | True if the document passed the Schematron validation, False otherwise. 112 | """ 113 | for pattern_result in self.pattern_results: 114 | if not pattern_result.is_valid(): 115 | return False 116 | return True 117 | 118 | 119 | @dataclass(slots=True, frozen=True) 120 | class PatternResult(BaseXMLNodeResult): 121 | """Result class for evaluating a pattern on a node. 122 | 123 | Args: 124 | pattern: a reference to the evaluated pattern 125 | rule_results: a list of the rule results for each rule in the pattern. 126 | """ 127 | pattern: ConcretePattern 128 | rule_results: tuple[RuleResult, ...] 129 | 130 | def has_fired_rule(self) -> bool: 131 | """Check if this pattern result has a fired rule or not. 132 | 133 | Returns: 134 | True if there was an active rule in this pattern for the node, False otherwise 135 | """ 136 | return any(result.is_fired() for result in self.rule_results) 137 | 138 | def is_valid(self) -> bool: 139 | """Return True if all rules yielded a valid results, False otherwise. 140 | 141 | Returns: 142 | True if the document passed the Schematron validation, False otherwise. 143 | """ 144 | for rule_result in self.rule_results: 145 | if isinstance(rule_result, FiredRuleResult): 146 | if not rule_result.is_valid(): 147 | return False 148 | return True 149 | 150 | 151 | @dataclass(slots=True, frozen=True) 152 | class RuleResult(BaseXMLNodeResult, metaclass=ABCMeta): 153 | """Base class for skipped, fired, and suppressed rules. 154 | 155 | Since we process all rules we need a way to indicate if a rule was skipped, fired, or suppressed. 156 | This base class creates a base type for the different rule results. 157 | 158 | Args: 159 | rule: the rule which was processed 160 | """ 161 | rule: ConcreteRule 162 | 163 | @abstractmethod 164 | def is_skipped(self) -> bool: 165 | """Check if this rule was skipped or not. 166 | 167 | Returns: 168 | True if the rule was skipped, False otherwise 169 | """ 170 | 171 | @abstractmethod 172 | def is_fired(self) -> bool: 173 | """Check if this rule was fired or not. 174 | 175 | Returns: 176 | True if the rule was fired, False otherwise 177 | """ 178 | 179 | @abstractmethod 180 | def is_suppressed(self) -> bool: 181 | """Check if this rule was suppressed or not. 182 | 183 | Returns: 184 | True if the rule was suppressed, False otherwise 185 | """ 186 | 187 | 188 | @dataclass(slots=True, frozen=True) 189 | class SkippedRuleResult(RuleResult): 190 | """Indicates the result of a rule which was skipped because the context did not match.""" 191 | 192 | def is_skipped(self) -> bool: 193 | return True 194 | 195 | def is_fired(self) -> bool: 196 | return False 197 | 198 | def is_suppressed(self) -> bool: 199 | return False 200 | 201 | 202 | @dataclass(slots=True, frozen=True) 203 | class SuppressedRuleResult(RuleResult): 204 | """Indicates the result of a rule which was shadowed by a preceding rule.""" 205 | 206 | @classmethod 207 | def from_fired_rule_result(cls, fired_rule_result: FiredRuleResult): 208 | """Generated a suppressed rule result from the result of a fired rule. 209 | 210 | This is a convenience method to turn a fired rule in a suppressed rule. 211 | 212 | Args: 213 | fired_rule_result: the fired result we would like to transform 214 | """ 215 | return cls(fired_rule_result.xml_node, fired_rule_result.evaluation_context, fired_rule_result.rule) 216 | 217 | def is_skipped(self) -> bool: 218 | return False 219 | 220 | def is_fired(self) -> bool: 221 | return False 222 | 223 | def is_suppressed(self) -> bool: 224 | return True 225 | 226 | 227 | @dataclass(slots=True, frozen=True) 228 | class FiredRuleResult(RuleResult): 229 | """The result of checking the asserts and reports of a Rule on an XML node. 230 | 231 | Args: 232 | check_results: the results of the checks 233 | subject_node: the node referenced by the subject attribute of the Schematron rule. 234 | """ 235 | check_results: list[CheckResult] 236 | subject_node: XMLNode | None 237 | 238 | def is_skipped(self) -> bool: 239 | return False 240 | 241 | def is_fired(self) -> bool: 242 | return True 243 | 244 | def is_suppressed(self) -> bool: 245 | return False 246 | 247 | def is_valid(self) -> bool: 248 | """Return True if all checks yielded a valid results, False otherwise. 249 | 250 | Returns: 251 | True if the document passed the Schematron validation, False otherwise. 252 | """ 253 | for check_result in self.check_results: 254 | if check_result.check_result: 255 | return False 256 | return True 257 | 258 | 259 | @dataclass(slots=True, frozen=True) 260 | class CheckResult(BaseXMLNodeResult): 261 | """The result of checking a Schematron assert or report on an XML node. 262 | 263 | The test result stored in this class represents if the test in the check was true or false. As such, 264 | it is independent on the nature of the check. A false test result for an assertion means a failure, which will be 265 | reported, while only a true test result for a report is reported. If you want this derived message, use the 266 | dynamic check result property. 267 | 268 | Args: 269 | check: the check which was run 270 | test_result: the result of the test in the check. 271 | text: the text result from the rich text content. 272 | subject_node: the node referenced by the subject attribute of the Schematron check. 273 | """ 274 | check: Assert | Report 275 | test_result: bool 276 | text: str 277 | subject_node: XMLNode | None 278 | property_results: tuple[PropertyResult, ...] | None = None 279 | diagnostic_results: tuple[DiagnosticResult, ...] | None = None 280 | 281 | @property 282 | def check_result(self) -> bool: 283 | """Get the result of the check. 284 | 285 | In Schematron, tests can be written in one of two ways: 286 | 287 | outputs a message if an XPath test evaluates to false. 288 | outputs a message if an XPath test evaluates to true. 289 | 290 | The test result stored in this class represents the state of the test result, not the final outcome 291 | of the check. For that, there is this method. 292 | 293 | This checks if the result was a pass or not, it returns a value based on the following combinations: 294 | 295 | +--------+-------------+--------------+ 296 | | Check | Test result | Return value | 297 | +========+=============+==============+ 298 | | Assert | true | false | 299 | | Assert | false | true | 300 | | Report | true | true | 301 | | Report | false | false | 302 | +--------+-------------+--------------+ 303 | 304 | Returns: 305 | If the return value is true, we are either dealing with a failed assert, or a successful report. 306 | If the return value is false, we have a successful assert, or a failed report. 307 | """ 308 | if isinstance(self.check, Assert): 309 | return not self.test_result 310 | else: 311 | return self.test_result 312 | 313 | 314 | @dataclass(slots=True, frozen=True) 315 | class PropertyResult(ValidationResult): 316 | """Result of evaluating a property indicated by a check. 317 | 318 | Args: 319 | text: resulting text 320 | property_id: identifier of this property 321 | role: the role attribute for this property, copied from the properties' role attribute 322 | scheme: the scheme attribute for this property, copied from the properties' scheme attribute 323 | """ 324 | text: str 325 | property_id: str 326 | role: str | None = None 327 | scheme: str | None = None 328 | 329 | 330 | @dataclass(slots=True, frozen=True) 331 | class DiagnosticResult(ValidationResult): 332 | """Result of evaluating a diagnostic indicated by a check. 333 | 334 | Args: 335 | text: resulting text 336 | diagnostic_id: identifier of this diagnostic 337 | xml_lang: the xml language attribute for this diagnostic 338 | xml_space: the xml_space attribute from the diagnostic 339 | """ 340 | text: str 341 | diagnostic_id: str 342 | xml_lang: str | None = None 343 | xml_space: Literal['default', 'preserve'] | None = None 344 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/svrl/xml_writer.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2024-03-17' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | __licence__ = 'LGPL v3' 6 | 7 | from abc import ABCMeta, abstractmethod 8 | from typing import override 9 | 10 | from lxml.etree import Element, _Element 11 | from pyschematron.direct_mode.svrl.ast import SchematronOutput, SVRLNode, NSPrefixInAttributeValues, Text, \ 12 | ActivePattern, MetaData, FiredRule, FailedAssert, SuppressedRule, SuccessfulReport, PropertyReference, \ 13 | DiagnosticReference 14 | from pyschematron.direct_mode.svrl.svrl_visitors import SVRLASTVisitor 15 | 16 | 17 | class SVRLWriter(metaclass=ABCMeta): 18 | """Type class for SVRL XML writers.""" 19 | 20 | @abstractmethod 21 | def create_xml(self, schematron_output: SchematronOutput) -> _Element: 22 | """Transform a Schematron output SVRL root node, to an XML element. 23 | 24 | Args: 25 | schematron_output: the root node of an SVRL AST. 26 | 27 | Returns: 28 | An XML representation of the provided SVRL output node. 29 | """ 30 | 31 | 32 | class LxmlSVRLWriter(SVRLWriter): 33 | """SVRL writer using the Lxml library.""" 34 | 35 | @override 36 | def create_xml(self, schematron_output: SchematronOutput) -> _Element: 37 | nsmap = { 38 | 'svrl': 'http://purl.oclc.org/dsdl/svrl', 39 | 'sch': 'http://purl.oclc.org/dsdl/schematron', 40 | 'xs': 'http://www.w3.org/2001/XMLSchema' 41 | } 42 | writer_visitor = _SVRLWriterVisitor(nsmap) 43 | return writer_visitor.visit(schematron_output) 44 | 45 | 46 | class _SVRLWriterVisitor(SVRLASTVisitor): 47 | 48 | def __init__(self, nsmap: dict[str, str]): 49 | """SVRL XML writer using the visitor pattern. 50 | 51 | Args: 52 | nsmap: the default namespaces to apply in the SVRL, may be overwritten by the Schematron. 53 | """ 54 | self._nsmap = nsmap 55 | 56 | @override 57 | def visit(self, svrl_node: SVRLNode) -> _Element | None: 58 | match svrl_node: 59 | case SchematronOutput(): 60 | return self._process_schematron_output(svrl_node) 61 | case NSPrefixInAttributeValues(): 62 | return self._process_ns_prefix_node(svrl_node) 63 | case MetaData(): 64 | return self._process_metadata(svrl_node) 65 | case ActivePattern(): 66 | return self._process_active_pattern_node(svrl_node) 67 | case FiredRule(): 68 | return self._process_fired_rule_node(svrl_node) 69 | case SuppressedRule(): 70 | return self._process_suppressed_rule_node(svrl_node) 71 | case FailedAssert(): 72 | return self._process_failed_assert(svrl_node) 73 | case SuccessfulReport(): 74 | return self._process_successful_report(svrl_node) 75 | case Text(): 76 | return self._process_text_node(svrl_node) 77 | case PropertyReference(): 78 | return self._process_property_reference(svrl_node) 79 | case DiagnosticReference(): 80 | return self._process_diagnostic_reference(svrl_node) 81 | return None 82 | 83 | def _process_schematron_output(self, schematron_output: SchematronOutput) -> _Element: 84 | """Process the SchematronOutput node, the root of the SVRL. 85 | 86 | Args: 87 | schematron_output: the SVRL root node 88 | 89 | Returns: 90 | An element representing the SVRL report. 91 | """ 92 | for ns_prefix in schematron_output.ns_prefix_in_attribute_values: 93 | self._nsmap[ns_prefix.prefix] = ns_prefix.uri 94 | 95 | node_attributes = { 96 | 'phase': schematron_output.phase, 97 | 'schemaVersion': schematron_output.schema_version, 98 | 'title': schematron_output.title 99 | } 100 | final_attributes = {k: v for k, v in node_attributes.items() if v is not None} 101 | 102 | root = Element(f'{{{self._nsmap["svrl"]}}}schematron-output', attrib=final_attributes, nsmap=self._nsmap) 103 | 104 | for text_node in schematron_output.texts: 105 | root.append(self.visit(text_node)) 106 | 107 | for ns_prefix in schematron_output.ns_prefix_in_attribute_values: 108 | root.append(self.visit(ns_prefix)) 109 | 110 | if schematron_output.metadata: 111 | root.append(self.visit(schematron_output.metadata)) 112 | 113 | for validation_event in schematron_output.validation_events: 114 | el = self.visit(validation_event) 115 | if el is not None: 116 | root.append(el) 117 | 118 | return root 119 | 120 | def _process_ns_prefix_node(self, ns_prefix: NSPrefixInAttributeValues) -> _Element: 121 | """Process the namespace prefix node. 122 | 123 | Args: 124 | ns_prefix: the prefix node to render into an XML element 125 | 126 | Returns: 127 | The created XML element 128 | """ 129 | return Element(f'{{{self._nsmap["svrl"]}}}ns-prefix-in-attribute-values', 130 | attrib={'prefix': ns_prefix.prefix, 'uri': ns_prefix.uri}, 131 | nsmap=self._nsmap) 132 | 133 | def _process_metadata(self, metadata: MetaData) -> _Element: 134 | """Process the metadata node. 135 | 136 | Args: 137 | metadata: the metadata node 138 | 139 | Returns: 140 | The created XML node 141 | """ 142 | additional_namespaces = {} 143 | for namespace in metadata.namespaces: 144 | additional_namespaces[namespace.prefix] = namespace.uri 145 | 146 | metadata_root = Element(f'{{{self._nsmap["svrl"]}}}metadata', nsmap=self._nsmap | additional_namespaces) 147 | for element in metadata.xml_elements: 148 | metadata_root.append(element) 149 | 150 | return metadata_root 151 | 152 | def _process_active_pattern_node(self, active_pattern: ActivePattern) -> _Element: 153 | """Process an active pattern node. 154 | 155 | Args: 156 | active_pattern: The active pattern node 157 | 158 | Returns: 159 | The created XML element 160 | """ 161 | documents = None 162 | if active_pattern.documents: 163 | documents = ' '.join([f'file:{doc}' for doc in active_pattern.documents]) 164 | 165 | node_attributes = { 166 | 'documents': documents, 167 | 'id': active_pattern.id, 168 | 'name': active_pattern.name, 169 | } 170 | final_attributes = {k: v for k, v in node_attributes.items() if v is not None} 171 | 172 | return Element(f'{{{self._nsmap["svrl"]}}}active-pattern', attrib=final_attributes, nsmap=self._nsmap) 173 | 174 | def _process_fired_rule_node(self, fired_rule: FiredRule) -> _Element: 175 | """Process a fired rule node. 176 | 177 | Args: 178 | fired_rule: information on the fired rule 179 | 180 | Returns: 181 | The created XML element 182 | """ 183 | document = None 184 | if fired_rule.document: 185 | document = f'file:{fired_rule.document}' 186 | 187 | node_attributes = { 188 | 'context': fired_rule.context.query, 189 | 'document': document, 190 | 'flag': fired_rule.flag, 191 | 'id': fired_rule.id, 192 | 'name': fired_rule.name, 193 | 'role': fired_rule.role 194 | } 195 | final_attributes = {k: v for k, v in node_attributes.items() if v is not None} 196 | 197 | return Element(f'{{{self._nsmap["svrl"]}}}fired-rule', attrib=final_attributes, nsmap=self._nsmap) 198 | 199 | def _process_suppressed_rule_node(self, suppressed_rule: SuppressedRule) -> _Element: 200 | """Process a suppressed rule node 201 | 202 | Args: 203 | suppressed_rule: information on the suppressed rule 204 | 205 | Returns: 206 | The created XML element 207 | """ 208 | node_attributes = { 209 | 'context': suppressed_rule.context.query, 210 | 'id': suppressed_rule.id, 211 | } 212 | final_attributes = {k: v for k, v in node_attributes.items() if v is not None} 213 | 214 | return Element(f'{{{self._nsmap["svrl"]}}}suppressed-rule', attrib=final_attributes, nsmap=self._nsmap) 215 | 216 | def _process_failed_assert(self, failed_assert: FailedAssert) -> _Element: 217 | """Process a failed assert node. 218 | 219 | Args: 220 | failed_assert: the failed assert information 221 | 222 | Returns: 223 | The created XML element 224 | """ 225 | return self._process_check_result(failed_assert) 226 | 227 | def _process_successful_report(self, successful_report: SuccessfulReport) -> _Element: 228 | """Process a successful report node. 229 | 230 | Args: 231 | successful_report: the successful report information 232 | 233 | Returns: 234 | The created XML element 235 | """ 236 | return self._process_check_result(successful_report) 237 | 238 | def _process_check_result(self, check_result: FailedAssert | SuccessfulReport): 239 | """Process a check result. 240 | 241 | Depending on the type of input we return either a `successful-report` or a `failed-assert` element. 242 | 243 | Args: 244 | check_result: the check result. 245 | 246 | Returns: 247 | The created XML element 248 | """ 249 | node_attributes = { 250 | 'flag': check_result.flag, 251 | 'id': check_result.id, 252 | 'location': check_result.location.expression, 253 | 'role': check_result.role, 254 | 'test': check_result.test.query 255 | } 256 | final_attributes = {k: v for k, v in node_attributes.items() if v is not None} 257 | 258 | if subject_location := check_result.subject_location: 259 | final_attributes['location'] = subject_location.expression 260 | 261 | node_name = 'successful-report' 262 | if isinstance(check_result, FailedAssert): 263 | node_name = 'failed-assert' 264 | 265 | report_element = Element(f'{{{self._nsmap["svrl"]}}}{node_name}', 266 | attrib=final_attributes, nsmap=self._nsmap) 267 | 268 | if check_result.diagnostic_references: 269 | for diagnostic_reference in check_result.diagnostic_references: 270 | report_element.append(self.visit(diagnostic_reference)) 271 | 272 | if check_result.property_references: 273 | for property_reference in check_result.property_references: 274 | report_element.append(self.visit(property_reference)) 275 | 276 | if check_result.text.content: 277 | report_element.append(self.visit(check_result.text)) 278 | 279 | return report_element 280 | 281 | def _process_text_node(self, text: Text) -> _Element: 282 | """Process a text node. 283 | 284 | Args: 285 | text: the text node to convert into an element. 286 | 287 | Returns: 288 | The created XML element 289 | """ 290 | node_attributes = { 291 | 'fpi': text.fpi, 292 | 'icon': text.icon, 293 | 'see': text.see, 294 | 'class': text.class_, 295 | 'id': text.id, 296 | '{http://www.w3.org/XML/1998/namespace}lang': text.xml_lang, 297 | '{http://www.w3.org/XML/1998/namespace}space': text.xml_space, 298 | } 299 | final_attributes = {k: v for k, v in node_attributes.items() if v is not None} 300 | 301 | text_element = Element(f'{{{self._nsmap["svrl"]}}}text', attrib=final_attributes, nsmap=self._nsmap) 302 | text_element.text = text.content 303 | return text_element 304 | 305 | def _process_property_reference(self, property_reference: PropertyReference) -> _Element: 306 | """Process a property reference node. 307 | 308 | Args: 309 | property_reference: the property reference to convert into a node. 310 | 311 | Returns: 312 | The created XML element 313 | """ 314 | node_attributes = { 315 | 'property': property_reference.property, 316 | 'role': property_reference.role, 317 | 'scheme': property_reference.scheme 318 | } 319 | final_attributes = {k: v for k, v in node_attributes.items() if v is not None} 320 | 321 | property_reference_element = Element(f'{{{self._nsmap["svrl"]}}}property-reference', 322 | attrib=final_attributes, nsmap=self._nsmap) 323 | 324 | if property_reference.text: 325 | property_reference_element.append(self.visit(property_reference.text)) 326 | 327 | return property_reference_element 328 | 329 | def _process_diagnostic_reference(self, diagnostic_reference: DiagnosticReference) -> _Element: 330 | """Process a diagnostic reference node. 331 | 332 | Args: 333 | diagnostic_reference: the diagnostic reference to convert into a node. 334 | 335 | Returns: 336 | The created XML element 337 | """ 338 | node_attributes = { 339 | 'diagnostic': diagnostic_reference.diagnostic, 340 | } 341 | final_attributes = {k: v for k, v in node_attributes.items() if v is not None} 342 | 343 | diagnostic_reference_element = Element(f'{{{self._nsmap["svrl"]}}}diagnostic-reference', 344 | attrib=final_attributes, nsmap=self._nsmap) 345 | 346 | if diagnostic_reference.text: 347 | diagnostic_reference_element.append(self.visit(diagnostic_reference.text)) 348 | 349 | return diagnostic_reference_element 350 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/xml_validation/results/svrl_builder.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2023-04-24' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | __licence__ = 'GPL v3' 6 | 7 | from abc import ABCMeta, abstractmethod 8 | 9 | from lxml.etree import Element, SubElement, ElementTree, _ElementTree 10 | 11 | from datetime import datetime 12 | 13 | from pyschematron import __version__ 14 | from pyschematron.direct_mode.schematron.ast import Assert, Namespace, ConcretePattern 15 | from pyschematron.direct_mode.svrl.ast import ActivePattern, FiredRule, SchematronQuery, FailedAssert, \ 16 | XPathExpression, Text, SchematronOutput, SuccessfulReport, SuppressedRule, NSPrefixInAttributeValues, MetaData, \ 17 | CheckResult, ValidationEvent, PropertyReference, DiagnosticReference 18 | 19 | from pyschematron.direct_mode.svrl.xml_writer import LxmlSVRLWriter 20 | from pyschematron.direct_mode.xml_validation.results.validation_results import (XMLDocumentValidationResult, RuleResult, 21 | FiredRuleResult, SuppressedRuleResult, 22 | PropertyResult, DiagnosticResult) 23 | 24 | 25 | class SVRLReportBuilder(metaclass=ABCMeta): 26 | 27 | @abstractmethod 28 | def create_svrl_xml(self, validation_result: XMLDocumentValidationResult) -> _ElementTree: 29 | """Create a Schematron Validation Reporting Language (SVRL) document of the validation results. 30 | 31 | This transforms the validation results into an XML document in the SVRL namespace. 32 | 33 | Args: 34 | validation_result: the result of validating the document using the validator of this package. 35 | 36 | Returns: 37 | An XML document in the SVRL namespace. 38 | """ 39 | 40 | 41 | class DefaultSVRLReportBuilder(SVRLReportBuilder): 42 | """Converts the validation results into an SVRL AST. 43 | 44 | In the SVRL report we want to enumerate the processing results in the format: 45 | 46 | ... 47 | 48 | ... 49 | 50 | ... 51 | 52 | The results of using the validator provides results for every XML node, Schematron pattern and Schematron rule 53 | checked. This is too comprehensive for the SVRL and as such this class simplifies the results. 54 | """ 55 | 56 | def create_svrl_xml(self, validation_result: XMLDocumentValidationResult) -> _ElementTree: 57 | title = None 58 | if title_node := validation_result.schema_information.schema.title: 59 | title = title_node.content 60 | 61 | svrl = SchematronOutput( 62 | self._get_text_nodes(validation_result), 63 | self._get_ns_prefix_nodes(validation_result), 64 | tuple(self.get_validation_events(validation_result)), 65 | metadata=self._get_metadata(validation_result), 66 | phase=validation_result.schema_information.phase, 67 | schema_version=validation_result.schema_information.schema.schema_version, 68 | title=title) 69 | 70 | writer = LxmlSVRLWriter() 71 | return ElementTree(writer.create_xml(svrl)) 72 | 73 | def get_validation_events(self, validation_result: XMLDocumentValidationResult) -> list[ValidationEvent]: 74 | """Extract a list of SVRL validation events from the validation results. 75 | 76 | Args: 77 | validation_result: The validation results from the validator 78 | 79 | Returns: 80 | A list of SVRL validation events (active pattern, fired rule, failed assert, 81 | successful report, suppressed rule). 82 | """ 83 | root_document = None 84 | if xml_document := validation_result.xml_information.xml_document: 85 | if docinfo := xml_document.docinfo: 86 | root_document = docinfo.URL 87 | 88 | processed_patterns = self._get_rule_results_by_pattern(validation_result) 89 | validation_events = [] 90 | for pattern, rules_processed in processed_patterns.items(): 91 | name = pattern.title.content if pattern.title else None 92 | validation_events.append(ActivePattern(id=pattern.id, name=name, documents=(root_document,))) 93 | 94 | for rule_processed in rules_processed: 95 | if isinstance(rule_processed, FiredRuleResult): 96 | validation_events.append(FiredRule(SchematronQuery(rule_processed.rule.context.query), 97 | id=rule_processed.rule.id)) 98 | validation_events += self._get_svrl_check_results(rule_processed) 99 | elif isinstance(rule_processed, SuppressedRuleResult): 100 | validation_events.append(SuppressedRule(SchematronQuery(rule_processed.rule.context.query))) 101 | return validation_events 102 | 103 | def _get_svrl_check_results(self, fired_rule: FiredRuleResult) -> list[CheckResult]: 104 | """Transform the validation checks in the fired rule to a list of SVRL AST check results. 105 | 106 | Args: 107 | fired_rule: the rule which was fired, from this we will return the checks which did not succeed. 108 | 109 | Returns: 110 | A list of failed asserts or successful reports. 111 | """ 112 | subject_location = None 113 | if fired_rule.subject_node: 114 | subject_location = XPathExpression(fired_rule.subject_node.xpath_location) 115 | 116 | events = [] 117 | for check_result in fired_rule.check_results: 118 | if check_result.check_result: 119 | if check_result.subject_node: 120 | subject_location = XPathExpression(check_result.subject_node.xpath_location) 121 | 122 | property_references = self._get_property_references(check_result.property_results) 123 | diagnostic_references = self._get_diagnostic_reference(check_result.diagnostic_results) 124 | 125 | if isinstance(check_result.check, Assert): 126 | event = FailedAssert(Text(check_result.text), 127 | XPathExpression(check_result.xml_node.xpath_location), 128 | SchematronQuery(check_result.check.test.query), 129 | diagnostic_references=tuple(diagnostic_references), 130 | property_references=tuple(property_references), 131 | subject_location=subject_location) 132 | else: 133 | event = SuccessfulReport(Text(check_result.text), 134 | XPathExpression(check_result.xml_node.xpath_location), 135 | SchematronQuery(check_result.check.test.query), 136 | diagnostic_references=tuple(diagnostic_references), 137 | property_references=tuple(property_references), 138 | subject_location=subject_location) 139 | events.append(event) 140 | return events 141 | 142 | @staticmethod 143 | def _get_property_references(property_results: list[PropertyResult] | None) -> list[PropertyReference]: 144 | """Convert the property results to SVRL property reference nodes. 145 | 146 | Args: 147 | property_results: the list of property results, may be None, in which case we return an empty list. 148 | 149 | Returns: 150 | The list of property references. 151 | """ 152 | property_references = [] 153 | if property_results: 154 | for property_result in property_results: 155 | property_references.append(PropertyReference( 156 | Text(property_result.text), property_result.property_id, 157 | property_result.role, property_result.scheme)) 158 | return property_references 159 | 160 | @staticmethod 161 | def _get_diagnostic_reference(diagnostic_results: list[DiagnosticResult] | None) -> list[DiagnosticReference]: 162 | """Convert the diagnostic results to SVRL diagnostic reference nodes. 163 | 164 | Args: 165 | diagnostic_results: the list of diagnostic results, may be None, in which case we return an empty list. 166 | 167 | Returns: 168 | The list of diagnostic references. 169 | """ 170 | diagnostic_references = [] 171 | if diagnostic_results: 172 | for diagnostic_result in diagnostic_results: 173 | text = Text(diagnostic_result.text, 174 | xml_lang=diagnostic_result.xml_lang, 175 | xml_space=diagnostic_result.xml_space) 176 | diagnostic_references.append(DiagnosticReference(text, diagnostic=diagnostic_result.diagnostic_id)) 177 | return diagnostic_references 178 | 179 | @staticmethod 180 | def _get_rule_results_by_pattern( 181 | validation_result: XMLDocumentValidationResult) -> dict[ConcretePattern, list[RuleResult]]: 182 | """Reduce the validation results into a dictionary indexed by pattern. 183 | 184 | The validation results consist of data for each XML node, Schematron pattern and Schematron rule. This function 185 | iterates over the validation results and groups them by Schematron pattern. 186 | 187 | Args: 188 | validation_result: the validation results 189 | 190 | Returns: 191 | A dictionary which as indices the patterns which had at least one fired rule. As values are the 192 | rule results taken from the validation results. 193 | """ 194 | processed_patterns = {} 195 | for node_result in validation_result.node_results: 196 | for pattern_result in node_result.pattern_results: 197 | rules_processed = processed_patterns.setdefault(pattern_result.pattern, []) 198 | 199 | if pattern_result.has_fired_rule(): 200 | for rule_result in pattern_result.rule_results: 201 | rules_processed.append(rule_result) 202 | return processed_patterns 203 | 204 | @staticmethod 205 | def _get_text_nodes(validation_result: XMLDocumentValidationResult) -> tuple[Text, ...]: 206 | """Get the listing of text nodes we will add to the SVRL output. 207 | 208 | For the text nodes we will use all the paragraph nodes present in the Schematron document. 209 | 210 | Args: 211 | validation_result: all the validation information 212 | 213 | Returns: 214 | The text nodes, created from the paragraphs of the Schematron document. 215 | """ 216 | texts = [] 217 | for paragraph in validation_result.schema_information.schema.paragraphs: 218 | texts.append(Text(paragraph.content, icon=paragraph.icon, 219 | xml_lang=paragraph.xml_lang, id=paragraph.id, 220 | class_=paragraph.class_)) 221 | return tuple(texts) 222 | 223 | @staticmethod 224 | def _get_ns_prefix_nodes(validation_result: XMLDocumentValidationResult) -> tuple[NSPrefixInAttributeValues, ...]: 225 | """Get the listing of the ns prefix nodes. 226 | 227 | Args: 228 | validation_result: all the validation information 229 | 230 | Returns: 231 | The NS prefix nodes. 232 | """ 233 | ns_prefix_in_attribute_values = [] 234 | for schematron_namespace in validation_result.schema_information.schema.namespaces: 235 | ns_prefix_in_attribute_values.append(NSPrefixInAttributeValues(schematron_namespace.prefix, 236 | schematron_namespace.uri)) 237 | return tuple(ns_prefix_in_attribute_values) 238 | 239 | @staticmethod 240 | def _get_metadata(validation_result: XMLDocumentValidationResult) -> MetaData: 241 | """Get the metadata node to add to the SVRL. 242 | 243 | Args: 244 | validation_result: all the validation information 245 | 246 | Returns: 247 | The metadata node. 248 | """ 249 | namespaces = (Namespace('dct', 'http://purl.org/dc/terms/'), 250 | Namespace('skos', 'http://www.w3.org/2004/02/skos/core#'), 251 | Namespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'), 252 | Namespace('pysch', 'https://github.com/robbert-harms/pyschematron')) 253 | 254 | nsmap = {ns.prefix: ns.uri for ns in namespaces} 255 | create_time = datetime.now().astimezone() 256 | 257 | def creator_element(): 258 | creator = Element(f'{{{nsmap["dct"]}}}creator', nsmap=nsmap) 259 | agent = SubElement(creator, f'{{{nsmap["dct"]}}}agent') 260 | pref_label = SubElement(agent, f'{{{nsmap["skos"]}}}prefLabel') 261 | pref_label.text = f'PySchematron {__version__}' 262 | return creator 263 | 264 | def created_element(): 265 | created = Element(f'{{{nsmap["dct"]}}}created', nsmap=nsmap) 266 | created.text = create_time.isoformat() 267 | return created 268 | 269 | def source_element(): 270 | source = Element(f'{{{nsmap["dct"]}}}source', nsmap=nsmap) 271 | description = SubElement(source, f'{{{nsmap["rdf"]}}}Description') 272 | creator = SubElement(description, f'{{{nsmap["dct"]}}}creator') 273 | agent = SubElement(creator, f'{{{nsmap["dct"]}}}Agent') 274 | pref_label = SubElement(agent, f'{{{nsmap["skos"]}}}prefLabel') 275 | pref_label.text = f'PySchematron {__version__}' 276 | description.append(created_element()) 277 | return source 278 | 279 | return MetaData((creator_element(), created_element(), source_element()), namespaces) 280 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/schematron/parsers/xml/builders.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2023-02-18' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | 6 | from abc import ABCMeta, abstractmethod 7 | from typing import override 8 | 9 | from pyschematron.direct_mode.schematron.ast import (SchematronASTNode, Check, Variable, Paragraph, Extends, 10 | ConcreteRule, ExternalRule, AbstractRule, Query, Rule, 11 | ConcretePattern, Pattern, Namespace, Schema, Title, 12 | AbstractPattern, InstancePattern, PatternParameter, Phase, 13 | ActivePhase, Diagnostics, Properties, XPathExpression) 14 | from pyschematron.direct_mode.schematron.parsers.xml.utils import parse_attributes 15 | 16 | 17 | class SchematronASTNodeBuilder(metaclass=ABCMeta): 18 | """Builder pattern for delayed construction of Schematron nodes.""" 19 | 20 | @abstractmethod 21 | def build(self) -> SchematronASTNode: 22 | """Build a Schematron node based on the information in this builder. 23 | 24 | Returns: 25 | The constructed schematron element. 26 | """ 27 | 28 | 29 | class RuleBuilder(SchematronASTNodeBuilder, metaclass=ABCMeta): 30 | 31 | def __init__(self): 32 | """Construct a Rule node out of the parts provided. 33 | 34 | Can not be used directly, one needs to use one of the specialized subclasses. 35 | """ 36 | self.checks: list[Check] = [] 37 | self.variables: list[Variable] = [] 38 | self.paragraphs: list[Paragraph] = [] 39 | self.extends: list[Extends] = [] 40 | self.attributes = {} 41 | 42 | def add_checks(self, nodes: list[Check]): 43 | """Add a list of Check nodes (Report or Assert). 44 | 45 | Args: 46 | nodes: the nodes to add to the list of checks. 47 | """ 48 | self.checks.extend(nodes) 49 | 50 | def add_variables(self, nodes: list[Variable]): 51 | """Add a list of Variable nodes. 52 | 53 | Args: 54 | nodes: the nodes to add to the list of variables. 55 | """ 56 | self.variables.extend(nodes) 57 | 58 | def add_paragraphs(self, nodes: list[Paragraph]): 59 | """Add a list of Paragraph nodes. 60 | 61 | Args: 62 | nodes: the nodes to add to the list of paragraphs. 63 | """ 64 | self.paragraphs.extend(nodes) 65 | 66 | def add_extends(self, nodes: list[Extends]): 67 | """Add a list of extends nodes. 68 | 69 | Args: 70 | nodes: the nodes to add to the list of extends 71 | """ 72 | self.extends.extend(nodes) 73 | 74 | def add_attributes(self, element_attributes: dict[str, str]): 75 | """Add all the attributes of the XML Rule element in one go. 76 | 77 | Args: 78 | element_attributes: dictionary of attributes taken from the XML node 79 | """ 80 | allowed_attributes = ['context', 'subject', 'flag', 'fpi', 'icon', 'id', 'role', 'see', 81 | '{http://www.w3.org/XML/1998/namespace}lang', 82 | '{http://www.w3.org/XML/1998/namespace}space'] 83 | 84 | attribute_handlers = { 85 | 'context': lambda k, v: {k: Query(v)}, 86 | 'subject': lambda k, v: {k: XPathExpression(v)}, 87 | '{http://www.w3.org/XML/1998/namespace}lang': lambda k, v: {'xml_lang': v}, 88 | '{http://www.w3.org/XML/1998/namespace}space': lambda k, v: {'xml_space': v} 89 | } 90 | 91 | attributes = parse_attributes(element_attributes, allowed_attributes, attribute_handlers) 92 | self.attributes.update(attributes) 93 | 94 | 95 | class ConcreteRuleBuilder(RuleBuilder): 96 | 97 | @override 98 | def build(self) -> ConcreteRule: 99 | if 'context' not in self.attributes: 100 | raise ValueError('A concrete rule must have a context.') 101 | 102 | return ConcreteRule(checks=tuple(self.checks), variables=tuple(self.variables), 103 | paragraphs=tuple(self.paragraphs), extends=tuple(self.extends), **self.attributes) 104 | 105 | 106 | class AbstractRuleBuilder(RuleBuilder): 107 | 108 | @override 109 | def build(self) -> AbstractRule: 110 | if 'context' in self.attributes: 111 | raise ValueError('An abstract rule can not have a context.') 112 | 113 | if 'id' not in self.attributes: 114 | raise ValueError('An abstract rule must have an id.') 115 | 116 | return AbstractRule(checks=tuple(self.checks), variables=tuple(self.variables), 117 | paragraphs=tuple(self.paragraphs), extends=tuple(self.extends), **self.attributes) 118 | 119 | 120 | class ExternalRuleBuilder(RuleBuilder): 121 | 122 | @override 123 | def build(self) -> ExternalRule: 124 | if 'context' in self.attributes: 125 | raise ValueError('An external rule can not have a context.') 126 | 127 | return ExternalRule(checks=tuple(self.checks), variables=tuple(self.variables), 128 | paragraphs=tuple(self.paragraphs), extends=tuple(self.extends), **self.attributes) 129 | 130 | 131 | class PatternBuilder(SchematronASTNodeBuilder, metaclass=ABCMeta): 132 | 133 | def __init__(self): 134 | """Construct a Pattern node out of the parts provided. 135 | 136 | Can not be used directly, one needs to use one of the specialized subclasses. 137 | """ 138 | self.rules: list[Rule] = [] 139 | self.variables: list[Variable] = [] 140 | self.title: Title | None = None 141 | self.paragraphs: list[Paragraph] = [] 142 | self.pattern_parameters: list[PatternParameter] = [] 143 | self.attributes = {} 144 | 145 | def add_rules(self, nodes: list[Rule]): 146 | """Add a list of Rule nodes 147 | 148 | Args: 149 | nodes: the nodes to add to the list of checks. 150 | """ 151 | self.rules.extend(nodes) 152 | 153 | def add_variables(self, nodes: list[Variable]): 154 | """Add a list of Variable nodes. 155 | 156 | Args: 157 | nodes: the nodes to add to the list of variables. 158 | """ 159 | self.variables.extend(nodes) 160 | 161 | def set_title(self, node: Title | None): 162 | """Set the title node. 163 | 164 | Args: 165 | node: the title node 166 | """ 167 | self.title = node 168 | 169 | def add_paragraphs(self, nodes: list[Paragraph]): 170 | """Add a list of Paragraph nodes. 171 | 172 | Args: 173 | nodes: the nodes to add to the list of paragraphs. 174 | """ 175 | self.paragraphs.extend(nodes) 176 | 177 | def add_parameters(self, nodes: list[PatternParameter]): 178 | """Add a list of PatternParameter nodes. 179 | 180 | Args: 181 | nodes: the nodes to add to the list of parameters. 182 | """ 183 | self.pattern_parameters.extend(nodes) 184 | 185 | def add_attributes(self, element_attributes: dict[str, str]): 186 | """Add all the attributes of the XML Pattern element in one go. 187 | 188 | Args: 189 | element_attributes: dictionary of attributes taken from the XML node 190 | """ 191 | allowed_attributes = ['documents', 'fpi', 'icon', 'id', 'see', 'is-a', 192 | '{http://www.w3.org/XML/1998/namespace}lang', 193 | '{http://www.w3.org/XML/1998/namespace}space'] 194 | 195 | attribute_handlers = { 196 | 'documents': lambda k, v: {k: Query(v)}, 197 | 'is-a': lambda k, v: {'abstract_id_ref': v}, 198 | '{http://www.w3.org/XML/1998/namespace}lang': lambda k, v: {'xml_lang': v}, 199 | '{http://www.w3.org/XML/1998/namespace}space': lambda k, v: {'xml_space': v} 200 | } 201 | 202 | attributes = parse_attributes(element_attributes, allowed_attributes, attribute_handlers) 203 | self.attributes.update(attributes) 204 | 205 | 206 | class ConcretePatternBuilder(PatternBuilder): 207 | 208 | @override 209 | def build(self) -> ConcretePattern: 210 | return ConcretePattern(rules=tuple(self.rules), variables=tuple(self.variables), 211 | paragraphs=tuple(self.paragraphs), title=self.title, **self.attributes) 212 | 213 | 214 | class AbstractPatternBuilder(PatternBuilder): 215 | 216 | @override 217 | def build(self) -> AbstractPattern: 218 | return AbstractPattern(rules=tuple(self.rules), variables=tuple(self.variables), 219 | paragraphs=tuple(self.paragraphs), title=self.title, **self.attributes) 220 | 221 | 222 | class InstancePatternBuilder(PatternBuilder): 223 | 224 | @override 225 | def build(self) -> InstancePattern: 226 | return InstancePattern(params=tuple(self.pattern_parameters), **self.attributes) 227 | 228 | 229 | class PhaseBuilder(SchematronASTNodeBuilder): 230 | 231 | def __init__(self): 232 | """Construct a Phase node out of the parts provided.""" 233 | self.active: list[ActivePhase] = [] 234 | self.variables: list[Variable] = [] 235 | self.paragraphs: list[Paragraph] = [] 236 | self.attributes = {} 237 | 238 | @override 239 | def build(self) -> Phase: 240 | return Phase(active=tuple(self.active), variables=tuple(self.variables), 241 | paragraphs=tuple(self.paragraphs), **self.attributes) 242 | 243 | def add_active(self, nodes: list[ActivePhase]): 244 | """Add a list of ActivePhase nodes 245 | 246 | Args: 247 | nodes: the nodes to add to the list of active phases. 248 | """ 249 | self.active.extend(nodes) 250 | 251 | def add_variables(self, nodes: list[Variable]): 252 | """Add a list of Variable nodes. 253 | 254 | Args: 255 | nodes: the nodes to add to the list of variables. 256 | """ 257 | self.variables.extend(nodes) 258 | 259 | def add_paragraphs(self, nodes: list[Paragraph]): 260 | """Add a list of Paragraph nodes. 261 | 262 | Args: 263 | nodes: the nodes to add to the list of paragraphs. 264 | """ 265 | self.paragraphs.extend(nodes) 266 | 267 | def add_attributes(self, element_attributes: dict[str, str]): 268 | """Add all the attributes of the XML Pattern element in one go. 269 | 270 | Args: 271 | element_attributes: dictionary of attributes taken from the XML node 272 | """ 273 | allowed_attributes = ['fpi', 'icon', 'id', 'see', 274 | '{http://www.w3.org/XML/1998/namespace}lang', 275 | '{http://www.w3.org/XML/1998/namespace}space'] 276 | 277 | attribute_handlers = { 278 | '{http://www.w3.org/XML/1998/namespace}lang': lambda k, v: {'xml_lang': v}, 279 | '{http://www.w3.org/XML/1998/namespace}space': lambda k, v: {'xml_space': v} 280 | } 281 | 282 | attributes = parse_attributes(element_attributes, allowed_attributes, attribute_handlers) 283 | self.attributes.update(attributes) 284 | 285 | 286 | class SchemaBuilder(SchematronASTNodeBuilder): 287 | 288 | def __init__(self): 289 | """Construct a Schema node out of the parts provided.""" 290 | self.patterns: list[Pattern] = [] 291 | self.namespaces: list[Namespace] = [] 292 | self.diagnostics: list[Diagnostics] = [] 293 | self.properties: list[Properties] = [] 294 | self.title: Title | None = None 295 | self.variables: list[Variable] = [] 296 | self.paragraphs: list[Paragraph] = [] 297 | self.phases: list[Phase] = [] 298 | self.attributes = {} 299 | 300 | @override 301 | def build(self) -> Schema: 302 | return Schema(patterns=tuple(self.patterns), namespaces=tuple(self.namespaces), phases=tuple(self.phases), 303 | paragraphs=tuple(self.paragraphs), variables=tuple(self.variables), 304 | diagnostics=tuple(self.diagnostics), properties=tuple(self.properties), title=self.title, 305 | **self.attributes) 306 | 307 | def add_patterns(self, nodes: list[Pattern]): 308 | """Add a list of Pattern nodes 309 | 310 | Args: 311 | nodes: the nodes to add to the list of patterns. 312 | """ 313 | self.patterns.extend(nodes) 314 | 315 | def add_namespaces(self, nodes: list[Namespace]): 316 | """Add a list of Namespace nodes 317 | 318 | Args: 319 | nodes: the nodes to add to the list of namespaces. 320 | """ 321 | self.namespaces.extend(nodes) 322 | 323 | def add_phases(self, nodes: list[Phase]): 324 | """Add a list of Phase nodes 325 | 326 | Args: 327 | nodes: the nodes to add to the list of phases. 328 | """ 329 | self.phases.extend(nodes) 330 | 331 | def add_diagnostics(self, nodes: list[Diagnostics]): 332 | """Add a list of Diagnostics nodes 333 | 334 | Args: 335 | nodes: the nodes to add to the list of diagnostics. 336 | """ 337 | self.diagnostics.extend(nodes) 338 | 339 | def add_properties(self, nodes: list[Properties]): 340 | """Add a list of Properties nodes 341 | 342 | Args: 343 | nodes: the nodes to add to the list of properties. 344 | """ 345 | self.properties.extend(nodes) 346 | 347 | def add_variables(self, nodes: list[Variable]): 348 | """Add a list of Variable nodes. 349 | 350 | Args: 351 | nodes: the nodes to add to the list of variables. 352 | """ 353 | self.variables.extend(nodes) 354 | 355 | def add_paragraphs(self, nodes: list[Paragraph]): 356 | """Add a list of Paragraph nodes. 357 | 358 | Args: 359 | nodes: the nodes to add to the list of paragraphs. 360 | """ 361 | self.paragraphs.extend(nodes) 362 | 363 | def set_title(self, node: Title | None): 364 | """Set the title node. 365 | 366 | Args: 367 | node: the title node 368 | """ 369 | self.title = node 370 | 371 | def add_attributes(self, element_attributes: dict[str, str]): 372 | """Add all the attributes of the XML Schema element in one go. 373 | 374 | Args: 375 | element_attributes: dictionary of attributes taken from the XML node 376 | """ 377 | allowed_attributes = ['defaultPhase', 'fpi', 'icon', 'id', 378 | 'queryBinding', 'schemaVersion', 'see', 379 | '{http://www.w3.org/XML/1998/namespace}lang', 380 | '{http://www.w3.org/XML/1998/namespace}space'] 381 | 382 | attribute_handlers = { 383 | 'defaultPhase': lambda k, v: {'default_phase': v}, 384 | 'queryBinding': lambda k, v: {'query_binding': v}, 385 | 'schemaVersion': lambda k, v: {'schema_version': v}, 386 | '{http://www.w3.org/XML/1998/namespace}lang': lambda k, v: {'xml_lang': v}, 387 | '{http://www.w3.org/XML/1998/namespace}space': lambda k, v: {'xml_space': v} 388 | } 389 | 390 | attributes = parse_attributes(element_attributes, allowed_attributes, attribute_handlers) 391 | self.attributes.update(attributes) 392 | -------------------------------------------------------------------------------- /pyschematron/direct_mode/schematron/ast_visitors.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Robbert Harms' 2 | __date__ = '2023-03-06' 3 | __maintainer__ = 'Robbert Harms' 4 | __email__ = 'robbert@xkls.nl' 5 | 6 | from typing import Any, Mapping, Iterable, Literal, override 7 | 8 | from abc import ABCMeta 9 | 10 | from pyschematron.direct_mode.lib.ast import GenericASTVisitor 11 | from pyschematron.direct_mode.schematron.ast import SchematronASTNode, Schema, ConcretePattern, Rule, ExtendsExternal, \ 12 | ExternalRule, ConcreteRule, ExtendsById, AbstractRule, AbstractPattern, InstancePattern, Pattern, Phase 13 | from pyschematron.direct_mode.schematron.utils import macro_expand 14 | 15 | 16 | class SchematronASTVisitor(GenericASTVisitor[SchematronASTNode], metaclass=ABCMeta): 17 | """Visitor pattern for the Schematron AST nodes.""" 18 | 19 | 20 | class FindIdVisitor(SchematronASTVisitor): 21 | 22 | def __init__(self, id_ref: str): 23 | """A visitor which finds a node with the given ID. 24 | 25 | Args: 26 | id_ref: the id we would like to find in the visited nodes 27 | """ 28 | super().__init__() 29 | self._id_ref = id_ref 30 | 31 | @override 32 | def visit(self, ast_node: SchematronASTNode) -> Any: 33 | if hasattr(ast_node, 'id') and getattr(ast_node, 'id') == self._id_ref: 34 | return ast_node 35 | 36 | for child in ast_node.get_children(): 37 | if found_node := self.visit(child): 38 | return found_node 39 | 40 | 41 | class GetIDMappingVisitor(SchematronASTVisitor): 42 | 43 | def __init__(self): 44 | """A visitor which maps all nodes with an id to their id.""" 45 | super().__init__() 46 | self._result = {} 47 | 48 | @override 49 | def visit(self, ast_node: SchematronASTNode) -> Any: 50 | self._result |= self._visit(ast_node) 51 | return self._result 52 | 53 | def _visit(self, ast_node: SchematronASTNode) -> dict[str, SchematronASTNode]: 54 | for child in ast_node.get_children(): 55 | child.accept_visitor(self) 56 | 57 | if hasattr(ast_node, 'id'): 58 | if (node_id := getattr(ast_node, 'id')) is not None: 59 | return {node_id: ast_node} 60 | return {} 61 | 62 | 63 | class GetNodesOfTypeVisitor(SchematronASTVisitor): 64 | 65 | def __init__(self, types: type[SchematronASTNode] | tuple[type[SchematronASTNode], ...]): 66 | """A visitor which checks each node for their type against the type(s) provided 67 | 68 | Args: 69 | types: a single type or a tuple of types we check each node against. 70 | """ 71 | super().__init__() 72 | self._types = types 73 | self._result = [] 74 | 75 | @override 76 | def visit(self, ast_node: SchematronASTNode) -> Any: 77 | for child in ast_node.get_children(): 78 | child.accept_visitor(self) 79 | 80 | if isinstance(ast_node, self._types): 81 | self._result.append(ast_node) 82 | 83 | return self._result 84 | 85 | 86 | class ResolveExtendsVisitor(SchematronASTVisitor): 87 | 88 | def __init__(self, schema: Schema): 89 | """Simplify an AST Schema by inlining all the extends in the rules. 90 | 91 | This visitor inlines the variables and checks of each of the extended rules. 92 | `AbstractRule` and `ExternalRule` items are deleted after inlining. 93 | 94 | Args: 95 | schema: the full Schema as input to lookup all the rules by ID. 96 | """ 97 | super().__init__() 98 | self._schema = schema 99 | 100 | @override 101 | def visit(self, ast_node: SchematronASTNode) -> SchematronASTNode: 102 | match ast_node: 103 | case Schema(): 104 | return self._process_schema(ast_node) 105 | case ConcretePattern() | AbstractPattern(): 106 | return self._process_pattern(ast_node) 107 | case Rule(): 108 | return self._process_rule(ast_node) 109 | case ExtendsExternal(): 110 | return self._process_extends_external(ast_node) 111 | case ExtendsById(): 112 | return self._process_extends_by_id(ast_node) 113 | case _: 114 | return ast_node 115 | 116 | def _process_schema(self, schema: Schema) -> Schema: 117 | """Process a Schema by processing all the patterns. 118 | 119 | Args: 120 | schema: the schema to process 121 | 122 | Returns: 123 | A processed schema 124 | """ 125 | patterns = [] 126 | for pattern in schema.patterns: 127 | patterns.append(ResolveExtendsVisitor(self._schema).apply(pattern)) 128 | return schema.with_updated(patterns=tuple(patterns)) 129 | 130 | def _process_pattern(self, pattern: ConcretePattern | AbstractPattern) -> ConcretePattern | AbstractPattern: 131 | """Process a pattern by processing all the rules. 132 | 133 | Args: 134 | pattern: the pattern to process 135 | 136 | Returns: 137 | the processed pattern 138 | """ 139 | rules = [] 140 | for rule in pattern.rules: 141 | processed_rule = ResolveExtendsVisitor(self._schema).apply(rule) 142 | if isinstance(processed_rule, ConcreteRule): 143 | rules.append(processed_rule) 144 | return pattern.with_updated(rules=tuple(rules)) 145 | 146 | def _process_rule(self, rule: Rule) -> Rule: 147 | """Process a rule by inlining all the extends. 148 | 149 | Args: 150 | rule: the rule we wish to process 151 | 152 | Returns: 153 | A new rule with all the extends loaded and added to the checks. 154 | """ 155 | extra_checks = [] 156 | extra_variables = [] 157 | for extends in rule.extends: 158 | extended_rule = ResolveExtendsVisitor(self._schema).apply(extends) 159 | extra_checks.extend(extended_rule.checks) 160 | extra_variables.extend(extended_rule.variables) 161 | 162 | checks = tuple(extra_checks) + rule.checks 163 | variables = tuple(extra_variables) + rule.variables 164 | return rule.with_updated(checks=checks, variables=variables, extends=tuple()) 165 | 166 | def _process_extends_by_id(self, extends: ExtendsById) -> AbstractRule: 167 | """Process an extends which points to an abstract rule. 168 | 169 | Args: 170 | extends: the extends node we are processing 171 | 172 | Returns: 173 | The abstract rule this extends points to. 174 | """ 175 | abstract_rule = FindIdVisitor(extends.id_ref).apply(self._schema) 176 | if abstract_rule is None: 177 | raise ValueError(f'Can\'t find the abstract rule with id "{extends.id_ref}"') 178 | return ResolveExtendsVisitor(self._schema).apply(abstract_rule) 179 | 180 | def _process_extends_external(self, extends: ExtendsExternal) -> ExternalRule: 181 | """Process an external extend by returning the loaded rule. 182 | 183 | Args: 184 | extends: the extends node we are processing 185 | 186 | Returns: 187 | The loaded external rule 188 | """ 189 | return ResolveExtendsVisitor(self._schema).apply(extends.rule) 190 | 191 | 192 | class ResolveAbstractPatternsVisitor(SchematronASTVisitor): 193 | 194 | def __init__(self, schema: Schema): 195 | """Simplify an AST Schema by expanding all the instance-of patterns. 196 | 197 | This visitor substitutes the abstract patterns with each of the instance-of patterns. 198 | All abstract patterns are deleted from the AST after replacement. 199 | 200 | Args: 201 | schema: the full Schema as input to lookup all the rules by ID. 202 | """ 203 | super().__init__() 204 | self._schema = schema 205 | 206 | @override 207 | def visit(self, ast_node: SchematronASTNode) -> SchematronASTNode: 208 | match ast_node: 209 | case Schema(): 210 | return self._process_schema(ast_node) 211 | case InstancePattern(): 212 | return self._process_instance_pattern(ast_node) 213 | case _: 214 | return ast_node 215 | 216 | def _process_schema(self, schema: Schema) -> Schema: 217 | """Process a Schema by processing all the patterns. 218 | 219 | Args: 220 | schema: the schema to process 221 | 222 | Returns: 223 | A processed schema 224 | """ 225 | patterns = [] 226 | for pattern in schema.patterns: 227 | new_pattern = ResolveAbstractPatternsVisitor(self._schema).apply(pattern) 228 | if isinstance(new_pattern, ConcretePattern): 229 | patterns.append(new_pattern) 230 | 231 | return schema.with_updated(patterns=tuple(patterns)) 232 | 233 | def _process_instance_pattern(self, instance_pattern: InstancePattern) -> ConcretePattern: 234 | """Process an instance-of pattern by expanding it with an abstract pattern. 235 | 236 | Args: 237 | instance_pattern: the instance-of pattern to process 238 | 239 | Returns: 240 | the processed pattern as a concrete pattern 241 | """ 242 | abstract_pattern = FindIdVisitor(instance_pattern.abstract_id_ref).apply(self._schema) 243 | if abstract_pattern is None: 244 | raise ValueError(f'Can\'t find the abstract pattern with id "{instance_pattern.abstract_id_ref}"') 245 | 246 | macro_expansions = {f'${param.name}': param.value for param in instance_pattern.params} 247 | macro_expand_visitor = MacroExpandVisitor(macro_expansions) 248 | 249 | macro_expanded_pattern = macro_expand_visitor.apply(abstract_pattern) 250 | return macro_expanded_pattern.with_updated(id=instance_pattern.id) 251 | 252 | 253 | class MacroExpandVisitor(SchematronASTVisitor): 254 | 255 | def __init__(self, macro_expansions: dict[str, str]): 256 | """Macro expand an abstract pattern. 257 | 258 | If the input is an AbstractPattern we return a ConcretePattern. 259 | In all other cases we return a node of the same type but with macro expanded elements. 260 | 261 | Args: 262 | A mapping of macro expansions to apply. 263 | """ 264 | super().__init__() 265 | self._macro_expansions = macro_expansions 266 | 267 | @override 268 | def visit(self, ast_node: SchematronASTNode) -> SchematronASTNode: 269 | if isinstance(ast_node, AbstractPattern): 270 | expanded_pattern = self._visit_generic_node(ast_node) 271 | return ConcretePattern(**expanded_pattern.get_init_values()) 272 | 273 | return self._visit_generic_node(ast_node) 274 | 275 | def _visit_generic_node[T: SchematronASTNode](self, ast_node: T) -> T: 276 | """Visit a generic node and do macro expansion. 277 | 278 | Args: 279 | ast_node: the node we are visiting and expanding 280 | 281 | Returns: 282 | A node of the same type but with macro expanded items. 283 | """ 284 | sub_visitor = MacroExpandVisitor(self._macro_expansions) 285 | init_values = ast_node.get_init_values() 286 | 287 | def _expand_value(value): 288 | if isinstance(value, str): 289 | return macro_expand(value, self._macro_expansions) 290 | elif isinstance(value, SchematronASTNode): 291 | return sub_visitor.apply(value) 292 | elif isinstance(value, Mapping): 293 | return {k: _expand_value(v) for k, v in value.items()} 294 | elif isinstance(value, Iterable): 295 | return tuple(_expand_value(el) for el in value) 296 | else: 297 | return value 298 | 299 | updated_items = {} 300 | for key, value in init_values.items(): 301 | updated_items[key] = _expand_value(value) 302 | 303 | return ast_node.with_updated(**updated_items) 304 | 305 | 306 | class PhaseSelectionVisitor(SchematronASTVisitor): 307 | 308 | def __init__(self, schema: Schema, phase: str | Literal['#ALL', '#DEFAULT'] | None = None): 309 | """Reduce an AST to only those patterns and phases referenced by a specific phase. 310 | 311 | This visitor only works on concrete Schema AST trees, we assume all abstract rules and patterns to be resolved. 312 | 313 | The output limits the `patterns` in the AST to only those selected by the phase. 314 | It will also limit the `phases` to the active phase, or to an empty list if no phase was specified. 315 | 316 | Args: 317 | schema: the full Schema as input to lookup all the rules by ID. 318 | phase: the phase we want to select, can be an IDREF of a phase node, the literal `#ALL` for all patterns, 319 | or `#DEFAULT` for the `defaultPhase` attribute of the Schematron. The default value is `#DEFAULT`, 320 | it is overwritten by the attribute `defaultPhase`, which again can be overwritten by the phase 321 | here specified. 322 | """ 323 | super().__init__() 324 | self._schema = schema 325 | self._phase = phase 326 | self._phase_node = self._get_phase_node(schema, phase) 327 | 328 | self._active_pattern_ids = None 329 | if self._phase_node: 330 | self._active_pattern_ids = [active_phase.pattern_id for active_phase in self._phase_node.active] 331 | 332 | @override 333 | def visit(self, ast_node: SchematronASTNode) -> SchematronASTNode | bool: 334 | match ast_node: 335 | case Schema(): 336 | return self._process_schema(ast_node) 337 | case Pattern(): 338 | return self._process_pattern(ast_node) 339 | case Phase(): 340 | return self._process_phase(ast_node) 341 | case _: 342 | return ast_node 343 | 344 | def _process_schema(self, schema: Schema) -> Schema: 345 | """Process a Schema by reducing the patterns and phases to the specified set. 346 | 347 | Args: 348 | schema: the schema to process 349 | 350 | Returns: 351 | A processed schema 352 | """ 353 | patterns = tuple(pattern for pattern in schema.patterns if self.apply(pattern)) 354 | phases = tuple(phase for phase in schema.phases if self.apply(phase)) 355 | return schema.with_updated(patterns=patterns, phases=phases) 356 | 357 | def _process_pattern(self, pattern: Pattern) -> bool: 358 | """Process a pattern by verifying if it is in the current phase. 359 | 360 | Args: 361 | pattern: the pattern to process 362 | 363 | Returns: 364 | A boolean indicating if this pattern is in the current phase or not. 365 | """ 366 | if not isinstance(pattern, ConcretePattern): 367 | raise ValueError('This visitor can only deal with concrete patterns.') 368 | 369 | return self._active_pattern_ids is None or pattern.id in self._active_pattern_ids 370 | 371 | def _process_phase(self, phase: Phase) -> bool: 372 | """Process a phase node by verifying if it is in the current phase. 373 | 374 | Args: 375 | phase: the phase to process 376 | 377 | Returns: 378 | A boolean indicating if this phase is in the current phase or not 379 | """ 380 | return self._phase_node is None or phase.id == self._phase_node.id 381 | 382 | def _get_phase_node(self, schema: Schema, phase: str | Literal['#ALL', '#DEFAULT'] | None = None) -> Phase | None: 383 | """Get the phase node associated with the elected phase, or None if None found. 384 | 385 | Args: 386 | schema: the schema we want to search 387 | phase: the chosen phase. 388 | 389 | Returns: 390 | The AST phase node, or None if not applicable / not found. 391 | """ 392 | if phase is None: 393 | phase = '#DEFAULT' 394 | 395 | if phase == '#ALL': 396 | return None 397 | 398 | if phase == '#DEFAULT': 399 | phase = schema.default_phase 400 | 401 | if isinstance(phase, str): 402 | phase_node = FindIdVisitor(phase).apply(self._schema) 403 | 404 | if phase_node is None: 405 | raise ValueError(f'Can not find the phase "{phase}".') 406 | return phase_node 407 | 408 | return None 409 | -------------------------------------------------------------------------------- /docs/iso-schematron.xsd: -------------------------------------------------------------------------------- 1 | 2 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | --------------------------------------------------------------------------------