├── tests
    ├── __init__.py
    ├── unit
    │   └── __init__.py
    ├── integration
    │   └── __init__.py
    └── fixtures
    │   ├── README.rst
    │   └── full_example
    │       ├── abstract_extends.sch
    │       ├── check_weights.sch
    │       ├── diagnostics.sch
    │       ├── cargo.xml
    │       └── schema.sch
├── docs
    ├── c055982_ISO_IEC_19757-3_2016.pdf
    └── iso-schematron.xsd
├── pyschematron
    ├── direct_mode
    │   ├── __init__.py
    │   ├── xml_validation
    │   │   ├── __init__.py
    │   │   ├── queries
    │   │   │   ├── __init__.py
    │   │   │   ├── exceptions.py
    │   │   │   ├── factories.py
    │   │   │   ├── base.py
    │   │   │   └── xpath.py
    │   │   └── results
    │   │   │   ├── __init__.py
    │   │   │   ├── xml_nodes.py
    │   │   │   ├── validation_results.py
    │   │   │   └── svrl_builder.py
    │   ├── schematron
    │   │   ├── parsers
    │   │   │   ├── __init__.py
    │   │   │   └── xml
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── utils.py
    │   │   │   │   └── builders.py
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   ├── ast_yaml.py
    │   │   └── ast_visitors.py
    │   ├── lib
    │   │   ├── __init__.py
    │   │   └── ast.py
    │   ├── svrl
    │   │   ├── __init__.py
    │   │   ├── svrl_visitors.py
    │   │   ├── ast.py
    │   │   └── xml_writer.py
    │   └── api.py
    ├── __version__.py
    ├── utils.py
    ├── cli.py
    ├── __init__.py
    └── api.py
├── .editorconfig
├── .travis.yml
├── .coveragerc
├── tox.ini
├── pyproject.toml
├── .gitignore
├── scripts
    ├── demo_ast.py
    ├── generic_test_script.py
    ├── demo_validation.py
    └── demo_custom_functions.py
├── CHANGELOG.rst
├── cliff.toml
├── Makefile
└── README.rst


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/tests/integration/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/README.rst:
--------------------------------------------------------------------------------
1 | All data needed for unit tests go here.
2 | 


--------------------------------------------------------------------------------
/docs/c055982_ISO_IEC_19757-3_2016.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robbert-harms/pyschematron/HEAD/docs/c055982_ISO_IEC_19757-3_2016.pdf


--------------------------------------------------------------------------------
/pyschematron/direct_mode/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Robbert Harms'
2 | __date__ = '2023-03-02'
3 | __maintainer__ = 'Robbert Harms'
4 | __email__ = 'robbert@xkls.nl'
5 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/xml_validation/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Robbert Harms'
2 | __date__ = '2023-02-18'
3 | __maintainer__ = 'Robbert Harms'
4 | __email__ = 'robbert@xkls.nl'
5 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/schematron/parsers/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Robbert Harms'
2 | __date__ = '2023-02-18'
3 | __maintainer__ = 'Robbert Harms'
4 | __email__ = 'robbert@xkls.nl'
5 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/schematron/parsers/xml/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Robbert Harms'
2 | __date__ = '2023-02-18'
3 | __maintainer__ = 'Robbert Harms'
4 | __email__ = 'robbert@xkls.nl'
5 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/xml_validation/queries/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Robbert Harms'
2 | __date__ = '2023-03-19'
3 | __maintainer__ = 'Robbert Harms'
4 | __email__ = 'robbert@xkls.nl'
5 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/lib/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Robbert Harms'
2 | __date__ = '2024-03-21'
3 | __maintainer__ = 'Robbert Harms'
4 | __email__ = 'robbert@xkls.nl'
5 | __licence__ = 'LGPL v3'
6 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/svrl/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Robbert Harms'
2 | __date__ = '2024-03-11'
3 | __maintainer__ = 'Robbert Harms'
4 | __email__ = 'robbert@xkls.nl'
5 | __licence__ = 'LGPL v3'
6 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/schematron/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Robbert Harms'
2 | __date__ = '2024-03-19'
3 | __maintainer__ = 'Robbert Harms'
4 | __email__ = 'robbert@xkls.nl'
5 | __licence__ = 'LGPL v3'
6 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/xml_validation/results/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Robbert Harms'
2 | __date__ = '2024-03-25'
3 | __maintainer__ = 'Robbert Harms'
4 | __email__ = 'robbert@xkls.nl'
5 | __licence__ = 'LGPL v3'
6 | 


--------------------------------------------------------------------------------
/tests/fixtures/full_example/abstract_extends.sch:
--------------------------------------------------------------------------------
1 | <rule xmlns="http://purl.oclc.org/dsdl/schematron">
2 |     <assert test="parent::$pv_category">
3 |         The item <name/> is in the wrong category ($pv_category) (external check).
4 |     </assert>
5 | </rule>
6 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/xml_validation/queries/exceptions.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Robbert Harms'
 2 | __date__ = '2023-03-25'
 3 | __maintainer__ = 'Robbert Harms'
 4 | __email__ = 'robbert@xkls.nl'
 5 | __licence__ = 'GPL v3'
 6 | 
 7 | 
 8 | class MissingRootNodeError(Exception):
 9 |     ...
10 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*]
 6 | indent_style = space
 7 | indent_size = 4
 8 | trim_trailing_whitespace = true
 9 | insert_final_newline = true
10 | charset = utf-8
11 | end_of_line = lf
12 | 
13 | [LICENSE]
14 | insert_final_newline = false
15 | 
16 | [Makefile]
17 | indent_style = tab
18 | 


--------------------------------------------------------------------------------
/tests/fixtures/full_example/check_weights.sch:
--------------------------------------------------------------------------------
1 | <rule xmlns="http://purl.oclc.org/dsdl/schematron">
2 |   <assert test="xs:integer(@weight) le $max-weight" properties="pr_maxWeight pr_weight" diagnostics="di_too-heavy-en di_too-heavy-nl">
3 |     Weight not correct (<value-of select="@weight"/> vs <value-of select="$max-weight"/> at <name/>).
4 |   </assert>
5 | </rule>
6 | 


--------------------------------------------------------------------------------
/tests/fixtures/full_example/diagnostics.sch:
--------------------------------------------------------------------------------
1 | <diagnostics xmlns="http://purl.oclc.org/dsdl/schematron">
2 |     <diagnostic id="di_too-heavy-en" xml:lang="en">
3 |         The item "<value-of select="name(.)" />" is too heavy.
4 |     </diagnostic>
5 |     <diagnostic id="di_too-heavy-nl" xml:lang="nl">
6 |         Het item "<value-of select="name(.)" />" is te zwaar.
7 |     </diagnostic>
8 | </diagnostics>
9 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Config file for automatic testing at travis-ci.org
 2 | 
 3 | language: python
 4 | 
 5 | python:
 6 |   - "3.12"
 7 | 
 8 | # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
 9 | install:
10 |   - python -c 'import tomllib; f = open("pyproject.toml", "rb"); c = tomllib.load(f); print("\n".join(c["project"]["dependencies"]));' | pip install -r /dev/stdin
11 | 
12 | # command to run tests, e.g. python setup.py test
13 | script: make test
14 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/svrl/svrl_visitors.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Robbert Harms'
 2 | __date__ = '2024-03-17'
 3 | __maintainer__ = 'Robbert Harms'
 4 | __email__ = 'robbert@xkls.nl'
 5 | __licence__ = 'LGPL v3'
 6 | 
 7 | from abc import ABCMeta
 8 | 
 9 | from pyschematron.direct_mode.lib.ast import GenericASTVisitor
10 | from pyschematron.direct_mode.svrl.ast import SVRLNode
11 | 
12 | 
13 | class SVRLASTVisitor(GenericASTVisitor[SVRLNode], metaclass=ABCMeta):
14 |     """Visitor implementation for the SVRL nodes."""
15 |     ...
16 | 


--------------------------------------------------------------------------------
/pyschematron/__version__.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Robbert Harms'
 2 | __date__ = '2020-02-04'
 3 | __maintainer__ = 'Robbert Harms'
 4 | __email__ = 'robbert@laltoida.com'
 5 | 
 6 | 
 7 | from importlib import metadata
 8 | from importlib.metadata import PackageNotFoundError
 9 | from pathlib import Path
10 | 
11 | import tomllib
12 | 
13 | try:
14 |     __version__ = metadata.version('pyschematron')
15 | except PackageNotFoundError:
16 |     with open(Path(__file__).parent.parent / 'pyproject.toml', 'rb') as f:
17 |         pyproject = tomllib.load(f)
18 |         __version__ = pyproject['project']['version']
19 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | branch = True
 3 | source = pyschematron
 4 | 
 5 | [report]
 6 | ignore_errors = True
 7 | ; Regexes for lines to exclude from consideration
 8 | exclude_lines =
 9 |     ; Have to re-enable the standard pragma
10 |     pragma: no cover
11 | 
12 |     ; Don't complain about missing debug-only code:
13 |     def __repr__
14 |     if self\.debug
15 | 
16 |     ; Don't complain if tests don't hit defensive assertion code:
17 |     raise AssertionError
18 |     raise NotImplementedError
19 | 
20 |     ; Don't complain if non-runnable code isn't run:
21 |     if 0:
22 |     if __name__ == .__main__.:
23 | 


--------------------------------------------------------------------------------
/tests/fixtures/full_example/cargo.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-model href="schema.sch" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>
 3 | <cargo xmlns="http://www.amazing-cargo.com/xml/data/2023" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:test="http://www.test.nl">
 4 |     <!-- This is a comment -->
 5 |     <!-- Comment: better comment -->
 6 |     <vehicles id="testable_id">
 7 |         <car weight="5000" volume="20" type="vehicle" />
 8 |         <car weight="5000" volume="20" type="vehicle" />
 9 |         <!-- Another comment -->
10 |         <apple weight="1" volume="1" type="fruit" />
11 |     </vehicles>
12 |     <fruits id="id_test">
13 |         <banana weight="1" volume="500" type="fruit" />
14 |         <motorcycle weight="100" volume="1" type="vehicle" />
15 |     </fruits>
16 | </cargo>
17 | 
18 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/schematron/utils.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Robbert Harms'
 2 | __date__ = '2023-03-06'
 3 | __maintainer__ = 'Robbert Harms'
 4 | __email__ = 'robbert@xkls.nl'
 5 | 
 6 | import re
 7 | 
 8 | 
 9 | def macro_expand(string: str, macros: dict[str, str]) -> str:
10 |     """Expand the provided macros on the provided string.
11 | 
12 |     This replaces all the macros in one go. This is a specialized version of multi-string replacement which
13 |     assumes all the replacements are unique (as they are in macro's).
14 | 
15 |     We use this function for instantiating abstract patterns to concrete patterns.
16 | 
17 |     This assumes the macros already have the prefix `$`.
18 | 
19 |     Args:
20 |         string: the string on which to apply the macro
21 |         macros: the macros to expand
22 | 
23 |     Returns:
24 |         A version of the string with the macros expanded
25 |     """
26 |     macros_pattern = '|'.join(re.escape(k) for k in macros)
27 |     pattern = re.compile(f'({macros_pattern})\\b')
28 |     return pattern.sub(lambda match: macros[match.group(0)], string)
29 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | ; with ideas copied from: https://github.com/zopefoundation/RestrictedPython/blob/master/tox.ini
 2 | 
 3 | [tox]
 4 | isolated_build = True
 5 | envlist = setup,py312,report
 6 | toxworkdir = {toxinidir}/build/.tox
 7 | 
 8 | [testenv]
 9 | usedevelop = True
10 | allowlist_externals = which
11 | commands =
12 |     python --version
13 |     which python
14 |     which pip
15 |     which pytest
16 |     pytest --cov=pyschematron --cov-report=html:build/coverage/{envname} --cov-append --html=build/pytest/report-{envname}.html --self-contained-html {posargs}
17 | deps =
18 |     pytest
19 |     pytest-cov
20 |     pytest-html
21 |     joblib
22 |     tqdm
23 |     moto
24 |     docker
25 | setenv =
26 |   COVERAGE_FILE=build/.coverage.{envname}
27 | 
28 | [testenv:report]
29 | deps = coverage
30 | skip_install = true
31 | depends = py312
32 | setenv =
33 |   COVERAGE_FILE=build/.coverage
34 | commands =
35 |     coverage erase
36 |     coverage combine
37 |     coverage html -d build/coverage/all/
38 |     coverage report
39 | 
40 | [testenv:setup]
41 | deps = coverage
42 | skip_install = true
43 | setenv =
44 |   COVERAGE_FILE=build/.coverage
45 | commands = coverage erase
46 | 
47 | 
48 | [pytest]
49 | testpaths = tests
50 | cache_dir = build/.pytest_cache
51 | addopts = --capture=tee-sys
52 | 


--------------------------------------------------------------------------------
/pyschematron/utils.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Robbert Harms'
 2 | __date__ = '2023-02-17'
 3 | __maintainer__ = 'Robbert Harms'
 4 | __email__ = 'robbert@xkls.nl'
 5 | 
 6 | 
 7 | from io import BytesIO, IOBase
 8 | from pathlib import Path
 9 | from typing import BinaryIO, Union
10 | 
11 | from lxml import etree
12 | from lxml.etree import _ElementTree
13 | 
14 | 
15 | def load_xml_document(xml_data: Union[bytes, str, Path, IOBase, BinaryIO],
16 |                       parser: etree.XMLParser | None = None) -> _ElementTree:
17 |     """Load an XML document from a polymorphic source.
18 | 
19 |     Args:
20 |         xml_data: the XML data to load. Can be loaded from a string, file, or byte-like object.
21 |         parser: the XMLParser to use. Can be specialized for your use-case.
22 | 
23 |     Returns:
24 |         The document node of the loaded XML.
25 |     """
26 |     parser = parser or etree.XMLParser(ns_clean=True)
27 | 
28 |     match xml_data:
29 |         case IOBase():
30 |             return etree.parse(xml_data, parser)
31 |         case bytes():
32 |             return load_xml_document(BytesIO(xml_data), parser=parser)
33 |         case str():
34 |             return load_xml_document(BytesIO(xml_data.encode('utf-8')), parser=parser)
35 |         case Path():
36 |             with open(xml_data, 'rb') as f:
37 |                 return load_xml_document(f, parser=parser)
38 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "pyschematron"
 3 | description = "Schematron validation in Python."
 4 | readme = "README.rst"
 5 | version = "1.1.13"
 6 | requires-python = ">=3.12"
 7 | keywords = ["Schematron", "XML validation"]
 8 | classifiers = [
 9 |     "Development Status :: 5 - Production/Stable",
10 |     "Environment :: Console",
11 |     "Intended Audience :: Developers",
12 |     "Natural Language :: English",
13 |     "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
14 |     "Programming Language :: Python :: 3.12",
15 | ]
16 | license = { file="LICENSE" }
17 | authors = [
18 |     { name = "Robbert Harms", email = "robbert@xkls.nl" }
19 | ]
20 | dependencies = [
21 |     "xmlschema~=4.1.0",
22 |     "elementpath~=5.0.4",
23 |     "typer>=0.19.2",
24 |     "appdirs~=1.4.4",
25 |     "lxml~=6.0.0",
26 |     "Jinja2~=3.1.6",
27 |     "ruyaml~=0.91.0",
28 |     "frozendict~=2.4.6"
29 | ]
30 | 
31 | [project.optional-dependencies]
32 | test = [
33 |     "pytest~=8.3.4",
34 |     "pytest-check~=2.5.0",
35 |     "pytest-cov~=6.0.0",
36 |     "pytest-html~=4.1.1",
37 |     "tox~=4.24.1"
38 | ]
39 | doc = [
40 |     "Sphinx~=8.2.0",
41 |     "git-cliff~=2.8.0"
42 | ]
43 | 
44 | [project.urls]
45 | Homepage = "https://github.com/robbert-harms/pyschematron/"
46 | 
47 | [project.scripts]
48 | pyschematron = "pyschematron.cli:app"
49 | 
50 | [build-system]
51 | requires = ["flit_core >=3.9,<4"]
52 | build-backend = "flit_core.buildapi"
53 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.py[cod]
  2 | 
  3 | # C extensions
  4 | *.so
  5 | 
  6 | # Packages
  7 | *.egg
  8 | *.egg-info
  9 | /dist
 10 | /build
 11 | /eggs
 12 | /parts
 13 | /var
 14 | /sdist
 15 | /develop-eggs
 16 | /lib
 17 | /lib64
 18 | .installed.cfg
 19 | /.pybuild
 20 | 
 21 | # Installer logs
 22 | pip-log.txt
 23 | 
 24 | # Unit test / coverage reports
 25 | .coverage
 26 | .tox
 27 | nosetests.xml
 28 | htmlcov
 29 | 
 30 | # Translations
 31 | *.mo
 32 | 
 33 | # Mr Developer
 34 | .mr.developer.cfg
 35 | .project
 36 | .pydevproject
 37 | 
 38 | # Complexity
 39 | output/*.html
 40 | output/*/index.html
 41 | 
 42 | # Sphinx
 43 | docs/_build
 44 | 
 45 | #Idea
 46 | .idea
 47 | 
 48 | #Miscellaneous
 49 | /misc
 50 | 
 51 | # debian tmps
 52 | /debian/source
 53 | /debian/tmp
 54 | /debian/changelog
 55 | /debian/compat
 56 | /debian/control
 57 | /debian/rules
 58 | /debian/watch
 59 | 
 60 | # Elastic Beanstalk Files
 61 | .elasticbeanstalk/*
 62 | !.elasticbeanstalk/*.cfg.yml
 63 | !.elasticbeanstalk/*.global.yml
 64 | 
 65 | # MAC os files
 66 | .DS_Store
 67 | 
 68 | #########
 69 | ## Latex
 70 | #########
 71 | ## Core latex/pdflatex auxiliary files:
 72 | *.aux
 73 | *.lof
 74 | *.log
 75 | *.lot
 76 | *.fls
 77 | *.out
 78 | *.toc
 79 | *.fmt
 80 | *.fot
 81 | *.cb
 82 | *.cb2
 83 | .*.lb
 84 | 
 85 | ## Intermediate documents:
 86 | *.dvi
 87 | *.xdv
 88 | *-converted-to.*
 89 | 
 90 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
 91 | *.bbl
 92 | *.bcf
 93 | *.blg
 94 | *-blx.aux
 95 | *-blx.bib
 96 | *.run.xml
 97 | 
 98 | ## Build tool auxiliary files:
 99 | *.fdb_latexmk
100 | *.synctex
101 | *.synctex(busy)
102 | *.synctex.gz
103 | *.synctex.gz(busy)
104 | *.pdfsync
105 | 
106 | #########
107 | ## End Latex
108 | #########
109 | 
110 | .python-version
111 | 


--------------------------------------------------------------------------------
/scripts/demo_ast.py:
--------------------------------------------------------------------------------
 1 | """This script demonstrates how to load a Schematron Schema in the PySchematron direct-mode.
 2 | 
 3 | This example shows the use of the direct-mode Abstract Syntax Tree (AST) for PySchematron Schemas. By loading Schematron
 4 | schema's in the AST, you can inspect the Schema using Python functionality.
 5 | 
 6 | Please note that only Schematron specific XML nodes are loaded from the Schematron Schema. Custom nodes are not loaded.
 7 | You can however augment the AST and the parser with your own nodes. This is however not demonstrated (here).
 8 | """
 9 | 
10 | __author__ = 'Robbert Harms'
11 | __date__ = '2023-02-21'
12 | __maintainer__ = 'Robbert Harms'
13 | __email__ = 'robbert@xkls.nl'
14 | 
15 | from pathlib import Path
16 | 
17 | from pyschematron.direct_mode.schematron.ast_visitors import ResolveExtendsVisitor, ResolveAbstractPatternsVisitor, \
18 |     PhaseSelectionVisitor
19 | from pyschematron.direct_mode.schematron.ast_yaml import RuyamlASTYamlConverter
20 | from pyschematron.direct_mode.schematron.parsers.xml.parser import ParsingContext, SchemaParser
21 | from pyschematron.utils import load_xml_document
22 | 
23 | 
24 | schematron_path = Path('../tests/fixtures/full_example/schema.sch')
25 | schematron_xml = load_xml_document(schematron_path)
26 | parsing_context = ParsingContext(base_path=schematron_path.parent)
27 | 
28 | # Parse the Schema
29 | schematron_parser = SchemaParser()
30 | schema = schematron_parser.parse(schematron_xml.getroot(), parsing_context)
31 | 
32 | # Shows the use of the visitor pattern to modify the Schema
33 | schema = ResolveExtendsVisitor(schema).apply(schema)
34 | schema = ResolveAbstractPatternsVisitor(schema).apply(schema)
35 | schema = PhaseSelectionVisitor(schema, '#ALL').apply(schema)
36 | 
37 | # Experimental, YAML conversion.
38 | yaml_converter = RuyamlASTYamlConverter()
39 | yaml_shema = yaml_converter.dump(schema)
40 | round_trip = yaml_converter.load(yaml_shema)
41 | 
42 | print(yaml_shema)
43 | print(round_trip == schema)
44 | 


--------------------------------------------------------------------------------
/scripts/generic_test_script.py:
--------------------------------------------------------------------------------
 1 | """A test script I use when developing PySchematron."""
 2 | 
 3 | __author__ = 'Robbert Harms'
 4 | __date__ = '2023-02-21'
 5 | __maintainer__ = 'Robbert Harms'
 6 | __email__ = 'robbert@xkls.nl'
 7 | 
 8 | from pathlib import Path
 9 | 
10 | from lxml import etree
11 | 
12 | from pyschematron.direct_mode.schematron.ast_visitors import ResolveExtendsVisitor, \
13 |     ResolveAbstractPatternsVisitor, PhaseSelectionVisitor
14 | from pyschematron.direct_mode.schematron.parsers.xml.parser import ParsingContext, SchemaParser
15 | from pyschematron.direct_mode.xml_validation.results.svrl_builder import DefaultSVRLReportBuilder
16 | from pyschematron.direct_mode.xml_validation.validators import SimpleSchematronXMLValidator
17 | from pyschematron.utils import load_xml_document
18 | 
19 | '''
20 | cd programming/python/pyschematron/tests/fixtures/full_example/
21 | java -jar ~/programming/java/schxslt-cli.jar -d cargo.xml -s schema.sch -o /tmp/report.xml
22 | '''
23 | 
24 | example_path = Path('../tests/fixtures/full_example/')
25 | schematron_path = example_path / 'schema.sch'
26 | phase = '#ALL'
27 | 
28 | schematron_xml = load_xml_document(schematron_path)
29 | parsing_context = ParsingContext(base_path=schematron_path.parent)
30 | 
31 | schematron_parser = SchemaParser()
32 | schema = schematron_parser.parse(schematron_xml.getroot(), parsing_context)
33 | schema = ResolveExtendsVisitor(schema).apply(schema)
34 | schema = ResolveAbstractPatternsVisitor(schema).apply(schema)
35 | schema = PhaseSelectionVisitor(schema, phase).apply(schema)
36 | 
37 | validator = SimpleSchematronXMLValidator(schema, phase, parsing_context.base_path)
38 | 
39 | xml_document = load_xml_document(example_path / 'cargo.xml')
40 | validation_results = validator.validate_xml(xml_document)
41 | 
42 | svrl_report = DefaultSVRLReportBuilder().create_svrl_xml(validation_results)
43 | report_str = etree.tostring(svrl_report, pretty_print=True).decode('utf-8')
44 | 
45 | with open('/tmp/report_pyschematron.xml', 'w') as f:
46 |     f.write(report_str)
47 | 
48 | print(report_str)
49 | 
50 | print()
51 | 


--------------------------------------------------------------------------------
/pyschematron/cli.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Robbert Harms'
 2 | __date__ = '2024-04-06'
 3 | __maintainer__ = 'Robbert Harms'
 4 | __email__ = 'robbert@xkls.nl'
 5 | __licence__ = 'LGPL v3'
 6 | 
 7 | from pathlib import Path
 8 | 
 9 | import typer
10 | 
11 | from pyschematron import validate_documents, validate_document
12 | 
13 | app = typer.Typer(no_args_is_help=True, pretty_exceptions_enable=False)
14 | 
15 | 
16 | @app.command(help='Validate one or more documents using PySchematron.')
17 | def validate(xml_documents: list[Path] = typer.Argument(help='One or more documents to validate.',
18 |                                                         exists=True, file_okay=True, dir_okay=False, resolve_path=True),
19 |              schema: Path = typer.Argument(help='The Schematron Schema to use for the validation.',
20 |                                            exists=True, file_okay=True, dir_okay=False, resolve_path=True),
21 |              phase: str = typer.Option('#DEFAULT', '--phase', '-p', help='The Schematron phase to use.'),
22 |              svrl_out: Path = typer.Option(None, '--svrl-out',
23 |                                            help='The file to write the SVRL to. '
24 |                                                 'For multiple documents we append the XML document name to this name.',
25 |                                            file_okay=True, dir_okay=False, writable=True)):
26 | 
27 |     if svrl_out:
28 |         svrl_out.parent.mkdir(parents=True, exist_ok=True)
29 | 
30 |     if len(xml_documents) == 1:
31 |         result = validate_document(xml_documents[0], schema, phase=phase)
32 | 
33 |         print(xml_documents[0], 'VALID' if result.is_valid() else 'INVALID')
34 | 
35 |         if svrl_out:
36 |             result.get_svrl().write(str(svrl_out), pretty_print=True, xml_declaration=True, encoding="utf-8")
37 |     else:
38 |         results = validate_documents(xml_documents, schema, phase=phase)
39 | 
40 |         for filename, result in zip(xml_documents, results):
41 |             print(filename, 'VALID' if result.is_valid() else 'INVALID')
42 | 
43 |             if svrl_out:
44 |                 out_fname = svrl_out.with_stem(svrl_out.stem + '_' + Path(filename).stem)
45 |                 result.get_svrl().write(str(out_fname), pretty_print=True, xml_declaration=True, encoding="utf-8")
46 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/xml_validation/results/xml_nodes.py:
--------------------------------------------------------------------------------
 1 | """This module contains class representations for XML nodes.
 2 | 
 3 | For the purpose of storing the validation results, we would like to reference the XML node at
 4 | which a check was performed. The ISO Schematron applies rules to:
 5 | 
 6 | - Elements (*)
 7 | - Attributes (@*)
 8 | - Root node (/)
 9 | - Comments (comment())
10 | - Processing instructions (processing-instruction())
11 | 
12 | As such, when we list the results, we need a way to represent these different kind of nodes in a uniform manner.
13 | This module aids by having objects to represent each visited XML node.
14 | """
15 | from __future__ import annotations
16 | 
17 | __author__ = 'Robbert Harms'
18 | __date__ = '2024-03-25'
19 | __maintainer__ = 'Robbert Harms'
20 | __email__ = 'robbert@xkls.nl'
21 | __licence__ = 'LGPL v3'
22 | 
23 | from abc import ABCMeta
24 | from dataclasses import dataclass
25 | from lxml.etree import _Element, _ProcessingInstruction, _Comment
26 | 
27 | 
28 | @dataclass(frozen=True, slots=True)
29 | class XMLNode(metaclass=ABCMeta):
30 |     """The base class for all XML nodes in this module.
31 | 
32 |     Args:
33 |         xpath_location: The location of the provided element in XPath 3.1 notation, using the `BracedURILiteral`
34 |             style for the qualified names.
35 |     """
36 |     xpath_location: str
37 | 
38 | 
39 | @dataclass(frozen=True, slots=True)
40 | class AttributeNode(XMLNode):
41 |     """Represents attributes of XML elements.
42 | 
43 |     Args:
44 |         name: the attribute name.
45 |         value: a string value for the attribute
46 |         parent: the parent element node
47 |     """
48 |     name: str
49 |     value: str
50 |     parent: _Element
51 | 
52 | 
53 | @dataclass(frozen=True, slots=True)
54 | class CommentNode(XMLNode):
55 |     """Represents XML comments
56 | 
57 |     Args:
58 |         element: the XML element
59 |     """
60 |     element: _Comment
61 | 
62 | 
63 | @dataclass(frozen=True, slots=True)
64 | class ProcessingInstructionNode(XMLNode):
65 |     """Represents XML processing instructions nodes.
66 | 
67 |     Args:
68 |         element: the wrapped Processing Instruction Element.
69 |     """
70 |     element: _ProcessingInstruction
71 | 
72 | 
73 | @dataclass(frozen=True, slots=True)
74 | class ElementNode(XMLNode):
75 |     """Representation of an XML element
76 | 
77 |     Args:
78 |         element: the wrapper XML element.
79 |     """
80 |     element: _Element
81 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/schematron/parsers/xml/utils.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Robbert Harms'
 2 | __date__ = '2023-02-18'
 3 | __maintainer__ = 'Robbert Harms'
 4 | __email__ = 'robbert@xkls.nl'
 5 | 
 6 | from pathlib import Path
 7 | from typing import Callable, Any
 8 | 
 9 | import lxml
10 | from lxml import etree
11 | from lxml.etree import _Element
12 | 
13 | 
14 | def node_to_str(node: _Element, remove_namespaces: bool = True) -> str:
15 |     """Convert an lxml node to string.
16 | 
17 |     This can be used to convert a node to string without namespaces.
18 | 
19 |     Args:
20 |         node: the node to convert to string
21 |         remove_namespaces: if we want to string the namespaces
22 | 
23 |     Returns:
24 |         A string representation of the provided node.
25 |     """
26 |     tag_str = lxml.etree.tostring(node, with_tail=False, encoding='unicode')
27 | 
28 |     if remove_namespaces:
29 |         new_root = etree.fromstring(tag_str)
30 |         for elem in new_root.getiterator():
31 |             elem.tag = etree.QName(elem).localname
32 |         etree.cleanup_namespaces(new_root)
33 | 
34 |         return lxml.etree.tostring(new_root, encoding='unicode')
35 |     return tag_str
36 | 
37 | 
38 | def resolve_href(href: str, base_path: Path) -> Path:
39 |     """Resolve a href attribute to a file on the filesystem.
40 | 
41 |     This can be used to resolve the `href` attributes of extend or include tags.
42 | 
43 |     Args:
44 |         href: an absolute or relative path pointing to a file on the filesystem
45 |         base_path: the base path to resolve relative paths.
46 | 
47 |     Returns:
48 |         An absolute path to a file on the filesystem.
49 |     """
50 |     file_path = Path(href)
51 |     if file_path.is_absolute():
52 |         return file_path
53 |     return (base_path / file_path).resolve()
54 | 
55 | 
56 | def parse_attributes(attributes: dict[str, str],
57 |                      allowed_attributes: list[str],
58 |                      attribute_handlers: dict[str: Callable[[str, str], Any]] | None = None) -> dict[str, Any]:
59 |     """Parse the attributes of the given element.
60 | 
61 |     By default, it returns all attributes as a string value. By using the attribute handlers it is possible
62 |     to specify for each attribute how it is to be treated.
63 | 
64 |     Args:
65 |         attributes: the attributes we wish to parse
66 |         allowed_attributes: the set of allowed attributes, we will only parse and return the items in this list
67 |         attribute_handlers: for each attribute name, a callback taking in the name and attribute value to return
68 |             a new modified name and attribute value.
69 | 
70 |     Returns:
71 |         For each allowed attribute the parsed values.
72 |     """
73 |     attribute_handlers = attribute_handlers or {}
74 | 
75 |     parsed_attributes = {}
76 |     for item in allowed_attributes:
77 |         if item in attributes:
78 |             handler = attribute_handlers.get(item, lambda k, v: {k: v})
79 |             parsed_attributes.update(handler(item, attributes[item]))
80 |     return parsed_attributes
81 | 


--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
  1 | *********
  2 | Changelog
  3 | *********
  4 | 
  5 | 
  6 | Version 1.1.13 (2025-11-05)
  7 | ===========================
  8 | 
  9 | Other
 10 | -----
 11 | - Updated default parser in load_xml_document, this improves parallel processing speed according to the documentation of XMLParser.
 12 | 
 13 | 
 14 | 
 15 | Version 1.1.12 (2025-11-04)
 16 | ===========================
 17 | 
 18 | Features
 19 | --------
 20 | - The function load_xml_document now accepts a parser instance for flexibility.
 21 | - The XML Schematron writer now allows overriding the default nsmap namespace.
 22 | 
 23 | 
 24 | 
 25 | Version 1.1.11 (2025-09-24)
 26 | ===========================
 27 | 
 28 | Other
 29 | -----
 30 | - Updated typer dependency and removed the all limiter.
 31 | 
 32 | 
 33 | 
 34 | Version 1.1.10 (2025-09-21)
 35 | ===========================
 36 | 
 37 | Other
 38 | -----
 39 | - Relaxed the version constraint on lxml.
 40 | 
 41 | 
 42 | 
 43 | Version 1.1.9 (2025-09-18)
 44 | ==========================
 45 | 
 46 | Other
 47 | -----
 48 | - Updated xmlschema, elementpath and lxml dependencies.
 49 | 
 50 | 
 51 | 
 52 | Version 1.1.8 (2025-03-26)
 53 | ==========================
 54 | 
 55 | Features
 56 | --------
 57 | - Adds Schematron base path to the function API interface.
 58 | 
 59 | Documentation
 60 | -------------
 61 | - Updated editorconfig
 62 | 
 63 | 
 64 | 
 65 | Version 1.1.7 (2025-02-19)
 66 | ==========================
 67 | 
 68 | Miscellaneous Tasks
 69 | -------------------
 70 | - *(deps\)* Updated all the dependencies.
 71 | 
 72 | 
 73 | Version 1.1.6 (2025-01-31)
 74 | ==========================
 75 | 
 76 | Other
 77 | -----
 78 | - Replaced gitchangelog with git-cliff.
 79 | 
 80 | 
 81 | Version 1.1.5 (2025-01-22)
 82 | ==========================
 83 | 
 84 | Added
 85 | -----
 86 | - Adds PyPi Homepage url.
 87 | 
 88 | Fixed
 89 | -----
 90 | - Fixes bug #7.
 91 | 
 92 | 
 93 | Version 1.1.4 (2024-12-23)
 94 | ==========================
 95 | 
 96 | Other
 97 | -----
 98 | - Made the assert and report checks more robust for queries not returning a single boolean. This fixes the second part of issue #6.
 99 | 
100 | Version 1.1.3 (2024-12-21)
101 | ==========================
102 | 
103 | Other
104 | -----
105 | - Made rich text evaluation more robust for complex results.
106 | 
107 | 
108 | Version 1.1.2 (2024-12-20)
109 | ==========================
110 | 
111 | Other
112 | -----
113 | - Bumped required elementpath version to fix bug #6.
114 | 
115 | 
116 | Version 1.1.1 (2024-11-27)
117 | ==========================
118 | 
119 | Other
120 | -----
121 | - Updated is_valid comment in the API.
122 | 
123 | 
124 | Version 1.1.0 (2024-11-27)
125 | ==========================
126 | 
127 | Other
128 | -----
129 | - Fixes github bug #5. The reporting of the is_valid method was reversed with regard to assert/report.
130 | - Fixed the documentation regarding the is_valid function.
131 | 
132 | 
133 | Version 1.0.3 (2024-10-29)
134 | ==========================
135 | 
136 | Other
137 | -----
138 | - Updated elementpath dependency version.
139 | 
140 | 
141 | Version 1.0.2 (2024-10-18)
142 | ==========================
143 | 
144 | Other
145 | -----
146 | - Updated readme to include supported Python version and other textual changes.
147 | - Updated lxml dependency from 5.1.0 to 5.2.1
148 | 
149 | 
150 | Version 1.0.1 (2024-09-24)
151 | ==========================
152 | 
153 | Other
154 | -----
155 | - Upgraded to elementpath==4.5.0
156 | - Fixed email address in info blocks.
157 | 
158 | 
159 | Version 1.0.0 (2024-08-23)
160 | ==========================
161 | 
162 | Other
163 | -----
164 | First complete version of PySchematron. See the readme for the functionality and limitations.
165 | 
166 | 
167 | Version 0.1.0 (2022-09-12)
168 | ==========================
169 | 
170 | Other
171 | -----
172 | - First version
173 | 
174 | 
175 | 
176 | 


--------------------------------------------------------------------------------
/cliff.toml:
--------------------------------------------------------------------------------
 1 | # Git Cliff configuration file. See: https://git-cliff.org/docs/configuration
 2 | 
 3 | [changelog]
 4 | header = """
 5 | *********
 6 | Changelog
 7 | *********
 8 | 
 9 | """
10 | 
11 | # template for the changelog body
12 | # https://keats.github.io/tera/docs/#introduction
13 | body = """
14 | {% if version %}\
15 | Version {{ version | trim_start_matches(pat="v") }} ({{ timestamp | date(format="%Y-%m-%d") }})
16 | {%- set header_length = version | trim_start_matches(pat="v") | length + timestamp | date(format="%Y-%m-%d") | length + 11 %}
17 | {% for i in range(end=header_length) %}={% endfor %}
18 | {% else %}\
19 | [unreleased]
20 | {% for i in range(end=12) %}-{% endfor %}
21 | {% endif %}\
22 | 
23 | {% for group, commits in commits | group_by(attribute="group") %}
24 | {{ group | striptags | trim | upper_first }}
25 | {%- set subheader_length = group | striptags | trim | length%}
26 | {% for i in range(end=subheader_length) %}-{% endfor %}
27 | {%- for commit in commits %}
28 | - {% if commit.scope %}*({{ commit.scope }})* {% endif %}\
29 | {% if commit.breaking %}[**breaking**] {% endif %}\
30 | {{ commit.message | upper_first }}\
31 | {% endfor %}
32 | {% endfor %}\n
33 | """
34 | 
35 | # template for the changelog footer
36 | footer = ""
37 | 
38 | # remove the leading and trailing s
39 | trim = false
40 | 
41 | # postprocessors
42 | postprocessors = [
43 |   # { pattern = '<REPO>', replace = "https://github.com/orhun/git-cliff" }, # replace repository URL
44 | ]
45 | # render body even when there are no releases to process
46 | # render_always = true
47 | # output file path
48 | # output = "test.md"
49 | 
50 | [git]
51 | # parse the commits based on https://www.conventionalcommits.org
52 | conventional_commits = true
53 | 
54 | # filter out the commits that are not conventional
55 | filter_unconventional = false
56 | 
57 | # process each line of a commit as an individual commit
58 | split_commits = false
59 | 
60 | # regex for preprocessing the commit messages
61 | commit_preprocessors = [
62 |   # Replace issue numbers
63 |   #{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](<REPO>/issues/${2}))"},
64 |   # Check spelling of the commit with https://github.com/crate-ci/typos
65 |   # If the spelling is incorrect, it will be automatically fixed.
66 |   #{ pattern = '.*', replace_command = 'typos --write-changes -' },
67 | ]
68 | 
69 | # regex for parsing and grouping commits
70 | commit_parsers = [
71 |   { message = "^feat", group = "<!-- 0 -->Features" },
72 |   { message = "^fix", group = "<!-- 1 -->Bug Fixes" },
73 |   { message = "^doc", group = "<!-- 3 -->Documentation" },
74 |   { message = "^perf", group = "<!-- 4 -->Performance" },
75 |   { message = "^refactor", group = "<!-- 2 -->Refactor" },
76 |   { message = "^style", group = "<!-- 5 -->Styling" },
77 |   { message = "^test", group = "<!-- 6 -->Testing" },
78 |   { message = "^chore\\(release\\): prepare for", skip = true },
79 |   { message = "^chore\\(deps.*\\)", skip = true },
80 |   { message = "^chore\\(pr\\)", skip = true },
81 |   { message = "^chore\\(pull\\)", skip = true },
82 |   { message = "^chore|^ci", group = "<!-- 7 -->Miscellaneous Tasks" },
83 |   { body = ".*security", group = "<!-- 8 -->Security" },
84 |   { message = "^revert", group = "<!-- 9 -->Revert" },
85 |   { message = ".*", group = "<!-- 10 -->Other" },
86 | ]
87 | 
88 | # filter out the commits that are not matched by commit parsers
89 | filter_commits = false
90 | 
91 | # sort the tags topologically
92 | topo_order = false
93 | 
94 | # sort the commits inside sections by oldest/newest order
95 | sort_commits = "oldest"
96 | 
97 | # git tag pattern for finding the versions (taken from https://semver.org/)
98 | tag_pattern = "^v(?P<major>0|[1-9]\\d*)\\.(?P<minor>0|[1-9]\\d*)\\.(?P<patch>0|[1-9]\\d*)(?:-(?P<prerelease>(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$"
99 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/lib/ast.py:
--------------------------------------------------------------------------------
  1 | """This module defines abstract base types for an Abstract Syntax Tree (AST) and an AST visitor."""
  2 | from __future__ import annotations
  3 | 
  4 | __author__ = 'Robbert Harms'
  5 | __date__ = '2024-03-21'
  6 | __maintainer__ = 'Robbert Harms'
  7 | __email__ = 'robbert@xkls.nl'
  8 | __licence__ = 'LGPL v3'
  9 | 
 10 | from abc import abstractmethod, ABCMeta
 11 | from dataclasses import dataclass, fields
 12 | from typing import Any, Mapping, Iterable, Self
 13 | 
 14 | 
 15 | @dataclass(slots=True, frozen=True)
 16 | class GenericASTNode:
 17 |     """Abstract base class for Abstract Syntax Tree (AST) nodes.
 18 | 
 19 |     Each node in an AST (also the root) is of this type. Since we, in general, aim for immutability, the AST nodes
 20 |     are defined as frozen dataclasses with slots. If you want true immutability, avoid dictionaries and lists in
 21 |     your AST implementations.
 22 | 
 23 |     This class already implements the visitor pattern using the :class:`GenericASTVisitor`.
 24 |     """
 25 | 
 26 |     def accept_visitor(self, visitor: GenericASTVisitor) -> Any:
 27 |         """Accept a visitor on this node.
 28 | 
 29 |         Since Python allows polymorphic return values, we allow the visitor pattern to return values.
 30 | 
 31 |         Args:
 32 |             visitor: the visitor we accept and call
 33 | 
 34 |         Returns:
 35 |             The result of the visitor.
 36 |         """
 37 |         return visitor.visit(self)
 38 | 
 39 |     def get_init_values(self) -> dict[str, Any]:
 40 |         """Get the initialisation values with which this class was instantiated.
 41 | 
 42 |         Returns:
 43 |             A dictionary with the arguments which instantiated this object.
 44 |         """
 45 |         return {field.name: getattr(self, field.name) for field in fields(self)}
 46 | 
 47 |     def get_children(self) -> list[Self]:
 48 |         """Get a list of all the AST nodes in this node.
 49 | 
 50 |         This should return all the references to AST nodes in this node, all bundled in one list.
 51 | 
 52 |         Returns:
 53 |             All the child nodes in this node
 54 |         """
 55 |         def get_ast_nodes(init_values):
 56 |             children = []
 57 | 
 58 |             if isinstance(init_values, GenericASTNode):
 59 |                 children.append(init_values)
 60 |             elif isinstance(init_values, Mapping):
 61 |                 for el in init_values.values():
 62 |                     children.extend(get_ast_nodes(el))
 63 |             elif isinstance(init_values, Iterable) and not isinstance(init_values, str):
 64 |                 for el in init_values:
 65 |                     children.extend(get_ast_nodes(el))
 66 | 
 67 |             return children
 68 | 
 69 |         return get_ast_nodes(self.get_init_values())
 70 | 
 71 | 
 72 | class GenericASTVisitor[T: GenericASTNode](metaclass=ABCMeta):
 73 |     """Generic base class for visitors, according to the visitor design pattern.
 74 | 
 75 |     Instead of a typed double dispatch we use dynamic double dispatching in which each node, when visited, calls
 76 |     the :meth:``visit` of this class instead of a visit method for each node type. This makes it easier to
 77 |     do edits on class names since the types can be looked up by an IDE.
 78 | 
 79 |     We use the generic type hint `T` to ensure solid type hinting for implementing visitors.
 80 |     """
 81 | 
 82 |     @abstractmethod
 83 |     def visit(self, ast_node: T) -> Any:
 84 |         """Visit the AST node.
 85 | 
 86 |         This uses dynamic dispatch to accept all types of AST nodes.
 87 | 
 88 |         Since Python allows polymorphic return values, we allow the visitor pattern to return values.
 89 | 
 90 |         Args:
 91 |             ast_node: an AST node of any type
 92 | 
 93 |         Returns:
 94 |             The result of the visitor.
 95 |         """
 96 | 
 97 |     def apply(self, ast_node: T) -> Any:
 98 |         """Convenience method to apply this visitor on the indicated node and get the result value.
 99 | 
100 |         Args:
101 |             ast_node: the node on which to apply this visitor
102 | 
103 |         Returns:
104 |             The result value from :meth:`get_result`
105 |         """
106 |         return ast_node.accept_visitor(self)
107 | 
108 | 


--------------------------------------------------------------------------------
/scripts/demo_validation.py:
--------------------------------------------------------------------------------
 1 | """This script demonstrates using the direct-mode PySchematron validator to validate your XML documents.
 2 | 
 3 | In Schematron validation, we apply a Schematron Schema to an XML resulting in either a pass or a fail. A fail indicates
 4 | that the document could not be validated using the Schema, hence the XML may have problems. In addition to this boolean
 5 | output, Schematron also defines the Schematron Validation Report Language (SVRL), loosely defining a format in which
 6 | more information about the validation results can be represented.
 7 | 
 8 | This script shows three different ways of interacting with the PySchematron direct-mode validator. The most simple is
 9 | by using the functional interface defined in the main module. Second, you can use a generalized API which might be
10 | extended in the future with an XSLT methodology. Finally, you can use the full-blown direct-mode classes and methods.
11 | The latter is the most complicated but gives the most control.
12 | """
13 | 
14 | __author__ = 'Robbert Harms'
15 | __date__ = '2024-04-03'
16 | __maintainer__ = 'Robbert Harms'
17 | __email__ = 'robbert@xkls.nl'
18 | __licence__ = 'LGPL v3'
19 | 
20 | from pathlib import Path
21 | 
22 | from lxml import etree
23 | 
24 | from pyschematron import DirectModeSchematronValidatorFactory, validate_document
25 | from pyschematron.direct_mode.schematron.ast_visitors import ResolveExtendsVisitor, ResolveAbstractPatternsVisitor, \
26 |     PhaseSelectionVisitor
27 | from pyschematron.direct_mode.schematron.parsers.xml.parser import SchemaParser, ParsingContext
28 | from pyschematron.direct_mode.xml_validation.results.svrl_builder import DefaultSVRLReportBuilder
29 | from pyschematron.direct_mode.xml_validation.validators import SimpleSchematronXMLValidator
30 | from pyschematron.utils import load_xml_document
31 | 
32 | 
33 | # the paths to the example data and Schema
34 | example_base_path = Path('../tests/fixtures/full_example/')
35 | schematron_schema_path = example_base_path / 'schema.sch'
36 | example_xml_document_path = example_base_path / 'cargo.xml'
37 | 
38 | # the phase we would like to evaluate
39 | phase = '#ALL'
40 | 
41 | 
42 | def demo_functional_interface():
43 |     """This example uses the functional interface, the most simple method of interacting with PySchematron. """
44 |     result = validate_document(example_xml_document_path, schematron_schema_path)
45 |     svrl = result.get_svrl()
46 | 
47 |     report_str = etree.tostring(svrl, pretty_print=True).decode('utf-8')
48 |     print(report_str)
49 |     print(result.is_valid())
50 | 
51 | 
52 | def demo_generic_api():
53 |     """This demonstrates the use of the generic API."""
54 |     validator_factory = DirectModeSchematronValidatorFactory()
55 |     validator_factory.set_schema(schematron_schema_path)
56 |     validator_factory.set_phase(phase)
57 | 
58 |     validator = validator_factory.build()
59 |     validation_result = validator.validate(example_xml_document_path)
60 | 
61 |     svrl = validation_result.get_svrl()
62 |     report_str = etree.tostring(svrl, pretty_print=True).decode('utf-8')
63 | 
64 |     print(report_str)
65 |     print(validation_result.is_valid())
66 | 
67 | 
68 | def demo_full_api():
69 |     """This demonstrates the inner workings of the direct-mode validator."""
70 |     schematron_xml = load_xml_document(schematron_schema_path)
71 |     parsing_context = ParsingContext(base_path=schematron_schema_path.parent)
72 | 
73 |     schematron_parser = SchemaParser()
74 |     schema = schematron_parser.parse(schematron_xml.getroot(), parsing_context)
75 |     schema = ResolveExtendsVisitor(schema).apply(schema)
76 |     schema = ResolveAbstractPatternsVisitor(schema).apply(schema)
77 |     schema = PhaseSelectionVisitor(schema, phase).apply(schema)
78 | 
79 |     validator = SimpleSchematronXMLValidator(schema, phase, parsing_context.base_path)
80 | 
81 |     xml_document = load_xml_document(example_xml_document_path)
82 |     validation_results = validator.validate_xml(xml_document)
83 | 
84 |     svrl_report = DefaultSVRLReportBuilder().create_svrl_xml(validation_results)
85 | 
86 |     report_str = etree.tostring(svrl_report, pretty_print=True).decode('utf-8')
87 |     print(report_str)
88 |     print(validation_results.is_valid())
89 | 
90 | 
91 | demo_functional_interface()
92 | demo_generic_api()
93 | demo_full_api()
94 | 
95 | 


--------------------------------------------------------------------------------
/pyschematron/__init__.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Robbert Harms'
 2 | __date__ = '2022-02-25'
 3 | __maintainer__ = 'Robbert Harms'
 4 | __email__ = 'robbert@xkls.nl'
 5 | 
 6 | from pathlib import Path
 7 | from typing import Literal
 8 | 
 9 | from lxml.etree import _ElementTree
10 | 
11 | from pyschematron.__version__ import __version__
12 | from pyschematron.api import ValidationResult
13 | from pyschematron.direct_mode.api import DirectModeSchematronValidatorFactory
14 | from pyschematron.direct_mode.xml_validation.queries.base import CustomQueryFunction
15 | 
16 | 
17 | def validate_document(xml_document: Path | _ElementTree,
18 |                       schematron_schema: Path | _ElementTree,
19 |                       phase: str | None = None,
20 |                       schematron_base_path: Path | None = None,
21 |                       custom_functions: dict[str, str | list[CustomQueryFunction]] | None = None,
22 |                       mode: Literal['direct-mode'] = 'direct-mode') -> ValidationResult:
23 |     """Validate an XML document using a Schematron schema.
24 | 
25 |     Args:
26 |         xml_document: the XML document we would like to validate
27 |         schematron_schema: the Schematron Schema we would like to load and use for the validation
28 |         phase: the Schematron phase we would like to use, optional.
29 |         schematron_base_path: explicitly set the Schematron base path, this is used in Schematron file inclusions.
30 |         custom_functions: a dictionary defining additional custom functions to add to the parser(s).
31 |             This should at least contain the key 'query_binding' mapping to a query binding name, and the
32 |             key 'custom_query_functions' specifying a list of custom query functions to add.
33 |             For non-standard query binding language, you also need to provide the key 'base_query_binding'
34 |             mapping to a standard query binding. Example usage:
35 |             `{'query_binding': 'xpath31-custom', 'base_query_binding': 'xpath31', 'custom_query_functions': [...]}`.
36 |         mode: which validation mode we would like to use, at the moment this only supports the 'direct-mode'.
37 | 
38 |     Returns:
39 |         The validation result in an API wrapper.
40 |     """
41 |     validator_factory = DirectModeSchematronValidatorFactory(schematron_xml=schematron_schema, phase=phase,
42 |                                                              schematron_base_path=schematron_base_path)
43 |     if custom_functions:
44 |         validator_factory.add_custom_functions(**custom_functions)
45 |     validator = validator_factory.build()
46 |     return validator.validate(xml_document)
47 | 
48 | 
49 | def validate_documents(xml_documents: list[Path | _ElementTree],
50 |                        schematron_schema: Path | _ElementTree,
51 |                        phase: str | None = None,
52 |                        custom_functions: dict[str, str | list[CustomQueryFunction]] | None = None,
53 |                        mode: Literal['direct-mode'] = 'direct-mode') -> list[ValidationResult]:
54 |     """Validate multiple XML documents using the same Schematron schema.
55 | 
56 |     This assumes we would like to use the same Schematron phase for each validation. As such, we can afford a speed-up
57 |     since we don't need to compile the Schematron every run again.
58 | 
59 |     Args:
60 |         xml_documents: the XML documents we would like to validate
61 |         schematron_schema: the Schematron Schema we would like to load and use for the validation
62 |         phase: the Schematron phase we would like to use, optional.
63 |         custom_functions: a dictionary defining additional custom functions to add to the parser(s).
64 |             This should at least contain the key 'query_binding' mapping to a query binding name, and the
65 |             key 'custom_query_functions' specifying a list of custom query functions to add.
66 |             For non-standard query binding language, you also need to provide the key 'base_query_binding'
67 |             mapping to a standard query binding. Example usage:
68 |             `{'query_binding': 'xpath31-custom', 'base_query_binding': 'xpath31', 'custom_query_functions': [...]}`.
69 |         mode: which validation mode we would like to use, at the moment this only supports the 'direct-mode'.
70 | 
71 |     Returns:
72 |         The validation results in an API wrapper.
73 |     """
74 |     validator_factory = DirectModeSchematronValidatorFactory(schematron_xml=schematron_schema, phase=phase)
75 |     if custom_functions:
76 |         validator_factory.add_custom_functions(**custom_functions)
77 |     validator = validator_factory.build()
78 | 
79 |     validation_results = []
80 |     for document in xml_documents:
81 |         validation_results.append(validator.validate(document))
82 | 
83 |     return validation_results
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/pyschematron/api.py:
--------------------------------------------------------------------------------
  1 | """Definition of the common API for validating an XML using Schematron.
  2 | 
  3 | This defines a common interface for Schematron validators to implement.
  4 | 
  5 | At the moment this is only implemented by direct mode evaluation, but in the future it might also support
  6 | the XSLT evaluation.
  7 | """
  8 | from __future__ import annotations
  9 | 
 10 | __author__ = 'Robbert Harms'
 11 | __date__ = '2024-04-01'
 12 | __maintainer__ = 'Robbert Harms'
 13 | __email__ = 'robbert@xkls.nl'
 14 | __licence__ = 'LGPL v3'
 15 | 
 16 | from abc import ABCMeta, abstractmethod
 17 | from pathlib import Path
 18 | 
 19 | from lxml.etree import _ElementTree
 20 | 
 21 | from pyschematron.direct_mode.xml_validation.queries.base import CustomQueryFunction
 22 | 
 23 | 
 24 | class SchematronValidatorFactory(metaclass=ABCMeta):
 25 |     """Factory class for generating Schematron validators.
 26 | 
 27 |     The Schematron validators do the hard work of validating an XML document. To ensure that a single validator can
 28 |     process multiple XML documents once set on a phase, we make the validators immutable and built them using this
 29 |     factory.
 30 |     """
 31 | 
 32 |     @abstractmethod
 33 |     def set_schema(self, schematron_xml: Path | _ElementTree):
 34 |         """Set the Schematron schema.
 35 | 
 36 |         Args:
 37 |             schematron_xml: the Schematron Schema we would like to use in the validation.
 38 |         """
 39 | 
 40 |     @abstractmethod
 41 |     def set_base_path(self, schematron_base_path: Path):
 42 |         """Set the Schematron base path.
 43 | 
 44 |         Some Schematron schemas include files from other locations. If you would like explicit control over the
 45 |         Schematron base path, or if the path could not be inferred from the provided schema, set it here explicitly.
 46 | 
 47 |         Args:
 48 |             schematron_base_path: the base path for the Schematron definition
 49 |         """
 50 | 
 51 |     @abstractmethod
 52 |     def set_phase(self, phase: str | None):
 53 |         """Set the phase we would like the Schematron validator to validate.
 54 | 
 55 |         By setting this before we load the Schematron validator, we can prepare the phase for evaluation, giving
 56 |         us some speed benefit.
 57 | 
 58 |         Args:
 59 |             phase: the phase we would like to validate. If set to None we use the '#DEFAULT' phase.
 60 |         """
 61 | 
 62 |     @abstractmethod
 63 |     def build(self) -> SchematronValidator:
 64 |         """Construct the configured Schematron validator.
 65 | 
 66 |         Returns:
 67 |             An implementation of the Schematron validator.
 68 |         """
 69 | 
 70 |     @abstractmethod
 71 |     def add_custom_functions(self,
 72 |                              query_binding: str,
 73 |                              custom_query_functions: list[CustomQueryFunction],
 74 |                              base_query_binding: str | None = None):
 75 |         """Add custom functions to the parser we would like to use.
 76 | 
 77 |         The custom query functions are added to the parser for a specific query binding language. Repeated calls
 78 |         accumulate the additional functions. If you create a new query binding specifier, you need to specify the base
 79 |         query binding language to use as basis for the new query binding language.
 80 | 
 81 |         Args:
 82 |             query_binding: the query binding language for which we are adding the custom query functions
 83 |             custom_query_functions: a list of custom query function objects
 84 |             base_query_binding: the basis to use for any new query binding language
 85 |         """
 86 | 
 87 | 
 88 | class SchematronValidator(metaclass=ABCMeta):
 89 |     """The Schematron validator, validating an XML document using the init injected Schematron definition."""
 90 | 
 91 |     @abstractmethod
 92 |     def validate(self, xml_data: Path | _ElementTree) -> ValidationResult:
 93 |         """Validate an XML document and return an SVRL XML document.
 94 | 
 95 |         Args:
 96 |             xml_data: the XML data we would like to validate
 97 | 
 98 |         Returns:
 99 |             The validation results.
100 |         """
101 | 
102 | 
103 | class ValidationResult(metaclass=ABCMeta):
104 |     """Results from Schematron validation.
105 | 
106 |     This should be able to produce an SVRL XML document, and should be able to tell if the document was valid or not.
107 |     """
108 | 
109 |     @abstractmethod
110 |     def get_svrl(self) -> _ElementTree:
111 |         """Get the SVRL as an XML element tree.
112 | 
113 |         Returns:
114 |             The SVRL as an element tree.
115 |         """
116 | 
117 |     @abstractmethod
118 |     def is_valid(self) -> bool:
119 |         """Check if the document we validated was valid.
120 | 
121 |         According to the specifications, a successful report is considered a failure. As such, this method considers
122 |         an XML document to be valid if none of the assertions and none of the reports were raised.
123 | 
124 |         Returns:
125 |             If the document was valid return True, else False.
126 |         """
127 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/xml_validation/queries/factories.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | __author__ = 'Robbert Harms'
  4 | __date__ = '2023-03-25'
  5 | __maintainer__ = 'Robbert Harms'
  6 | __email__ = 'robbert@xkls.nl'
  7 | __licence__ = 'GPL v3'
  8 | 
  9 | from abc import ABCMeta, abstractmethod
 10 | from typing import override
 11 | 
 12 | from pyschematron.direct_mode.schematron.ast import Schema
 13 | from pyschematron.direct_mode.xml_validation.queries.base import QueryProcessor
 14 | from pyschematron.direct_mode.xml_validation.queries.xpath import XPath1QueryParser, XPath2QueryParser, \
 15 |     XPath3QueryParser, XPath31QueryParser, XPathQueryProcessor
 16 | 
 17 | 
 18 | class QueryProcessorFactory(metaclass=ABCMeta):
 19 |     """Query processor factories can construct QueryProcessor classes specific to your query binding language.
 20 | 
 21 |     In Schematron, the queryBinding attribute determines which query language is used. This factory
 22 |     allows you to get the right query processor for your query binding language.
 23 |     """
 24 | 
 25 |     @abstractmethod
 26 |     def get_query_processor(self, query_binding: str) -> QueryProcessor:
 27 |         """Get the processor you can use for this query binding language.
 28 | 
 29 |         Args:
 30 |             query_binding: the query binding for which we want to get a parser.
 31 | 
 32 |         Returns:
 33 |             A query processor specialized for this query binding language.
 34 | 
 35 |         Raises:
 36 |             ValueError: if no query processor could be found for the indicated query binding.
 37 |         """
 38 | 
 39 |     @abstractmethod
 40 |     def has_query_processor(self, query_binding: str) -> bool:
 41 |         """Check if we have a processor for the specific query binding language.
 42 | 
 43 |         Args:
 44 |             query_binding: the query binding for which we want to check if a processor is available.
 45 | 
 46 |         Returns:
 47 |             True if we have a processor, False otherwise.
 48 |         """
 49 | 
 50 |     @abstractmethod
 51 |     def get_schema_query_processor(self, schema: Schema) -> QueryProcessor:
 52 |         """Get the processor you can use for this schema.
 53 | 
 54 |         Not only will this select the right query binding, it will also load the namespaces.
 55 | 
 56 |         Args:
 57 |             schema: the Schema for which we want to get a query processor.
 58 | 
 59 |         Returns:
 60 |             A query processor specialized for this Schema, with the right query binding language and
 61 |                 the namespaces loaded.
 62 | 
 63 |         Raises:
 64 |             ValueError: if no query processor could be found for this Schema.
 65 |         """
 66 | 
 67 | 
 68 | class DefaultQueryProcessorFactory(QueryProcessorFactory):
 69 | 
 70 |     def __init__(self):
 71 |         """The default query processor factory.
 72 | 
 73 |         This factory only supports XSLT and XPath query languages. The XSLT query binding is additionally limited
 74 |         to XPath expressions.
 75 |         """
 76 |         self._query_processors = {
 77 |             'xslt': XPathQueryProcessor(XPath1QueryParser()),
 78 |             'xslt2': XPathQueryProcessor(XPath2QueryParser()),
 79 |             'xslt3': XPathQueryProcessor(XPath3QueryParser()),
 80 |             'xpath': XPathQueryProcessor(XPath1QueryParser()),
 81 |             'xpath2': XPathQueryProcessor(XPath2QueryParser()),
 82 |             'xpath3': XPathQueryProcessor(XPath3QueryParser()),
 83 |             'xpath31': XPathQueryProcessor(XPath31QueryParser()),
 84 |         }
 85 | 
 86 |     @override
 87 |     def get_query_processor(self, query_binding: str) -> QueryProcessor:
 88 |         try:
 89 |             return self._query_processors[query_binding]
 90 |         except KeyError:
 91 |             raise ValueError(f'No parser could be found for the query binding "{query_binding}".')
 92 | 
 93 |     @override
 94 |     def has_query_processor(self, query_binding: str) -> bool:
 95 |         return query_binding in self._query_processors
 96 | 
 97 |     @override
 98 |     def get_schema_query_processor(self, schema: Schema) -> QueryProcessor:
 99 |         query_binding = schema.query_binding or 'xslt'
100 |         namespaces = {ns.prefix: ns.uri for ns in schema.namespaces}
101 | 
102 |         processor = self.get_query_processor(query_binding)
103 |         return processor.with_namespaces(namespaces)
104 | 
105 | 
106 | class ExtendableQueryProcessorFactory(DefaultQueryProcessorFactory):
107 | 
108 |     def __init__(self):
109 |         """An extendable query processor factory.
110 | 
111 |         This has all the processors from the default query processor factory, but allows extending and/or overwriting
112 |         these using getters and setters.
113 |         """
114 |         super().__init__()
115 | 
116 |     def set_query_processor(self, query_binding: str, query_processor: QueryProcessor):
117 |         """Set the query processor to use for a specific query binding language.
118 | 
119 |         Args:
120 |             query_binding: the query binding we wish to add / overwrite.
121 |             query_processor: the query processor we would like to use for this query binding.
122 |         """
123 |         self._query_processors[query_binding] = query_processor
124 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | SHELL := /bin/bash
  2 | PYTHON := $$(which python3)
  3 | PIP := $$(which pip3)
  4 | PYTEST := $$(which pytest)
  5 | PROJECT_NAME := pyschematron
  6 | GIT_BRANCH := $$(git branch --show-current)
  7 | PROJECT_VERSION := $(shell grep -m 1 version pyproject.toml | tr -s ' ' | tr -d '"' | tr -d "'" | cut -d' ' -f3)
  8 | 
  9 | .PHONY: help
 10 | help:
 11 | 	@echo "clean: remove all build, test, coverage and Python artifacts (no uninstall)"
 12 | 	@echo "test(s): run unit and integration tests with the default Python."
 13 | 	@echo "test-unit: run the unit tests using the default Python."
 14 | 	@echo "test-integration: run the integration tests using the default Python."
 15 | 	@echo "test-all: run all tests using all environments using tox"
 16 | 	@echo "docs: generate Sphinx HTML documentation, including API docs"
 17 | 	@echo "docs-pdf: generate the PDF documentation, including API docs"
 18 | 	@echo "docs-man: generate the linux manpages"
 19 | 	@echo "docs-changelog: generate the changelog documentation"
 20 | 	@echo "install-deps: install all the dependencies"
 21 | 	@echo "install-symlink: install the package as a symlink, to allow continuous development"
 22 | 	@echo "uninstall: uninstall PySchematron (while keeping the dependencies)"
 23 | 	@echo "prepare-release: prepare for a new release"
 24 | 	@echo "release: package and release the new version"
 25 | 
 26 | 
 27 | .PHONY: clean
 28 | clean: clean-build clean-pyc clean-test
 29 | 
 30 | .PHONY: clean-build
 31 | clean-build:
 32 | 	rm -fr build/
 33 | 	rm -fr dist/
 34 | 	rm -fr .eggs/
 35 | 	find . -name '*.egg-info' -exec rm -fr {} +
 36 | 	find . -name '*.egg' -exec rm -f {} +
 37 | 
 38 | .PHONY: clean-pyc
 39 | clean-pyc:
 40 | 	find . -name '*.pyc' -exec rm -f {} +
 41 | 	find . -name '*.pyo' -exec rm -f {} +
 42 | 	find . -name '*~' -exec rm -f {} +
 43 | 	find . -name '__pycache__' -exec rm -fr {} +
 44 | 
 45 | .PHONY: clean-test
 46 | clean-test:
 47 | 	rm -rf .tox/
 48 | 	rm -f .coverage
 49 | 	rm -rf htmlcov/
 50 | 	rm -rf .pytest_cache
 51 | 	rm -rf tests/htmlcov
 52 | 	rm -rf tests/.coverage
 53 | 	find tests -name 'build' -exec rm -rf {} +
 54 | 	find tests -name '.coverage' -exec rm -rf {} +
 55 | 
 56 | 
 57 | .PHONY: tests
 58 | tests: test
 59 | 
 60 | .PHONY: test
 61 | test:
 62 | 	mkdir -p build
 63 | 	COVERAGE_FILE=build/.coverage \
 64 | 	$(PYTEST) tests --cov=$(PROJECT_NAME) --cov-report=html:build/coverage/defaultenv --cov-report=term --html=build/pytest/report-defaultenv.html --self-contained-html
 65 | 
 66 | .PHONY: test-unit
 67 | test-unit:
 68 | 	mkdir -p build
 69 | 	COVERAGE_FILE=build/.coverage \
 70 | 	$(PYTEST) tests/unit --cov=$(PROJECT_NAME) --cov-report=html:build/coverage/defaultenv --cov-report=term --html=build/pytest/report-defaultenv.html --self-contained-html
 71 | 
 72 | .PHONY: test-integration
 73 | test-integration:
 74 | 	mkdir -p build
 75 | 	COVERAGE_FILE=build/.coverage \
 76 | 	$(PYTEST) tests/integration --cov=$(PROJECT_NAME) --cov-report=html:build/coverage/defaultenv --cov-report=term --html=build/pytest/report-defaultenv.html --self-contained-html
 77 | 
 78 | .PHONY: test-all
 79 | test-all:
 80 | 	tox
 81 | 
 82 | .PHONY: docs
 83 | docs:
 84 | 	mkdir -p build
 85 | 	rm -f docs/$(PROJECT_NAME)*.rst
 86 | 	rm -f docs/modules.rst
 87 | 	$(MAKE) -C docs clean
 88 | 	sphinx-apidoc -o docs/ $(PROJECT_NAME)
 89 | 	$(MAKE) -C docs html SPHINXBUILD='python3 $(shell which sphinx-build)'
 90 | 	@echo "To view results type: firefox docs/_build/html/index.html &"
 91 | 
 92 | .PHONY: docs-pdf
 93 | docs-pdf:
 94 | 	mkdir -p build
 95 | 	rm -f docs/$(PROJECT_NAME)*.rst
 96 | 	rm -f docs/modules.rst
 97 | 	$(MAKE) -C docs clean
 98 | 	sphinx-apidoc -o docs/ $(PROJECT_NAME)
 99 | 	$(MAKE) -C docs latexpdf SPHINXBUILD='python3 $(shell which sphinx-build)'
100 | 	@echo "To view results use something like: evince docs/_build/latex/$(PROJECT_NAME).pdf &"
101 | 
102 | .PHONY: docs-man
103 | docs-man:
104 | 	rm -f docs/$(PROJECT_NAME)*.rst
105 | 	rm -f docs/modules.rst
106 | 	$(MAKE) -C docs clean
107 | 	sphinx-apidoc -o docs/ $(PROJECT_NAME)
108 | 	$(MAKE) -C docs man SPHINXBUILD='python3 $(shell which sphinx-build)'
109 | 	@echo "To view results use something like: man docs/_build/man/$(PROJECT_NAME).1 &"
110 | 
111 | 
112 | .PHONY: docs-changelog
113 | docs-changelog:
114 | 	 git cliff --prepend CHANGELOG.rst -l -u
115 | 
116 | 
117 | .PHONY: prepare-release
118 | prepare-release: clean
119 | 	@echo "Current version: "$(PROJECT_VERSION)
120 | 	@while [ -z "$$NEW_VERSION" ]; do \
121 |         read -r -p "Give new version: " NEW_VERSION;\
122 |     done && \
123 |     ( \
124 |         printf 'Setting new version: %s \n\n' \
125 |         	"$$NEW_VERSION " \
126 | 	) && sed -i 's/version = \"\(.*\)\"/version = "'$$NEW_VERSION'"/g' pyproject.toml \
127 |       && git cliff -l -u --tag $$NEW_VERSION --prepend CHANGELOG.rst \
128 |       && echo "Please manually inspect CHANGELOG.rst before continuing." \
129 |       && read ans \
130 |       && git add -u \
131 | 	  && git diff-index --quiet HEAD || git commit -am "release: New release" \
132 | 	  && git tag -a v$$NEW_VERSION -m "Version $$NEW_VERSION" \
133 | 
134 | 
135 | .PHONY: release
136 | release: clean release-git release-pip
137 | 
138 | .PHONY: release-git
139 | release-git:
140 | 	git push
141 | 	git push origin --tags
142 | 
143 | .PHONY: release-pip
144 | release-pip:
145 | 	flit publish
146 | 
147 | 
148 | .PHONY: dist
149 | dist: clean
150 | 	$(PYTHON) setup.py sdist
151 | 	$(PYTHON) setup.py bdist_wheel
152 | 	ls -l dist
153 | 
154 | .PHONY: install-deps
155 | install-deps:
156 | 	flit install --only-deps
157 | 
158 | .PHONY: install-symlink
159 | install-symlink:
160 | 	flit install --symlink
161 | 
162 | .PHONY: uninstall
163 | uninstall:
164 | 	$(PIP) uninstall -y $(PROJECT_NAME)
165 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | ############
  2 | PySchematron
  3 | ############
  4 | This is a library package for Schematron validation in Python.
  5 | 
  6 | Schematron is a schema language used to validate XML documents.
  7 | A Schematron schema is defined as an XML containing various assertions to validate a target XML document.
  8 | If the XML you wish to validate passes all the Schematron assertions,
  9 | your XML is considered valid according to the Schematron schema.
 10 | Complete validation results are offered using the Schematron Validation Report Language,
 11 | a loose definition of an XML based validation report.
 12 | 
 13 | There are various versions of Schematron available.
 14 | This library only supports the latest version of Schematron,
 15 | `ISO/IEC 19757-3:2020 <https://www.iso.org/standard/74515.html>`_, with a few limitations (see below).
 16 | 
 17 | Currently, this library only supports a pure Python mode of Schematron validation.
 18 | In this pure Python mode we load the Schematron into an internal representation and apply that to an XML.
 19 | The advantage of such direct evaluation is that it offers superior performance compared to an XSLT
 20 | transformation based evaluation.
 21 | The disadvantage is that it only supports XPath expressions and does not support XSLT functions.
 22 | 
 23 | In the future we hope to expand this library with an XSLT transformation based processing.
 24 | Unfortunately XSLT transformations require an XSLT processor,
 25 | which is currently not available in Python for XSLT >= 2.0.
 26 | 
 27 | A few similar packages to this software in other languages are
 28 | `node-schematron <https://github.com/wvbe/node-schematron#readme>`_ in Javascript, and
 29 | `ph-schematron <http://phax.github.io/ph-schematron/>`_ in Java.
 30 | 
 31 | For all XPath expressions this package uses the
 32 | `elementpath <https://github.com/sissaschool/elementpath>`_ library supporting XPath 1.0, 2.0, 3.0 and 3.1 selectors.
 33 | 
 34 | Please note that, as of this writing, this package only supports Python 3.12.
 35 | Older Python versions are not supported due to missing functionality (Python syntax primarily).
 36 | Newer versions will be supported in due time.
 37 | 
 38 | **********
 39 | Python API
 40 | **********
 41 | To use the Python API, install the project like any other Python project, e.g. using ``pip install pyschematron``.
 42 | 
 43 | After that you can use:
 44 | 
 45 | .. code:: python
 46 | 
 47 |     from pyschematron import validate_document
 48 | 
 49 |     result = validate_document(<xml_document.xml>, <schematron_schema.sch>)
 50 | 
 51 |     svrl = result.get_svrl()
 52 |     is_valid = result.is_valid()
 53 | 
 54 | 
 55 | To process multiple documents with the same Schematron schema, you can use:
 56 | 
 57 | .. code:: python
 58 | 
 59 |     from pyschematron import validate_document
 60 | 
 61 |     documents = [...]
 62 |     schema = <schema.sch>
 63 | 
 64 |     results = validate_documents(documents, schema)
 65 | 
 66 | 
 67 | For more examples, or examples on how to use different parts of the API, please see the `demo_*` files in the
 68 | `scripts` directory.
 69 | 
 70 | 
 71 | **********************
 72 | Command Line Interface
 73 | **********************
 74 | To use the command line interface, first install the application using pip: ``pip install pyschematron``.
 75 | Afterwards, you can use the command ``pyschematron`` to validate your documents.
 76 | Use ``pyschematron --help`` to see the command line options.
 77 | 
 78 | 
 79 | *************
 80 | Functionality
 81 | *************
 82 | This library offers a basic implementation of Schematron using a pure Python "direct mode" evaluation method.
 83 | 
 84 | Direct mode evaluation
 85 | ======================
 86 | The direct mode evaluation allows for basic validity checks using all XPath functionality of Schematron.
 87 | 
 88 | When applied to a document, the direct mode evaluation follows this procedure to validate a document:
 89 | 
 90 | #. Read in the Schematron from either a document or a string.
 91 |    In this phase the document is loaded into an AST (abstract syntax tree).
 92 |    All ``<includes />`` are resolved and inlined into the AST.
 93 |    All ``<extends />`` are loaded but not fully resolved at this stage.
 94 | #. Recreate the AST without abstract patterns and rules.
 95 |    In this phase we process the AST to create a concrete set of patterns and rules.
 96 |    All ``<extends />`` are resolved, abstract patterns are instantiated,
 97 |    and redundant abstract rules and patterns are removed.
 98 | #. Phase selection, we limit the AST to only include patterns and phases limited to the selected phase.
 99 | #. Query binding, we determine the query binding language to use.
100 |    This library only supports ``xslt``, ``xslt2``, ``xslt3``, ``xpath``, ``xpath2``, ``xpath3``, and ``xpath31``,
101 |    where all ``xslt`` variations are limited to XPath expressions only.
102 | #. Apply the bound schema to an XML document to validate.
103 | 
104 | 
105 | Custom functions
106 | ----------------
107 | With the current direct mode evaluation method, custom XSLT functions in your Schematron (``<xsl:function>``) are not supported.
108 | Custom Python functions are supported however. View the `demo_custom_functions.py` in the `scripts` directory for examples.
109 | 
110 | 
111 | Compliance
112 | ----------
113 | The direct mode evaluation supports most of the `ISO/IEC 19757-3:2020 <https://www.iso.org/standard/74515.html>`_ standard, with a few exceptions.
114 | All Schematron specific elements are supported, except for XSLT elements.
115 | 
116 | In terms of attributes, the ``@documents`` attribute of the ``<assert />`` tag is not supported.
117 | Furthermore, ``@icon``, ``@see``, ``@fpi``, ``@flag``, and ``@role`` are loaded but not used.
118 | 
119 | Note that the ISO Schematron applies rules to:
120 | 
121 | - Elements (*)
122 | - Attributes (@*)
123 | - Root node (/)
124 | - Comments (comment())
125 | - Processing instructions (processing-instruction())
126 | 
127 | But it does not apply rules to text nodes.
128 | 
129 | If there are any problems, please open a Github issue.
130 | 
131 | 


--------------------------------------------------------------------------------
/tests/fixtures/full_example/schema.sch:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <schema xmlns="http://purl.oclc.org/dsdl/schematron"
  3 |         schemaVersion="iso"
  4 |         defaultPhase="check-weights"
  5 |         queryBinding="xslt3"
  6 |         xml:lang="en"
  7 |         fpi="-//PYSCHEMATRON//DTD XML 1.0//EN">
  8 | 
  9 |     <title>Cargo checking</title>
 10 |     <ns prefix="c" uri="http://www.amazing-cargo.com/xml/data/2023"/>
 11 |     <p>This checks the cargo manifest on weight, size, and vehicles on number of wheels.
 12 |         By default it checks on weight only.</p>
 13 |     <p class="some-class" id="some-paragraph-id" icon="smiley" xml:lang="en">A second paragraph to test if this is processed correctly.</p>
 14 | 
 15 |     <!-- Showing dynamic variables. -->
 16 |     <let name="max-weight" value="xs:integer(/c:cargo/c:vehicles[1]/c:apple[1]/@weight)"/> <!-- kg -->
 17 |     <let name="max-volume" value="xs:integer(200) * $max-weight"/> <!-- m3 -->
 18 | 
 19 | 
 20 |     <!-- Showcasing pattern with external extends -->
 21 |     <pattern id="pa_check-weights">
 22 |         <title>Weight check</title>
 23 |         <let name="test-variable-in-pattern" value="42"/>
 24 |         <let name="second-test-variable-in-pattern"><html xmlns="http://www.w3.org/1999/xhtml">info</html></let>
 25 |         <rule context="c:*[@type='vehicle']">
 26 |             <extends href="check_weights.sch"/>
 27 |         </rule>
 28 |         <rule context="c:*[@type='fruit']">
 29 |             <extends href="check_weights.sch"/>
 30 |         </rule>
 31 |         <rule context="c:*">
 32 |             <!-- Blank rule to check if we only match nodes once. -->
 33 |         </rule>
 34 |     </pattern>
 35 | 
 36 | 
 37 |     <!-- Showcasing pattern with abstract rules -->
 38 |     <pattern id="pa_check-volumes">
 39 |         <title>Volume check</title>
 40 |         <rule context="c:*[@type='vehicle']">
 41 |             <extends rule="ru_abstract-volume-check"/>
 42 |         </rule>
 43 |         <rule context="c:*[@type='fruit']">
 44 |             <extends rule="ru_abstract-volume-check"/>
 45 |         </rule>
 46 |     </pattern>
 47 | 
 48 |     <!-- Showing the use of subject in a rule, and check -->
 49 |     <pattern id="pa_check-banana">
 50 |         <p>Just a check on the banana's.</p>
 51 |         <rule subject="/c:cargo[1]" context="c:banana">
 52 |             <report test="@type = 'fruit'">Banana is a fruit.</report>
 53 |             <report subject="." test="@weight = 1">And the weight of the banana shall be 1.</report>
 54 |         </rule>
 55 |     </pattern>
 56 | 
 57 |     <pattern>
 58 |         <rule abstract="true" id="ru_abstract-volume-check">
 59 |             <assert test="xs:integer(@volume) le $max-volume" properties="pr_maxVolume pr_volume">
 60 |                 Volume not correct (<value-of select="@volume"/> vs <value-of select="$max-volume"/> at <name/>).
 61 |                 Testing name with path: <name path="//c:cargo"/>
 62 |             </assert>
 63 |             <report test="xs:integer(@volume) gt $max-volume">
 64 |                 We report an item with a volume greater than allowed.
 65 |             </report>
 66 |         </rule>
 67 |     </pattern>
 68 | 
 69 |     <!-- Showcasing that we support checks on processing instructions. -->
 70 |     <pattern id="pa_processing-instructions-test">
 71 |         <rule id="ru_processing-instructions-test" context="processing-instruction('xml-model')">
 72 |             <assert test="contains(.,'foobar')">
 73 |                 XML model processing instruction does not include foobar.
 74 |             </assert>
 75 |         </rule>
 76 |     </pattern>
 77 | 
 78 |     <!-- Showcasing that we support checks on comments. -->
 79 |     <pattern id="pa_comments-test">
 80 |         <rule id="ru_comments-test" context="comment()">
 81 |             <assert test="starts-with(., ' Comment: ')">
 82 |                 This comment does not start with "Comment: ".
 83 |             </assert>
 84 |         </rule>
 85 |     </pattern>
 86 | 
 87 |     <!-- Showcasing that we support checks on attributes. -->
 88 |     <pattern id="pa_attribute-test">
 89 |         <rule id="ru_attribute-test" context="@id">
 90 |             <assert test="starts-with(., 'id_')">
 91 |                 The id attribute does not starts with "id_".
 92 |             </assert>
 93 |         </rule>
 94 |     </pattern>
 95 | 
 96 |     <!-- Show that we can check the root and check the namespace -->
 97 |     <pattern id="pa_root-test">
 98 |         <rule id="ru_root-test" context="c:cargo">
 99 |             <assert test="@id">
100 |                 The root node does not have an ID.
101 |             </assert>
102 |             <assert test="namespace-uri-for-prefix('test', .) = 'http://www.test.com'">
103 |                 The namespace for "test" should be "http://www.test.com".
104 |             </assert>
105 |         </rule>
106 |     </pattern>
107 | 
108 |     <!-- Showcasing abstract patterns -->
109 |     <pattern is-a="pa_check-category" id="pa_check-category-vehicles">
110 |         <p>Check for all the vehicles if they are in the right category.</p>
111 |         <param name="pv_items" value="c:*[@type='vehicle']"/>
112 |         <param name="pv_category" value="c:vehicles"/>
113 |     </pattern>
114 | 
115 |     <pattern is-a="pa_check-category" id="pa_check-category-fruits">
116 |         <p>Check for all the fruits if they are in the right category.</p>
117 |         <param name="pv_items" value="c:*[@type='fruit']"/>
118 |         <param name="pv_category" value="c:fruits"/>
119 |     </pattern>
120 | 
121 |     <pattern abstract="true" id="pa_check-category">
122 |         <p>Check if items are in the right category ($pv_category).</p>
123 |         <rule context="$pv_items">
124 |             <assert test="parent::$pv_category">
125 |                    The item <name/> is in the wrong category ($pv_category).
126 |                    Extra data <value-of select="count(parent::$pv_category)"/>
127 |             </assert>
128 |             <extends href="abstract_extends.sch"/>
129 |         </rule>
130 |     </pattern>
131 | 
132 |     <!-- Showcasing phases -->
133 |     <phase id="check-weights">
134 |         <p>Only check the cargo items for weight.</p>
135 |         <active pattern="pa_check-weights">Check for weights</active>
136 |         <let name="demonstration-of-let-in-phase" value="xs:integer(0)"/>
137 |     </phase>
138 | 
139 |     <phase id="check-volumes">
140 |         <p>Only check the cargo items for volume.</p>
141 |         <active pattern="pa_check-volumes"/>
142 |     </phase>
143 | 
144 |     <phase id="check-categories">
145 |         <p>Only check the cargo for the right category.</p>
146 |         <active pattern="pa_check-category-vehicles"/>
147 |         <active pattern="pa_check-category-fruits"/>
148 |     </phase>
149 | 
150 | 
151 |     <!-- Showcasing properties -->
152 |     <properties>
153 |         <property id="pr_maxWeight" scheme="kg"><value-of select="$max-weight"/></property>
154 |         <property id="pr_maxVolume" scheme="m3"><value-of select="$max-volume"/></property>
155 |         <property id="pr_weight" scheme="kg"><value-of select="@weight"/></property>
156 |         <property id="pr_volume" scheme="m3"><value-of select="@volume"/></property>
157 |     </properties>
158 | 
159 | 
160 |     <!-- Showcasing include -->
161 |     <include href="diagnostics.sch"/>
162 | </schema>
163 | 


--------------------------------------------------------------------------------
/scripts/demo_custom_functions.py:
--------------------------------------------------------------------------------
  1 | """This script shows how to use custom Python functions inside your Schematron schema's.
  2 | 
  3 | The general idea is that you either overwrite an existing query binding language, or define a new query binding
  4 | with your custom functions loaded. Your custom functions will then be attached to the query parser defined for that
  5 | specific query language.
  6 | 
  7 | As an example. Suppose you have a small custom function named `custom-func()`, and you want to use it in your
  8 | Schematron Schema. Your Schema is defined using `queryBinding="xpath31"` and you wish to extend this with your
  9 | custom function. For clarity, you want to call your new query binding language "xpath31-custom". In your Schematron
 10 | schema you then use `queryBinding="xpath31-custom"`, and in your queries you can use the `custom-func()`. For
 11 | PySchematron to know about this function, you must define it and add it to the library. This module shows how.
 12 | 
 13 | There are three ways of interacting with the PySchematron direct-mode validator. The most simple is by using the
 14 | functional interface defined in the main module. Second, you can use a generalized API which might be extended in the
 15 | future with an XSLT methodology. Finally, you can use the full-blown direct-mode classes and methods. The latter is the
 16 | most complicated but gives the most control. Either of these though enables adding custom functions.
 17 | """
 18 | 
 19 | __author__ = 'Robbert Harms'
 20 | __date__ = '2024-04-03'
 21 | __maintainer__ = 'Robbert Harms'
 22 | __email__ = 'robbert@xkls.nl'
 23 | __licence__ = 'LGPL v3'
 24 | 
 25 | from pathlib import Path
 26 | 
 27 | from elementpath import ElementNode
 28 | from lxml import etree
 29 | from lxml.etree import _ElementTree
 30 | 
 31 | from pyschematron import DirectModeSchematronValidatorFactory, validate_document
 32 | from pyschematron.direct_mode.schematron.parsers.xml.parser import SchemaParser
 33 | from pyschematron.direct_mode.xml_validation.queries.factories import ExtendableQueryProcessorFactory
 34 | from pyschematron.direct_mode.xml_validation.queries.xpath import (XPathQueryProcessor, XPath31QueryParser,
 35 |                                                                    SimpleCustomXPathFunction)
 36 | from pyschematron.direct_mode.xml_validation.results.svrl_builder import DefaultSVRLReportBuilder
 37 | from pyschematron.direct_mode.xml_validation.validators import SimpleSchematronXMLValidator
 38 | from pyschematron.utils import load_xml_document
 39 | 
 40 | 
 41 | def get_example_schema() -> _ElementTree:
 42 |     """Get the example Schema for the examples.
 43 | 
 44 |     In this Schema, we defined a new query binding language `queryBinding="xpath31-custom"` which we will
 45 |     also need to add in PySchematron. Note also the use of `custom-func()`.
 46 | 
 47 |     Returns:
 48 |         The loaded Schema.
 49 |     """
 50 |     schematron = '''
 51 |     <schema xmlns="http://purl.oclc.org/dsdl/schematron"
 52 |             schemaVersion="iso"
 53 |             queryBinding="xpath31-custom"
 54 |             xml:lang="en"
 55 |             fpi="-//PYSCHEMATRON//DTD XML 1.0//EN">
 56 | 
 57 |         <ns prefix="c" uri="http://www.amazing-cargo.com/xml/data/2023"/>
 58 | 
 59 |         <pattern id="pa_check-banana">
 60 |             <p>Just a check on the banana's.</p>
 61 |             <rule subject="/c:cargo[1]" context="c:banana">
 62 |                 <report test="@type = 'fruit'">Banana is a fruit <value-of select="custom-func(., 10)"/></report>
 63 |             </rule>
 64 |         </pattern>
 65 |     </schema>
 66 |     '''
 67 |     return load_xml_document(schematron)
 68 | 
 69 | 
 70 | def get_example_xml_document() -> _ElementTree:
 71 |     """Get the example XML document we wish to validate.
 72 | 
 73 |     This returns the XML document from the tests fixtures.
 74 | 
 75 |     Returns:
 76 |         The XML document we wish to validate.
 77 |     """
 78 |     return load_xml_document(Path('../tests/fixtures/full_example/cargo.xml'))
 79 | 
 80 | 
 81 | def custom_func(el: ElementNode, number: int) -> int:
 82 |     """An example of a custom function.
 83 | 
 84 |     It can have any number of inputs and outputs. This example takes an elementpath element as input and an integer.
 85 |     It returns the XPath node position times the provided number.
 86 |     """
 87 |     return el.position * number
 88 | 
 89 | 
 90 | def demo_functional_interface(xml_document: _ElementTree, schematron_xml: _ElementTree):
 91 |     """Showing how to add custom path functions using the functional interface.
 92 | 
 93 |     This uses the functional interface, the most simple method of interacting with PySchematron.
 94 | 
 95 |     Args:
 96 |         xml_document: the document we wish to validate
 97 |         schematron_xml: the Schematron Schema
 98 |     """
 99 |     custom_functions = {
100 |         'query_binding': 'xpath31-custom',
101 |         'base_query_binding': 'xpath31',
102 |         'custom_query_functions': [SimpleCustomXPathFunction(custom_func, 'custom-func')]
103 |     }
104 | 
105 |     result = validate_document(xml_document, schematron_xml, custom_functions=custom_functions)
106 | 
107 |     svrl = result.get_svrl()
108 |     print(etree.tostring(svrl, pretty_print=True).decode('utf-8'))
109 |     print(result.is_valid())
110 | 
111 | 
112 | def demo_generic_api(xml_document: _ElementTree, schematron_xml: _ElementTree):
113 |     """Showing how to add custom path functions using the general API.
114 | 
115 |     This uses the generic API which in the future might be extended using the XSLT method.
116 | 
117 |     Args:
118 |         xml_document: the document we wish to validate
119 |         schematron_xml: the Schematron Schema
120 |     """
121 |     validator_factory = DirectModeSchematronValidatorFactory(schematron_xml=schematron_xml)
122 | 
123 |     validator_factory.add_custom_functions('xpath31-custom',
124 |                                            [SimpleCustomXPathFunction(custom_func, 'custom-func')], 'xpath31')
125 | 
126 |     validator = validator_factory.build()
127 |     validation_result = validator.validate(xml_document)
128 | 
129 |     svrl = validation_result.get_svrl()
130 |     print(etree.tostring(svrl, pretty_print=True).decode('utf-8'))
131 |     print(validation_result.is_valid())
132 | 
133 | 
134 | def demo_full_api(xml_document: _ElementTree, schematron_xml: _ElementTree):
135 |     """Showing how to add custom path functions using the full direct-mode API.
136 | 
137 |     This is the most complex method, but shows how the direct-mode method operates.
138 | 
139 |     Args:
140 |         xml_document: the document we wish to validate
141 |         schematron_xml: the Schematron Schema
142 | 
143 |     """
144 |     custom_xpath_function = SimpleCustomXPathFunction(custom_func, 'custom-func')
145 | 
146 |     custom_parser = XPath31QueryParser()
147 |     custom_parser = custom_parser.with_custom_function(custom_xpath_function)
148 | 
149 |     custom_query_processor = XPathQueryProcessor(custom_parser)
150 | 
151 |     custom_processor_factory = ExtendableQueryProcessorFactory()
152 |     custom_processor_factory.set_query_processor('xpath31-custom', custom_query_processor)
153 | 
154 |     schema = SchemaParser().parse(schematron_xml.getroot())
155 | 
156 |     validator = SimpleSchematronXMLValidator(schema, query_processor_factory=custom_processor_factory)
157 |     validation_results = validator.validate_xml(xml_document)
158 | 
159 |     svrl = DefaultSVRLReportBuilder().create_svrl_xml(validation_results)
160 | 
161 |     print(etree.tostring(svrl, pretty_print=True).decode('utf-8'))
162 |     print(validation_results.is_valid())
163 | 
164 | 
165 | demo_functional_interface(get_example_xml_document(), get_example_schema())
166 | demo_generic_api(get_example_xml_document(), get_example_schema())
167 | demo_full_api(get_example_xml_document(), get_example_schema())
168 | 
169 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/api.py:
--------------------------------------------------------------------------------
  1 | """Implementation of the common API defined in the pyschematron package."""
  2 | 
  3 | __author__ = 'Robbert Harms'
  4 | __date__ = '2024-04-01'
  5 | __maintainer__ = 'Robbert Harms'
  6 | __email__ = 'robbert@xkls.nl'
  7 | __licence__ = 'LGPL v3'
  8 | 
  9 | from pathlib import Path
 10 | 
 11 | from lxml.etree import _ElementTree
 12 | 
 13 | from pyschematron.api import SchematronValidatorFactory, SchematronValidator, ValidationResult
 14 | from pyschematron.direct_mode.schematron.ast import Schema
 15 | from pyschematron.direct_mode.schematron.ast_visitors import ResolveExtendsVisitor, ResolveAbstractPatternsVisitor, \
 16 |     PhaseSelectionVisitor
 17 | from pyschematron.direct_mode.schematron.parsers.xml.parser import SchemaParser, ParsingContext
 18 | from pyschematron.direct_mode.xml_validation.queries.base import CustomQueryFunction
 19 | from pyschematron.direct_mode.xml_validation.queries.factories import ExtendableQueryProcessorFactory, \
 20 |     QueryProcessorFactory
 21 | from pyschematron.direct_mode.xml_validation.results.svrl_builder import DefaultSVRLReportBuilder
 22 | from pyschematron.direct_mode.xml_validation.results.validation_results import XMLDocumentValidationResult
 23 | from pyschematron.direct_mode.xml_validation.validators import SimpleSchematronXMLValidator
 24 | from pyschematron.utils import load_xml_document
 25 | 
 26 | 
 27 | class DirectModeSchematronValidatorFactory(SchematronValidatorFactory):
 28 | 
 29 |     def __init__(self,
 30 |                  schematron_xml: Path | _ElementTree | None = None,
 31 |                  phase: str | None = None,
 32 |                  schematron_base_path: Path | None = None):
 33 |         """Validator factory for direct mode Schematron validation.
 34 | 
 35 |         Args:
 36 |             schematron_xml: the Schematron Schema we want to evaluate. Can also be set using the class methods.
 37 |             phase: the phase we would like to evaluate. Can also be set using the class methods. If set to None
 38 |                 we use the Schema's default phase.
 39 |             schematron_base_path: explicitly set the Schematron base path, this is used in Schematron file inclusions.
 40 |         """
 41 |         self._schematron_xml = schematron_xml
 42 |         self._phase = phase
 43 |         self._schematron_base_path = schematron_base_path
 44 |         self._custom_query_functions: dict[str, list[CustomQueryFunction]] = {}
 45 |         self._custom_query_bases: dict[str, str] = {}
 46 | 
 47 |     def set_schema(self, schematron_xml: Path | _ElementTree):
 48 |         self._schematron_xml = schematron_xml
 49 | 
 50 |     def set_phase(self, phase: str | None):
 51 |         self._phase = phase
 52 | 
 53 |     def set_base_path(self, schematron_base_path: Path):
 54 |         self._schematron_base_path = schematron_base_path
 55 | 
 56 |     def add_custom_functions(self,
 57 |                              query_binding: str,
 58 |                              custom_query_functions: list[CustomQueryFunction],
 59 |                              base_query_binding: str | None = None):
 60 |         if query_binding in self._custom_query_functions:
 61 |             self._custom_query_functions[query_binding] += custom_query_functions
 62 |         else:
 63 |             self._custom_query_functions[query_binding] = custom_query_functions
 64 | 
 65 |         if base_query_binding:
 66 |             self._custom_query_bases[query_binding] = base_query_binding
 67 | 
 68 |     def build(self) -> SchematronValidator:
 69 |         if isinstance(self._schematron_xml, Path):
 70 |             schematron = load_xml_document(self._schematron_xml)
 71 |         else:
 72 |             schematron = self._schematron_xml
 73 | 
 74 |         schematron_base_path = self._get_schematron_base_path()
 75 | 
 76 |         schematron_parser = SchemaParser()
 77 |         parsing_context = ParsingContext(base_path=schematron_base_path)
 78 |         schema = schematron_parser.parse(schematron.getroot(), parsing_context)
 79 | 
 80 |         query_processor_factory = self._get_query_processor_factory()
 81 | 
 82 |         return DirectModeSchematronValidator(schema, self._phase, schematron_base_path, query_processor_factory)
 83 | 
 84 |     def _get_query_processor_factory(self) -> QueryProcessorFactory | None:
 85 |         """Get the query processor factory we would like to use for the validation.
 86 | 
 87 |         Returns:
 88 |             The query processor to use, or None if defaults can be used.
 89 |         """
 90 |         if not len(self._custom_query_functions):
 91 |             return None
 92 | 
 93 |         custom_processor_factory = ExtendableQueryProcessorFactory()
 94 | 
 95 |         for query_binding, custom_functions in self._custom_query_functions.items():
 96 |             if query_binding in self._custom_query_bases:
 97 |                 processor = custom_processor_factory.get_query_processor(self._custom_query_bases[query_binding])
 98 |             else:
 99 |                 if custom_processor_factory.has_query_processor(query_binding):
100 |                     processor = custom_processor_factory.get_query_processor(query_binding)
101 |                 else:
102 |                     raise ValueError(f'No query binding base provided for adding '
103 |                                      f'custom functions to query binding "{query_binding}"')
104 | 
105 |             for custom_function in custom_functions:
106 |                 processor = processor.with_custom_function(custom_function)
107 | 
108 |             custom_processor_factory.set_query_processor(query_binding, processor)
109 |         return custom_processor_factory
110 | 
111 |     def _get_schematron_base_path(self) -> Path | None:
112 |         """Get the base path we use for the Schematron parsing.
113 | 
114 |         If set explicitly, we return that. Else we try to infer it from the provided Schematron schema.
115 | 
116 |         Returns:
117 |             The path we would like to use in the Schematron parsing.
118 |         """
119 |         if self._schematron_base_path is not None:
120 |             return self._schematron_base_path
121 | 
122 |         if isinstance(self._schematron_xml, Path):
123 |             return self._schematron_xml.parent
124 | 
125 |         if hasattr(self._schematron_xml, 'docinfo'):
126 |             docinfo = self._schematron_xml.docinfo
127 |             if hasattr(docinfo, 'URL') and docinfo.URL:
128 |                 return Path(docinfo.URL).parent
129 | 
130 |         return None
131 | 
132 | 
133 | class DirectModeSchematronValidator(SchematronValidator):
134 | 
135 |     def __init__(self, schema: Schema,
136 |                  phase: str | None,
137 |                  base_path: Path | None,
138 |                  query_processor_factory: QueryProcessorFactory | None = None):
139 |         """Validator API implementation for the direct mode evaluation.
140 | 
141 |         Args:
142 |             schema: the Schema we would like to use in the validation
143 |             phase: the phase we would like to use
144 |             base_path: the base path of the Schema, used for loading external Schema parts.
145 |             query_processor_factory: optionally, specify the query processor factory to use.
146 |         """
147 |         self._schema = schema
148 |         self._phase = phase
149 |         self._base_path = base_path
150 | 
151 |         schema = ResolveExtendsVisitor(schema).apply(schema)
152 |         schema = ResolveAbstractPatternsVisitor(schema).apply(schema)
153 |         schema = PhaseSelectionVisitor(schema, phase).apply(schema)
154 | 
155 |         self._validator = SimpleSchematronXMLValidator(schema, phase, base_path,
156 |                                                        query_processor_factory=query_processor_factory)
157 | 
158 |     def validate(self, xml_data: Path | _ElementTree) -> ValidationResult:
159 |         if isinstance(xml_data, Path):
160 |             xml_document = load_xml_document(xml_data)
161 |         else:
162 |             xml_document = xml_data
163 | 
164 |         validation_results = self._validator.validate_xml(xml_document)
165 |         return DirectModeValidationResult(validation_results)
166 | 
167 | 
168 | class DirectModeValidationResult(ValidationResult):
169 | 
170 |     def __init__(self, validation_results: XMLDocumentValidationResult):
171 |         """Validation results from using the direct mode evaluation.
172 | 
173 |         Args:
174 |             validation_results: the validation results from the direct mode evaluator.
175 |         """
176 |         self._validation_results = validation_results
177 |         self._svrl_report = DefaultSVRLReportBuilder().create_svrl_xml(validation_results)
178 |         self._is_valid = validation_results.is_valid()
179 | 
180 |     def get_svrl(self) -> _ElementTree:
181 |         return self._svrl_report
182 | 
183 |     def is_valid(self) -> bool:
184 |         return self._is_valid
185 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/svrl/ast.py:
--------------------------------------------------------------------------------
  1 | """The abstract syntax tree for representing an SVRL report.
  2 | 
  3 | This is primarily used to write out an SVRL report after Schematron validation of an XML.
  4 | """
  5 | from __future__ import annotations
  6 | 
  7 | __author__ = 'Robbert Harms'
  8 | __date__ = '2024-03-11'
  9 | __maintainer__ = 'Robbert Harms'
 10 | __email__ = 'robbert@xkls.nl'
 11 | __licence__ = 'LGPL v3'
 12 | 
 13 | from dataclasses import dataclass
 14 | from typing import Literal
 15 | 
 16 | from lxml.etree import _Element
 17 | 
 18 | from pyschematron.direct_mode.lib.ast import GenericASTNode
 19 | 
 20 | 
 21 | @dataclass(slots=True, frozen=True)
 22 | class SVRLNode(GenericASTNode):
 23 |     """Base class for the Schematron Validation Report Language (SVRL) nodes."""
 24 | 
 25 | 
 26 | @dataclass(slots=True, frozen=True)
 27 | class SchematronOutput(SVRLNode):
 28 |     """Representation of the `<schematron-output>` SVRL node.
 29 | 
 30 |     Args:
 31 |         texts: zero or more text nodes containing some text about the schema and/or validation.
 32 |         ns_prefix_in_attribute_values: namespace and prefix declarations.
 33 |         validation_events: schematron validation events, in order of proceeding
 34 |         phase: the Schematron phase this SVRL is a result of
 35 |         schema_version: copy of the Schematron's schemaVersion attribute
 36 |         title: some title for this validation report.
 37 |     """
 38 |     texts: tuple[Text, ...] = tuple()
 39 |     ns_prefix_in_attribute_values: tuple[NSPrefixInAttributeValues, ...] = tuple()
 40 |     validation_events: tuple[ValidationEvent, ...] = tuple()
 41 |     metadata: MetaData | None = None
 42 |     phase: str | None = None
 43 |     schema_version: str | None = None
 44 |     title: str | None = None
 45 | 
 46 | 
 47 | @dataclass(slots=True, frozen=True)
 48 | class MetaData(SVRLNode):
 49 |     """Metadata for this SVRL report.
 50 | 
 51 |     A non-standard defined node containing metadata. We use it to add metadata about PySchematron.
 52 | 
 53 |     Args:
 54 |         xml_elements: listing of the XML elements contained in this metadata
 55 |         namespaces: the namespaces to be used in the attributes of this metadata node.
 56 |     """
 57 |     xml_elements: tuple[_Element, ...] = tuple()
 58 |     namespaces: tuple[Namespace, ...] = tuple()
 59 | 
 60 |     @dataclass(slots=True, frozen=True)
 61 |     class MetaDataNode:
 62 |         """Base class for metadata nodes."""
 63 | 
 64 |     @dataclass(slots=True, frozen=True)
 65 |     class Namespace(MetaDataNode):
 66 |         """Representation of a namespace attribute in the metadata node.
 67 | 
 68 |         Args:
 69 |             prefix: the prefix
 70 |             uri: the namespace's URI
 71 |         """
 72 |         prefix: str
 73 |         uri: str
 74 | 
 75 | 
 76 | @dataclass(slots=True, frozen=True)
 77 | class NSPrefixInAttributeValues(SVRLNode):
 78 |     """Namespace declaration, representation of the SVRL `<ns-prefix-in-attribute-values>` node.
 79 | 
 80 |     Args:
 81 |         prefix: the prefix to use for this namespace
 82 |         uri: the namespace's URI
 83 |     """
 84 |     prefix: str
 85 |     uri: str
 86 | 
 87 | 
 88 | @dataclass(slots=True, frozen=True)
 89 | class ValidationEvent(SVRLNode):
 90 |     """Base class for the validation events.
 91 | 
 92 |     An SVRL is a flat representation of the patterns, rules and assertions / reports visited during validation.
 93 |     To represent these in a class hierarchy we group these as validation events.
 94 |     """
 95 | 
 96 | 
 97 | @dataclass(slots=True, frozen=True)
 98 | class ActivePattern(ValidationEvent):
 99 |     """Representation of the `<active-pattern>` SVRL node.
100 | 
101 |     Args:
102 |         documents: list of URIs of datatype `xs:anyURI`, pointing to the documents processed.
103 |         id: the identifier of this pattern, typically a copy of the Schematron pattern id.
104 |         name: some name for this pattern, up to the implementation.
105 |         role: some role indicator for this pattern, up to the implementation.
106 |     """
107 |     documents: tuple[str, ...] | None = None
108 |     id: str | None = None
109 |     name: str | None = None
110 |     role: str | None = None
111 | 
112 | 
113 | @dataclass(slots=True, frozen=True)
114 | class FiredRule(ValidationEvent):
115 |     """Representation of the `<fired-rule>` SVRL node.
116 | 
117 |     Args:
118 |         context: a copy of the context of the Schematron rule element
119 |         document: Reference of the document to which this rule was defined.
120 |         flag: a flag that was set to true when this rule fired, typically a copy of the flag of the Schematron rule.
121 |         id: the identifier of this rule, typically a copy of the Schematron rule id.
122 |         name: some name for this rule, up to the implementation.
123 |         role: the role for this rule, typically a copy of the role of the rule element.
124 |     """
125 |     context: SchematronQuery
126 |     document: str | None = None
127 |     flag: str | None = None
128 |     id: str | None = None
129 |     name: str | None = None
130 |     role: str | None = None
131 | 
132 | 
133 | @dataclass(slots=True, frozen=True)
134 | class SuppressedRule(ValidationEvent):
135 |     """Representation of the `<suppressed-rule>` SVRL node.
136 | 
137 |     This node type is officially not in the standard, but it is added by some packages, and so do we.
138 | 
139 |     Args:
140 |         context: a copy of the context of the Schematron rule element
141 |         id: the identifier of this rule, typically a copy of the Schematron rule id.
142 |     """
143 |     context: SchematronQuery
144 |     id: str | None = None
145 | 
146 | 
147 | @dataclass(slots=True, frozen=True)
148 | class CheckResult(ValidationEvent):
149 |     """Base class for the `<failed-assert>` and `<successful-report>` SVRL nodes.
150 | 
151 |     Args:
152 |         text: result description of this check.
153 |         location: the location of this failed assert as an XPath expression
154 |         test: the test expression for this assert, copied from the Schematron assert node.
155 |         diagnostic_references: listing of the diagnostic references by this check
156 |         property_references: properties referenced by this check
157 |         subject_location: the location referenced by the subject of either the check or the parent rule.
158 |         flag: a flag that was set to true when this assertion fired, typically a copy of the Schematron's flag rule.
159 |         id: the identifier of this rule, typically a copy of the Schematron assert id.
160 |         role: the role for this assert, typically a copy of the role of the assert element.
161 |     """
162 |     text: Text
163 |     location: XPathExpression
164 |     test: SchematronQuery
165 |     diagnostic_references: tuple[DiagnosticReference, ...] = tuple()
166 |     property_references: tuple[PropertyReference, ...] = tuple()
167 |     subject_location: XPathExpression | None = None
168 |     flag: str | None = None
169 |     id: str | None = None
170 |     role: str | None = None
171 | 
172 | 
173 | @dataclass(slots=True, frozen=True)
174 | class FailedAssert(CheckResult):
175 |     """Representation of the `<failed-assert>` SVRL node."""
176 | 
177 | 
178 | @dataclass(slots=True, frozen=True)
179 | class SuccessfulReport(CheckResult):
180 |     """Representation of the `<successful-report>` SVRL node."""
181 | 
182 | 
183 | @dataclass(slots=True, frozen=True)
184 | class DiagnosticReference(SVRLNode):
185 |     """Representation of the `<diagnostic-reference>` node.
186 | 
187 |     These nodes reference a diagnostic connected to an assert/report node.
188 | 
189 |     Args:
190 |         text: resulting text
191 |         diagnostic: identifier of this diagnostic, copied from the diagnostic element's id attribute
192 |     """
193 |     text: Text
194 |     diagnostic: str
195 | 
196 | 
197 | @dataclass(slots=True, frozen=True)
198 | class PropertyReference(SVRLNode):
199 |     """Representation of the `<property-reference>` node.
200 | 
201 |     These nodes reference a property connected to an assert/report node.
202 | 
203 |     Args:
204 |         text: resulting text
205 |         property: identifier of this property
206 |         role: the role attribute for this property, copied from the properties' role attribute
207 |         scheme: the scheme attribute for this property, copied from the properties' scheme attribute
208 |     """
209 |     text: Text
210 |     property: str
211 |     role: str | None = None
212 |     scheme: str | None = None
213 | 
214 | 
215 | @dataclass(slots=True, frozen=True)
216 | class XPathExpression(SVRLNode):
217 |     """Representation of an XPath expression  used in the SVRL nodes."""
218 |     expression: str
219 | 
220 | 
221 | @dataclass(slots=True, frozen=True)
222 | class SchematronQuery(SVRLNode):
223 |     """Representation of a Schematron Query, as used in the SVRL nodes."""
224 |     query: str
225 | 
226 | 
227 | @dataclass(slots=True, frozen=True)
228 | class Text(SVRLNode):
229 |     """Representation of a `<text>` tag.
230 | 
231 |     Although the attributes `class` and `id` are not specified in the SVRL specification, we add them nonetheless since
232 |     they can be forwarded from Schematron nodes.
233 | 
234 |     Args:
235 |         content: the text content of this text element, all loaded as one string
236 |         fpi: formal public identifier, may be copied from the relevant Schematron FPI attribute.
237 |         icon: the icon attribute
238 |         see: A URI pointing to some external information of this element.
239 |         class_: some class attribute
240 |         id: unique identifier
241 |         xml_lang: the default natural language for this node
242 |         xml_space: defines how whitespace must be handled for this element.
243 |     """
244 |     content: str
245 |     fpi: str | None = None
246 |     icon: str | None = None
247 |     see: str | None = None
248 |     class_: str | None = None
249 |     id: str | None = None
250 |     xml_lang: str | None = None
251 |     xml_space: Literal['default', 'preserve'] | None = None
252 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/xml_validation/queries/base.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | __author__ = 'Robbert Harms'
  4 | __date__ = '2023-03-24'
  5 | __maintainer__ = 'Robbert Harms'
  6 | __email__ = 'robbert@xkls.nl'
  7 | __licence__ = 'GPL v3'
  8 | 
  9 | from abc import ABCMeta, abstractmethod
 10 | from typing import Any, Self, override, Callable
 11 | from elementpath.tree_builders import RootArgType
 12 | from elementpath.xpath_context import ItemArgType
 13 | 
 14 | 
 15 | class QueryProcessor(metaclass=ABCMeta):
 16 |     """Interface class for the query processing classes.
 17 | 
 18 |     Successful query parsing requires a query parser and an evaluation context. These need to be matched to each other.
 19 |     This class ensures matching parsers and evaluation contexts.
 20 |     """
 21 | 
 22 |     @abstractmethod
 23 |     def get_query_parser(self) -> QueryParser:
 24 |         """Get the query parser for parsing the queries in an AST.
 25 | 
 26 |         Returns:
 27 |             A query parser to parse queries in the Schematron
 28 |         """
 29 | 
 30 |     @abstractmethod
 31 |     def get_evaluation_context(self) -> EvaluationContext:
 32 |         """Get the evaluation context for the parsed queries.
 33 | 
 34 |         Returns:
 35 |             An evaluation context to evaluate the parsed queries.
 36 |         """
 37 | 
 38 |     @abstractmethod
 39 |     def with_namespaces(self, namespaces: dict[str, str]) -> Self:
 40 |         """Create a copy of this query processor with updated namespaces.
 41 | 
 42 |         Args:
 43 |             namespaces: a dictionary mapping namespace prefixes to URIs.
 44 | 
 45 |         Returns:
 46 |             An updated Query Processor.
 47 |         """
 48 | 
 49 |     @abstractmethod
 50 |     def with_custom_function(self, custom_function: CustomQueryFunction) -> Self:
 51 |         """Create a copy of this query processor with support for the provided custom function.
 52 | 
 53 |         Args:
 54 |             custom_function: the custom function to add to the parser.
 55 | 
 56 |         Returns:
 57 |             An updated query processor
 58 |         """
 59 | 
 60 | 
 61 | class SimpleQueryProcessor(QueryProcessor):
 62 | 
 63 |     def __init__(self, query_parser: QueryParser, evaluation_context: EvaluationContext):
 64 |         """Simple query processor prepared with a query parser and evaluation context.
 65 | 
 66 |         Defined to be immutable.
 67 | 
 68 |         Args:
 69 |             query_parser: the query parser this instance specialize in
 70 |             evaluation_context: the evaluation context this instance specializes in
 71 |         """
 72 |         self._query_parser = query_parser
 73 |         self._evaluation_context = evaluation_context
 74 | 
 75 |     @override
 76 |     def get_query_parser(self) -> QueryParser:
 77 |         return self._query_parser
 78 | 
 79 |     @override
 80 |     def get_evaluation_context(self) -> EvaluationContext:
 81 |         return self._evaluation_context
 82 | 
 83 |     @override
 84 |     def with_namespaces(self, namespaces: dict[str, str]) -> Self:
 85 |         return type(self)(self._query_parser.with_namespaces(namespaces),
 86 |                           self._evaluation_context.with_namespaces(namespaces))
 87 | 
 88 |     @override
 89 |     def with_custom_function(self, custom_function: CustomQueryFunction) -> Self:
 90 |         return type(self)(self._query_parser.with_custom_function(custom_function),
 91 |                           self._evaluation_context)
 92 | 
 93 | 
 94 | class QueryParser(metaclass=ABCMeta):
 95 |     """Representation of a parser for Schematron queries."""
 96 | 
 97 |     @abstractmethod
 98 |     def parse(self, source: str) -> Query:
 99 |         """Parse an expression in the implemented query language.
100 | 
101 |         Args:
102 |             source: the source code of the expression, in the language supported by this parser.
103 | 
104 |         Returns:
105 |             A parsed expression in the language supported by this parser.
106 |         """
107 | 
108 |     @abstractmethod
109 |     def with_namespaces(self, namespaces: dict[str, str]) -> Self:
110 |         """Create a copy of this query parser with updated namespaces.
111 | 
112 |         Args:
113 |             namespaces: a dictionary mapping namespace prefixes to URIs.
114 | 
115 |         Returns:
116 |             An updated Query Parser.
117 |         """
118 | 
119 |     @abstractmethod
120 |     def with_custom_function(self, custom_function: CustomQueryFunction) -> Self:
121 |         """Create a copy of this query parser with an additional custom query function.
122 | 
123 |         Args:
124 |             custom_function: the custom function to add to the parser.
125 | 
126 |         Returns:
127 |             An updated Query Parser.
128 |         """
129 | 
130 | 
131 | class CachingQueryParser(QueryParser):
132 | 
133 |     def __init__(self, query_parser: QueryParser):
134 |         """A wrapper around a query parser enabling caching of compiled queries.
135 | 
136 |         This keeps a mapping of source strings to Queries and checks this first before compiling a query.
137 | 
138 |         Args:
139 |             query_parser: the query parser we use for actual parsing
140 |         """
141 |         self._query_parser = query_parser
142 |         self._query_cache = {}
143 | 
144 |     @override
145 |     def parse(self, source: str) -> Query:
146 |         return self._query_cache.setdefault(source, self._query_parser.parse(source))
147 | 
148 |     @override
149 |     def with_namespaces(self, namespaces: dict[str, str]) -> Self:
150 |         return type(self)(self._query_parser.with_namespaces(namespaces))
151 | 
152 |     @override
153 |     def with_custom_function(self, custom_function: CustomQueryFunction) -> Self:
154 |         return type(self)(self._query_parser.with_custom_function(custom_function))
155 | 
156 | 
157 | class CustomQueryFunction(metaclass=ABCMeta):
158 | 
159 |     @property
160 |     @abstractmethod
161 |     def callback(self) -> Callable[..., Any]:
162 |         """Get the callback of this custom function.
163 | 
164 |         Returns:
165 |             The (Python) callback function.
166 |         """
167 | 
168 |     @property
169 |     @abstractmethod
170 |     def name(self) -> str:
171 |         """The name of this function.
172 | 
173 |         Returns:
174 |             The name of the callable function.
175 |         """
176 | 
177 |     @property
178 |     @abstractmethod
179 |     def prefix(self) -> str | None:
180 |         """The XML prefix of this function.
181 | 
182 |         Returns:
183 |             The XML prefix of this function
184 |         """
185 | 
186 | 
187 | class SimpleCustomQueryFunction(CustomQueryFunction):
188 | 
189 |     def __init__(self, callback: Callable[..., Any], name: str, prefix: str | None = None):
190 |         """Simple definition of a custom query function.
191 | 
192 |         Args:
193 |             callback: the function to call when evaluating the parsed expression
194 |             name: the function name for use inside the query language.
195 |             prefix: the function's name prefix, if not provided it is set to the XPath default
196 |                 function namespace (`fn:`). This means, it may overwrite library functions.
197 |         """
198 |         self._callback = callback
199 |         self._name = name
200 |         self._prefix = prefix
201 | 
202 |     @property
203 |     def callback(self) -> Callable[..., Any]:
204 |         return self._callback
205 | 
206 |     @property
207 |     def name(self) -> str:
208 |         return self._name
209 | 
210 |     @property
211 |     def prefix(self) -> str | None:
212 |         return self._prefix
213 | 
214 | 
215 | class EvaluationContext(metaclass=ABCMeta):
216 |     """Representation of the context required when evaluating a Query.
217 | 
218 |     Each context should be immutable. Every change constructs a new evaluation context.
219 |     """
220 | 
221 |     @abstractmethod
222 |     def with_xml_root(self, xml_root: RootArgType) -> Self:
223 |         """Create a new evaluation context with the XML root node we can use for dynamic queries.
224 | 
225 |         For queries like: `xs:integer(42)` no XML root node is needed.
226 |         For dynamic queries like: `xs:integer(/data/@nmr_items)`, a root node is needed.
227 | 
228 |         Args:
229 |             xml_root: the root node usable for dynamic query evaluations
230 | 
231 |         Returns:
232 |             A new evaluation context
233 |         """
234 | 
235 |     @abstractmethod
236 |     def with_context_item(self, xml_item: ItemArgType) -> Self:
237 |         """Create a new evaluation context with the provided xml item (node, comment, attribute) as query base.
238 | 
239 |         This is needed for asserts and reports queries which assume the context of the rule node.
240 | 
241 |         Args:
242 |             xml_item: the XML item we use as context node for parser evaluation.
243 | 
244 |         Returns:
245 |             A new evaluation context
246 |         """
247 | 
248 |     @abstractmethod
249 |     def with_namespaces(self, namespaces: dict[str, str]) -> Self:
250 |         """Create a new evaluation context with the namespaces used during evaluation.
251 | 
252 |         Args:
253 |             namespaces: a dictionary mapping namespace prefixes to URIs.
254 |                 This is used when namespace information is not available within document and element nodes.
255 | 
256 |         Returns:
257 |             A new evaluation context
258 |         """
259 | 
260 |     @abstractmethod
261 |     def with_variables(self, variables: dict[str, Any], overwrite: bool = False) -> Self:
262 |         """Create a new evaluation context with the namespaces used during evaluation.
263 | 
264 |         Args:
265 |             variables: a dictionary mapping variable names (QNames) to variables. This expects the
266 |                 variables to be a parsed and evaluated value.
267 |             overwrite: if set to True, we will overwrite any stored variables. If set to False, we update
268 |                 the dictionary of variables.
269 | 
270 |         Returns:
271 |             A new evaluation context
272 |         """
273 | 
274 |     @abstractmethod
275 |     def get_xml_root(self) -> RootArgType | None:
276 |         """Get the XML root node current in this context.
277 | 
278 |         Returns:
279 |             The current root node. May not be set, giving None.
280 |         """
281 | 
282 |     @abstractmethod
283 |     def get_context_item(self) -> ItemArgType | None:
284 |         """Get the XML node serving as the query base.
285 | 
286 |         Returns:
287 |             The XML item we use as context node for parser evaluation. May not be set yet.
288 |         """
289 | 
290 | 
291 | class Query(metaclass=ABCMeta):
292 |     """Representation of an executable Schematron query.
293 | 
294 |     To specialize for a new language, one must implement a specialized Context, Parser and Query.
295 |     """
296 | 
297 |     @abstractmethod
298 |     def evaluate(self, context: EvaluationContext | None = None) -> Any:
299 |         """Evaluate this query.
300 | 
301 |         Args:
302 |             context: optional context to be used during evaluation.
303 |                 The exact context and its usage is implementation defined.
304 | 
305 |         Returns:
306 |             The results of running this query.
307 |         """
308 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/xml_validation/queries/xpath.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | """The XPath query bindings.
  4 | 
  5 | This uses the adapter pattern around the elementpath library.
  6 | 
  7 | The elementpath library is used for the parsing, context and evaluation of all XPath related queries.
  8 | """
  9 | 
 10 | __author__ = 'Robbert Harms'
 11 | __date__ = '2023-03-24'
 12 | __maintainer__ = 'Robbert Harms'
 13 | __email__ = 'robbert@xkls.nl'
 14 | __licence__ = 'GPL v3'
 15 | 
 16 | from typing import Any, Type, Self, override
 17 | from abc import ABCMeta
 18 | 
 19 | from elementpath import XPathToken, XPath1Parser, XPath2Parser, XPathContext
 20 | from elementpath.xpath3 import XPath3Parser
 21 | from elementpath.xpath31 import XPath31Parser
 22 | from elementpath.xpath_context import ItemArgType
 23 | from elementpath.tree_builders import RootArgType
 24 | 
 25 | from pyschematron.direct_mode.xml_validation.queries.base import QueryParser, Query, EvaluationContext, \
 26 |     SimpleQueryProcessor, CustomQueryFunction, SimpleCustomQueryFunction
 27 | from pyschematron.direct_mode.xml_validation.queries.exceptions import MissingRootNodeError
 28 | 
 29 | 
 30 | class XPathQueryProcessor(SimpleQueryProcessor):
 31 | 
 32 |     def __init__(self, query_parser: XPathQueryParser, evaluation_context: EvaluationContext = None):
 33 |         """Query processor for XPath queries.
 34 | 
 35 |         This is simply a wrapper around the simple query processor, with as default evaluation context the XPath
 36 |         evaluation context.
 37 | 
 38 |         Args:
 39 |             query_parser: the (XPath) query processor to use
 40 |             evaluation_context: the evaluation context, defaults to the XPath evaluation context
 41 |         """
 42 |         super().__init__(query_parser, evaluation_context or XPathEvaluationContext())
 43 | 
 44 | 
 45 | class XPathQueryParser(QueryParser, metaclass=ABCMeta):
 46 |     """Wrapper around the query parser to indicate specialization for XPath query parsers."""
 47 | 
 48 | 
 49 | class ElementPathXPathQueryParser(XPathQueryParser, metaclass=ABCMeta):
 50 | 
 51 |     def __init__(self,
 52 |                  parser_type: Type[XPath1Parser | XPath2Parser | XPath3Parser | XPath31Parser],
 53 |                  namespaces: dict[str, str] | None = None,
 54 |                  custom_functions: list[CustomXPathFunction] | None = None):
 55 |         """Base class for XPath parsers wrapping the `elementpath` library.
 56 | 
 57 |         Args:
 58 |             parser_type: the type of XPath parser from the elementpath library we are wrapping
 59 |             namespaces: namespaces to use during parsing
 60 |             custom_functions: the list of custom functions to load
 61 |         """
 62 |         self._parser_type = parser_type
 63 |         self._namespaces = namespaces or {}
 64 |         self._custom_functions = custom_functions or []
 65 |         self._parser = self._get_elementpath_parser()
 66 | 
 67 |     @override
 68 |     def parse(self, source: str) -> Query:
 69 |         xpath_token = self._parser.parse(source)
 70 |         return XPathQuery(xpath_token)
 71 | 
 72 |     def _get_elementpath_parser(self) -> XPath1Parser | XPath2Parser | XPath3Parser | XPath31Parser:
 73 |         """Get an elementpath parser using the defined namespaces and custom functions.
 74 | 
 75 |         Returns:
 76 |             An elementpath parser instance to use during parsing.
 77 |         """
 78 |         parser = self._parser_type(namespaces=self._namespaces)
 79 |         for custom_function in self._custom_functions:
 80 |             parser.external_function(custom_function.callback, custom_function.name, custom_function.prefix)
 81 |         return parser
 82 | 
 83 | 
 84 | class CustomXPathFunction(CustomQueryFunction, metaclass=ABCMeta):
 85 |     """Wrapper around the custom query functions to specify functions for use in XPath query parsers."""
 86 | 
 87 | 
 88 | class SimpleCustomXPathFunction(CustomXPathFunction, SimpleCustomQueryFunction):
 89 |     """Simple definition of an XPath custom function."""
 90 | 
 91 | 
 92 | class XPathEvaluationContext(EvaluationContext):
 93 | 
 94 |     def __init__(self,
 95 |                  root: RootArgType | None = None,
 96 |                  namespaces: dict[str, str] | None = None,
 97 |                  item: ItemArgType | None = None,
 98 |                  variables: dict[str, Any] | None = None):
 99 |         super().__init__()
100 |         self._context_variables = {
101 |             'root': root,
102 |             'namespaces': namespaces or {},
103 |             'item': item,
104 |             'variables': variables or {}
105 |         }
106 | 
107 |         self._xpath_context = None
108 |         if root is not None:
109 |             self._xpath_context = XPathContext(**self._context_variables)
110 | 
111 |     def get_xpath_context(self) -> XPathContext | None:
112 |         """Get the XPath context we can use for evaluation of a query.
113 | 
114 |         If no root node is set yet, we return None. Else, we return an XPathContext from the elementpath library.
115 | 
116 |         Returns:
117 |              The XPath context if a root node is set, else None.
118 | 
119 |         Raises:
120 |             MissingRootNodeError: if the XPath node could not be
121 |         """
122 |         if self._xpath_context is None:
123 |             raise MissingRootNodeError('Missing root node in XPath context, please set a root node first.')
124 |         return self._xpath_context
125 | 
126 |     @override
127 |     def with_context_item(self, xml_item: ItemArgType) -> Self:
128 |         if xml_item is self._context_variables['item']:
129 |             return self
130 |         return self._get_updated({'item': xml_item})
131 | 
132 |     @override
133 |     def with_xml_root(self, xml_root: RootArgType) -> Self:
134 |         if xml_root is self._context_variables['root']:
135 |             return self
136 |         return self._get_updated({'root': xml_root})
137 | 
138 |     @override
139 |     def with_namespaces(self, namespaces: dict[str, str]) -> Self:
140 |         return self._get_updated({'namespaces': namespaces})
141 | 
142 |     @override
143 |     def with_variables(self, variables: dict[str, Any], overwrite: bool = False) -> Self:
144 |         if overwrite:
145 |             return self._get_updated({'variables': variables})
146 |         else:
147 |             return self._get_updated({'variables': self._context_variables['variables'] | variables})
148 | 
149 |     @override
150 |     def get_xml_root(self) -> RootArgType | None:
151 |         return self._context_variables['root']
152 | 
153 |     @override
154 |     def get_context_item(self) -> ItemArgType | None:
155 |         return self._context_variables['item']
156 | 
157 |     def _get_updated(self, updates: dict[str, Any]) -> Self:
158 |         kwargs = self._context_variables.copy()
159 |         kwargs.update(updates)
160 |         return type(self)(**kwargs)
161 | 
162 | 
163 | class XPathQuery(Query):
164 | 
165 |     def __init__(self, xpath_token: XPathToken):
166 |         """Representation of an XPath query.
167 | 
168 |         This uses the elementpath library for representing the XPath expressions.
169 | 
170 |         Args:
171 |             xpath_token: the parsed XPath expression
172 |         """
173 |         self._xpath_token = xpath_token
174 | 
175 |     @override
176 |     def evaluate(self, context: XPathEvaluationContext | None = None) -> Any:
177 |         xpath_context = None
178 |         if context:
179 |             xpath_context = context.get_xpath_context()
180 | 
181 |         return self._xpath_token.evaluate(xpath_context)
182 | 
183 | 
184 | class XPath1QueryParser(ElementPathXPathQueryParser):
185 | 
186 |     def __init__(self, namespaces: dict[str, str] | None = None):
187 |         """Query parser for XPath 1.0 expressions.
188 | 
189 |         This uses the XPath 1.0 parser of the `elementpath` library.
190 | 
191 |         Args:
192 |              namespaces: a dictionary with namespaces to use while parsing.
193 |         """
194 |         super().__init__(XPath1Parser, namespaces=namespaces)
195 | 
196 |     @override
197 |     def with_namespaces(self, namespaces: dict[str, str]) -> Self:
198 |         return type(self)(self._namespaces | namespaces)
199 | 
200 |     @override
201 |     def with_custom_function(self, custom_function: CustomXPathFunction) -> Self:
202 |         raise ValueError('Custom functions are not supported for XPath1 parsers.')
203 | 
204 | 
205 | class XPath2QueryParser(ElementPathXPathQueryParser):
206 | 
207 |     def __init__(self,
208 |                  namespaces: dict[str, str] | None = None,
209 |                  custom_functions: list[CustomXPathFunction] | None = None):
210 |         """Query parser for XPath 2.0 expressions.
211 | 
212 |         This uses the XPath 2.0 parser of the `elementpath` library.
213 | 
214 |         Args:
215 |              namespaces: a dictionary with namespaces to use while parsing.
216 |              custom_functions: the list of custom functions to load
217 |         """
218 |         super().__init__(XPath2Parser, namespaces=namespaces, custom_functions=custom_functions)
219 | 
220 |     @override
221 |     def with_namespaces(self, namespaces: dict[str, str]) -> Self:
222 |         return type(self)(self._namespaces | namespaces,
223 |                           custom_functions=self._custom_functions)
224 | 
225 |     @override
226 |     def with_custom_function(self, custom_function: CustomXPathFunction) -> Self:
227 |         return type(self)(namespaces=self._namespaces,
228 |                           custom_functions=self._custom_functions + [custom_function])
229 | 
230 | 
231 | class XPath3QueryParser(ElementPathXPathQueryParser):
232 | 
233 |     def __init__(self,
234 |                  namespaces: dict[str, str] | None = None,
235 |                  custom_functions: list[CustomXPathFunction] | None = None):
236 |         """Query parser for XPath 3.0 expressions.
237 | 
238 |         This uses the XPath 3.0 parser of the `elementpath` library.
239 | 
240 |         Args:
241 |              namespaces: a dictionary with namespaces to use while parsing.
242 |              custom_functions: the list of custom functions to load
243 |         """
244 |         super().__init__(XPath3Parser, namespaces=namespaces, custom_functions=custom_functions)
245 | 
246 |     @override
247 |     def with_namespaces(self, namespaces: dict[str, str]) -> Self:
248 |         return type(self)(self._namespaces | namespaces,
249 |                           custom_functions=self._custom_functions)
250 | 
251 |     @override
252 |     def with_custom_function(self, custom_function: CustomXPathFunction) -> Self:
253 |         return type(self)(namespaces=self._namespaces,
254 |                           custom_functions=self._custom_functions + [custom_function])
255 | 
256 | 
257 | class XPath31QueryParser(ElementPathXPathQueryParser):
258 | 
259 |     def __init__(self,
260 |                  namespaces: dict[str, str] | None = None,
261 |                  custom_functions: list[CustomXPathFunction] | None = None):
262 |         """Query parser for XPath 3.1 expressions.
263 | 
264 |         This uses the XPath 3.1 parser of the `elementpath` library.
265 | 
266 |         Args:
267 |              namespaces: a dictionary with namespaces to use while parsing.
268 |              custom_functions: the list of custom functions to load
269 |         """
270 |         super().__init__(XPath31Parser, namespaces=namespaces, custom_functions=custom_functions)
271 | 
272 |     @override
273 |     def with_namespaces(self, namespaces: dict[str, str]) -> Self:
274 |         return type(self)(self._namespaces | namespaces,
275 |                           custom_functions=self._custom_functions)
276 | 
277 |     @override
278 |     def with_custom_function(self, custom_function: CustomXPathFunction) -> Self:
279 |         return type(self)(namespaces=self._namespaces,
280 |                           custom_functions=self._custom_functions + [custom_function])
281 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/schematron/ast_yaml.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | __author__ = 'Robbert Harms'
  4 | __date__ = '2023-02-21'
  5 | __maintainer__ = 'Robbert Harms'
  6 | __email__ = 'robbert@xkls.nl'
  7 | 
  8 | import dataclasses
  9 | import inspect
 10 | from abc import ABCMeta, abstractmethod
 11 | from io import StringIO
 12 | from pathlib import PosixPath, Path
 13 | from typing import Callable, Any, Mapping, Iterable
 14 | 
 15 | from ruyaml import YAML, BaseRepresenter, BaseConstructor, Node
 16 | 
 17 | import pyschematron.direct_mode.schematron.ast
 18 | from pyschematron.direct_mode.schematron.ast import SchematronASTNode
 19 | 
 20 | 
 21 | class ASTYamlConverter(metaclass=ABCMeta):
 22 | 
 23 |     @abstractmethod
 24 |     def load(self, stream: Path | Any) -> SchematronASTNode:
 25 |         """Load a provided stream into a Schematron AST node.
 26 | 
 27 |         Args:
 28 |             stream: the stream with Yaml data to load, either a path to a file, or some sort of (binary) string.
 29 | 
 30 |         Returns:
 31 |             The loaded Schematron AST node.
 32 |         """
 33 | 
 34 |     @abstractmethod
 35 |     def dump(self, data: SchematronASTNode, stream: Path | Any | None = None) -> str | None:
 36 |         """Dump the provided data into YAML format.
 37 | 
 38 |         Args:
 39 |             data: the Schematron AST node to dump
 40 |             stream: the stream to dump the data to, either a file path, or some sort of string buffer.
 41 |                 This is optional, if not provided we return a string.
 42 | 
 43 |         Returns:
 44 |             If no stream was provided, we return the output as a string. Else we dump the output to the provided stream.
 45 |         """
 46 | 
 47 | 
 48 | class RuyamlASTYamlConverter(ASTYamlConverter):
 49 | 
 50 |     def __init__(self):
 51 |         """Basic AST to YAML (and back) converter.
 52 | 
 53 |         This works together with the ruyaml library to dump and load AST nodes to and from yaml.
 54 |         """
 55 |         self._codec = _ASTYamlCodec()
 56 | 
 57 |     def load(self, stream: Path | Any) -> SchematronASTNode:
 58 |         return self._codec.load(stream)
 59 | 
 60 |     def dump(self, data: SchematronASTNode, stream: Path | Any | None = None) -> str | None:
 61 |         if stream is None:
 62 |             with StringIO() as dumped:
 63 |                 self._codec.dump(data, dumped)
 64 |                 return dumped.getvalue()
 65 |         else:
 66 |             self._codec.dump(data, stream)
 67 | 
 68 | 
 69 | class _ASTYamlCodec(YAML):
 70 | 
 71 |     def __init__(self, *args, **kwargs):
 72 |         """Specialized RuYAML loader and dumper for :class:`SchematronASTNode` instances.
 73 | 
 74 |         For loading the YAML, the ruyaml library uses shared mutable lists to construct the class hierarchy.
 75 |         Since we are creating immutable AST nodes, we need a builder pattern to construct the final classes.
 76 |         During processing of the YAML, the :class:`YamlRepresenter` create :class:`SchematronASTNodeBuilder` nodes.
 77 |         As a final step, the build method of these builders is called to construct the final AST nodes.
 78 |         """
 79 |         super().__init__(*args, typ='safe', **kwargs)
 80 |         self._add_ast_representers()
 81 | 
 82 |     def load(self, stream: Path | Any) -> Any:
 83 |         loaded_objects = super().load(stream)
 84 |         if isinstance(loaded_objects, SchematronASTNodeBuilder):
 85 |             return loaded_objects.build()
 86 |         return loaded_objects
 87 | 
 88 |     def _add_ast_representers(self):
 89 |         """Add YAML representers (loading and dumping) for all relevant types in `pyschematron.ast`.
 90 | 
 91 |         This modifies the current instance by adding representer objects.
 92 |         """
 93 |         representers = self._get_ast_node_representers()
 94 |         representers.append(PathRepresenter())
 95 | 
 96 |         for representer in representers:
 97 |             self.representer.add_representer(representer.element_class, representer.get_dumping_function())
 98 |             self.constructor.add_constructor(representer.yaml_tag, representer.get_loading_function())
 99 | 
100 |     def _get_ast_node_representers(self) -> list["YamlRepresenter"]:
101 |         """Get representers for all the :class:`SchematronASTNode` nodes.
102 | 
103 |         Returns:
104 |             A list of YAML representers for the AST nodes.
105 |         """
106 |         ast_nodes = self._list_representable_ast_nodes()
107 |         return [self._get_representer(ast_node) for ast_node in ast_nodes]
108 | 
109 |     def _list_representable_ast_nodes(self) -> list[type[SchematronASTNode]]:
110 |         """List the representable AST nodes.
111 | 
112 |         Returns:
113 |             Get a list of AST nodes we can YAML represent.
114 |         """
115 |         def filter_function(el):
116 |             return inspect.isclass(el) and issubclass(el, SchematronASTNode) and el is not SchematronASTNode
117 |         return [el[1] for el in inspect.getmembers(pyschematron.direct_mode.schematron.ast, filter_function)]
118 | 
119 |     def _get_representer(self, ast_node: type[SchematronASTNode]) -> "YamlRepresenter":
120 |         """Get a representor for the indicated node type.
121 | 
122 |         Args:
123 |             ast_node: the type of AST node for which we want a representer.
124 | 
125 |         Returns:
126 |             A representer for this specific node type.
127 |         """
128 |         return GenericASTNodeYamlRepresenter(ast_node)
129 | 
130 | 
131 | class YamlRepresenter(metaclass=ABCMeta):
132 |     """Specialized class for YAML representing elements and contents of :class:`SchematronASTNode`.
133 | 
134 |     This works in conjunction with the Ruyaml library.
135 |     """
136 | 
137 |     @property
138 |     @abstractmethod
139 |     def element_class(self) -> Any:
140 |         """Get the type of element this representer represents."""
141 | 
142 |     @property
143 |     @abstractmethod
144 |     def yaml_tag(self) -> str:
145 |         """Get the YAML tag for the class representation."""
146 | 
147 |     @abstractmethod
148 |     def get_dumping_function(self) -> Callable[[BaseRepresenter, Any], Callable]:
149 |         """Get the dumping function `to_yaml` we can use to dump the item to YAML.
150 | 
151 |         Returns:
152 |             A function to create the yaml representation.
153 |         """
154 | 
155 |     @abstractmethod
156 |     def get_loading_function(self) -> Callable[[BaseConstructor, Node], Any]:
157 |         """Get the loading function we can use to load the YAML node into a `SchematronASTNode`.
158 | 
159 |         Returns:
160 |             A function to load the yaml representation.
161 |         """
162 | 
163 | 
164 | class PathRepresenter(YamlRepresenter):
165 | 
166 |     @property
167 |     def element_class(self) -> Any:
168 |         return PosixPath
169 | 
170 |     @property
171 |     def yaml_tag(self) -> str:
172 |         return f'!Path'
173 | 
174 |     def get_dumping_function(self) -> Callable[[BaseRepresenter, Any], Node]:
175 |         def to_yaml(representer: BaseRepresenter, node: Any) -> Node:
176 |             return representer.represent_scalar(self.yaml_tag, str(node))
177 |         return to_yaml
178 | 
179 |     def get_loading_function(self) -> Callable[[BaseConstructor, Node], Any]:
180 |         def from_yaml(constructor: BaseConstructor, node: Node) -> Any:
181 |             return self.element_class(node.value)
182 |         return from_yaml
183 | 
184 | 
185 | class ASTNodeYamlRepresenter(YamlRepresenter):
186 | 
187 |     def get_dumping_function(self) -> Callable[[BaseRepresenter, Any], Node]:
188 |         def to_yaml(representer: BaseRepresenter, node: SchematronASTNode) -> Node:
189 |             return self._to_yaml(representer, node)
190 |         return to_yaml
191 | 
192 |     def get_loading_function(self) -> Callable[[BaseConstructor, Node], Any]:
193 |         def from_yaml(constructor: BaseConstructor, node: Node) -> SchematronASTNodeBuilder:
194 |             return self._from_yaml(constructor, node)
195 |         return from_yaml
196 | 
197 |     @abstractmethod
198 |     def _to_yaml(self, representer: BaseRepresenter, node: SchematronASTNode) -> Node:
199 |         """The YAML dumping function.
200 | 
201 |         This is the actual function used to dump a specific Schematron node to YAML.
202 | 
203 |         Returns:
204 |             The ruyaml Node used to dump to YAML.
205 |         """
206 | 
207 |     @abstractmethod
208 |     def _from_yaml(self, constructor: BaseConstructor, node: Node) -> SchematronASTNodeBuilder:
209 |         """The function to load a ruyaml YAML Node into a Schematron AST Node.
210 | 
211 |         Returns:
212 |             A builder for the Schematron node.
213 |         """
214 | 
215 | 
216 | class GenericASTNodeYamlRepresenter(ASTNodeYamlRepresenter):
217 | 
218 |     def __init__(self, node_type: type[SchematronASTNode]):
219 |         """Create a basic representer for :class:`SchematronASTNode`.
220 | 
221 |         When loading a YAML, this class returns :class:`SchematronASTNodeBuilder` nodes.
222 | 
223 |         Args:
224 |             node_type: the type of node we are representing
225 |         """
226 |         self._node_type = node_type
227 | 
228 |     @property
229 |     def element_class(self) -> type[SchematronASTNode]:
230 |         return self._node_type
231 | 
232 |     @property
233 |     def yaml_tag(self) -> str:
234 |         return f'!{self._node_type.__name__}'
235 | 
236 |     def _to_yaml(self, representer: BaseRepresenter, node: SchematronASTNode) -> Node:
237 |         """Internal function called by the forwarding function in `get_dumping_function`"""
238 |         init_names = [f.name for f in dataclasses.fields(self.element_class) if f.init]
239 | 
240 |         node_data = {}
241 |         for init_name in init_names:
242 |             if value := getattr(node, init_name):
243 |                 node_data[init_name] = value
244 | 
245 |         return representer.represent_mapping(self.yaml_tag, node_data)
246 | 
247 |     def _from_yaml(self, constructor: BaseConstructor, node: Node) -> SchematronASTNodeBuilder:
248 |         """Internal function called by the forwarding function in `get_loading_function`"""
249 |         values = constructor.construct_mapping(node)
250 |         return DictionaryNodeBuilder(self._node_type, values)
251 | 
252 | 
253 | class SchematronASTNodeBuilder(metaclass=ABCMeta):
254 |     """Builder pattern for delayed construction of Schematron nodes."""
255 | 
256 |     @abstractmethod
257 |     def build(self) -> SchematronASTNode:
258 |         """Build a Schematron node based on the information in this builder.
259 | 
260 |         Returns:
261 |             The constructed schematron element.
262 |         """
263 | 
264 | 
265 | class DictionaryNodeBuilder(SchematronASTNodeBuilder):
266 | 
267 |     def __init__(self, node_type: type[SchematronASTNode], init_values: dict):
268 |         """Construct a Schematron AST node using a dictionary containing the init values.
269 | 
270 |         During the build phase, this builder will transform the provided init values according to these rules:
271 |         1. if a node contains a builder, build it
272 |         2. if a node contains a list, modify it to become a tuple
273 | 
274 |         Args:
275 |             node_type: the type of node we will build
276 |             init_values: the init values to pass to the constructor.
277 |         """
278 |         self._node_type = node_type
279 |         self._init_values = init_values
280 | 
281 |     def build(self) -> SchematronASTNode:
282 |         def _expand_value(value):
283 |             if isinstance(value, SchematronASTNodeBuilder):
284 |                 return value.build()
285 |             elif isinstance(value, str):
286 |                 return value
287 |             elif isinstance(value, Mapping):
288 |                 return {k: _expand_value(v) for k, v in value.items()}
289 |             elif isinstance(value, Iterable):
290 |                 return tuple(_expand_value(el) for el in value)
291 |             else:
292 |                 return value
293 | 
294 |         final_inits = {}
295 |         for key, value in self._init_values.items():
296 |             final_inits[key] = _expand_value(value)
297 | 
298 |         return self._node_type(**final_inits)
299 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/xml_validation/results/validation_results.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | __author__ = 'Robbert Harms'
  4 | __date__ = '2023-05-06'
  5 | __maintainer__ = 'Robbert Harms'
  6 | __email__ = 'robbert@xkls.nl'
  7 | __licence__ = 'GPL v3'
  8 | 
  9 | from abc import ABCMeta, abstractmethod
 10 | from dataclasses import dataclass
 11 | from pathlib import Path
 12 | from typing import Literal
 13 | 
 14 | from lxml.etree import _ElementTree
 15 | 
 16 | from pyschematron.direct_mode.schematron.ast import ConcreteRule, Assert, Report, ConcretePattern, Schema
 17 | from pyschematron.direct_mode.xml_validation.queries.base import EvaluationContext
 18 | from pyschematron.direct_mode.xml_validation.results.xml_nodes import XMLNode
 19 | 
 20 | 
 21 | @dataclass(slots=True, frozen=True)
 22 | class ValidationResult:
 23 |     """Type class for the validation results. """
 24 | 
 25 | 
 26 | @dataclass(slots=True, frozen=True)
 27 | class XMLDocumentValidationResult(ValidationResult):
 28 |     """Result class for the full evaluation of the entire XML document.
 29 | 
 30 |     This encapsulates the processing of all patterns over all nodes.
 31 | 
 32 |     Args:
 33 |         xml_information: the knowledge of the XML
 34 |         schema_information: information about the applied Schema
 35 |         node_results: the results over all nodes
 36 |     """
 37 |     xml_information: XMLInformation
 38 |     schema_information: SchemaInformation
 39 |     node_results: tuple[FullNodeResult, ...]
 40 | 
 41 |     def is_valid(self) -> bool:
 42 |         """Return True if the XML document was considered valid, False otherwise.
 43 | 
 44 |         According to the specifications, a successful report is considered a failure. As such, this method considers
 45 |         an XML document to be valid if none of the assertions and none of the reports were raised.
 46 | 
 47 |         Returns:
 48 |             True if the document passed the Schematron validation, False otherwise.
 49 |         """
 50 |         for node_result in self.node_results:
 51 |             if not node_result.is_valid():
 52 |                 return False
 53 |         return True
 54 | 
 55 | 
 56 | @dataclass(slots=True, frozen=True)
 57 | class XMLInformation(ValidationResult):
 58 |     """Container for the knowledge of the XML being validated.
 59 | 
 60 |     This encapsulates the processing of all patterns over all nodes.
 61 | 
 62 |     Args:
 63 |         xml_document: the XML document provided as input
 64 |     """
 65 |     xml_document: _ElementTree
 66 | 
 67 | 
 68 | @dataclass(slots=True, frozen=True)
 69 | class SchemaInformation(ValidationResult):
 70 |     """Container for the information of the Schematron used during validation.
 71 | 
 72 |     Args:
 73 |         schema: the Schema AST node used during evaluation
 74 |         phase: the phase used in evaluation
 75 |         schematron_base_path: the base path from which we loaded the Schematron file, provided for context.
 76 |     """
 77 |     schema: Schema
 78 |     phase: str | Literal['#ALL', '#DEFAULT'] | None = None,
 79 |     schematron_base_path: Path | None = None
 80 | 
 81 | 
 82 | @dataclass(slots=True, frozen=True)
 83 | class BaseXMLNodeResult(ValidationResult):
 84 |     """Base class for the result of processing a specific XML node.
 85 | 
 86 |     Args:
 87 |         xml_node: the node on which we are reporting the processing result.
 88 |         evaluation_context: the context in which the node was processed.
 89 |             This should not be specialized to the context in which the node was processed. For example, for a processed
 90 |             pattern, this should be the "outside" evaluation context without the parameters inside the pattern.
 91 |     """
 92 |     xml_node: XMLNode
 93 |     evaluation_context: EvaluationContext
 94 | 
 95 | 
 96 | @dataclass(slots=True, frozen=True)
 97 | class FullNodeResult(BaseXMLNodeResult):
 98 |     """Result class for the full evaluation of an XML node.
 99 | 
100 |     This encapsulates the processing of all the patterns over the indicated XML node.
101 | 
102 |     Args:
103 |         pattern_results: the results of all the patterns
104 |     """
105 |     pattern_results: tuple[PatternResult, ...]
106 | 
107 |     def is_valid(self) -> bool:
108 |         """Return True if all patterns yielded a valid results, False otherwise.
109 | 
110 |         Returns:
111 |             True if the document passed the Schematron validation, False otherwise.
112 |         """
113 |         for pattern_result in self.pattern_results:
114 |             if not pattern_result.is_valid():
115 |                 return False
116 |         return True
117 | 
118 | 
119 | @dataclass(slots=True, frozen=True)
120 | class PatternResult(BaseXMLNodeResult):
121 |     """Result class for evaluating a pattern on a node.
122 | 
123 |     Args:
124 |         pattern: a reference to the evaluated pattern
125 |         rule_results: a list of the rule results for each rule in the pattern.
126 |     """
127 |     pattern: ConcretePattern
128 |     rule_results: tuple[RuleResult, ...]
129 | 
130 |     def has_fired_rule(self) -> bool:
131 |         """Check if this pattern result has a fired rule or not.
132 | 
133 |         Returns:
134 |             True if there was an active rule in this pattern for the node, False otherwise
135 |         """
136 |         return any(result.is_fired() for result in self.rule_results)
137 | 
138 |     def is_valid(self) -> bool:
139 |         """Return True if all rules yielded a valid results, False otherwise.
140 | 
141 |         Returns:
142 |             True if the document passed the Schematron validation, False otherwise.
143 |         """
144 |         for rule_result in self.rule_results:
145 |             if isinstance(rule_result, FiredRuleResult):
146 |                 if not rule_result.is_valid():
147 |                     return False
148 |         return True
149 | 
150 | 
151 | @dataclass(slots=True, frozen=True)
152 | class RuleResult(BaseXMLNodeResult, metaclass=ABCMeta):
153 |     """Base class for skipped, fired, and suppressed rules.
154 | 
155 |     Since we process all rules we need a way to indicate if a rule was skipped, fired, or suppressed.
156 |     This base class creates a base type for the different rule results.
157 | 
158 |     Args:
159 |         rule: the rule which was processed
160 |     """
161 |     rule: ConcreteRule
162 | 
163 |     @abstractmethod
164 |     def is_skipped(self) -> bool:
165 |         """Check if this rule was skipped or not.
166 | 
167 |         Returns:
168 |             True if the rule was skipped, False otherwise
169 |         """
170 | 
171 |     @abstractmethod
172 |     def is_fired(self) -> bool:
173 |         """Check if this rule was fired or not.
174 | 
175 |         Returns:
176 |             True if the rule was fired, False otherwise
177 |         """
178 | 
179 |     @abstractmethod
180 |     def is_suppressed(self) -> bool:
181 |         """Check if this rule was suppressed or not.
182 | 
183 |         Returns:
184 |             True if the rule was suppressed, False otherwise
185 |         """
186 | 
187 | 
188 | @dataclass(slots=True, frozen=True)
189 | class SkippedRuleResult(RuleResult):
190 |     """Indicates the result of a rule which was skipped because the context did not match."""
191 | 
192 |     def is_skipped(self) -> bool:
193 |         return True
194 | 
195 |     def is_fired(self) -> bool:
196 |         return False
197 | 
198 |     def is_suppressed(self) -> bool:
199 |         return False
200 | 
201 | 
202 | @dataclass(slots=True, frozen=True)
203 | class SuppressedRuleResult(RuleResult):
204 |     """Indicates the result of a rule which was shadowed by a preceding rule."""
205 | 
206 |     @classmethod
207 |     def from_fired_rule_result(cls, fired_rule_result: FiredRuleResult):
208 |         """Generated a suppressed rule result from the result of a fired rule.
209 | 
210 |         This is a convenience method to turn a fired rule in a suppressed rule.
211 | 
212 |         Args:
213 |             fired_rule_result: the fired result we would like to transform
214 |         """
215 |         return cls(fired_rule_result.xml_node, fired_rule_result.evaluation_context, fired_rule_result.rule)
216 | 
217 |     def is_skipped(self) -> bool:
218 |         return False
219 | 
220 |     def is_fired(self) -> bool:
221 |         return False
222 | 
223 |     def is_suppressed(self) -> bool:
224 |         return True
225 | 
226 | 
227 | @dataclass(slots=True, frozen=True)
228 | class FiredRuleResult(RuleResult):
229 |     """The result of checking the asserts and reports of a Rule on an XML node.
230 | 
231 |     Args:
232 |         check_results: the results of the checks
233 |         subject_node: the node referenced by the subject attribute of the Schematron rule.
234 |     """
235 |     check_results: list[CheckResult]
236 |     subject_node: XMLNode | None
237 | 
238 |     def is_skipped(self) -> bool:
239 |         return False
240 | 
241 |     def is_fired(self) -> bool:
242 |         return True
243 | 
244 |     def is_suppressed(self) -> bool:
245 |         return False
246 | 
247 |     def is_valid(self) -> bool:
248 |         """Return True if all checks yielded a valid results, False otherwise.
249 | 
250 |         Returns:
251 |             True if the document passed the Schematron validation, False otherwise.
252 |         """
253 |         for check_result in self.check_results:
254 |             if check_result.check_result:
255 |                 return False
256 |         return True
257 | 
258 | 
259 | @dataclass(slots=True, frozen=True)
260 | class CheckResult(BaseXMLNodeResult):
261 |     """The result of checking a Schematron assert or report on an XML node.
262 | 
263 |     The test result stored in this class represents if the test in the check was true or false. As such,
264 |     it is independent on the nature of the check. A false test result for an assertion means a failure, which will be
265 |     reported, while only a true test result for a report is reported. If you want this derived message, use the
266 |     dynamic check result property.
267 | 
268 |     Args:
269 |         check: the check which was run
270 |         test_result: the result of the test in the check.
271 |         text: the text result from the rich text content.
272 |         subject_node: the node referenced by the subject attribute of the Schematron check.
273 |     """
274 |     check: Assert | Report
275 |     test_result: bool
276 |     text: str
277 |     subject_node: XMLNode | None
278 |     property_results: tuple[PropertyResult, ...] | None = None
279 |     diagnostic_results: tuple[DiagnosticResult, ...] | None = None
280 | 
281 |     @property
282 |     def check_result(self) -> bool:
283 |         """Get the result of the check.
284 | 
285 |         In Schematron, tests can be written in one of two ways:
286 | 
287 |             <sch:assert> outputs a message if an XPath test evaluates to false.
288 |             <sch:report> outputs a message if an XPath test evaluates to true.
289 | 
290 |         The test result stored in this class represents the state of the test result, not the final outcome
291 |         of the check. For that, there is this method.
292 | 
293 |         This checks if the result was a pass or not, it returns a value based on the following combinations:
294 | 
295 |         +--------+-------------+--------------+
296 |         |  Check | Test result | Return value |
297 |         +========+=============+==============+
298 |         | Assert | true        | false        |
299 |         | Assert | false       | true         |
300 |         | Report | true        | true         |
301 |         | Report | false       | false        |
302 |         +--------+-------------+--------------+
303 | 
304 |         Returns:
305 |             If the return value is true, we are either dealing with a failed assert, or a successful report.
306 |             If the return value is false, we have a successful assert, or a failed report.
307 |         """
308 |         if isinstance(self.check, Assert):
309 |             return not self.test_result
310 |         else:
311 |             return self.test_result
312 | 
313 | 
314 | @dataclass(slots=True, frozen=True)
315 | class PropertyResult(ValidationResult):
316 |     """Result of evaluating a property indicated by a check.
317 | 
318 |     Args:
319 |         text: resulting text
320 |         property_id: identifier of this property
321 |         role: the role attribute for this property, copied from the properties' role attribute
322 |         scheme: the scheme attribute for this property, copied from the properties' scheme attribute
323 |     """
324 |     text: str
325 |     property_id: str
326 |     role: str | None = None
327 |     scheme: str | None = None
328 | 
329 | 
330 | @dataclass(slots=True, frozen=True)
331 | class DiagnosticResult(ValidationResult):
332 |     """Result of evaluating a diagnostic indicated by a check.
333 | 
334 |     Args:
335 |         text: resulting text
336 |         diagnostic_id: identifier of this diagnostic
337 |         xml_lang: the xml language attribute for this diagnostic
338 |         xml_space: the xml_space attribute from the diagnostic
339 |     """
340 |     text: str
341 |     diagnostic_id: str
342 |     xml_lang: str | None = None
343 |     xml_space: Literal['default', 'preserve'] | None = None
344 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/svrl/xml_writer.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'Robbert Harms'
  2 | __date__ = '2024-03-17'
  3 | __maintainer__ = 'Robbert Harms'
  4 | __email__ = 'robbert@xkls.nl'
  5 | __licence__ = 'LGPL v3'
  6 | 
  7 | from abc import ABCMeta, abstractmethod
  8 | from typing import override
  9 | 
 10 | from lxml.etree import Element, _Element
 11 | from pyschematron.direct_mode.svrl.ast import SchematronOutput, SVRLNode, NSPrefixInAttributeValues, Text, \
 12 |     ActivePattern, MetaData, FiredRule, FailedAssert, SuppressedRule, SuccessfulReport, PropertyReference, \
 13 |     DiagnosticReference
 14 | from pyschematron.direct_mode.svrl.svrl_visitors import SVRLASTVisitor
 15 | 
 16 | 
 17 | class SVRLWriter(metaclass=ABCMeta):
 18 |     """Type class for SVRL XML writers."""
 19 | 
 20 |     @abstractmethod
 21 |     def create_xml(self, schematron_output: SchematronOutput) -> _Element:
 22 |         """Transform a Schematron output SVRL root node, to an XML element.
 23 | 
 24 |         Args:
 25 |             schematron_output: the root node of an SVRL AST.
 26 | 
 27 |         Returns:
 28 |             An XML representation of the provided SVRL output node.
 29 |         """
 30 | 
 31 | 
 32 | class LxmlSVRLWriter(SVRLWriter):
 33 |     """SVRL writer using the Lxml library."""
 34 | 
 35 |     @override
 36 |     def create_xml(self, schematron_output: SchematronOutput) -> _Element:
 37 |         nsmap = {
 38 |             'svrl': 'http://purl.oclc.org/dsdl/svrl',
 39 |             'sch': 'http://purl.oclc.org/dsdl/schematron',
 40 |             'xs': 'http://www.w3.org/2001/XMLSchema'
 41 |         }
 42 |         writer_visitor = _SVRLWriterVisitor(nsmap)
 43 |         return writer_visitor.visit(schematron_output)
 44 | 
 45 | 
 46 | class _SVRLWriterVisitor(SVRLASTVisitor):
 47 | 
 48 |     def __init__(self, nsmap: dict[str, str]):
 49 |         """SVRL XML writer using the visitor pattern.
 50 | 
 51 |         Args:
 52 |             nsmap: the default namespaces to apply in the SVRL, may be overwritten by the Schematron.
 53 |         """
 54 |         self._nsmap = nsmap
 55 | 
 56 |     @override
 57 |     def visit(self, svrl_node: SVRLNode) -> _Element | None:
 58 |         match svrl_node:
 59 |             case SchematronOutput():
 60 |                 return self._process_schematron_output(svrl_node)
 61 |             case NSPrefixInAttributeValues():
 62 |                 return self._process_ns_prefix_node(svrl_node)
 63 |             case MetaData():
 64 |                 return self._process_metadata(svrl_node)
 65 |             case ActivePattern():
 66 |                 return self._process_active_pattern_node(svrl_node)
 67 |             case FiredRule():
 68 |                 return self._process_fired_rule_node(svrl_node)
 69 |             case SuppressedRule():
 70 |                 return self._process_suppressed_rule_node(svrl_node)
 71 |             case FailedAssert():
 72 |                 return self._process_failed_assert(svrl_node)
 73 |             case SuccessfulReport():
 74 |                 return self._process_successful_report(svrl_node)
 75 |             case Text():
 76 |                 return self._process_text_node(svrl_node)
 77 |             case PropertyReference():
 78 |                 return self._process_property_reference(svrl_node)
 79 |             case DiagnosticReference():
 80 |                 return self._process_diagnostic_reference(svrl_node)
 81 |         return None
 82 | 
 83 |     def _process_schematron_output(self, schematron_output: SchematronOutput) -> _Element:
 84 |         """Process the SchematronOutput node, the root of the SVRL.
 85 | 
 86 |         Args:
 87 |             schematron_output: the SVRL root node
 88 | 
 89 |         Returns:
 90 |             An element representing the SVRL report.
 91 |         """
 92 |         for ns_prefix in schematron_output.ns_prefix_in_attribute_values:
 93 |             self._nsmap[ns_prefix.prefix] = ns_prefix.uri
 94 | 
 95 |         node_attributes = {
 96 |             'phase': schematron_output.phase,
 97 |             'schemaVersion': schematron_output.schema_version,
 98 |             'title': schematron_output.title
 99 |         }
100 |         final_attributes = {k: v for k, v in node_attributes.items() if v is not None}
101 | 
102 |         root = Element(f'{{{self._nsmap["svrl"]}}}schematron-output', attrib=final_attributes, nsmap=self._nsmap)
103 | 
104 |         for text_node in schematron_output.texts:
105 |             root.append(self.visit(text_node))
106 | 
107 |         for ns_prefix in schematron_output.ns_prefix_in_attribute_values:
108 |             root.append(self.visit(ns_prefix))
109 | 
110 |         if schematron_output.metadata:
111 |             root.append(self.visit(schematron_output.metadata))
112 | 
113 |         for validation_event in schematron_output.validation_events:
114 |             el = self.visit(validation_event)
115 |             if el is not None:
116 |                 root.append(el)
117 | 
118 |         return root
119 | 
120 |     def _process_ns_prefix_node(self, ns_prefix: NSPrefixInAttributeValues) -> _Element:
121 |         """Process the namespace prefix node.
122 | 
123 |         Args:
124 |             ns_prefix: the prefix node to render into an XML element
125 | 
126 |         Returns:
127 |             The created XML element
128 |         """
129 |         return Element(f'{{{self._nsmap["svrl"]}}}ns-prefix-in-attribute-values',
130 |                        attrib={'prefix': ns_prefix.prefix, 'uri': ns_prefix.uri},
131 |                        nsmap=self._nsmap)
132 | 
133 |     def _process_metadata(self, metadata: MetaData) -> _Element:
134 |         """Process the metadata node.
135 | 
136 |         Args:
137 |             metadata: the metadata node
138 | 
139 |         Returns:
140 |             The created XML node
141 |         """
142 |         additional_namespaces = {}
143 |         for namespace in metadata.namespaces:
144 |             additional_namespaces[namespace.prefix] = namespace.uri
145 | 
146 |         metadata_root = Element(f'{{{self._nsmap["svrl"]}}}metadata', nsmap=self._nsmap | additional_namespaces)
147 |         for element in metadata.xml_elements:
148 |             metadata_root.append(element)
149 | 
150 |         return metadata_root
151 | 
152 |     def _process_active_pattern_node(self, active_pattern: ActivePattern) -> _Element:
153 |         """Process an active pattern node.
154 | 
155 |         Args:
156 |             active_pattern: The active pattern node
157 | 
158 |         Returns:
159 |             The created XML element
160 |         """
161 |         documents = None
162 |         if active_pattern.documents:
163 |             documents = ' '.join([f'file:{doc}' for doc in active_pattern.documents])
164 | 
165 |         node_attributes = {
166 |             'documents': documents,
167 |             'id': active_pattern.id,
168 |             'name': active_pattern.name,
169 |         }
170 |         final_attributes = {k: v for k, v in node_attributes.items() if v is not None}
171 | 
172 |         return Element(f'{{{self._nsmap["svrl"]}}}active-pattern', attrib=final_attributes, nsmap=self._nsmap)
173 | 
174 |     def _process_fired_rule_node(self, fired_rule: FiredRule) -> _Element:
175 |         """Process a fired rule node.
176 | 
177 |         Args:
178 |             fired_rule: information on the fired rule
179 | 
180 |         Returns:
181 |             The created XML element
182 |         """
183 |         document = None
184 |         if fired_rule.document:
185 |             document = f'file:{fired_rule.document}'
186 | 
187 |         node_attributes = {
188 |             'context': fired_rule.context.query,
189 |             'document': document,
190 |             'flag': fired_rule.flag,
191 |             'id': fired_rule.id,
192 |             'name': fired_rule.name,
193 |             'role': fired_rule.role
194 |         }
195 |         final_attributes = {k: v for k, v in node_attributes.items() if v is not None}
196 | 
197 |         return Element(f'{{{self._nsmap["svrl"]}}}fired-rule', attrib=final_attributes, nsmap=self._nsmap)
198 | 
199 |     def _process_suppressed_rule_node(self, suppressed_rule: SuppressedRule) -> _Element:
200 |         """Process a suppressed rule node
201 | 
202 |         Args:
203 |             suppressed_rule: information on the suppressed rule
204 | 
205 |         Returns:
206 |             The created XML element
207 |         """
208 |         node_attributes = {
209 |             'context': suppressed_rule.context.query,
210 |             'id': suppressed_rule.id,
211 |         }
212 |         final_attributes = {k: v for k, v in node_attributes.items() if v is not None}
213 | 
214 |         return Element(f'{{{self._nsmap["svrl"]}}}suppressed-rule', attrib=final_attributes, nsmap=self._nsmap)
215 | 
216 |     def _process_failed_assert(self, failed_assert: FailedAssert) -> _Element:
217 |         """Process a failed assert node.
218 | 
219 |         Args:
220 |             failed_assert: the failed assert information
221 | 
222 |         Returns:
223 |             The created XML element
224 |         """
225 |         return self._process_check_result(failed_assert)
226 | 
227 |     def _process_successful_report(self, successful_report: SuccessfulReport) -> _Element:
228 |         """Process a successful report node.
229 | 
230 |         Args:
231 |             successful_report: the successful report information
232 | 
233 |         Returns:
234 |             The created XML element
235 |         """
236 |         return self._process_check_result(successful_report)
237 | 
238 |     def _process_check_result(self, check_result: FailedAssert | SuccessfulReport):
239 |         """Process a check result.
240 | 
241 |         Depending on the type of input we return either a `successful-report` or a `failed-assert` element.
242 | 
243 |         Args:
244 |             check_result: the check result.
245 | 
246 |         Returns:
247 |             The created XML element
248 |         """
249 |         node_attributes = {
250 |             'flag': check_result.flag,
251 |             'id': check_result.id,
252 |             'location': check_result.location.expression,
253 |             'role': check_result.role,
254 |             'test': check_result.test.query
255 |         }
256 |         final_attributes = {k: v for k, v in node_attributes.items() if v is not None}
257 | 
258 |         if subject_location := check_result.subject_location:
259 |             final_attributes['location'] = subject_location.expression
260 | 
261 |         node_name = 'successful-report'
262 |         if isinstance(check_result, FailedAssert):
263 |             node_name = 'failed-assert'
264 | 
265 |         report_element = Element(f'{{{self._nsmap["svrl"]}}}{node_name}',
266 |                                  attrib=final_attributes, nsmap=self._nsmap)
267 | 
268 |         if check_result.diagnostic_references:
269 |             for diagnostic_reference in check_result.diagnostic_references:
270 |                 report_element.append(self.visit(diagnostic_reference))
271 | 
272 |         if check_result.property_references:
273 |             for property_reference in check_result.property_references:
274 |                 report_element.append(self.visit(property_reference))
275 | 
276 |         if check_result.text.content:
277 |             report_element.append(self.visit(check_result.text))
278 | 
279 |         return report_element
280 | 
281 |     def _process_text_node(self, text: Text) -> _Element:
282 |         """Process a text node.
283 | 
284 |         Args:
285 |             text: the text node to convert into an element.
286 | 
287 |         Returns:
288 |             The created XML element
289 |         """
290 |         node_attributes = {
291 |             'fpi': text.fpi,
292 |             'icon': text.icon,
293 |             'see': text.see,
294 |             'class': text.class_,
295 |             'id': text.id,
296 |             '{http://www.w3.org/XML/1998/namespace}lang': text.xml_lang,
297 |             '{http://www.w3.org/XML/1998/namespace}space': text.xml_space,
298 |         }
299 |         final_attributes = {k: v for k, v in node_attributes.items() if v is not None}
300 | 
301 |         text_element = Element(f'{{{self._nsmap["svrl"]}}}text', attrib=final_attributes, nsmap=self._nsmap)
302 |         text_element.text = text.content
303 |         return text_element
304 | 
305 |     def _process_property_reference(self, property_reference: PropertyReference) -> _Element:
306 |         """Process a property reference node.
307 | 
308 |         Args:
309 |             property_reference: the property reference to convert into a node.
310 | 
311 |         Returns:
312 |             The created XML element
313 |         """
314 |         node_attributes = {
315 |             'property': property_reference.property,
316 |             'role': property_reference.role,
317 |             'scheme': property_reference.scheme
318 |         }
319 |         final_attributes = {k: v for k, v in node_attributes.items() if v is not None}
320 | 
321 |         property_reference_element = Element(f'{{{self._nsmap["svrl"]}}}property-reference',
322 |                                              attrib=final_attributes, nsmap=self._nsmap)
323 | 
324 |         if property_reference.text:
325 |             property_reference_element.append(self.visit(property_reference.text))
326 | 
327 |         return property_reference_element
328 | 
329 |     def _process_diagnostic_reference(self, diagnostic_reference: DiagnosticReference) -> _Element:
330 |         """Process a diagnostic reference node.
331 | 
332 |         Args:
333 |             diagnostic_reference: the diagnostic reference to convert into a node.
334 | 
335 |         Returns:
336 |             The created XML element
337 |         """
338 |         node_attributes = {
339 |             'diagnostic': diagnostic_reference.diagnostic,
340 |         }
341 |         final_attributes = {k: v for k, v in node_attributes.items() if v is not None}
342 | 
343 |         diagnostic_reference_element = Element(f'{{{self._nsmap["svrl"]}}}diagnostic-reference',
344 |                                                attrib=final_attributes, nsmap=self._nsmap)
345 | 
346 |         if diagnostic_reference.text:
347 |             diagnostic_reference_element.append(self.visit(diagnostic_reference.text))
348 | 
349 |         return diagnostic_reference_element
350 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/xml_validation/results/svrl_builder.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'Robbert Harms'
  2 | __date__ = '2023-04-24'
  3 | __maintainer__ = 'Robbert Harms'
  4 | __email__ = 'robbert@xkls.nl'
  5 | __licence__ = 'GPL v3'
  6 | 
  7 | from abc import ABCMeta, abstractmethod
  8 | 
  9 | from lxml.etree import Element, SubElement, ElementTree, _ElementTree
 10 | 
 11 | from datetime import datetime
 12 | 
 13 | from pyschematron import __version__
 14 | from pyschematron.direct_mode.schematron.ast import Assert, Namespace, ConcretePattern
 15 | from pyschematron.direct_mode.svrl.ast import ActivePattern, FiredRule, SchematronQuery, FailedAssert, \
 16 |     XPathExpression, Text, SchematronOutput, SuccessfulReport, SuppressedRule, NSPrefixInAttributeValues, MetaData, \
 17 |     CheckResult, ValidationEvent, PropertyReference, DiagnosticReference
 18 | 
 19 | from pyschematron.direct_mode.svrl.xml_writer import LxmlSVRLWriter
 20 | from pyschematron.direct_mode.xml_validation.results.validation_results import (XMLDocumentValidationResult, RuleResult,
 21 |                                                                                 FiredRuleResult, SuppressedRuleResult,
 22 |                                                                                 PropertyResult, DiagnosticResult)
 23 | 
 24 | 
 25 | class SVRLReportBuilder(metaclass=ABCMeta):
 26 | 
 27 |     @abstractmethod
 28 |     def create_svrl_xml(self, validation_result: XMLDocumentValidationResult) -> _ElementTree:
 29 |         """Create a Schematron Validation Reporting Language (SVRL) document of the validation results.
 30 | 
 31 |         This transforms the validation results into an XML document in the SVRL namespace.
 32 | 
 33 |         Args:
 34 |             validation_result: the result of validating the document using the validator of this package.
 35 | 
 36 |         Returns:
 37 |             An XML document in the SVRL namespace.
 38 |         """
 39 | 
 40 | 
 41 | class DefaultSVRLReportBuilder(SVRLReportBuilder):
 42 |     """Converts the validation results into an SVRL AST.
 43 | 
 44 |     In the SVRL report we want to enumerate the processing results in the format:
 45 |         <active-pattern/>
 46 |         ...
 47 |         <rule results>
 48 |         ...
 49 |         <active-pattern/>
 50 |         ...
 51 | 
 52 |     The results of using the validator provides results for every XML node, Schematron pattern and Schematron rule
 53 |     checked. This is too comprehensive for the SVRL and as such this class simplifies the results.
 54 |     """
 55 | 
 56 |     def create_svrl_xml(self, validation_result: XMLDocumentValidationResult) -> _ElementTree:
 57 |         title = None
 58 |         if title_node := validation_result.schema_information.schema.title:
 59 |             title = title_node.content
 60 | 
 61 |         svrl = SchematronOutput(
 62 |             self._get_text_nodes(validation_result),
 63 |             self._get_ns_prefix_nodes(validation_result),
 64 |             tuple(self.get_validation_events(validation_result)),
 65 |             metadata=self._get_metadata(validation_result),
 66 |             phase=validation_result.schema_information.phase,
 67 |             schema_version=validation_result.schema_information.schema.schema_version,
 68 |             title=title)
 69 | 
 70 |         writer = LxmlSVRLWriter()
 71 |         return ElementTree(writer.create_xml(svrl))
 72 | 
 73 |     def get_validation_events(self, validation_result: XMLDocumentValidationResult) -> list[ValidationEvent]:
 74 |         """Extract a list of SVRL validation events from the validation results.
 75 | 
 76 |         Args:
 77 |             validation_result: The validation results from the validator
 78 | 
 79 |         Returns:
 80 |             A list of SVRL validation events (active pattern, fired rule, failed assert,
 81 |             successful report, suppressed rule).
 82 |         """
 83 |         root_document = None
 84 |         if xml_document := validation_result.xml_information.xml_document:
 85 |             if docinfo := xml_document.docinfo:
 86 |                 root_document = docinfo.URL
 87 | 
 88 |         processed_patterns = self._get_rule_results_by_pattern(validation_result)
 89 |         validation_events = []
 90 |         for pattern, rules_processed in processed_patterns.items():
 91 |             name = pattern.title.content if pattern.title else None
 92 |             validation_events.append(ActivePattern(id=pattern.id, name=name, documents=(root_document,)))
 93 | 
 94 |             for rule_processed in rules_processed:
 95 |                 if isinstance(rule_processed, FiredRuleResult):
 96 |                     validation_events.append(FiredRule(SchematronQuery(rule_processed.rule.context.query),
 97 |                                                        id=rule_processed.rule.id))
 98 |                     validation_events += self._get_svrl_check_results(rule_processed)
 99 |                 elif isinstance(rule_processed, SuppressedRuleResult):
100 |                     validation_events.append(SuppressedRule(SchematronQuery(rule_processed.rule.context.query)))
101 |         return validation_events
102 | 
103 |     def _get_svrl_check_results(self, fired_rule: FiredRuleResult) -> list[CheckResult]:
104 |         """Transform the validation checks in the fired rule to a list of SVRL AST check results.
105 | 
106 |         Args:
107 |             fired_rule: the rule which was fired, from this we will return the checks which did not succeed.
108 | 
109 |         Returns:
110 |             A list of failed asserts or successful reports.
111 |         """
112 |         subject_location = None
113 |         if fired_rule.subject_node:
114 |             subject_location = XPathExpression(fired_rule.subject_node.xpath_location)
115 | 
116 |         events = []
117 |         for check_result in fired_rule.check_results:
118 |             if check_result.check_result:
119 |                 if check_result.subject_node:
120 |                     subject_location = XPathExpression(check_result.subject_node.xpath_location)
121 | 
122 |                 property_references = self._get_property_references(check_result.property_results)
123 |                 diagnostic_references = self._get_diagnostic_reference(check_result.diagnostic_results)
124 | 
125 |                 if isinstance(check_result.check, Assert):
126 |                     event = FailedAssert(Text(check_result.text),
127 |                                          XPathExpression(check_result.xml_node.xpath_location),
128 |                                          SchematronQuery(check_result.check.test.query),
129 |                                          diagnostic_references=tuple(diagnostic_references),
130 |                                          property_references=tuple(property_references),
131 |                                          subject_location=subject_location)
132 |                 else:
133 |                     event = SuccessfulReport(Text(check_result.text),
134 |                                              XPathExpression(check_result.xml_node.xpath_location),
135 |                                              SchematronQuery(check_result.check.test.query),
136 |                                              diagnostic_references=tuple(diagnostic_references),
137 |                                              property_references=tuple(property_references),
138 |                                              subject_location=subject_location)
139 |                 events.append(event)
140 |         return events
141 | 
142 |     @staticmethod
143 |     def _get_property_references(property_results: list[PropertyResult] | None) -> list[PropertyReference]:
144 |         """Convert the property results to SVRL property reference nodes.
145 | 
146 |         Args:
147 |             property_results: the list of property results, may be None, in which case we return an empty list.
148 | 
149 |         Returns:
150 |             The list of property references.
151 |         """
152 |         property_references = []
153 |         if property_results:
154 |             for property_result in property_results:
155 |                 property_references.append(PropertyReference(
156 |                     Text(property_result.text), property_result.property_id,
157 |                     property_result.role, property_result.scheme))
158 |         return property_references
159 | 
160 |     @staticmethod
161 |     def _get_diagnostic_reference(diagnostic_results: list[DiagnosticResult] | None) -> list[DiagnosticReference]:
162 |         """Convert the diagnostic results to SVRL diagnostic reference nodes.
163 | 
164 |         Args:
165 |             diagnostic_results: the list of diagnostic results, may be None, in which case we return an empty list.
166 | 
167 |         Returns:
168 |             The list of diagnostic references.
169 |         """
170 |         diagnostic_references = []
171 |         if diagnostic_results:
172 |             for diagnostic_result in diagnostic_results:
173 |                 text = Text(diagnostic_result.text,
174 |                             xml_lang=diagnostic_result.xml_lang,
175 |                             xml_space=diagnostic_result.xml_space)
176 |                 diagnostic_references.append(DiagnosticReference(text, diagnostic=diagnostic_result.diagnostic_id))
177 |         return diagnostic_references
178 | 
179 |     @staticmethod
180 |     def _get_rule_results_by_pattern(
181 |             validation_result: XMLDocumentValidationResult) -> dict[ConcretePattern, list[RuleResult]]:
182 |         """Reduce the validation results into a dictionary indexed by pattern.
183 | 
184 |         The validation results consist of data for each XML node, Schematron pattern and Schematron rule. This function
185 |         iterates over the validation results and groups them by Schematron pattern.
186 | 
187 |         Args:
188 |             validation_result: the validation results
189 | 
190 |         Returns:
191 |             A dictionary which as indices the patterns which had at least one fired rule. As values are the
192 |             rule results taken from the validation results.
193 |         """
194 |         processed_patterns = {}
195 |         for node_result in validation_result.node_results:
196 |             for pattern_result in node_result.pattern_results:
197 |                 rules_processed = processed_patterns.setdefault(pattern_result.pattern, [])
198 | 
199 |                 if pattern_result.has_fired_rule():
200 |                     for rule_result in pattern_result.rule_results:
201 |                         rules_processed.append(rule_result)
202 |         return processed_patterns
203 | 
204 |     @staticmethod
205 |     def _get_text_nodes(validation_result: XMLDocumentValidationResult) -> tuple[Text, ...]:
206 |         """Get the listing of text nodes we will add to the SVRL output.
207 | 
208 |         For the text nodes we will use all the paragraph nodes present in the Schematron document.
209 | 
210 |         Args:
211 |             validation_result: all the validation information
212 | 
213 |         Returns:
214 |             The text nodes, created from the paragraphs of the Schematron document.
215 |         """
216 |         texts = []
217 |         for paragraph in validation_result.schema_information.schema.paragraphs:
218 |             texts.append(Text(paragraph.content, icon=paragraph.icon,
219 |                               xml_lang=paragraph.xml_lang, id=paragraph.id,
220 |                               class_=paragraph.class_))
221 |         return tuple(texts)
222 | 
223 |     @staticmethod
224 |     def _get_ns_prefix_nodes(validation_result: XMLDocumentValidationResult) -> tuple[NSPrefixInAttributeValues, ...]:
225 |         """Get the listing of the ns prefix nodes.
226 | 
227 |         Args:
228 |             validation_result: all the validation information
229 | 
230 |         Returns:
231 |             The NS prefix nodes.
232 |         """
233 |         ns_prefix_in_attribute_values = []
234 |         for schematron_namespace in validation_result.schema_information.schema.namespaces:
235 |             ns_prefix_in_attribute_values.append(NSPrefixInAttributeValues(schematron_namespace.prefix,
236 |                                                                            schematron_namespace.uri))
237 |         return tuple(ns_prefix_in_attribute_values)
238 | 
239 |     @staticmethod
240 |     def _get_metadata(validation_result: XMLDocumentValidationResult) -> MetaData:
241 |         """Get the metadata node to add to the SVRL.
242 | 
243 |         Args:
244 |             validation_result: all the validation information
245 | 
246 |         Returns:
247 |             The metadata node.
248 |         """
249 |         namespaces = (Namespace('dct', 'http://purl.org/dc/terms/'),
250 |                       Namespace('skos', 'http://www.w3.org/2004/02/skos/core#'),
251 |                       Namespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'),
252 |                       Namespace('pysch', 'https://github.com/robbert-harms/pyschematron'))
253 | 
254 |         nsmap = {ns.prefix: ns.uri for ns in namespaces}
255 |         create_time = datetime.now().astimezone()
256 | 
257 |         def creator_element():
258 |             creator = Element(f'{{{nsmap["dct"]}}}creator', nsmap=nsmap)
259 |             agent = SubElement(creator, f'{{{nsmap["dct"]}}}agent')
260 |             pref_label = SubElement(agent, f'{{{nsmap["skos"]}}}prefLabel')
261 |             pref_label.text = f'PySchematron {__version__}'
262 |             return creator
263 | 
264 |         def created_element():
265 |             created = Element(f'{{{nsmap["dct"]}}}created', nsmap=nsmap)
266 |             created.text = create_time.isoformat()
267 |             return created
268 | 
269 |         def source_element():
270 |             source = Element(f'{{{nsmap["dct"]}}}source', nsmap=nsmap)
271 |             description = SubElement(source, f'{{{nsmap["rdf"]}}}Description')
272 |             creator = SubElement(description, f'{{{nsmap["dct"]}}}creator')
273 |             agent = SubElement(creator, f'{{{nsmap["dct"]}}}Agent')
274 |             pref_label = SubElement(agent, f'{{{nsmap["skos"]}}}prefLabel')
275 |             pref_label.text = f'PySchematron {__version__}'
276 |             description.append(created_element())
277 |             return source
278 | 
279 |         return MetaData((creator_element(), created_element(), source_element()), namespaces)
280 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/schematron/parsers/xml/builders.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'Robbert Harms'
  2 | __date__ = '2023-02-18'
  3 | __maintainer__ = 'Robbert Harms'
  4 | __email__ = 'robbert@xkls.nl'
  5 | 
  6 | from abc import ABCMeta, abstractmethod
  7 | from typing import override
  8 | 
  9 | from pyschematron.direct_mode.schematron.ast import (SchematronASTNode, Check, Variable, Paragraph, Extends,
 10 |                                                      ConcreteRule, ExternalRule, AbstractRule, Query, Rule,
 11 |                                                      ConcretePattern, Pattern, Namespace, Schema, Title,
 12 |                                                      AbstractPattern, InstancePattern, PatternParameter, Phase,
 13 |                                                      ActivePhase, Diagnostics, Properties, XPathExpression)
 14 | from pyschematron.direct_mode.schematron.parsers.xml.utils import parse_attributes
 15 | 
 16 | 
 17 | class SchematronASTNodeBuilder(metaclass=ABCMeta):
 18 |     """Builder pattern for delayed construction of Schematron nodes."""
 19 | 
 20 |     @abstractmethod
 21 |     def build(self) -> SchematronASTNode:
 22 |         """Build a Schematron node based on the information in this builder.
 23 | 
 24 |         Returns:
 25 |             The constructed schematron element.
 26 |         """
 27 | 
 28 | 
 29 | class RuleBuilder(SchematronASTNodeBuilder, metaclass=ABCMeta):
 30 | 
 31 |     def __init__(self):
 32 |         """Construct a Rule node out of the parts provided.
 33 | 
 34 |         Can not be used directly, one needs to use one of the specialized subclasses.
 35 |         """
 36 |         self.checks: list[Check] = []
 37 |         self.variables: list[Variable] = []
 38 |         self.paragraphs: list[Paragraph] = []
 39 |         self.extends: list[Extends] = []
 40 |         self.attributes = {}
 41 | 
 42 |     def add_checks(self, nodes: list[Check]):
 43 |         """Add a list of Check nodes (Report or Assert).
 44 | 
 45 |         Args:
 46 |             nodes: the nodes to add to the list of checks.
 47 |         """
 48 |         self.checks.extend(nodes)
 49 | 
 50 |     def add_variables(self, nodes: list[Variable]):
 51 |         """Add a list of Variable nodes.
 52 | 
 53 |         Args:
 54 |             nodes: the nodes to add to the list of variables.
 55 |         """
 56 |         self.variables.extend(nodes)
 57 | 
 58 |     def add_paragraphs(self, nodes: list[Paragraph]):
 59 |         """Add a list of Paragraph nodes.
 60 | 
 61 |         Args:
 62 |             nodes: the nodes to add to the list of paragraphs.
 63 |         """
 64 |         self.paragraphs.extend(nodes)
 65 | 
 66 |     def add_extends(self, nodes: list[Extends]):
 67 |         """Add a list of extends nodes.
 68 | 
 69 |         Args:
 70 |             nodes: the nodes to add to the list of extends
 71 |         """
 72 |         self.extends.extend(nodes)
 73 | 
 74 |     def add_attributes(self, element_attributes: dict[str, str]):
 75 |         """Add all the attributes of the XML Rule element in one go.
 76 | 
 77 |         Args:
 78 |             element_attributes: dictionary of attributes taken from the XML node
 79 |         """
 80 |         allowed_attributes = ['context', 'subject', 'flag', 'fpi', 'icon', 'id', 'role', 'see',
 81 |                               '{http://www.w3.org/XML/1998/namespace}lang',
 82 |                               '{http://www.w3.org/XML/1998/namespace}space']
 83 | 
 84 |         attribute_handlers = {
 85 |             'context': lambda k, v: {k: Query(v)},
 86 |             'subject': lambda k, v: {k: XPathExpression(v)},
 87 |             '{http://www.w3.org/XML/1998/namespace}lang': lambda k, v: {'xml_lang': v},
 88 |             '{http://www.w3.org/XML/1998/namespace}space': lambda k, v: {'xml_space': v}
 89 |         }
 90 | 
 91 |         attributes = parse_attributes(element_attributes, allowed_attributes, attribute_handlers)
 92 |         self.attributes.update(attributes)
 93 | 
 94 | 
 95 | class ConcreteRuleBuilder(RuleBuilder):
 96 | 
 97 |     @override
 98 |     def build(self) -> ConcreteRule:
 99 |         if 'context' not in self.attributes:
100 |             raise ValueError('A concrete rule must have a context.')
101 | 
102 |         return ConcreteRule(checks=tuple(self.checks), variables=tuple(self.variables),
103 |                             paragraphs=tuple(self.paragraphs), extends=tuple(self.extends), **self.attributes)
104 | 
105 | 
106 | class AbstractRuleBuilder(RuleBuilder):
107 | 
108 |     @override
109 |     def build(self) -> AbstractRule:
110 |         if 'context' in self.attributes:
111 |             raise ValueError('An abstract rule can not have a context.')
112 | 
113 |         if 'id' not in self.attributes:
114 |             raise ValueError('An abstract rule must have an id.')
115 | 
116 |         return AbstractRule(checks=tuple(self.checks), variables=tuple(self.variables),
117 |                             paragraphs=tuple(self.paragraphs), extends=tuple(self.extends), **self.attributes)
118 | 
119 | 
120 | class ExternalRuleBuilder(RuleBuilder):
121 | 
122 |     @override
123 |     def build(self) -> ExternalRule:
124 |         if 'context' in self.attributes:
125 |             raise ValueError('An external rule can not have a context.')
126 | 
127 |         return ExternalRule(checks=tuple(self.checks), variables=tuple(self.variables),
128 |                             paragraphs=tuple(self.paragraphs), extends=tuple(self.extends), **self.attributes)
129 | 
130 | 
131 | class PatternBuilder(SchematronASTNodeBuilder, metaclass=ABCMeta):
132 | 
133 |     def __init__(self):
134 |         """Construct a Pattern node out of the parts provided.
135 | 
136 |         Can not be used directly, one needs to use one of the specialized subclasses.
137 |         """
138 |         self.rules: list[Rule] = []
139 |         self.variables: list[Variable] = []
140 |         self.title: Title | None = None
141 |         self.paragraphs: list[Paragraph] = []
142 |         self.pattern_parameters: list[PatternParameter] = []
143 |         self.attributes = {}
144 | 
145 |     def add_rules(self, nodes: list[Rule]):
146 |         """Add a list of Rule nodes
147 | 
148 |         Args:
149 |             nodes: the nodes to add to the list of checks.
150 |         """
151 |         self.rules.extend(nodes)
152 | 
153 |     def add_variables(self, nodes: list[Variable]):
154 |         """Add a list of Variable nodes.
155 | 
156 |         Args:
157 |             nodes: the nodes to add to the list of variables.
158 |         """
159 |         self.variables.extend(nodes)
160 | 
161 |     def set_title(self, node: Title | None):
162 |         """Set the title node.
163 | 
164 |         Args:
165 |             node: the title node
166 |         """
167 |         self.title = node
168 | 
169 |     def add_paragraphs(self, nodes: list[Paragraph]):
170 |         """Add a list of Paragraph nodes.
171 | 
172 |         Args:
173 |             nodes: the nodes to add to the list of paragraphs.
174 |         """
175 |         self.paragraphs.extend(nodes)
176 | 
177 |     def add_parameters(self, nodes: list[PatternParameter]):
178 |         """Add a list of PatternParameter nodes.
179 | 
180 |         Args:
181 |             nodes: the nodes to add to the list of parameters.
182 |         """
183 |         self.pattern_parameters.extend(nodes)
184 | 
185 |     def add_attributes(self, element_attributes: dict[str, str]):
186 |         """Add all the attributes of the XML Pattern element in one go.
187 | 
188 |         Args:
189 |             element_attributes: dictionary of attributes taken from the XML node
190 |         """
191 |         allowed_attributes = ['documents', 'fpi', 'icon', 'id', 'see', 'is-a',
192 |                               '{http://www.w3.org/XML/1998/namespace}lang',
193 |                               '{http://www.w3.org/XML/1998/namespace}space']
194 | 
195 |         attribute_handlers = {
196 |             'documents': lambda k, v: {k: Query(v)},
197 |             'is-a': lambda k, v: {'abstract_id_ref': v},
198 |             '{http://www.w3.org/XML/1998/namespace}lang': lambda k, v: {'xml_lang': v},
199 |             '{http://www.w3.org/XML/1998/namespace}space': lambda k, v: {'xml_space': v}
200 |         }
201 | 
202 |         attributes = parse_attributes(element_attributes, allowed_attributes, attribute_handlers)
203 |         self.attributes.update(attributes)
204 | 
205 | 
206 | class ConcretePatternBuilder(PatternBuilder):
207 | 
208 |     @override
209 |     def build(self) -> ConcretePattern:
210 |         return ConcretePattern(rules=tuple(self.rules), variables=tuple(self.variables),
211 |                                paragraphs=tuple(self.paragraphs), title=self.title, **self.attributes)
212 | 
213 | 
214 | class AbstractPatternBuilder(PatternBuilder):
215 | 
216 |     @override
217 |     def build(self) -> AbstractPattern:
218 |         return AbstractPattern(rules=tuple(self.rules), variables=tuple(self.variables),
219 |                                paragraphs=tuple(self.paragraphs), title=self.title, **self.attributes)
220 | 
221 | 
222 | class InstancePatternBuilder(PatternBuilder):
223 | 
224 |     @override
225 |     def build(self) -> InstancePattern:
226 |         return InstancePattern(params=tuple(self.pattern_parameters), **self.attributes)
227 | 
228 | 
229 | class PhaseBuilder(SchematronASTNodeBuilder):
230 | 
231 |     def __init__(self):
232 |         """Construct a Phase node out of the parts provided."""
233 |         self.active: list[ActivePhase] = []
234 |         self.variables: list[Variable] = []
235 |         self.paragraphs: list[Paragraph] = []
236 |         self.attributes = {}
237 | 
238 |     @override
239 |     def build(self) -> Phase:
240 |         return Phase(active=tuple(self.active), variables=tuple(self.variables),
241 |                      paragraphs=tuple(self.paragraphs), **self.attributes)
242 | 
243 |     def add_active(self, nodes: list[ActivePhase]):
244 |         """Add a list of ActivePhase nodes
245 | 
246 |         Args:
247 |             nodes: the nodes to add to the list of active phases.
248 |         """
249 |         self.active.extend(nodes)
250 | 
251 |     def add_variables(self, nodes: list[Variable]):
252 |         """Add a list of Variable nodes.
253 | 
254 |         Args:
255 |             nodes: the nodes to add to the list of variables.
256 |         """
257 |         self.variables.extend(nodes)
258 | 
259 |     def add_paragraphs(self, nodes: list[Paragraph]):
260 |         """Add a list of Paragraph nodes.
261 | 
262 |         Args:
263 |             nodes: the nodes to add to the list of paragraphs.
264 |         """
265 |         self.paragraphs.extend(nodes)
266 | 
267 |     def add_attributes(self, element_attributes: dict[str, str]):
268 |         """Add all the attributes of the XML Pattern element in one go.
269 | 
270 |         Args:
271 |             element_attributes: dictionary of attributes taken from the XML node
272 |         """
273 |         allowed_attributes = ['fpi', 'icon', 'id', 'see',
274 |                               '{http://www.w3.org/XML/1998/namespace}lang',
275 |                               '{http://www.w3.org/XML/1998/namespace}space']
276 | 
277 |         attribute_handlers = {
278 |             '{http://www.w3.org/XML/1998/namespace}lang': lambda k, v: {'xml_lang': v},
279 |             '{http://www.w3.org/XML/1998/namespace}space': lambda k, v: {'xml_space': v}
280 |         }
281 | 
282 |         attributes = parse_attributes(element_attributes, allowed_attributes, attribute_handlers)
283 |         self.attributes.update(attributes)
284 | 
285 | 
286 | class SchemaBuilder(SchematronASTNodeBuilder):
287 | 
288 |     def __init__(self):
289 |         """Construct a Schema node out of the parts provided."""
290 |         self.patterns: list[Pattern] = []
291 |         self.namespaces: list[Namespace] = []
292 |         self.diagnostics: list[Diagnostics] = []
293 |         self.properties: list[Properties] = []
294 |         self.title: Title | None = None
295 |         self.variables: list[Variable] = []
296 |         self.paragraphs: list[Paragraph] = []
297 |         self.phases: list[Phase] = []
298 |         self.attributes = {}
299 | 
300 |     @override
301 |     def build(self) -> Schema:
302 |         return Schema(patterns=tuple(self.patterns), namespaces=tuple(self.namespaces), phases=tuple(self.phases),
303 |                       paragraphs=tuple(self.paragraphs), variables=tuple(self.variables),
304 |                       diagnostics=tuple(self.diagnostics), properties=tuple(self.properties), title=self.title,
305 |                       **self.attributes)
306 | 
307 |     def add_patterns(self, nodes: list[Pattern]):
308 |         """Add a list of Pattern nodes
309 | 
310 |         Args:
311 |             nodes: the nodes to add to the list of patterns.
312 |         """
313 |         self.patterns.extend(nodes)
314 | 
315 |     def add_namespaces(self, nodes: list[Namespace]):
316 |         """Add a list of Namespace nodes
317 | 
318 |         Args:
319 |             nodes: the nodes to add to the list of namespaces.
320 |         """
321 |         self.namespaces.extend(nodes)
322 | 
323 |     def add_phases(self, nodes: list[Phase]):
324 |         """Add a list of Phase nodes
325 | 
326 |         Args:
327 |             nodes: the nodes to add to the list of phases.
328 |         """
329 |         self.phases.extend(nodes)
330 | 
331 |     def add_diagnostics(self, nodes: list[Diagnostics]):
332 |         """Add a list of Diagnostics nodes
333 | 
334 |         Args:
335 |             nodes: the nodes to add to the list of diagnostics.
336 |         """
337 |         self.diagnostics.extend(nodes)
338 | 
339 |     def add_properties(self, nodes: list[Properties]):
340 |         """Add a list of Properties nodes
341 | 
342 |         Args:
343 |             nodes: the nodes to add to the list of properties.
344 |         """
345 |         self.properties.extend(nodes)
346 | 
347 |     def add_variables(self, nodes: list[Variable]):
348 |         """Add a list of Variable nodes.
349 | 
350 |         Args:
351 |             nodes: the nodes to add to the list of variables.
352 |         """
353 |         self.variables.extend(nodes)
354 | 
355 |     def add_paragraphs(self, nodes: list[Paragraph]):
356 |         """Add a list of Paragraph nodes.
357 | 
358 |         Args:
359 |             nodes: the nodes to add to the list of paragraphs.
360 |         """
361 |         self.paragraphs.extend(nodes)
362 | 
363 |     def set_title(self, node: Title | None):
364 |         """Set the title node.
365 | 
366 |         Args:
367 |             node: the title node
368 |         """
369 |         self.title = node
370 | 
371 |     def add_attributes(self, element_attributes: dict[str, str]):
372 |         """Add all the attributes of the XML Schema element in one go.
373 | 
374 |         Args:
375 |             element_attributes: dictionary of attributes taken from the XML node
376 |         """
377 |         allowed_attributes = ['defaultPhase', 'fpi', 'icon', 'id',
378 |                               'queryBinding', 'schemaVersion', 'see',
379 |                               '{http://www.w3.org/XML/1998/namespace}lang',
380 |                               '{http://www.w3.org/XML/1998/namespace}space']
381 | 
382 |         attribute_handlers = {
383 |             'defaultPhase': lambda k, v: {'default_phase': v},
384 |             'queryBinding': lambda k, v: {'query_binding': v},
385 |             'schemaVersion': lambda k, v: {'schema_version': v},
386 |             '{http://www.w3.org/XML/1998/namespace}lang': lambda k, v: {'xml_lang': v},
387 |             '{http://www.w3.org/XML/1998/namespace}space': lambda k, v: {'xml_space': v}
388 |         }
389 | 
390 |         attributes = parse_attributes(element_attributes, allowed_attributes, attribute_handlers)
391 |         self.attributes.update(attributes)
392 | 


--------------------------------------------------------------------------------
/pyschematron/direct_mode/schematron/ast_visitors.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'Robbert Harms'
  2 | __date__ = '2023-03-06'
  3 | __maintainer__ = 'Robbert Harms'
  4 | __email__ = 'robbert@xkls.nl'
  5 | 
  6 | from typing import Any, Mapping, Iterable, Literal, override
  7 | 
  8 | from abc import ABCMeta
  9 | 
 10 | from pyschematron.direct_mode.lib.ast import GenericASTVisitor
 11 | from pyschematron.direct_mode.schematron.ast import SchematronASTNode, Schema, ConcretePattern, Rule, ExtendsExternal, \
 12 |     ExternalRule, ConcreteRule, ExtendsById, AbstractRule, AbstractPattern, InstancePattern, Pattern, Phase
 13 | from pyschematron.direct_mode.schematron.utils import macro_expand
 14 | 
 15 | 
 16 | class SchematronASTVisitor(GenericASTVisitor[SchematronASTNode], metaclass=ABCMeta):
 17 |     """Visitor pattern for the Schematron AST nodes."""
 18 | 
 19 | 
 20 | class FindIdVisitor(SchematronASTVisitor):
 21 | 
 22 |     def __init__(self, id_ref: str):
 23 |         """A visitor which finds a node with the given ID.
 24 | 
 25 |         Args:
 26 |             id_ref: the id we would like to find in the visited nodes
 27 |         """
 28 |         super().__init__()
 29 |         self._id_ref = id_ref
 30 | 
 31 |     @override
 32 |     def visit(self, ast_node: SchematronASTNode) -> Any:
 33 |         if hasattr(ast_node, 'id') and getattr(ast_node, 'id') == self._id_ref:
 34 |             return ast_node
 35 | 
 36 |         for child in ast_node.get_children():
 37 |             if found_node := self.visit(child):
 38 |                 return found_node
 39 | 
 40 | 
 41 | class GetIDMappingVisitor(SchematronASTVisitor):
 42 | 
 43 |     def __init__(self):
 44 |         """A visitor which maps all nodes with an id to their id."""
 45 |         super().__init__()
 46 |         self._result = {}
 47 | 
 48 |     @override
 49 |     def visit(self, ast_node: SchematronASTNode) -> Any:
 50 |         self._result |= self._visit(ast_node)
 51 |         return self._result
 52 | 
 53 |     def _visit(self, ast_node: SchematronASTNode) -> dict[str, SchematronASTNode]:
 54 |         for child in ast_node.get_children():
 55 |             child.accept_visitor(self)
 56 | 
 57 |         if hasattr(ast_node, 'id'):
 58 |             if (node_id := getattr(ast_node, 'id')) is not None:
 59 |                 return {node_id: ast_node}
 60 |         return {}
 61 | 
 62 | 
 63 | class GetNodesOfTypeVisitor(SchematronASTVisitor):
 64 | 
 65 |     def __init__(self, types: type[SchematronASTNode] | tuple[type[SchematronASTNode], ...]):
 66 |         """A visitor which checks each node for their type against the type(s) provided
 67 | 
 68 |         Args:
 69 |             types: a single type or a tuple of types we check each node against.
 70 |         """
 71 |         super().__init__()
 72 |         self._types = types
 73 |         self._result = []
 74 | 
 75 |     @override
 76 |     def visit(self, ast_node: SchematronASTNode) -> Any:
 77 |         for child in ast_node.get_children():
 78 |             child.accept_visitor(self)
 79 | 
 80 |         if isinstance(ast_node, self._types):
 81 |             self._result.append(ast_node)
 82 | 
 83 |         return self._result
 84 | 
 85 | 
 86 | class ResolveExtendsVisitor(SchematronASTVisitor):
 87 | 
 88 |     def __init__(self, schema: Schema):
 89 |         """Simplify an AST Schema by inlining all the extends in the rules.
 90 | 
 91 |         This visitor inlines the variables and checks of each of the extended rules.
 92 |         `AbstractRule` and `ExternalRule` items are deleted after inlining.
 93 | 
 94 |         Args:
 95 |             schema: the full Schema as input to lookup all the rules by ID.
 96 |         """
 97 |         super().__init__()
 98 |         self._schema = schema
 99 | 
100 |     @override
101 |     def visit(self, ast_node: SchematronASTNode) -> SchematronASTNode:
102 |         match ast_node:
103 |             case Schema():
104 |                 return self._process_schema(ast_node)
105 |             case ConcretePattern() | AbstractPattern():
106 |                 return self._process_pattern(ast_node)
107 |             case Rule():
108 |                 return self._process_rule(ast_node)
109 |             case ExtendsExternal():
110 |                 return self._process_extends_external(ast_node)
111 |             case ExtendsById():
112 |                 return self._process_extends_by_id(ast_node)
113 |             case _:
114 |                 return ast_node
115 | 
116 |     def _process_schema(self, schema: Schema) -> Schema:
117 |         """Process a Schema by processing all the patterns.
118 | 
119 |         Args:
120 |             schema: the schema to process
121 | 
122 |         Returns:
123 |             A processed schema
124 |         """
125 |         patterns = []
126 |         for pattern in schema.patterns:
127 |             patterns.append(ResolveExtendsVisitor(self._schema).apply(pattern))
128 |         return schema.with_updated(patterns=tuple(patterns))
129 | 
130 |     def _process_pattern(self, pattern: ConcretePattern | AbstractPattern) -> ConcretePattern | AbstractPattern:
131 |         """Process a pattern by processing all the rules.
132 | 
133 |         Args:
134 |             pattern: the pattern to process
135 | 
136 |         Returns:
137 |             the processed pattern
138 |         """
139 |         rules = []
140 |         for rule in pattern.rules:
141 |             processed_rule = ResolveExtendsVisitor(self._schema).apply(rule)
142 |             if isinstance(processed_rule, ConcreteRule):
143 |                 rules.append(processed_rule)
144 |         return pattern.with_updated(rules=tuple(rules))
145 | 
146 |     def _process_rule(self, rule: Rule) -> Rule:
147 |         """Process a rule by inlining all the extends.
148 | 
149 |         Args:
150 |             rule: the rule we wish to process
151 | 
152 |         Returns:
153 |             A new rule with all the extends loaded and added to the checks.
154 |         """
155 |         extra_checks = []
156 |         extra_variables = []
157 |         for extends in rule.extends:
158 |             extended_rule = ResolveExtendsVisitor(self._schema).apply(extends)
159 |             extra_checks.extend(extended_rule.checks)
160 |             extra_variables.extend(extended_rule.variables)
161 | 
162 |         checks = tuple(extra_checks) + rule.checks
163 |         variables = tuple(extra_variables) + rule.variables
164 |         return rule.with_updated(checks=checks, variables=variables, extends=tuple())
165 | 
166 |     def _process_extends_by_id(self, extends: ExtendsById) -> AbstractRule:
167 |         """Process an extends which points to an abstract rule.
168 | 
169 |         Args:
170 |             extends: the extends node we are processing
171 | 
172 |         Returns:
173 |             The abstract rule this extends points to.
174 |         """
175 |         abstract_rule = FindIdVisitor(extends.id_ref).apply(self._schema)
176 |         if abstract_rule is None:
177 |             raise ValueError(f'Can\'t find the abstract rule with id "{extends.id_ref}"')
178 |         return ResolveExtendsVisitor(self._schema).apply(abstract_rule)
179 | 
180 |     def _process_extends_external(self, extends: ExtendsExternal) -> ExternalRule:
181 |         """Process an external extend by returning the loaded rule.
182 | 
183 |         Args:
184 |             extends: the extends node we are processing
185 | 
186 |         Returns:
187 |             The loaded external rule
188 |         """
189 |         return ResolveExtendsVisitor(self._schema).apply(extends.rule)
190 | 
191 | 
192 | class ResolveAbstractPatternsVisitor(SchematronASTVisitor):
193 | 
194 |     def __init__(self, schema: Schema):
195 |         """Simplify an AST Schema by expanding all the instance-of patterns.
196 | 
197 |         This visitor substitutes the abstract patterns with each of the instance-of patterns.
198 |         All abstract patterns are deleted from the AST after replacement.
199 | 
200 |         Args:
201 |             schema: the full Schema as input to lookup all the rules by ID.
202 |         """
203 |         super().__init__()
204 |         self._schema = schema
205 | 
206 |     @override
207 |     def visit(self, ast_node: SchematronASTNode) -> SchematronASTNode:
208 |         match ast_node:
209 |             case Schema():
210 |                 return self._process_schema(ast_node)
211 |             case InstancePattern():
212 |                 return self._process_instance_pattern(ast_node)
213 |             case _:
214 |                 return ast_node
215 | 
216 |     def _process_schema(self, schema: Schema) -> Schema:
217 |         """Process a Schema by processing all the patterns.
218 | 
219 |         Args:
220 |             schema: the schema to process
221 | 
222 |         Returns:
223 |             A processed schema
224 |         """
225 |         patterns = []
226 |         for pattern in schema.patterns:
227 |             new_pattern = ResolveAbstractPatternsVisitor(self._schema).apply(pattern)
228 |             if isinstance(new_pattern, ConcretePattern):
229 |                 patterns.append(new_pattern)
230 | 
231 |         return schema.with_updated(patterns=tuple(patterns))
232 | 
233 |     def _process_instance_pattern(self, instance_pattern: InstancePattern) -> ConcretePattern:
234 |         """Process an instance-of pattern by expanding it with an abstract pattern.
235 | 
236 |         Args:
237 |             instance_pattern: the instance-of pattern to process
238 | 
239 |         Returns:
240 |             the processed pattern as a concrete pattern
241 |         """
242 |         abstract_pattern = FindIdVisitor(instance_pattern.abstract_id_ref).apply(self._schema)
243 |         if abstract_pattern is None:
244 |             raise ValueError(f'Can\'t find the abstract pattern with id "{instance_pattern.abstract_id_ref}"')
245 | 
246 |         macro_expansions = {f'${param.name}': param.value for param in instance_pattern.params}
247 |         macro_expand_visitor = MacroExpandVisitor(macro_expansions)
248 | 
249 |         macro_expanded_pattern = macro_expand_visitor.apply(abstract_pattern)
250 |         return macro_expanded_pattern.with_updated(id=instance_pattern.id)
251 | 
252 | 
253 | class MacroExpandVisitor(SchematronASTVisitor):
254 | 
255 |     def __init__(self, macro_expansions: dict[str, str]):
256 |         """Macro expand an abstract pattern.
257 | 
258 |         If the input is an AbstractPattern we return a ConcretePattern.
259 |         In all other cases we return a node of the same type but with macro expanded elements.
260 | 
261 |         Args:
262 |             A mapping of macro expansions to apply.
263 |         """
264 |         super().__init__()
265 |         self._macro_expansions = macro_expansions
266 | 
267 |     @override
268 |     def visit(self, ast_node: SchematronASTNode) -> SchematronASTNode:
269 |         if isinstance(ast_node, AbstractPattern):
270 |             expanded_pattern = self._visit_generic_node(ast_node)
271 |             return ConcretePattern(**expanded_pattern.get_init_values())
272 | 
273 |         return self._visit_generic_node(ast_node)
274 | 
275 |     def _visit_generic_node[T: SchematronASTNode](self, ast_node: T) -> T:
276 |         """Visit a generic node and do macro expansion.
277 | 
278 |         Args:
279 |             ast_node: the node we are visiting and expanding
280 | 
281 |         Returns:
282 |             A node of the same type but with macro expanded items.
283 |         """
284 |         sub_visitor = MacroExpandVisitor(self._macro_expansions)
285 |         init_values = ast_node.get_init_values()
286 | 
287 |         def _expand_value(value):
288 |             if isinstance(value, str):
289 |                 return macro_expand(value, self._macro_expansions)
290 |             elif isinstance(value, SchematronASTNode):
291 |                 return sub_visitor.apply(value)
292 |             elif isinstance(value, Mapping):
293 |                 return {k: _expand_value(v) for k, v in value.items()}
294 |             elif isinstance(value, Iterable):
295 |                 return tuple(_expand_value(el) for el in value)
296 |             else:
297 |                 return value
298 | 
299 |         updated_items = {}
300 |         for key, value in init_values.items():
301 |             updated_items[key] = _expand_value(value)
302 | 
303 |         return ast_node.with_updated(**updated_items)
304 | 
305 | 
306 | class PhaseSelectionVisitor(SchematronASTVisitor):
307 | 
308 |     def __init__(self, schema: Schema, phase: str | Literal['#ALL', '#DEFAULT'] | None = None):
309 |         """Reduce an AST to only those patterns and phases referenced by a specific phase.
310 | 
311 |         This visitor only works on concrete Schema AST trees, we assume all abstract rules and patterns to be resolved.
312 | 
313 |         The output limits the `patterns` in the AST to only those selected by the phase.
314 |         It will also limit the `phases` to the active phase, or to an empty list if no phase was specified.
315 | 
316 |         Args:
317 |             schema: the full Schema as input to lookup all the rules by ID.
318 |             phase: the phase we want to select, can be an IDREF of a phase node, the literal `#ALL` for all patterns,
319 |                 or `#DEFAULT` for the `defaultPhase` attribute of the Schematron. The default value is `#DEFAULT`,
320 |                 it is overwritten by the attribute `defaultPhase`, which again can be overwritten by the phase
321 |                 here specified.
322 |         """
323 |         super().__init__()
324 |         self._schema = schema
325 |         self._phase = phase
326 |         self._phase_node = self._get_phase_node(schema, phase)
327 | 
328 |         self._active_pattern_ids = None
329 |         if self._phase_node:
330 |             self._active_pattern_ids = [active_phase.pattern_id for active_phase in self._phase_node.active]
331 | 
332 |     @override
333 |     def visit(self, ast_node: SchematronASTNode) -> SchematronASTNode | bool:
334 |         match ast_node:
335 |             case Schema():
336 |                 return self._process_schema(ast_node)
337 |             case Pattern():
338 |                 return self._process_pattern(ast_node)
339 |             case Phase():
340 |                 return self._process_phase(ast_node)
341 |             case _:
342 |                 return ast_node
343 | 
344 |     def _process_schema(self, schema: Schema) -> Schema:
345 |         """Process a Schema by reducing the patterns and phases to the specified set.
346 | 
347 |         Args:
348 |             schema: the schema to process
349 | 
350 |         Returns:
351 |             A processed schema
352 |         """
353 |         patterns = tuple(pattern for pattern in schema.patterns if self.apply(pattern))
354 |         phases = tuple(phase for phase in schema.phases if self.apply(phase))
355 |         return schema.with_updated(patterns=patterns, phases=phases)
356 | 
357 |     def _process_pattern(self, pattern: Pattern) -> bool:
358 |         """Process a pattern by verifying if it is in the current phase.
359 | 
360 |         Args:
361 |             pattern: the pattern to process
362 | 
363 |         Returns:
364 |             A boolean indicating if this pattern is in the current phase or not.
365 |         """
366 |         if not isinstance(pattern, ConcretePattern):
367 |             raise ValueError('This visitor can only deal with concrete patterns.')
368 | 
369 |         return self._active_pattern_ids is None or pattern.id in self._active_pattern_ids
370 | 
371 |     def _process_phase(self, phase: Phase) -> bool:
372 |         """Process a phase node by verifying if it is in the current phase.
373 | 
374 |         Args:
375 |             phase: the phase to process
376 | 
377 |         Returns:
378 |             A boolean indicating if this phase is in the current phase or not
379 |         """
380 |         return self._phase_node is None or phase.id == self._phase_node.id
381 | 
382 |     def _get_phase_node(self, schema: Schema, phase: str | Literal['#ALL', '#DEFAULT'] | None = None) -> Phase | None:
383 |         """Get the phase node associated with the elected phase, or None if None found.
384 | 
385 |         Args:
386 |             schema: the schema we want to search
387 |             phase: the chosen phase.
388 | 
389 |         Returns:
390 |             The AST phase node, or None if not applicable / not found.
391 |         """
392 |         if phase is None:
393 |             phase = '#DEFAULT'
394 | 
395 |         if phase == '#ALL':
396 |             return None
397 | 
398 |         if phase == '#DEFAULT':
399 |             phase = schema.default_phase
400 | 
401 |         if isinstance(phase, str):
402 |             phase_node = FindIdVisitor(phase).apply(self._schema)
403 | 
404 |             if phase_node is None:
405 |                 raise ValueError(f'Can not find the phase "{phase}".')
406 |             return phase_node
407 | 
408 |         return None
409 | 


--------------------------------------------------------------------------------
/docs/iso-schematron.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!--
  3 |     Copyright © ISO/IEC 2015
  4 |     The following permission notice and disclaimer shall be included in all
  5 |     copies of this XML schema ("the Schema"), and derivations of the Schema:
  6 |     Permission is hereby granted, free of charge in perpetuity, to any
  7 |     person obtaining a copy of the Schema, to use, copy, modify, merge and
  8 |     distribute free of charge, copies of the Schema for the purposes of
  9 |     developing, implementing, installing and using software based on the
 10 |     Schema, and to permit persons to whom the Schema is furnished to do so,
 11 |     subject to the following conditions:
 12 |     THE SCHEMA IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 13 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 14 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 15 |     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 16 |     OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 17 |     ARISING FROM, OUT OF OR IN CONNECTION WITH THE SCHEMA OR THE USE OR
 18 |     OTHER DEALINGS IN THE SCHEMA.
 19 |     In addition, any modified copy of the Schema shall include the following
 20 |     notice:
 21 |     "THIS SCHEMA HAS BEEN MODIFIED FROM THE SCHEMA DEFINED IN ISO/IEC 19757-3,
 22 |     AND SHOULD NOT BE INTERPRETED AS COMPLYING WITH THAT STANDARD".
 23 | -->
 24 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" targetNamespace="http://purl.oclc.org/dsdl/schematron" xmlns:sch="http://purl.oclc.org/dsdl/schematron">
 25 |     <xs:import namespace="http://www.w3.org/XML/1998/namespace"/>
 26 |     <!-- Element declarations -->
 27 |     <xs:element name="schema">
 28 |         <xs:complexType>
 29 |             <xs:choice minOccurs="0" maxOccurs="unbounded">
 30 |                 <xs:group ref="sch:foreign"/>
 31 |                 <xs:element ref="sch:include"/>
 32 |                 <xs:choice>
 33 |                     <xs:element ref="sch:title"/>
 34 |                     <xs:element ref="sch:ns"/>
 35 |                     <xs:element ref="sch:p"/>
 36 |                     <xs:element ref="sch:let"/>
 37 |                     <xs:element ref="sch:phase"/>
 38 |                     <xs:element ref="sch:pattern"/>
 39 |                     <xs:element ref="sch:diagnostics"/>
 40 |                     <xs:element ref="sch:properties"/>
 41 |                 </xs:choice>
 42 |             </xs:choice>
 43 |             <xs:attribute name="id" type="xs:ID"/>
 44 |             <xs:attributeGroup ref="sch:rich"/>
 45 |             <xs:attribute name="schemaVersion" type="sch:non-empty-string"/>
 46 |             <xs:attribute name="defaultPhase" type="xs:IDREF"/>
 47 |             <xs:attribute name="queryBinding" type="sch:non-empty-string"/>
 48 |             <xs:attributeGroup ref="sch:foreign"/>
 49 |         </xs:complexType>
 50 |     </xs:element>
 51 |     <xs:element name="active">
 52 |         <xs:complexType mixed="true">
 53 |             <xs:choice minOccurs="0" maxOccurs="unbounded">
 54 |                 <xs:group ref="sch:foreign"/>
 55 |                 <xs:choice>
 56 |                     <xs:element ref="sch:dir"/>
 57 |                     <xs:element ref="sch:emph"/>
 58 |                     <xs:element ref="sch:span"/>
 59 |                 </xs:choice>
 60 |             </xs:choice>
 61 |             <xs:attribute name="pattern" use="required" type="xs:IDREF"/>
 62 |             <xs:attributeGroup ref="sch:foreign"/>
 63 |         </xs:complexType>
 64 |     </xs:element>
 65 |     <xs:element name="assert">
 66 |         <xs:complexType mixed="true">
 67 |             <xs:choice minOccurs="0" maxOccurs="unbounded">
 68 |                 <xs:group ref="sch:foreign"/>
 69 |                 <xs:choice>
 70 |                     <xs:element ref="sch:name"/>
 71 |                     <xs:element ref="sch:value-of"/>
 72 |                     <xs:element ref="sch:emph"/>
 73 |                     <xs:element ref="sch:dir"/>
 74 |                     <xs:element ref="sch:span"/>
 75 |                 </xs:choice>
 76 |             </xs:choice>
 77 |             <xs:attribute name="test" use="required" type="sch:exprValue"/>
 78 |             <xs:attribute name="flag" type="sch:flagValue"/>
 79 |             <xs:attribute name="id" type="xs:ID"/>
 80 |             <xs:attribute name="diagnostics" type="xs:IDREFS"/>
 81 |             <xs:attribute name="properties" type="xs:IDREFS"/>
 82 |             <xs:attributeGroup ref="sch:rich"/>
 83 |             <xs:attributeGroup ref="sch:linkable"/>
 84 |             <xs:attributeGroup ref="sch:foreign"/>
 85 |         </xs:complexType>
 86 |     </xs:element>
 87 |     <xs:element name="diagnostic">
 88 |         <xs:complexType mixed="true">
 89 |             <xs:choice minOccurs="0" maxOccurs="unbounded">
 90 |                 <xs:group ref="sch:foreign"/>
 91 |                 <xs:choice>
 92 |                     <xs:element ref="sch:value-of"/>
 93 |                     <xs:element ref="sch:emph"/>
 94 |                     <xs:element ref="sch:dir"/>
 95 |                     <xs:element ref="sch:span"/>
 96 |                 </xs:choice>
 97 |             </xs:choice>
 98 |             <xs:attribute name="id" use="required" type="xs:ID"/>
 99 |             <xs:attributeGroup ref="sch:rich"/>
100 |             <xs:attributeGroup ref="sch:foreign"/>
101 |         </xs:complexType>
102 |     </xs:element>
103 |     <xs:element name="diagnostics">
104 |         <xs:complexType>
105 |             <xs:choice minOccurs="0" maxOccurs="unbounded">
106 |                 <xs:group ref="sch:foreign"/>
107 |                 <xs:element ref="sch:include"/>
108 |                 <xs:element ref="sch:diagnostic"/>
109 |             </xs:choice>
110 |             <xs:attributeGroup ref="sch:foreign"/>
111 |         </xs:complexType>
112 |     </xs:element>
113 |     <xs:element name="dir">
114 |         <xs:complexType mixed="true">
115 |             <xs:group ref="sch:foreign"/>
116 |             <xs:attribute name="value">
117 |                 <xs:simpleType>
118 |                     <xs:restriction base="xs:token">
119 |                         <xs:enumeration value="ltr"/>
120 |                         <xs:enumeration value="rtl"/>
121 |                     </xs:restriction>
122 |                 </xs:simpleType>
123 |             </xs:attribute>
124 |             <xs:attributeGroup ref="sch:foreign"/>
125 |         </xs:complexType>
126 |     </xs:element>
127 |     <xs:element name="emph">
128 |         <xs:complexType mixed="true"/>
129 |     </xs:element>
130 |     <xs:element name="extends">
131 |         <xs:complexType>
132 |             <xs:attribute name="rule" type="xs:IDREF"/>
133 |             <xs:attribute name="href" type="sch:uriValue"/>
134 |             <xs:attributeGroup ref="sch:foreign-empty"/>
135 |         </xs:complexType>
136 |     </xs:element>
137 |     <xs:element name="let">
138 |         <xs:complexType>
139 |             <xs:group minOccurs="0" maxOccurs="unbounded" ref="sch:foreign-element"/>
140 |             <xs:attribute name="name" use="required" type="sch:nameValue"/>
141 |             <xs:attribute name="value" type="xs:string"/>
142 |         </xs:complexType>
143 |     </xs:element>
144 |     <xs:element name="name">
145 |         <xs:complexType>
146 |             <xs:attribute name="path" type="sch:pathValue"/>
147 |             <xs:attributeGroup ref="sch:foreign-empty"/>
148 |         </xs:complexType>
149 |     </xs:element>
150 |     <xs:element name="ns">
151 |         <xs:complexType>
152 |             <xs:attribute name="uri" use="required" type="sch:uriValue"/>
153 |             <xs:attribute name="prefix" use="required" type="sch:nameValue"/>
154 |             <xs:attributeGroup ref="sch:foreign-empty"/>
155 |         </xs:complexType>
156 |     </xs:element>
157 |     <xs:element name="p">
158 |         <xs:complexType mixed="true">
159 |             <xs:choice minOccurs="0" maxOccurs="unbounded">
160 |                 <xs:group ref="sch:foreign"/>
161 |                 <xs:choice>
162 |                     <xs:element ref="sch:dir"/>
163 |                     <xs:element ref="sch:emph"/>
164 |                     <xs:element ref="sch:span"/>
165 |                 </xs:choice>
166 |             </xs:choice>
167 |             <xs:attribute name="id" type="xs:ID"/>
168 |             <xs:attribute name="class" type="sch:classValue"/>
169 |             <xs:attribute name="icon" type="sch:uriValue"/>
170 |             <xs:attributeGroup ref="sch:foreign"/>
171 |         </xs:complexType>
172 |     </xs:element>
173 |     <xs:element name="param">
174 |         <xs:complexType>
175 |             <xs:attribute name="name" use="required" type="sch:nameValue"/>
176 |             <xs:attribute name="value" use="required" type="sch:non-empty-string"/>
177 |         </xs:complexType>
178 |     </xs:element>
179 |     <xs:element name="pattern">
180 |         <xs:complexType>
181 |             <xs:choice minOccurs="0" maxOccurs="unbounded">
182 |                 <xs:group ref="sch:foreign"/>
183 |                 <xs:element ref="sch:include"/>
184 |                 <xs:choice>
185 |                     <xs:element ref="sch:title"/>
186 |                     <xs:element ref="sch:p"/>
187 |                     <xs:element ref="sch:param"/>
188 |                     <xs:element ref="sch:let"/>
189 |                     <xs:element ref="sch:rule"/>
190 |                 </xs:choice>
191 |             </xs:choice>
192 |             <xs:attribute name="documents" type="sch:pathValue"/>
193 |             <xs:attributeGroup ref="sch:rich"/>
194 |             <xs:attributeGroup ref="sch:foreign"/>
195 |             <xs:attribute name="abstract">
196 |                 <xs:simpleType>
197 |                     <xs:restriction base="xs:token">
198 |                         <xs:enumeration value="true"/>
199 |                         <xs:enumeration value="false"/>
200 |                         <xs:enumeration value="false"/>
201 |                     </xs:restriction>
202 |                 </xs:simpleType>
203 |             </xs:attribute>
204 |             <xs:attribute name="id" type="xs:ID"/>
205 |             <xs:attribute name="is-a" type="xs:IDREF"/>
206 |         </xs:complexType>
207 |     </xs:element>
208 |     <xs:element name="phase">
209 |         <xs:complexType>
210 |             <xs:choice minOccurs="0" maxOccurs="unbounded">
211 |                 <xs:group ref="sch:foreign"/>
212 |                 <xs:element ref="sch:include"/>
213 |                 <xs:choice>
214 |                     <xs:element ref="sch:p"/>
215 |                     <xs:element ref="sch:let"/>
216 |                     <xs:element ref="sch:active"/>
217 |                 </xs:choice>
218 |             </xs:choice>
219 |             <xs:attribute name="id" use="required" type="xs:ID"/>
220 |             <xs:attributeGroup ref="sch:rich"/>
221 |             <xs:attributeGroup ref="sch:foreign"/>
222 |         </xs:complexType>
223 |     </xs:element>
224 |     <xs:element name="properties">
225 |         <xs:complexType>
226 |             <xs:sequence>
227 |                 <xs:element minOccurs="0" maxOccurs="unbounded" ref="sch:property"/>
228 |             </xs:sequence>
229 |         </xs:complexType>
230 |     </xs:element>
231 |     <xs:element name="property">
232 |         <xs:complexType mixed="true">
233 |             <xs:choice minOccurs="0" maxOccurs="unbounded">
234 |                 <xs:group ref="sch:foreign"/>
235 |                 <xs:choice>
236 |                     <xs:element ref="sch:name"/>
237 |                     <xs:element ref="sch:value-of"/>
238 |                     <xs:element ref="sch:emph"/>
239 |                     <xs:element ref="sch:dir"/>
240 |                     <xs:element ref="sch:span"/>
241 |                 </xs:choice>
242 |             </xs:choice>
243 |             <xs:attribute name="id" use="required" type="xs:ID"/>
244 |             <xs:attribute name="role" type="sch:roleValue"/>
245 |             <xs:attribute name="scheme"/>
246 |             <xs:attributeGroup ref="sch:foreign"/>
247 |         </xs:complexType>
248 |     </xs:element>
249 |     <xs:element name="report">
250 |         <xs:complexType mixed="true">
251 |             <xs:choice minOccurs="0" maxOccurs="unbounded">
252 |                 <xs:group ref="sch:foreign"/>
253 |                 <xs:choice>
254 |                     <xs:element ref="sch:name"/>
255 |                     <xs:element ref="sch:value-of"/>
256 |                     <xs:element ref="sch:emph"/>
257 |                     <xs:element ref="sch:dir"/>
258 |                     <xs:element ref="sch:span"/>
259 |                 </xs:choice>
260 |             </xs:choice>
261 |             <xs:attribute name="test" use="required" type="sch:exprValue"/>
262 |             <xs:attribute name="flag" type="sch:flagValue"/>
263 |             <xs:attribute name="id" type="xs:ID"/>
264 |             <xs:attribute name="diagnostics" type="xs:IDREFS"/>
265 |             <xs:attribute name="properties" type="xs:IDREFS"/>
266 |             <xs:attributeGroup ref="sch:rich"/>
267 |             <xs:attributeGroup ref="sch:linkable"/>
268 |             <xs:attributeGroup ref="sch:foreign"/>
269 |         </xs:complexType>
270 |     </xs:element>
271 |     <xs:element name="rule">
272 |         <xs:complexType>
273 |             <xs:choice minOccurs="0" maxOccurs="unbounded">
274 |                 <xs:group ref="sch:foreign"/>
275 |                 <xs:element ref="sch:include"/>
276 |                 <xs:choice>
277 |                     <xs:element ref="sch:let"/>
278 |                     <xs:element ref="sch:assert"/>
279 |                     <xs:element ref="sch:report"/>
280 |                     <xs:element ref="sch:extends"/>
281 |                 </xs:choice>
282 |             </xs:choice>
283 |             <xs:attribute name="flag" type="sch:flagValue"/>
284 |             <xs:attributeGroup ref="sch:rich"/>
285 |             <xs:attributeGroup ref="sch:linkable"/>
286 |             <xs:attributeGroup ref="sch:foreign"/>
287 |             <xs:attribute name="abstract">
288 |                 <xs:simpleType>
289 |                     <xs:restriction base="xs:token">
290 |                         <xs:enumeration value="true"/>
291 |                         <xs:enumeration value="false"/>
292 |                     </xs:restriction>
293 |                 </xs:simpleType>
294 |             </xs:attribute>
295 |             <xs:attribute name="id" type="xs:ID"/>
296 |             <xs:attribute name="context" type="sch:pathValue"/>
297 |         </xs:complexType>
298 |     </xs:element>
299 |     <xs:element name="span">
300 |         <xs:complexType mixed="true">
301 |             <xs:group ref="sch:foreign"/>
302 |             <xs:attribute name="class" use="required" type="sch:classValue"/>
303 |             <xs:attributeGroup ref="sch:foreign"/>
304 |         </xs:complexType>
305 |     </xs:element>
306 |     <xs:element name="title">
307 |         <xs:complexType mixed="true">
308 |             <xs:sequence>
309 |                 <xs:element minOccurs="0" maxOccurs="unbounded" ref="sch:dir"/>
310 |             </xs:sequence>
311 |         </xs:complexType>
312 |     </xs:element>
313 |     <xs:element name="value-of">
314 |         <xs:complexType>
315 |             <xs:attribute name="select" use="required" type="sch:pathValue"/>
316 |             <xs:attributeGroup ref="sch:foreign-empty"/>
317 |         </xs:complexType>
318 |     </xs:element>
319 |     <!-- common declarations -->
320 |     <xs:element name="include">
321 |         <xs:complexType>
322 |             <xs:attribute name="href" use="required" type="sch:uriValue"/>
323 |             <xs:attributeGroup ref="sch:foreign-empty"/>
324 |         </xs:complexType>
325 |     </xs:element>
326 |     <xs:attributeGroup name="rich">
327 |         <xs:attribute name="icon" type="sch:uriValue"/>
328 |         <xs:attribute name="see" type="sch:uriValue"/>
329 |         <xs:attribute name="fpi" type="sch:fpiValue"/>
330 |         <xs:attribute ref="xml:lang"/>
331 |         <xs:attribute ref="xml:space"/>
332 |     </xs:attributeGroup>
333 |     <xs:attributeGroup name="linkable">
334 |         <xs:attribute name="role" type="sch:roleValue"/>
335 |         <xs:attribute name="subject" type="sch:pathValue"/>
336 |     </xs:attributeGroup>
337 |     <xs:group name="foreign">
338 |         <xs:sequence>
339 |             <xs:group minOccurs="0" maxOccurs="unbounded" ref="sch:foreign-element"/>
340 |         </xs:sequence>
341 |     </xs:group>
342 |     <xs:attributeGroup name="foreign">
343 |         <xs:attributeGroup ref="sch:foreign-attributes"/>
344 |     </xs:attributeGroup>
345 |     <xs:attributeGroup name="foreign-empty">
346 |         <xs:attributeGroup ref="sch:foreign-attributes"/>
347 |     </xs:attributeGroup>
348 |     <xs:attributeGroup name="foreign-attributes">
349 |         <xs:anyAttribute namespace="##local" processContents="skip"/>
350 |     </xs:attributeGroup>
351 |     <xs:group name="foreign-element">
352 |         <xs:choice>
353 |             <xs:any namespace="##other" processContents="skip"/>
354 |             <xs:any namespace="##local" processContents="skip"/>
355 |         </xs:choice>
356 |     </xs:group>
357 |     <!-- Data types -->
358 |     <xs:simpleType name="uriValue">
359 |         <xs:restriction base="xs:anyURI"/>
360 |     </xs:simpleType>
361 |     <xs:simpleType name="pathValue">
362 |         <xs:restriction base="xs:string"/>
363 |     </xs:simpleType>
364 |     <xs:simpleType name="exprValue">
365 |         <xs:restriction base="xs:string"/>
366 |     </xs:simpleType>
367 |     <xs:simpleType name="fpiValue">
368 |         <xs:restriction base="xs:string"/>
369 |     </xs:simpleType>
370 |     <xs:simpleType name="langValue">
371 |         <xs:restriction base="xs:language"/>
372 |     </xs:simpleType>
373 |     <xs:simpleType name="roleValue">
374 |         <xs:restriction base="xs:string"/>
375 |     </xs:simpleType>
376 |     <xs:simpleType name="flagValue">
377 |         <xs:restriction base="xs:string"/>
378 |     </xs:simpleType>
379 |     <xs:simpleType name="nameValue">
380 |         <xs:restriction base="xs:string"/>
381 |     </xs:simpleType>
382 |     <!-- In the default query language binding, xsd:NCNAME -->
383 |     <xs:simpleType name="classValue">
384 |         <xs:restriction base="xs:string"/>
385 |     </xs:simpleType>
386 |     <xs:simpleType name="non-empty-string">
387 |         <xs:restriction base="xs:token">
388 |             <xs:minLength value="1"/>
389 |         </xs:restriction>
390 |     </xs:simpleType>
391 | </xs:schema>
392 | 


--------------------------------------------------------------------------------