├── .github
    ├── actions
    │   └── setup
    │   │   └── action.yml
    └── workflows
    │   └── ocsf-validator.yml
├── .gitignore
├── .tool-versions
├── Dockerfile
├── LICENSE
├── README.md
├── build.sh
├── ocsf_validator
    ├── __init__.py
    ├── __main__.py
    ├── errors.py
    ├── matchers.py
    ├── processor.py
    ├── reader.py
    ├── runner.py
    ├── type_mapping.py
    ├── types.py
    └── validators.py
├── poetry.lock
├── pyproject.toml
└── tests
    ├── test_dependencies.py
    ├── test_mapping.py
    ├── test_matchers.py
    ├── test_reader.py
    ├── test_types.py
    └── test_validators.py


/.github/actions/setup/action.yml:
--------------------------------------------------------------------------------
 1 | name: "setup-ocsf-validator"
 2 | description: "Sets up the CI environment for the ocsf-validator"
 3 | inputs:
 4 |   python-version:
 5 |     description: Python version to use (e.g. "3.11")
 6 |     required: true
 7 | runs:
 8 |   using: "composite"
 9 |   steps:
10 |     - name: Set up Python ${{ inputs.python-version }}
11 |       uses: actions/setup-python@v4
12 |       with:
13 |         python-version: ${{ inputs.python-version }}
14 |     - name: Install Poetry
15 |       uses: snok/install-poetry@v1
16 |     - name: Install dependencies
17 |       shell: bash
18 |       run: poetry install


--------------------------------------------------------------------------------
/.github/workflows/ocsf-validator.yml:
--------------------------------------------------------------------------------
 1 | name: ocsf-validator
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |   pull_request:
 7 |     branches: [ "main" ]
 8 | 
 9 | jobs:
10 |   pytest:
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       matrix:
14 |         python-version: ["3.11", "3.12"]
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |       - name: Set up environment
18 |         uses: "./.github/actions/setup"
19 |         with:
20 |           python-version: ${{ matrix.python-version }}
21 |       - name: pytest
22 |         run: poetry run pytest
23 |   black:
24 |     runs-on: ubuntu-latest
25 |     steps:
26 |       - uses: actions/checkout@v4
27 |       - name: Set up environment
28 |         uses: "./.github/actions/setup"
29 |         with:
30 |           python-version: "3.12"
31 |       - name: black
32 |         run: poetry run black --check .
33 |   isort:
34 |     runs-on: ubuntu-latest
35 |     steps:
36 |       - uses: actions/checkout@v4
37 |       - name: Set up environment
38 |         uses: "./.github/actions/setup"
39 |         with:
40 |           python-version: "3.12"
41 |       - name: isort
42 |         run: poetry run isort --check .
43 |   pyright:
44 |     runs-on: ubuntu-latest
45 |     steps:
46 |       - uses: actions/checkout@v4
47 |       - name: Set up environment
48 |         uses: "./.github/actions/setup"
49 |         with:
50 |           python-version: "3.12"
51 |       - name: pyright
52 |         run: poetry run pyright


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/.tool-versions:
--------------------------------------------------------------------------------
1 | python 3.11.2
2 | poetry 1.6.1
3 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | #  a Dockerfile to containerize the build of ocsf-validator
 2 | #
 3 | #  To build a container:
 4 | #
 5 | #     docker build -t ocsf-validator:latest .
 6 | #
 7 | #  To use it to validate a schema
 8 | #
 9 | #     docker run --rm -v $PWD:/schema ocsf-validator:latest /schema
10 | #
11 | 
12 | FROM python:3.11.9-alpine3.19
13 | 
14 | RUN apk add --no-cache poetry nodejs npm
15 | 
16 | WORKDIR /src
17 | 
18 | # install stuff that doesn't change much
19 | ADD poetry.lock pyproject.toml .
20 | RUN poetry install --no-root
21 | 
22 | # pull in the rest of the code
23 | ADD . .
24 | RUN poetry install --only-root
25 | RUN poetry run black .
26 | RUN poetry run isort .
27 | RUN poetry run pyright ocsf_validator
28 | RUN poetry run pytest
29 | 
30 | ENTRYPOINT ["poetry", "run", "python", "-m", "ocsf_validator"]
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # OCSF Schema Validator
 2 | 
 3 | A utility to validate contributions to the [OCSF
 4 | schema](https://github.com/ocsf/ocsf-schema), intended to prevent human error
 5 | when contributing to the schema in order to keep the schema machine-readable.
 6 | 
 7 | OCSF provides several include mechanisms to facilitate reuse, but this means
 8 | individual schema files may be incomplete. This complicates using off-the-shelf
 9 | schema definition tools for validation.
10 | 
11 | [Query](https://www.query.ai) is a federated search solution that normalizes
12 | disparate security data to OCSF. This validator is adapted from active code and
13 | documentation generation tools written by the Query team.
14 | 
15 | ## Getting Started
16 | 
17 | ### Prerequisites
18 | 
19 |  - python >3.11
20 |  - pip
21 |  - A copy of the [OCSF schema](https://github.com/ocsf/ocsf-schema)
22 | 
23 | ### Installation
24 | 
25 | You can install the validator with `pip`:
26 | 
27 | ```
28 | $ pip install ocsf-validator
29 | ```
30 | 
31 | ## Usage
32 | 
33 | You can run the validator against your working copy of the schema to identify problems before submitting a PR. Invoke the validator using `python` and provide it with the path to the root of your working copy.
34 | 
35 | Examples:
36 | ```
37 | $ python -m ocsf_validator .
38 | $ python -m ocsf_validator ../ocsf-schema
39 | ```
40 | 
41 | 
42 | ## Tests
43 | 
44 | The validator performs the following tests on a copy of the schema:
45 | 
46 |  - The schema is readable and all JSON is valid. [FATAL]
47 |  - The directory structure meets expectations. [WARNING]
48 |  - The targets in `$include`, `profiles`, and `extends` directives can be found. [ERROR]
49 |  - All required attributes in schema definition files are present. [WARNING]
50 |  - There are no unrecognized attributes in schema definition files. [WARNING]
51 |  - All attributes in the attribute dictionary are used. [WARNING]
52 |  - There are no name collisions within a record type. [WARNING]
53 |  - All attributes are defined in the attribute dictionary. [WARNING]
54 | 
55 | If any ERROR or FATAL tests fail, the validator exits with a non-zero exit code.
56 | 
57 | 
58 | ## Technical Overview
59 | 
60 | The OCSF metaschema is represented as record types by filepath, achieved as follows:
61 | 
62 |  1. Record types are represented using Python's type system by defining them as Python `TypedDict`s in `types.py`. This allows the validator to take advantage of Python's reflection capabilities.
63 |  2. Files and record types are associated by pattern matching the file paths. These patterns are named in `matchers.py` to allow mistakes to be caught by a type checker.
64 |  3. Types are mapped to filepath patterns in `type_mapping.py`.
65 | 
66 | The contents of the OCSF schema to be validated are primarily represented as a `Reader` defined in `reader.py`. `Reader`s load the schema definitions to be validated from a source (usually from a filesystem) and contain them without judgement. The `process_includes` function and other contents of `processor.py` mutate the contents of a `Reader` by applying OCSF's various include mechanisms.
67 | 
68 | Validators are defined in `validators.py` and test the schema contents for various problematic conditions. Validators should pass `Exception`s to a special error `Collector` defined in `errors.py`. This module also defines a number of custom exception types that represent problematic schema states. The `Collector` raises errors by default, but can also hold them until they're aggregated by a larger validation process (e.g., the `ValidationRunner`).
69 | 
70 | The `ValidationRunner` combines all of the building blocks above to read a proposed schema from a filesystem, validate the schema, and provide useful output and a non-zero exit code if any errors were encountered.
71 | 
72 | 
73 | ## Contributing
74 | 
75 | After checking out, you'll want to install dependencies:
76 | ```
77 | poetry install
78 | ```
79 | 
80 | Before committing, run the formatters and tests:
81 | ```
82 | poetry run isort .
83 | poetry run black .
84 | poetry run pyright
85 | poetry run pytest
86 | ```
87 | 
88 | If you're adding a validator, do the following:
89 |  - Write your `validate_` function in `validate.py` to apply a function to the relevant keys in a reader that will run your desired validation. See `validators.py` for examples.
90 |  - Add any custom errors in `errors.py`.
91 |  - Create an option to change its severity level in `ValidatorOptions` and map it in the constructor of `ValidationRunner` in `runner.py`.
92 |  - Invoke the new validator in `ValidationRunner.validate`.
93 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | if ! [ "${0}" -ef "${PWD}/build.sh" ]; then
 4 |   echo "build.sh must be run from project root" >&2
 5 |   exit 1
 6 | fi
 7 | 
 8 | if [ "${VIRTUAL_ENV}" = "" ]; then
 9 |   exec poetry run "$0"
10 | fi
11 | 
12 | poetry install --no-root
13 | 
14 | black .
15 | isort .
16 | pyright ocsf_validator
17 | pytest
18 | 


--------------------------------------------------------------------------------
/ocsf_validator/__init__.py:
--------------------------------------------------------------------------------
1 | # TODO narrow down the *
2 | 
3 | from .errors import *
4 | from .processor import process_includes
5 | from .reader import DictReader, FileReader, Reader
6 | from .runner import ValidationRunner, ValidatorOptions
7 | from .types import *
8 | from .validators import *
9 | 


--------------------------------------------------------------------------------
/ocsf_validator/__main__.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | from ocsf_validator.runner import ValidationRunner, ValidatorOptions
 4 | 
 5 | parser = ArgumentParser(prog="ocsf-validator", description="OCSF Schema Validation")
 6 | parser.add_argument("path", help="The OCSF schema root directory")
 7 | parser.add_argument(
 8 |     "-m",
 9 |     "--metaschema_path",
10 |     help="The OCSF schema's metaschema"
11 |     " (default: metaschema subdirectory of schema root)",
12 | )
13 | args = parser.parse_args()
14 | 
15 | opts = ValidatorOptions(base_path=args.path, metaschema_path=args.metaschema_path)
16 | 
17 | validator = ValidationRunner(opts)
18 | 
19 | validator.validate()
20 | 


--------------------------------------------------------------------------------
/ocsf_validator/errors.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import Iterable, Optional
  4 | 
  5 | 
  6 | class Collector:
  7 |     """An error collector used by schema operations so that encountered errors
  8 |     can be saved for later and displayed as test output.
  9 | 
 10 |     The default behavior is to `raise` all exceptions, so you shouldn't
 11 |     notice the collector until `throw` is `False`.
 12 |     """
 13 | 
 14 |     default: Collector
 15 |     """Simple singleton used whenever an Optional[Collector] parameter is None."""
 16 | 
 17 |     def __init__(self, throw: bool = True):
 18 |         self._exceptions: list[Exception] = []
 19 |         self._throw = throw
 20 | 
 21 |     def handle(self, err: Exception):
 22 |         """Handle an exception.
 23 | 
 24 |         By default, exceptions are stored and raised. But if `throw` is `False`,
 25 |         exceptions will only be stored for later."""
 26 | 
 27 |         self._exceptions.append(err)
 28 |         if self._throw:
 29 |             raise err
 30 | 
 31 |     def exceptions(self):
 32 |         return self._exceptions
 33 | 
 34 |     def flush(self):
 35 |         e = list(self._exceptions)
 36 |         self._exceptions = []
 37 |         return e
 38 | 
 39 |     def __len__(self):
 40 |         return len(self._exceptions)
 41 | 
 42 |     def __iter__(self) -> Iterable[Exception]:
 43 |         return iter(self._exceptions)
 44 | 
 45 | 
 46 | Collector.default = Collector()
 47 | 
 48 | 
 49 | class ValidationError(Exception):
 50 |     """Base class for validation errors."""
 51 | 
 52 |     ...
 53 | 
 54 | 
 55 | class InvalidBasePathError(ValidationError): ...
 56 | 
 57 | 
 58 | class InvalidMetaSchemaError(ValidationError): ...
 59 | 
 60 | 
 61 | class InvalidMetaSchemaFileError(ValidationError): ...
 62 | 
 63 | 
 64 | class UnusedAttributeError(ValidationError):
 65 |     def __init__(self, attr: str):
 66 |         self.attr = attr
 67 |         super().__init__(f"Unused attribute {attr}")
 68 | 
 69 | 
 70 | class MissingRequiredKeyError(ValidationError):
 71 |     def __init__(
 72 |         self,
 73 |         key: str,
 74 |         file: str,
 75 |         cls: Optional[type] = None,
 76 |         trail: Optional[list[str]] = None,
 77 |     ):
 78 |         self.key = key
 79 |         self.file = file
 80 |         self.cls = cls
 81 |         self.trail = trail
 82 | 
 83 |         if trail is None:
 84 |             trail_str = ""
 85 |         else:
 86 |             trail_str = ".".join(trail)
 87 | 
 88 |         super().__init__(
 89 |             f"Missing required key `{key}` at `{trail_str}` in {file}.  Make sure required fields in this file and any supporting files such as dictionaries or includes are populated."
 90 |         )
 91 | 
 92 | 
 93 | class UnknownKeyError(ValidationError):
 94 |     def __init__(
 95 |         self,
 96 |         key: str,
 97 |         file: str,
 98 |         cls: Optional[type] = None,
 99 |         trail: Optional[list[str]] = None,
100 |     ):
101 |         self.key = key
102 |         self.file = file
103 |         self.cls = cls
104 |         self.trail = trail
105 | 
106 |         if trail is None:
107 |             trail_str = ""
108 |         else:
109 |             trail_str = ".".join(trail)
110 | 
111 |         super().__init__(
112 |             f"Unrecognized key `{key}` at `{trail_str}` in {file}.  Make sure fields in this file and any supporting files such as dictionaries or includes are valid."
113 |         )
114 | 
115 | 
116 | class DependencyError(ValidationError):
117 |     def __init__(self, file: str, include: str, message: Optional[str] = None):
118 |         self.file = file
119 |         self.include = include
120 |         super().__init__(message)
121 | 
122 | 
123 | class MissingIncludeError(DependencyError):
124 |     def __init__(self, file: str, include: str):
125 |         self.file = file
126 |         self.include = include
127 |         super().__init__(file, include, f"Missing include target '{include}' in {file}")
128 | 
129 | 
130 | class MissingBaseError(DependencyError):
131 |     def __init__(self, file: str, include: str):
132 |         self.file = file
133 |         self.include = include
134 |         super().__init__(file, include, f"Missing base record '{include}' in {file}")
135 | 
136 | 
137 | class ImpreciseBaseError(DependencyError):
138 |     def __init__(self, file: str, include: str):
139 |         self.file = file
140 |         self.include = include
141 |         super().__init__(
142 |             file,
143 |             include,
144 |             f"Possibly ambiguous base record definition '{include}' in {file}",
145 |         )
146 | 
147 | 
148 | class MissingProfileError(DependencyError):
149 |     def __init__(self, file: str, include: str):
150 |         self.file = file
151 |         self.include = include
152 |         super().__init__(file, include, f"Missing profile '{include}' in {file}")
153 | 
154 | 
155 | class SelfInheritanceError(DependencyError):
156 |     def __init__(self, file: str, include: str):
157 |         self.file = file
158 |         self.include = include
159 |         super().__init__(file, include, f"Inheritance from self '{include}' in {file}")
160 | 
161 | 
162 | class RedundantProfileIncludeError(DependencyError):
163 |     def __init__(self, file: str, include: str):
164 |         self.file = file
165 |         self.include = include
166 |         super().__init__(
167 |             file,
168 |             include,
169 |             f"Redundant $include and profiles entry '{include}' in {file}",
170 |         )
171 | 
172 | 
173 | class UndetectableTypeError(ValidationError):
174 |     def __init__(self, file: str):
175 |         self.file = file
176 |         super().__init__(f"Unable to detect type of {file}")
177 | 
178 | 
179 | class IncludeTypeMismatchError(ValidationError):
180 |     def __init__(
181 |         self, file: str, include: str, t: type | str, directive: str = "$include"
182 |     ):
183 |         self.file = file
184 |         self.include = include
185 |         if isinstance(t, str):
186 |             self.cls: str = t
187 |         else:
188 |             self.cls = t.__name__
189 |         self.directive = directive
190 |         super().__init__(
191 |             f"`{directive}` type mismatch in {file}: expected type `{self.cls}` for {include}"
192 |         )
193 | 
194 | 
195 | class TypeNameCollisionError(ValidationError):
196 |     def __init__(self, name: str, kind: str, file1: str, file2: str):
197 |         self.name = name
198 |         self.kind = kind
199 |         self.file1 = file1
200 |         self.file2 = file2
201 |         super().__init__(f"Name collision for `{name}` between {file1} and {file2}")
202 | 
203 | 
204 | class UndefinedAttributeError(ValidationError):
205 |     def __init__(self, attr: str, file: str):
206 |         self.attr = attr
207 |         self.file = file
208 |         super().__init__(
209 |             f"Attribute `{attr}` in {file} is not defined in any attribute"
210 |         )
211 | 
212 | 
213 | class InvalidAttributeTypeError(ValidationError):
214 |     def __init__(self, ref: str, attr: str, file: str):
215 |         super().__init__(f"Invalid type {ref} for {attr} in {file}")
216 | 
217 | 
218 | class IllegalObservableTypeIDError(ValidationError):
219 |     def __init__(self, cause: str):
220 |         super().__init__(cause)
221 | 
222 | 
223 | class ObservableTypeIDCollisionError(ValidationError):
224 |     def __init__(self, type_id: int, this_def: str, other_defs: list[str], file: str):
225 |         super().__init__(
226 |             f"Collision with observable type_id {type_id} between {this_def}"
227 |             f" in file {file} and {', '.join(other_defs)}."
228 |         )
229 | 
230 | 
231 | class UnknownCategoryError(ValidationError):
232 |     def __init__(self, category: str, file: str):
233 |         super().__init__(f'Unknown category "{category}" in "{file}"')
234 | 


--------------------------------------------------------------------------------
/ocsf_validator/matchers.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import re
  4 | from pathlib import Path
  5 | from typing import Optional
  6 | 
  7 | from ocsf_validator.types import *
  8 | 
  9 | 
 10 | class Matcher:
 11 |     def match(self, value: str) -> bool:
 12 |         raise NotImplementedError()
 13 | 
 14 |     @staticmethod
 15 |     def make(pattern) -> Matcher:
 16 |         if isinstance(pattern, Matcher):
 17 |             return pattern
 18 |         else:
 19 |             return RegexMatcher(pattern)
 20 | 
 21 | 
 22 | class TypeMatcher:
 23 |     def get_type(self) -> type:
 24 |         raise NotImplementedError()
 25 | 
 26 | 
 27 | class AnyMatcher(Matcher):
 28 |     def __init__(self, matchers: Optional[list[Matcher]] = None):
 29 |         if matchers is not None:
 30 |             self._matchers = matchers
 31 |         else:
 32 |             self._matchers = []
 33 | 
 34 |     def match(self, value: str):
 35 |         for matcher in self._matchers:
 36 |             if matcher.match(value):
 37 |                 return True
 38 | 
 39 |         return False
 40 | 
 41 |     def add(self, matcher: Matcher):
 42 |         self._matchers.append(matcher)
 43 | 
 44 | 
 45 | class RegexMatcher(Matcher):
 46 |     def __init__(self, pattern: str | re.Pattern):
 47 |         if isinstance(pattern, str):
 48 |             self._pattern = re.compile(pattern)
 49 |         else:
 50 |             self._pattern = pattern
 51 | 
 52 |     def match(self, value: str):
 53 |         return self._pattern.match(value) is not None
 54 | 
 55 | 
 56 | class GlobMatcher(Matcher):
 57 |     def __init__(self, pattern: str):
 58 |         self._pattern = pattern
 59 | 
 60 |     def match(self, value: str):
 61 |         path = Path(value)
 62 |         return path.match(self._pattern)
 63 | 
 64 | 
 65 | class DictionaryMatcher(RegexMatcher, TypeMatcher):
 66 |     def __init__(self):
 67 |         self._pattern = re.compile(r".*dictionary.json")
 68 | 
 69 |     def get_type(self):
 70 |         return OcsfDictionary
 71 | 
 72 | 
 73 | class VersionMatcher(RegexMatcher, TypeMatcher):
 74 |     def __init__(self):
 75 |         self._pattern = re.compile(r".*version.json")
 76 | 
 77 |     def get_type(self):
 78 |         return OcsfVersion
 79 | 
 80 | 
 81 | class ObjectMatcher(RegexMatcher, TypeMatcher):
 82 |     def __init__(self):
 83 |         self._pattern = re.compile(r".*objects/.*json")
 84 | 
 85 |     def get_type(self):
 86 |         return OcsfObject
 87 | 
 88 | 
 89 | class EventMatcher(RegexMatcher, TypeMatcher):
 90 |     def __init__(self):
 91 |         self._pattern = re.compile(r".*events/.*json")
 92 | 
 93 |     def get_type(self):
 94 |         return OcsfEvent
 95 | 
 96 | 
 97 | class ExtensionMatcher(GlobMatcher, TypeMatcher):
 98 |     def __init__(self):
 99 |         self._pattern = "extensions/*/extension.json"
100 | 
101 |     def get_type(self):
102 |         return OcsfExtension
103 | 
104 | 
105 | class ProfileMatcher(RegexMatcher, TypeMatcher):
106 |     def __init__(self):
107 |         self._pattern = re.compile(r".*profiles/.*.json")
108 | 
109 |     def get_type(self):
110 |         return OcsfProfile
111 | 
112 | 
113 | class CategoriesMatcher(RegexMatcher, TypeMatcher):
114 |     def __init__(self):
115 |         self._pattern = re.compile(r".*categories.json")
116 | 
117 |     def get_type(self):
118 |         return OcsfCategories
119 | 
120 | 
121 | class ExcludeMatcher(Matcher):
122 |     """
123 |     A matcher that produces the opposite result of the matcher it's given.
124 |     """
125 | 
126 |     def __init__(self, matcher: Matcher):
127 |         self.matcher = matcher
128 | 
129 |     def match(self, value: str) -> bool:
130 |         return not self.matcher.match(value)
131 | 


--------------------------------------------------------------------------------
/ocsf_validator/processor.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from typing import Any, Callable, Optional
  3 | 
  4 | from ocsf_validator.errors import *
  5 | from ocsf_validator.matchers import CategoriesMatcher, ExcludeMatcher
  6 | from ocsf_validator.reader import Reader
  7 | from ocsf_validator.type_mapping import TypeMapping
  8 | from ocsf_validator.types import (
  9 |     ATTRIBUTES_KEY,
 10 |     EXTENDS_KEY,
 11 |     INCLUDE_KEY,
 12 |     PROFILES_KEY,
 13 |     OcsfDictionary,
 14 |     OcsfEvent,
 15 |     OcsfObject,
 16 | )
 17 | 
 18 | 
 19 | def deep_merge(
 20 |     subj: dict[str, Any], other: dict[str, Any], exclude: Optional[set[str]] = None
 21 | ):
 22 |     """Recursive merging of dictionary keys.
 23 | 
 24 |     `subj | other` is more readable, but it doesn't merge recursively. If
 25 |     subj and other each have an "attributes" key with a dictionary value,
 26 |     only the first "attributes" dictionary will be present in the resulting
 27 |     dictionary. And thus this recursive merge."""
 28 | 
 29 |     if exclude is None:
 30 |         skip = set()
 31 |     else:
 32 |         skip = exclude
 33 | 
 34 |     for k, v in other.items():
 35 |         if k not in skip:
 36 |             if k in subj and isinstance(v, dict):
 37 |                 deep_merge(subj[k], other[k])
 38 | 
 39 |             elif k not in subj:
 40 |                 subj[k] = other[k]
 41 | 
 42 | 
 43 | def exclude_props(t1: type, t2: type):
 44 |     if not hasattr(t1, "__annotations__") or not hasattr(t2, "__annotations__"):
 45 |         raise Exception("Unexpected types in comparison")
 46 |     s1 = set(t1.__annotations__.keys())
 47 |     s2 = set(t2.__annotations__.keys())
 48 |     return s2 - s1
 49 | 
 50 | 
 51 | class DependencyResolver:
 52 |     def __init__(self, reader: Reader, types: TypeMapping):
 53 |         self._reader = reader
 54 |         self._types = types
 55 | 
 56 |     def resolve_include(
 57 |         self, target: str, relative_to: Optional[str] = None
 58 |     ) -> str | None:
 59 |         """Find a file from an OCSF $include directive.
 60 | 
 61 |         For a given file f, search:
 62 |           extn/f
 63 |           extn/f.json
 64 |           f
 65 |           f.json
 66 |         """
 67 | 
 68 |         filenames = [target]
 69 |         if Path(target).suffix != ".json":
 70 |             filenames.append(target + ".json")
 71 | 
 72 |         for file in filenames:
 73 |             if relative_to is not None:
 74 |                 # Search extension for relative include path,
 75 |                 # e.g. /profiles/thing.json -> /extensions/stuff/profiles/thing.json
 76 |                 extn = self._types.extension(relative_to)
 77 |                 if extn is not None:
 78 |                     k = self._reader.key("extensions", extn, file)
 79 |                     if k in self._reader:
 80 |                         return k
 81 | 
 82 |             k = self._reader.key(file)
 83 |             if k in self._reader:
 84 |                 return k
 85 | 
 86 |         return None
 87 | 
 88 |     def resolve_profile(self, profile: str, relative_to: str) -> str | None:
 89 |         """Find a file from an OCSF profiles directive.
 90 | 
 91 |         For a requested profile p, search:
 92 |           extn/profiles/p
 93 |           extn/profiles/p.json
 94 |           profiles/p
 95 |           profiles/p.json
 96 |           extn/p
 97 |           extn/p.json
 98 |           p
 99 |           p.json
100 |         """
101 |         file = self.resolve_include(profile, relative_to)
102 |         if file is None:
103 |             path = str(Path("profiles") / Path(profile))
104 |             file = self.resolve_include(path, relative_to)
105 | 
106 |         if file is None:
107 |             extn = self._types.extension(relative_to)
108 |             if extn is not None:
109 |                 # This is the strange case of `"profile": "linux/linux.json"`.
110 |                 # Why not "profiles/linux.json"` or just "linux.json"?
111 |                 file = self.resolve_include(
112 |                     str(Path("extensions", extn, "profiles") / Path(profile).name)
113 |                 )
114 | 
115 |         return file
116 | 
117 |     def resolve_base(self, base: str, relative_to: str) -> str | None:
118 |         """Find the location of a base record in an extends directive.
119 | 
120 |         For a requested base b in path events/activity/thin.json, search:
121 |           events/activity/b.json
122 |           events/b.json
123 |           b.json       # this should be ignored but isn't yet
124 | 
125 |         For a requested base b in path extn/stuff/events/activity/thing.json, search:
126 |           extn/stuff/events/activity/b.json
127 |           events/activity/b.json
128 |           extn/stuff/events/b.json
129 |           events/b.json
130 |           extn/stuff/b.json
131 |           b.json       # these last two should be ignored but aren't yet
132 |           extn/b.json  #
133 | 
134 |         parameters:
135 |             base: str         The base as it's described in the extends
136 |                               directive, without a path or an extension.
137 |             relative_to: str  The full path from the schema root to the record
138 |                               extending the base.
139 |         """
140 |         base_path = Path(base)
141 |         if base_path.suffix != ".json":
142 |             base += ".json"
143 | 
144 |         # Search the current directory and each parent directory
145 |         path = Path(relative_to)
146 |         extn = self._types.extension(relative_to)
147 | 
148 |         while path != path.parent:
149 |             test = str(path / base)
150 |             if test in self._reader and test != relative_to:
151 |                 return test
152 |             elif extn is not None:
153 |                 woextn = Path(*list(path.parts)[2:]) / base
154 |                 test = str(woextn)
155 |                 if test in self._reader:
156 |                     return test
157 | 
158 |             path = path.parent
159 | 
160 |         return None
161 | 
162 |     def resolve_imprecise_base(self, base: str, relative_to: str) -> str | None:
163 |         """Resolve an imprecise `extends` directive.
164 | 
165 |         Some `extends` directives point to files that are adjacent in the tree,
166 |         e.g. ../blah/base.json. These seem imprecise to me without a strict
167 |         lack of naming collisions, so I'm not making this the default behavior
168 |         of `resolve_base()` in order to generate warnings.
169 |         """
170 |         base_path = Path(base)
171 |         if base_path.suffix != ".json":
172 |             base += ".json"
173 | 
174 |         # Search the current directory and each parent directory
175 |         path = Path(relative_to)
176 |         extn = self._types.extension(relative_to)
177 | 
178 |         while path != path.parent:
179 |             for search in self._reader.ls(str(path)):
180 |                 search_path = path / search
181 |                 test = str(search_path / base)
182 |                 if test in self._reader and test != relative_to:
183 |                     return test
184 |                 elif extn is not None:
185 |                     woextn = Path(*list(search_path.parts)[2:]) / base
186 |                     test = str(woextn)
187 |                     if test in self._reader:
188 |                         return test
189 | 
190 |             path = path.parent
191 | 
192 |         return None
193 | 
194 | 
195 | class MergeParser:
196 |     def __init__(
197 |         self,
198 |         reader: Reader,
199 |         resolver: DependencyResolver,
200 |         collector: Collector,
201 |         types: TypeMapping,
202 |     ):
203 |         self._reader = reader
204 |         self._resolver = resolver
205 |         self._collector = collector
206 |         self._types = types
207 | 
208 |     def applies_to(self, t: type) -> bool:
209 |         return False
210 | 
211 |     def found_in(self, path: str) -> bool:
212 |         return False
213 | 
214 |     def extract_targets(self, path: str) -> list[str]:
215 |         return []
216 | 
217 |     def apply(self, path: str) -> None:
218 |         for target in self.extract_targets(path):
219 |             exclude = exclude_props(self._types[path], self._types[target])
220 |             deep_merge(self._reader[path], self._reader[target], exclude=exclude)
221 | 
222 | 
223 | class ExtendsParser(MergeParser):
224 |     def applies_to(self, t: type) -> bool:
225 |         if hasattr(t, "__required_keys__") or hasattr(t, "__optional_keys"):
226 |             return EXTENDS_KEY in t.__required_keys__ or EXTENDS_KEY in t.__optional_keys__  # type: ignore
227 |         else:
228 |             return False
229 | 
230 |     def found_in(self, path: str) -> bool:
231 |         return EXTENDS_KEY in self._reader[path]
232 | 
233 |     def extract_targets(self, path: str) -> list[str]:
234 |         target = self._reader[path][EXTENDS_KEY]
235 |         base = self._resolver.resolve_base(target, path)
236 |         if base is None:
237 |             base = self._resolver.resolve_imprecise_base(target, path)
238 |             if base is None:
239 |                 self._collector.handle(ImpreciseBaseError(path, target))
240 |             else:
241 |                 self._collector.handle(MissingBaseError(path, target))
242 |         else:
243 |             if self._types[base] not in [OcsfEvent, OcsfObject]:
244 |                 self._collector.handle(
245 |                     IncludeTypeMismatchError(
246 |                         path, base, "OcsfObject | OcsfEvent", "extends"
247 |                     )
248 |                 )
249 |             base = self._resolver.resolve_base(target, path)
250 |             return [base if base is not None else ""]
251 | 
252 |         return []
253 | 
254 | 
255 | class ProfilesParser(MergeParser):
256 |     def applies_to(self, t: type) -> bool:
257 |         if hasattr(t, "__required_keys__") or hasattr(t, "__optional_keys"):
258 |             return (
259 |                 PROFILES_KEY in t.__required_keys__  # type: ignore
260 |                 or PROFILES_KEY in t.__optional_keys__  # type: ignore
261 |             )
262 |         else:
263 |             return False
264 | 
265 |     def found_in(self, path: str) -> bool:
266 |         return PROFILES_KEY in self._reader[path]
267 | 
268 |     def extract_targets(self, path: str) -> list[str]:
269 |         targets = []
270 |         profiles = self._reader[path][PROFILES_KEY]
271 | 
272 |         if isinstance(profiles, str):
273 |             profiles = [profiles]
274 | 
275 |         for profile in profiles:
276 |             target = self._resolver.resolve_profile(profile, path)
277 |             if target is None:
278 |                 self._collector.handle(MissingProfileError(path, profile))
279 |             else:
280 |                 targets.append(target)
281 | 
282 |         return targets
283 | 
284 | 
285 | class AttributesParser(MergeParser):
286 |     def applies_to(self, t: type) -> bool:
287 |         if hasattr(t, "__required_keys__") or hasattr(t, "__optional_keys"):
288 |             return (
289 |                 ATTRIBUTES_KEY in t.__required_keys__  # type: ignore
290 |                 or ATTRIBUTES_KEY in t.__optional_keys__  # type: ignore
291 |             )
292 |         else:
293 |             return False
294 | 
295 |     def found_in(self, path: str) -> bool:
296 |         return ATTRIBUTES_KEY in self._reader[path]
297 | 
298 |     def extract_targets(self, path: str) -> list[str]:
299 |         if self._types[path] == OcsfDictionary:
300 |             return []
301 |         else:
302 |             return [self._reader.key("dictionary.json")]
303 |             # TODO the above should include extension dictionaries for correctness
304 | 
305 |     def _extn_dict(self, path):
306 |         extn = self._types.extension(path)
307 |         if extn is not None:
308 |             dict_path = self._reader.key("extensions", extn, "dictionary.json")
309 |             if dict_path in self._reader:
310 |                 return self._reader[dict_path][ATTRIBUTES_KEY]
311 |         return {}
312 | 
313 |     def _root_dict(self):
314 |         file = self._reader.find("dictionary.json")
315 |         if file is not None:
316 |             return file[ATTRIBUTES_KEY]
317 |         return {}
318 | 
319 |     def apply(self, path: str):
320 |         attrs = self._reader[path][ATTRIBUTES_KEY]
321 |         root = self._root_dict()
322 |         extn = self._extn_dict(path)
323 | 
324 |         # TODO is the dict name comparison enough
325 |         #      or do we need to find by the `name` key?
326 |         for name, attr in attrs.items():
327 |             if name in extn:
328 |                 deep_merge(attrs[name], extn[name])
329 |             if name in root:
330 |                 deep_merge(attrs[name], root[name])
331 | 
332 | 
333 | class IncludeParser(MergeParser):
334 |     def applies_to(self, t: type) -> bool:
335 |         return (
336 |             "__required_keys__" in t.__dict__
337 |             and INCLUDE_KEY in t.__required_keys__  # type: ignore
338 |         ) or (
339 |             "__optional_keys__" in t.__dict__
340 |             and INCLUDE_KEY in t.__optional_keys__  # type: ignore
341 |         )
342 | 
343 |     def _has_includes(self, defn: dict[str, Any]) -> bool:
344 |         """Recursively search for $include directives."""
345 |         keys = list(defn.keys())
346 |         for k in keys:
347 |             if k == INCLUDE_KEY:
348 |                 return True
349 |             elif isinstance(defn[k], dict):
350 |                 if self._has_includes(defn[k]):
351 |                     return True
352 |         return False
353 | 
354 |     def found_in(self, path: str) -> bool:
355 |         return self._has_includes(self._reader[path])
356 | 
357 |     def _parse_includes(
358 |         self,
359 |         defn: dict[str, Any],
360 |         path: str,
361 |         trail: list[str] = [],
362 |         update: bool = True,
363 |         remove: bool = False,
364 |     ) -> list[str]:
365 |         """Find $include directives, optionally apply them, optionally
366 |         remove the $include directive, and return a list of include targets.
367 |         """
368 |         keys = list(defn.keys())
369 |         found = []
370 | 
371 |         for k in keys:
372 |             if k == INCLUDE_KEY:
373 |                 if isinstance(defn[k], str):
374 |                     targets = [defn[k]]
375 |                 else:
376 |                     targets = defn[k]
377 | 
378 |                 for target in targets:
379 |                     t = self._resolver.resolve_include(target, path)
380 |                     found.append(t)
381 |                     if t is None:
382 |                         self._collector.handle(MissingIncludeError(path, target))
383 |                     elif update:
384 |                         other = self._reader[t]
385 |                         try:
386 |                             for key in trail:
387 |                                 other = other[key]
388 |                         except KeyError:
389 |                             # Older copies of the schema use files in enums/ that
390 |                             # don't mirror the structure of the files they're
391 |                             # being included into.
392 |                             pass
393 |                         deep_merge(defn, other)
394 | 
395 |                 if remove:
396 |                     del defn[k]
397 | 
398 |             elif isinstance(defn[k], dict):
399 |                 found += self._parse_includes(
400 |                     defn[k], path, trail + [k], update, remove
401 |                 )
402 | 
403 |         return found
404 | 
405 |     def extract_targets(self, path: str) -> list[str]:
406 |         return self._parse_includes(
407 |             self._reader[path], path, update=False, remove=False
408 |         )
409 | 
410 |     def apply(self, path: str) -> None:
411 |         self._parse_includes(self._reader[path], path, update=True, remove=False)
412 | 
413 | 
414 | class Dependencies:
415 |     """A friendly list of dependencies."""
416 | 
417 |     def __init__(self) -> None:
418 |         self._dependencies: dict[str, list[tuple[str, str]]] = {}
419 | 
420 |     def add(self, child: str, parent: str, label: str = "") -> None:
421 |         if child not in self._dependencies:
422 |             self._dependencies[child] = []
423 |         self._dependencies[child].append((parent, label))
424 | 
425 |     def __iter__(self):
426 |         return iter(self._dependencies)
427 | 
428 |     def __getitem__(self, key: str) -> list[tuple[str, str]]:
429 |         return self._dependencies[key]
430 | 
431 |     def keys(self):
432 |         return self._dependencies.keys()
433 | 
434 |     def exists(self, path: str, target: str, directive: Optional[str] = None):
435 |         if path in self._dependencies:
436 |             for item in self._dependencies[path]:
437 |                 if item[0] == target:
438 |                     if directive is not None:
439 |                         if directive == item[1]:
440 |                             return True
441 |                     else:
442 |                         return True
443 |         return False
444 | 
445 | 
446 | def process_includes(
447 |     reader: Reader,
448 |     collector: Collector = Collector.default,
449 |     types: Optional[TypeMapping] = None,
450 |     update: bool = True,
451 | ):
452 |     if types is None:
453 |         types = TypeMapping(reader, collector)
454 | 
455 |     resolver = DependencyResolver(reader, types)
456 | 
457 |     parsers = {
458 |         EXTENDS_KEY: ExtendsParser(reader, resolver, collector, types),
459 |         PROFILES_KEY: ProfilesParser(reader, resolver, collector, types),
460 |         INCLUDE_KEY: IncludeParser(reader, resolver, collector, types),
461 |         ATTRIBUTES_KEY: AttributesParser(reader, resolver, collector, types),
462 |     }
463 |     fulfilled: set[str] = set()
464 |     dependencies = Dependencies()
465 | 
466 |     # categories cannot be extended with dependencies, and it causes problems
467 |     # if we try to include dictionary attributes in categories
468 |     matcher = ExcludeMatcher(CategoriesMatcher())
469 | 
470 |     for path in reader.match(matcher):
471 |         for directive, parser in parsers.items():
472 |             if parser.found_in(path):
473 |                 for target in parser.extract_targets(path):
474 |                     dependencies.add(path, target, directive)
475 | 
476 |     def process(path: str):
477 |         if path not in fulfilled:
478 |             if path in dependencies:
479 |                 for dependency, directive in dependencies[path]:
480 |                     if dependency == path:
481 |                         collector.handle(SelfInheritanceError(path, dependency))
482 |                     elif directive == INCLUDE_KEY and dependencies.exists(
483 |                         path, dependency, PROFILES_KEY
484 |                     ):
485 |                         collector.handle(RedundantProfileIncludeError(path, dependency))
486 |                     else:
487 |                         process(dependency)
488 | 
489 |             if update:
490 |                 for directive, parser in parsers.items():
491 |                     if parser.found_in(path):
492 |                         parser.apply(path)
493 | 
494 |             fulfilled.add(path)
495 | 
496 |     for path in dependencies.keys():
497 |         # print(path, dependencies[path])
498 |         process(path)
499 | 


--------------------------------------------------------------------------------
/ocsf_validator/reader.py:
--------------------------------------------------------------------------------
  1 | """Tools for working with OCSF schema definition files.
  2 | 
  3 | This module contains tools to work with the JSON files that define the OCSF
  4 | schema. The most important export is the `Reader` class, which allows convenient
  5 | access to the OCSF schema as its represented in the definition files.
  6 | """
  7 | 
  8 | import json
  9 | from abc import ABC
 10 | from dataclasses import dataclass
 11 | from pathlib import Path
 12 | from typing import Any, Callable, Dict, Iterable, Optional
 13 | 
 14 | from ocsf_validator.errors import InvalidBasePathError
 15 | from ocsf_validator.matchers import Matcher
 16 | 
 17 | # TODO would os.PathLike be better?
 18 | Pathable = str | Path
 19 | 
 20 | # TODO refine Any in type signature
 21 | SchemaData = Dict[str, Any]
 22 | 
 23 | Pattern = str | Matcher
 24 | 
 25 | 
 26 | @dataclass
 27 | class ReaderOptions:
 28 |     """Options to control the behavior of a Reader."""
 29 | 
 30 |     base_path: Optional[Path] = None
 31 |     """The base path from which to load the schema."""
 32 | 
 33 |     metaschema_path: Optional[Path] = None
 34 |     """The metaschema path from which to load the metaschema."""
 35 | 
 36 |     read_extensions: bool = True
 37 |     """Recurse extensions."""
 38 | 
 39 | 
 40 | class Reader(ABC):
 41 |     """An in-memory copy of the raw OCSF schema definition.
 42 | 
 43 |     The `Reader` maintains a dictionary with relative file paths as strings
 44 |     and values of `Map[str, Any]` that are the decoded JSON of each schema
 45 |     file.
 46 | 
 47 |     Args:
 48 |         options (str | Path):    a base path for the schema, probably a clone of
 49 |                                  the `ocsf-schema` repository.
 50 |         options (ReaderOptions): an instances of ReaderOptions to change
 51 |                                  behaviors of the Reader.
 52 |     """
 53 | 
 54 |     def __init__(self, options: ReaderOptions | Pathable | None = None) -> None:
 55 |         if options is not None:
 56 |             if not isinstance(options, ReaderOptions):
 57 |                 if isinstance(options, str):
 58 |                     path = Path(options)
 59 |                 else:
 60 |                     path = options
 61 |                 options = ReaderOptions(
 62 |                     base_path=path, metaschema_path=(path / "metaschema")
 63 |                 )
 64 | 
 65 |             self._options = options
 66 |         else:
 67 |             self._options = ReaderOptions()
 68 | 
 69 |         self._data: SchemaData = {}
 70 |         self._root: str = ""
 71 | 
 72 |     @property
 73 |     def base_path(self):
 74 |         return self._options.base_path
 75 | 
 76 |     @property
 77 |     def metaschema_path(self):
 78 |         return self._options.metaschema_path
 79 | 
 80 |     def contents(self, path: Pathable) -> SchemaData:
 81 |         """Retrieve the parsed JSON data in a given file."""
 82 |         if isinstance(path, Path):
 83 |             path = str(path)
 84 | 
 85 |         # Can raise KeyError
 86 |         return self.__getitem__(path)
 87 | 
 88 |     def key(self, *args: str) -> str:
 89 |         """Platform agnostic key / filename creation from individual parts."""
 90 |         return str(self._root / Path(*args))
 91 | 
 92 |     def __getitem__(self, key: str):
 93 |         return self._data[key]
 94 | 
 95 |     def find(self, *parts: str) -> SchemaData | None:
 96 |         try:
 97 |             return self.__getitem__(self.key(*parts))
 98 |         except KeyError:
 99 |             return None
100 | 
101 |     def __setitem__(self, key: str, val: SchemaData):
102 |         self._data[key] = val
103 | 
104 |     def __contains__(self, key: str):
105 |         return key in self._data
106 | 
107 |     def __len__(self):
108 |         return len(self._data)
109 | 
110 |     def ls(self, path: str | None = None, dirs=True, files=True) -> list[str]:
111 |         if path is None:
112 |             path = "/"
113 |         if path[0] != "/":
114 |             path = "/" + path
115 | 
116 |         base = Path(path)
117 | 
118 |         matched = set()
119 |         for k in self._data.keys():
120 |             p = Path(k)
121 |             if p.parts[0 : len(base.parts)] == base.parts:
122 |                 depth = len(base.parts) + 1
123 |                 if (len(p.parts) == depth and files) or (len(p.parts) > depth and dirs):
124 |                     matched.add(p.parts[len(base.parts)])
125 | 
126 |         return list(matched)
127 | 
128 |     def match(self, pattern: Optional[Pattern] = None) -> Iterable[str]:
129 |         """Return a list of keys that match pattern."""
130 |         if pattern is not None:
131 |             pattern = Matcher.make(pattern)
132 | 
133 |         for k in self._data.keys():
134 |             if pattern is None or pattern.match(k):
135 |                 yield k
136 | 
137 |     def apply(self, op: Callable, pattern: Optional[Pattern] = None) -> None:
138 |         """Apply a function to every 'file' in the schema, optionally if it
139 |         matches a globbing expression `target`."""
140 | 
141 |         for k in self.match(pattern):
142 |             op(self, k)
143 | 
144 |     def map(
145 |         self,
146 |         op: Callable,
147 |         pattern: Optional[Pattern] = None,
148 |         accumulator: Any = None,
149 |     ) -> Any:
150 |         """Apply a function to every 'file' in the schema, optionally if it
151 |         matches a globbing expression `target`, and return the accumulated
152 |         result."""
153 | 
154 |         for k in self.match(pattern):
155 |             accumulator = op(self, k, accumulator)
156 | 
157 |         return accumulator
158 | 
159 | 
160 | class DictReader(Reader):
161 |     """A Reader that works from a `dict` without reading the filesystem.
162 | 
163 |     Useful (hopefully) for testing and debugging."""
164 | 
165 |     def __init__(
166 |         self, options: ReaderOptions | Pathable | SchemaData | None = None
167 |     ) -> None:
168 |         if isinstance(options, dict):
169 |             super().__init__(None)
170 |             self.set_data(options)
171 |         else:
172 |             super().__init__(options)
173 | 
174 |     def set_data(self, data: SchemaData):
175 |         self._data = data.copy()
176 |         self._root = Path(next(iter(self._data.keys()))).root
177 | 
178 | 
179 | class FileReader(Reader):
180 |     """A Reader that reads schema definitions from JSON files."""
181 | 
182 |     def __init__(self, options: ReaderOptions | Pathable | None) -> None:
183 |         if options is None:
184 |             raise InvalidBasePathError("No base path specified")
185 | 
186 |         super().__init__(options)
187 | 
188 |         path = self._options.base_path
189 | 
190 |         if path is None:
191 |             raise InvalidBasePathError(
192 |                 f"Missing schema base path in constructor arguments."
193 |             )
194 | 
195 |         if not path.is_dir():
196 |             raise InvalidBasePathError(f'Schema base path "{path}" is not a directory.')
197 | 
198 |         self._root = path.root
199 |         self._data = _walk(path, path, self._options)
200 | 
201 | 
202 | TRAVERSABLE_PATHS = ["enums", "includes", "objects", "events", "profiles", "extensions"]
203 | 
204 | 
205 | def _walk(path: Path, base: Path, options: ReaderOptions) -> SchemaData:
206 |     data: SchemaData = {}
207 | 
208 |     for entry in path.iterdir():
209 |         key = str(base.root / entry.relative_to(base))
210 | 
211 |         if entry.is_file() and entry.suffix == ".json":
212 |             with open(entry) as file:
213 |                 try:
214 |                     data[key] = json.load(file)
215 |                 except json.JSONDecodeError as e:
216 |                     # TODO maybe reformat this error before raising it
217 |                     raise e
218 | 
219 |         elif entry.is_dir() and (
220 |             entry.name in TRAVERSABLE_PATHS or entry.parent.name in TRAVERSABLE_PATHS
221 |         ):
222 |             if entry.name == "extensions" and not options.read_extensions:
223 |                 break
224 | 
225 |             data |= _walk(entry, base, options)
226 | 
227 |     return data
228 | 


--------------------------------------------------------------------------------
/ocsf_validator/runner.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Validate OCSF Schema definitions.
  3 | """
  4 | 
  5 | import traceback
  6 | from dataclasses import dataclass
  7 | from enum import IntEnum
  8 | from pathlib import Path
  9 | from typing import Callable, Optional
 10 | 
 11 | from termcolor import colored
 12 | 
 13 | import ocsf_validator.errors as errors
 14 | from ocsf_validator.processor import process_includes
 15 | from ocsf_validator.reader import FileReader, ReaderOptions
 16 | from ocsf_validator.type_mapping import TypeMapping
 17 | from ocsf_validator.validators import (
 18 |     validate_attr_types,
 19 |     validate_event_categories,
 20 |     validate_include_targets,
 21 |     validate_intra_type_collisions,
 22 |     validate_metaschemas,
 23 |     validate_no_unknown_keys,
 24 |     validate_observables,
 25 |     validate_required_keys,
 26 |     validate_undefined_attrs,
 27 |     validate_unused_attrs,
 28 | )
 29 | 
 30 | 
 31 | class Severity(IntEnum):
 32 |     INFO = 0
 33 |     WARN = 1
 34 |     ERROR = 2
 35 |     FATAL = 3
 36 | 
 37 | 
 38 | @dataclass
 39 | class ValidatorOptions:
 40 |     """Configure validator behavior."""
 41 | 
 42 |     base_path: str = "."
 43 |     """The base path of the schema."""
 44 | 
 45 |     metaschema_path: Optional[str] = None
 46 |     """The path to the schema's metaschema."""
 47 | 
 48 |     extensions: bool = True
 49 |     """Include the contents of extensions."""
 50 | 
 51 |     strict: bool = False
 52 |     """When True, exit with a non-zero exit code when warnings are encountered."""
 53 | 
 54 |     show_info: bool = False
 55 |     """Show informational messages."""
 56 | 
 57 |     invalid_path: int = Severity.FATAL
 58 |     """The OCSF Schema path could not be found or is horribly wrong."""
 59 | 
 60 |     invalid_metaschema: int = Severity.FATAL
 61 |     """The metaschema defined in this validator appears to be invalid."""
 62 | 
 63 |     missing_include: int = Severity.ERROR
 64 |     """An `$include` target is missing."""
 65 | 
 66 |     missing_profile: int = Severity.ERROR
 67 |     """A `profiles` target is missing."""
 68 | 
 69 |     missing_inheritance: int = Severity.ERROR
 70 |     """An `extends` inheritance target is missing."""
 71 | 
 72 |     imprecise_inheritance: int = Severity.INFO
 73 |     """An `extends` inheritance target is resolvable
 74 |     but imprecise and possibly ambiguous."""
 75 | 
 76 |     missing_key: int = Severity.ERROR
 77 |     """A required key is missing."""
 78 | 
 79 |     unknown_key: int = Severity.ERROR
 80 |     """An unrecognized key was found."""
 81 | 
 82 |     unused_attribute: int = Severity.WARN
 83 |     """An attribute in `dictionary.json` is unused."""
 84 | 
 85 |     self_inheritance: int = Severity.WARN
 86 |     """Attempting to `extend` the current record."""
 87 | 
 88 |     redundant_profile_include: int = Severity.INFO
 89 |     """Redundant profiles and $include target."""
 90 | 
 91 |     undetectable_type: int = Severity.WARN
 92 |     """Unable to detect type of file."""
 93 | 
 94 |     include_type_mismatch: int = Severity.WARN
 95 |     """Unexpected include type."""
 96 | 
 97 |     intra_type_name_collision: int = Severity.WARN
 98 |     """Same name used multiple times within a type."""
 99 | 
100 |     undefined_attribute: int = Severity.WARN
101 |     """Attributes used in a record but not defined in `dictionary.json`."""
102 | 
103 |     invalid_metaschema_file: int = Severity.ERROR
104 |     """A JSON schema metaschema file is missing or invalid."""
105 | 
106 |     invalid_attr_types: int = Severity.ERROR
107 |     """Attribute type is invalid."""
108 | 
109 |     illegal_observable: int = Severity.ERROR
110 |     """Observable type_id illegally defined."""
111 | 
112 |     observable_collision: int = Severity.ERROR
113 |     """Colliding observable type_id defined."""
114 | 
115 |     unknown_category: int = Severity.ERROR
116 |     """Unknown category."""
117 | 
118 |     def severity(self, err: Exception):
119 |         match type(err):
120 |             case errors.MissingRequiredKeyError:
121 |                 return self.missing_key
122 |             case errors.UnknownKeyError:
123 |                 return self.unknown_key
124 |             case errors.MissingIncludeError:
125 |                 return self.missing_include
126 |             case errors.MissingProfileError:
127 |                 return self.missing_profile
128 |             case errors.MissingBaseError:
129 |                 return self.missing_inheritance
130 |             case errors.UnusedAttributeError:
131 |                 return self.unused_attribute
132 |             case errors.InvalidMetaSchemaError:
133 |                 return self.invalid_metaschema
134 |             case errors.InvalidBasePathError:
135 |                 return self.invalid_path
136 |             case errors.ImpreciseBaseError:
137 |                 return self.imprecise_inheritance
138 |             case errors.SelfInheritanceError:
139 |                 return self.self_inheritance
140 |             case errors.RedundantProfileIncludeError:
141 |                 return self.redundant_profile_include
142 |             case errors.UndetectableTypeError:
143 |                 return self.undetectable_type
144 |             case errors.IncludeTypeMismatchError:
145 |                 return self.include_type_mismatch
146 |             case errors.TypeNameCollisionError:
147 |                 return self.intra_type_name_collision
148 |             case errors.UndefinedAttributeError:
149 |                 return self.undefined_attribute
150 |             case errors.InvalidMetaSchemaFileError:
151 |                 return self.invalid_metaschema_file
152 |             case errors.InvalidAttributeTypeError:
153 |                 return self.invalid_attr_types
154 |             case errors.IllegalObservableTypeIDError:
155 |                 return self.illegal_observable
156 |             case errors.ObservableTypeIDCollisionError:
157 |                 return self.observable_collision
158 |             case errors.UnknownCategoryError:
159 |                 return self.unknown_category
160 |             case _:
161 |                 return Severity.INFO
162 | 
163 | 
164 | class ValidationRunner:
165 |     def __init__(self, pathOrOptions: str | ValidatorOptions):
166 |         if isinstance(pathOrOptions, str):
167 |             options = ValidatorOptions(base_path=pathOrOptions)
168 |         else:
169 |             options = pathOrOptions
170 | 
171 |         self.options = options
172 | 
173 |     def txt_fail(self, text: str):
174 |         return colored(text, "red")
175 | 
176 |     def txt_warn(self, text: str):
177 |         return colored(text, "yellow")
178 | 
179 |     def txt_crash(self, text: str):
180 |         return colored(text, "black", "on_red")
181 | 
182 |     def txt_info(self, text: str):
183 |         return colored(text, "blue")
184 | 
185 |     def txt_pass(self, text: str):
186 |         return colored(text, "green")
187 | 
188 |     def txt_highlight(self, text: str):
189 |         return colored(text, "light_grey", "on_cyan")
190 | 
191 |     def txt_emphasize(self, text: str):
192 |         return colored(text, "white")
193 | 
194 |     def txt_label(self, severity: int):
195 |         match severity:
196 |             case Severity.INFO:
197 |                 return self.txt_info("INFO")
198 |             case Severity.WARN:
199 |                 return self.txt_warn("WARNING")
200 |             case Severity.ERROR:
201 |                 return self.txt_fail("ERROR")
202 |             case Severity.FATAL:
203 |                 return self.txt_crash("FATAL")
204 |             case _:
205 |                 return self.txt_emphasize("???")
206 | 
207 |     def validate(self) -> None:
208 |         exit_code = 0
209 |         messages: dict[str, dict[int, set[str]]] = {}
210 |         collector = errors.Collector(throw=False)
211 | 
212 |         def test(label: str, code: Callable):
213 |             failures: int = 0
214 |             message = code()
215 | 
216 |             if label not in messages:
217 |                 messages[label] = {}
218 |                 print("")
219 |                 print(self.txt_info("TESTING") + ":", self.txt_emphasize(label))
220 | 
221 |             for err in collector.exceptions():
222 |                 severity = self.options.severity(err)
223 | 
224 |                 if severity not in messages[label]:
225 |                     messages[label][severity] = set()
226 | 
227 |                 messages[label][severity].add(str(err))
228 |                 if severity > Severity.INFO or self.options.show_info:
229 |                     if severity > Severity.INFO:
230 |                         failures += 1
231 |                     print("  ", self.txt_label(severity) + ":", err)
232 | 
233 |                 if severity == Severity.FATAL:
234 |                     exit(2)
235 | 
236 |             if failures == 0:
237 |                 print("  ", self.txt_pass("PASS") + ":", "No problems identified.")
238 |             collector.flush()
239 | 
240 |             if message:
241 |                 print(message)
242 | 
243 |         try:
244 |             print(self.txt_emphasize("===[ OCSF Schema Validator ]==="))
245 |             print(
246 |                 "Validating OCSF Schema at:", self.txt_highlight(self.options.base_path)
247 |             )
248 |             b_path = Path(self.options.base_path)
249 |             if not b_path.is_absolute():
250 |                 print("  Absolute path:", str(b_path.resolve()))
251 |             if self.options.metaschema_path is not None:
252 |                 print(
253 |                     "Using metaschema at:",
254 |                     self.txt_highlight(self.options.metaschema_path),
255 |                 )
256 |                 m_path = Path(self.options.metaschema_path)
257 |                 if not m_path.is_absolute():
258 |                     print("  Absolute path:", str(m_path.resolve()))
259 | 
260 |             # Setup the reader
261 |             base_path = Path(self.options.base_path)
262 |             if self.options.metaschema_path is None:
263 |                 metaschema_path = base_path / "metaschema"
264 |             else:
265 |                 metaschema_path = Path(self.options.metaschema_path)
266 |             opts = ReaderOptions(
267 |                 base_path=base_path,
268 |                 metaschema_path=metaschema_path,
269 |                 read_extensions=self.options.extensions,
270 |             )
271 |             reader = None
272 |             try:
273 |                 reader = FileReader(opts)
274 |             except errors.ValidationError as err:
275 |                 collector.handle(err)
276 | 
277 |             if reader is None:
278 |                 print(self.txt_crash("FATAL"), "Unable to initialize schema")
279 |                 exit()
280 | 
281 |             test("Schema definitions can be loaded", lambda: None)
282 | 
283 |             types = TypeMapping(reader, collector)
284 |             test("Schema types can be inferred", lambda: None)
285 | 
286 |             test(
287 |                 "Check observable type_id definitions",
288 |                 lambda: validate_observables(reader, collector=collector, types=types),
289 |             )
290 | 
291 |             # Validate dependencies
292 |             test(
293 |                 "Dependency targets are resolvable and exist",
294 |                 lambda: validate_include_targets(
295 |                     reader, collector=collector, types=types
296 |                 ),
297 |             )
298 | 
299 |             process_includes(reader, collector=collector, types=types)
300 | 
301 |             # Any errors since the last test were duplicates; ignore them
302 |             collector.flush()
303 | 
304 |             # Validate keys
305 |             test(
306 |                 "Required keys are present",
307 |                 lambda: validate_required_keys(
308 |                     reader, collector=collector, types=types
309 |                 ),
310 |             )
311 | 
312 |             test(
313 |                 "There are no unrecognized keys",
314 |                 lambda: validate_no_unknown_keys(
315 |                     reader, collector=collector, types=types
316 |                 ),
317 |             )
318 | 
319 |             test(
320 |                 "All attributes in the dictionary are used",
321 |                 lambda: validate_unused_attrs(reader, collector=collector, types=types),
322 |             )
323 | 
324 |             test(
325 |                 "All attributes are defined in dictionary.json",
326 |                 lambda: validate_undefined_attrs(
327 |                     reader, collector=collector, types=types
328 |                 ),
329 |             )
330 | 
331 |             test(
332 |                 "Names are not used multiple times within a record type",
333 |                 lambda: validate_intra_type_collisions(
334 |                     reader, collector=collector, types=types
335 |                 ),
336 |             )
337 | 
338 |             test(
339 |                 "Attribute type references are defined",
340 |                 lambda: validate_attr_types(reader, collector=collector, types=types),
341 |             )
342 | 
343 |             test(
344 |                 "Event class categories are defined",
345 |                 lambda: validate_event_categories(
346 |                     reader, collector=collector, types=types
347 |                 ),
348 |             )
349 | 
350 |             test(
351 |                 "JSON files match their metaschema definitions",
352 |                 lambda: validate_metaschemas(reader, collector=collector, types=types),
353 |             )
354 | 
355 |         except Exception as err:
356 |             print("Encountered an unexpected exception:")
357 |             traceback.print_exception(err)
358 | 
359 |         finally:
360 |             print("")
361 |             print(self.txt_emphasize("SUMMARY"))
362 | 
363 |             failure_threshold = (
364 |                 Severity.ERROR if not self.options.strict else Severity.WARN
365 |             )
366 | 
367 |             for k in messages:
368 |                 found = False
369 |                 if len(messages[k].items()) > 0:
370 |                     for sev in [
371 |                         Severity.FATAL,
372 |                         Severity.ERROR,
373 |                         Severity.WARN,
374 |                         Severity.INFO,
375 |                     ]:
376 |                         if sev in messages[k] and sev >= failure_threshold:
377 |                             found = True
378 |                             print("  ", self.txt_fail("FAILED") + ":", k)
379 |                             exit_code = 1
380 | 
381 |                 if not found:
382 |                     print("  ", self.txt_pass("PASSED") + ":", k)
383 | 
384 |             print("")
385 |             exit(exit_code)
386 | 


--------------------------------------------------------------------------------
/ocsf_validator/type_mapping.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from ocsf_validator.errors import Collector, UndetectableTypeError
 4 | from ocsf_validator.matchers import *
 5 | from ocsf_validator.reader import Reader
 6 | from ocsf_validator.types import *
 7 | 
 8 | MATCHERS: list = [
 9 |     VersionMatcher(),
10 |     DictionaryMatcher(),
11 |     CategoriesMatcher(),
12 |     ProfileMatcher(),
13 |     ObjectMatcher(),
14 |     EventMatcher(),
15 |     ExtensionMatcher(),
16 | ]
17 | 
18 | 
19 | class TypeMapping:
20 |     def __init__(self, reader: Reader, collector: Collector = Collector.default):
21 |         self._reader = reader
22 |         self._collector = collector
23 |         self._mappings: dict[str, type] = {}
24 |         self.update()
25 | 
26 |     def __getitem__(self, path: str) -> type:
27 |         return self._mappings[path]
28 | 
29 |     def __contains__(self, path: str) -> bool:
30 |         return path in self._mappings
31 | 
32 |     def __iter__(self):
33 |         return iter(self._mappings)
34 | 
35 |     def _get_type(self, path: str) -> type | None:
36 |         for matcher in MATCHERS:
37 |             if matcher.match(path):
38 |                 return matcher.get_type()
39 |         return None
40 | 
41 |     def update(self):
42 |         for path in self._reader.match():
43 |             t = self._get_type(path)
44 |             if t is not None:
45 |                 self._mappings[path] = t
46 |             else:
47 |                 self._collector.handle(UndetectableTypeError(path))
48 | 
49 |     def extension(self, path: str) -> str | None:
50 |         """Extract the extension name from a given key/filepath."""
51 |         parts = list(Path(self._reader.key(path)).parts)
52 |         if "extensions" in parts:
53 |             return parts[parts.index("extensions") + 1]
54 |         else:
55 |             return None
56 | 


--------------------------------------------------------------------------------
/ocsf_validator/types.py:
--------------------------------------------------------------------------------
  1 | from typing import (
  2 |     Any,
  3 |     Dict,
  4 |     NotRequired,
  5 |     Optional,
  6 |     Required,
  7 |     Sequence,
  8 |     TypedDict,
  9 |     TypeVar,
 10 |     Union,
 11 | )
 12 | 
 13 | ATTRIBUTES_KEY = "attributes"
 14 | CATEGORY_KEY = "category"
 15 | PROFILES_KEY = "profiles"
 16 | EXTENDS_KEY = "extends"
 17 | INCLUDE_KEY = "$include"
 18 | OBSERVABLE_KEY = "observable"
 19 | OBSERVABLES_KEY = "observables"
 20 | TYPES_KEY = "types"
 21 | 
 22 | 
 23 | class OcsfVersion(TypedDict):
 24 |     version: str
 25 | 
 26 | 
 27 | class OcsfDeprecationInfo(TypedDict):
 28 |     message: Required[str]
 29 |     since: Required[str]
 30 | 
 31 | 
 32 | class OcsfReference(TypedDict):
 33 |     url: Required[str]
 34 |     description: Required[str]
 35 | 
 36 | 
 37 | OcsfReferences = Sequence[OcsfReference]
 38 | 
 39 | 
 40 | OcsfEnumMember = TypedDict(
 41 |     "OcsfEnumMember",
 42 |     {
 43 |         "@deprecated": NotRequired[OcsfDeprecationInfo],
 44 |         "caption": str,
 45 |         "description": NotRequired[str],
 46 |         "source": NotRequired[str],
 47 |         "references": NotRequired[OcsfReferences],
 48 |     },
 49 | )
 50 | 
 51 | 
 52 | class OcsfEnum(TypedDict):
 53 |     enum: Dict[str, OcsfEnumMember]
 54 | 
 55 | 
 56 | OcsfAttr = TypedDict(
 57 |     "OcsfAttr",
 58 |     {
 59 |         "$include": NotRequired[str],
 60 |         # "caption": NotRequired[str],
 61 |         "caption": str,
 62 |         "default": NotRequired[Any],
 63 |         "description": NotRequired[str],
 64 |         "enum": NotRequired[Dict[str, OcsfEnumMember]],
 65 |         "group": NotRequired[str],
 66 |         "is_array": NotRequired[bool],
 67 |         "suppress_checks": NotRequired[Sequence[str]],
 68 |         "max_len": NotRequired[int],
 69 |         "name": NotRequired[str],
 70 |         "notes": NotRequired[str],
 71 |         "observable": NotRequired[int],
 72 |         "range": NotRequired[Sequence[int]],
 73 |         "regex": NotRequired[str],
 74 |         "requirement": NotRequired[str],
 75 |         "sibling": NotRequired[str],
 76 |         "type": NotRequired[str],
 77 |         "type_name": NotRequired[str],
 78 |         "profile": NotRequired[Optional[Sequence[str]]],
 79 |         "values": NotRequired[Sequence[Any]],
 80 |         "@deprecated": NotRequired[OcsfDeprecationInfo],
 81 |         "source": NotRequired[str],
 82 |         "references": NotRequired[OcsfReferences],
 83 |     },
 84 | )
 85 | 
 86 | 
 87 | class OcsfExtension(TypedDict):
 88 |     uid: int
 89 |     name: str
 90 |     caption: str
 91 |     path: NotRequired[str]
 92 |     version: NotRequired[str]
 93 |     description: NotRequired[str]
 94 | 
 95 | 
 96 | class OcsfDictionaryTypes(TypedDict):
 97 |     attributes: Dict[str, OcsfAttr]
 98 |     caption: str
 99 |     description: str
100 | 
101 | 
102 | class OcsfDictionary(TypedDict):
103 |     attributes: Dict[str, OcsfAttr]
104 |     caption: str
105 |     description: str
106 |     name: str
107 |     types: NotRequired[OcsfDictionaryTypes]
108 | 
109 | 
110 | class OcsfCategory(TypedDict):
111 |     caption: str
112 |     description: str
113 |     uid: int
114 |     type: NotRequired[str]  # older categories.json definitions
115 | 
116 | 
117 | class OcsfCategories(TypedDict):
118 |     attributes: Dict[str, OcsfCategory]
119 |     caption: str
120 |     description: str
121 |     name: str
122 | 
123 | 
124 | class OcsfInclude(TypedDict):
125 |     caption: str
126 |     attributes: Dict[str, OcsfAttr]
127 |     description: NotRequired[str]
128 |     annotations: NotRequired[Dict[str, str]]
129 | 
130 | 
131 | class OcsfProfile(TypedDict):
132 |     caption: str
133 |     description: str
134 |     meta: str
135 |     name: str
136 |     attributes: Dict[str, OcsfAttr]
137 |     annotations: NotRequired[Dict[str, str]]
138 | 
139 | 
140 | OcsfObject = TypedDict(
141 |     "OcsfObject",
142 |     {
143 |         "caption": NotRequired[str],
144 |         "description": NotRequired[str],
145 |         "name": NotRequired[str],
146 |         "attributes": Dict[str, OcsfAttr],
147 |         "extends": NotRequired[Union[str, list[Optional[str]]]],
148 |         "observable": NotRequired[int],
149 |         "profiles": NotRequired[Sequence[str]],
150 |         "constraints": NotRequired[Dict[str, Sequence[str]]],
151 |         "observables": NotRequired[Dict[str, int]],
152 |         "$include": NotRequired[Union[str, Sequence[str]]],
153 |         "@deprecated": NotRequired[OcsfDeprecationInfo],
154 |         "references": NotRequired[OcsfReferences],
155 |     },
156 | )
157 | 
158 | 
159 | OcsfEvent = TypedDict(
160 |     "OcsfEvent",
161 |     {
162 |         "attributes": Dict[str, OcsfAttr],
163 |         "caption": NotRequired[str],
164 |         "name": NotRequired[str],
165 |         "uid": NotRequired[int],
166 |         "category": NotRequired[str],
167 |         "description": NotRequired[str],
168 |         "family": NotRequired[str],
169 |         "extends": NotRequired[Union[str, list[Optional[str]]]],
170 |         "profiles": NotRequired[Sequence[str]],
171 |         "associations": NotRequired[Dict[str, Sequence[str]]],
172 |         "constraints": NotRequired[Dict[str, Sequence[str]]],
173 |         "observables": NotRequired[Dict[str, int]],
174 |         "$include": NotRequired[Union[str, Sequence[str]]],
175 |         "@deprecated": NotRequired[OcsfDeprecationInfo],
176 |         "references": NotRequired[OcsfReferences],
177 |     },
178 | )
179 | 
180 | T = TypeVar("T")
181 | PerExt = Dict[Optional[str], T]
182 | # Includable = Union[OcsfInclude, OcsfEnum, OcsfProfile]
183 | 
184 | 
185 | class OcsfSchema(TypedDict):
186 |     categories: PerExt[OcsfCategories]
187 |     dictionaries: PerExt[OcsfDictionary]
188 |     observables: Dict[int, OcsfObject | OcsfAttr]
189 |     objects: PerExt[Dict[str, OcsfObject]]
190 |     events: PerExt[Dict[str, OcsfEvent]]
191 |     includes: PerExt[Dict[str, OcsfInclude]]
192 |     profiles: PerExt[Dict[str, OcsfProfile]]
193 |     enums: PerExt[Dict[str, OcsfEnum]]
194 | 
195 | 
196 | def is_ocsf_type(t: type):
197 |     return (
198 |         t is OcsfEnumMember
199 |         or t is OcsfEnum
200 |         or t is OcsfDeprecationInfo
201 |         or t is OcsfAttr
202 |         or t is OcsfExtension
203 |         or t is OcsfDictionaryTypes
204 |         or t is OcsfDictionary
205 |         or t is OcsfCategory
206 |         or t is OcsfCategories
207 |         or t is OcsfInclude
208 |         or t is OcsfProfile
209 |         or t is OcsfObject
210 |         or t is OcsfEvent
211 |     )
212 | 
213 | 
214 | def leaf_type(defn: type, prop: str) -> type | None:
215 |     if hasattr(defn, "__annotations__") and prop in defn.__annotations__:
216 |         t = defn.__annotations__[prop]
217 |         if hasattr(t, "__args__"):
218 |             return t.__args__[-1]
219 |         else:
220 |             return t
221 |     return None
222 | 


--------------------------------------------------------------------------------
/ocsf_validator/validators.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from pathlib import Path, PurePath
  3 | from typing import Any, Callable, Dict, List, Optional
  4 | 
  5 | import jsonschema
  6 | import referencing
  7 | import referencing.exceptions
  8 | 
  9 | from ocsf_validator.errors import (
 10 |     Collector,
 11 |     IllegalObservableTypeIDError,
 12 |     InvalidAttributeTypeError,
 13 |     InvalidMetaSchemaError,
 14 |     InvalidMetaSchemaFileError,
 15 |     MissingRequiredKeyError,
 16 |     ObservableTypeIDCollisionError,
 17 |     TypeNameCollisionError,
 18 |     UndefinedAttributeError,
 19 |     UndetectableTypeError,
 20 |     UnknownCategoryError,
 21 |     UnknownKeyError,
 22 |     UnusedAttributeError,
 23 | )
 24 | from ocsf_validator.matchers import (
 25 |     AnyMatcher,
 26 |     CategoriesMatcher,
 27 |     DictionaryMatcher,
 28 |     EventMatcher,
 29 |     ExtensionMatcher,
 30 |     ObjectMatcher,
 31 |     ProfileMatcher,
 32 | )
 33 | from ocsf_validator.processor import process_includes
 34 | from ocsf_validator.reader import Reader
 35 | from ocsf_validator.type_mapping import TypeMapping
 36 | from ocsf_validator.types import (
 37 |     ATTRIBUTES_KEY,
 38 |     CATEGORY_KEY,
 39 |     INCLUDE_KEY,
 40 |     OBSERVABLE_KEY,
 41 |     OBSERVABLES_KEY,
 42 |     TYPES_KEY,
 43 |     OcsfEvent,
 44 |     OcsfObject,
 45 |     is_ocsf_type,
 46 |     leaf_type,
 47 | )
 48 | 
 49 | METASCHEMA_MATCHERS = {
 50 |     "event.schema.json": EventMatcher(),
 51 |     "object.schema.json": ObjectMatcher(),
 52 |     "profile.schema.json": ProfileMatcher(),
 53 |     "categories.schema.json": CategoriesMatcher(),
 54 |     "dictionary.schema.json": DictionaryMatcher(),
 55 |     "extension.schema.json": ExtensionMatcher(),
 56 | }
 57 | 
 58 | 
 59 | def validate_required_keys(
 60 |     reader: Reader,
 61 |     collector: Collector = Collector.default,
 62 |     types: Optional[TypeMapping] = None,
 63 | ):
 64 |     """Validate that no required keys are missing."""
 65 | 
 66 |     if types is None:
 67 |         types = TypeMapping(reader)
 68 | 
 69 |     def compare_keys(
 70 |         data: Dict[str, Any], defn: type, file: str, trail: list[str] = []
 71 |     ):
 72 |         if hasattr(defn, "__required_keys__"):
 73 |             for k in defn.__required_keys__:  # type: ignore
 74 |                 t = leaf_type(defn, k)
 75 |                 if k not in data:
 76 |                     collector.handle(MissingRequiredKeyError(k, file, defn, trail))
 77 |                 elif t is not None and is_ocsf_type(t):
 78 |                     if isinstance(data[k], dict):
 79 |                         # dict[str, Ocsf____]
 80 |                         for k2, val in data[k].items():
 81 |                             if k2 != INCLUDE_KEY:
 82 |                                 compare_keys(data[k][k2], t, file, trail + [k, k2])
 83 |                     else:
 84 |                         compare_keys(data[k], t, file, trail + [k])
 85 | 
 86 |         else:
 87 |             collector.handle(
 88 |                 InvalidMetaSchemaError(
 89 |                     f"Unexpected definition {defn} used when processing {file}"
 90 |                 )
 91 |             )
 92 | 
 93 |     def validate(reader: Reader, file: str):
 94 |         record = reader[file]
 95 |         if file not in types:
 96 |             collector.handle(UndetectableTypeError(file))
 97 |         else:
 98 |             defn = types[file]
 99 |             if not hasattr(defn, "__annotations__"):
100 |                 collector.handle(InvalidMetaSchemaError(f"{defn} is not a TypedDict"))
101 |             compare_keys(record, defn, file)
102 | 
103 |     reader.apply(validate)
104 | 
105 | 
106 | def validate_no_unknown_keys(
107 |     reader: Reader,
108 |     collector: Collector = Collector.default,
109 |     types: Optional[TypeMapping] = None,
110 | ):
111 |     """Validate that there are no unknown keys."""
112 | 
113 |     if types is None:
114 |         types = TypeMapping(reader)
115 | 
116 |     def compare_keys(
117 |         data: Dict[str, Any], defn: type, file: str, trail: list[str] = []
118 |     ):
119 |         if hasattr(defn, "__annotations__") and isinstance(data, dict):
120 |             for k in data.keys():
121 |                 t = leaf_type(defn, k)
122 |                 if t is None:
123 |                     collector.handle(UnknownKeyError(k, file, defn, trail))
124 |                 elif is_ocsf_type(t):
125 |                     if hasattr(defn.__annotations__[k], "__args__"):
126 |                         args = defn.__annotations__[k].__args__
127 |                         if len(args) >= 2:
128 |                             if args[-2] == str:
129 |                                 for k2, val in data[k].items():
130 |                                     if k2 != INCLUDE_KEY:
131 |                                         compare_keys(
132 |                                             data[k][k2], t, file, trail + [k, k2]
133 |                                         )
134 |                             else:
135 |                                 ...  # what would this be?
136 |                         else:
137 |                             compare_keys(data[k], args[-1], file, trail + [k])
138 |                     else:
139 |                         compare_keys(data[k], t, file, trail + [k])
140 | 
141 |         else:
142 |             collector.handle(
143 |                 InvalidMetaSchemaError(
144 |                     f"Unexpected definition {defn} used when processing {file}"
145 |                 )
146 |             )
147 | 
148 |     def validate(reader: Reader, file: str):
149 |         record = reader[file]
150 |         if file not in types:
151 |             collector.handle(UndetectableTypeError(file))
152 |         else:
153 |             defn = types[file]
154 |             if not hasattr(defn, "__annotations__"):
155 |                 collector.handle(InvalidMetaSchemaError(f"{defn} is not a TypedDict"))
156 |             compare_keys(record, defn, file)
157 | 
158 |     reader.apply(validate)
159 | 
160 | 
161 | def validate_include_targets(
162 |     reader: Reader,
163 |     collector: Collector = Collector.default,
164 |     types: Optional[TypeMapping] = None,
165 | ):
166 |     process_includes(reader, collector=collector, types=types, update=False)
167 | 
168 | 
169 | def validate_unused_attrs(
170 |     reader: Reader,
171 |     collector: Collector = Collector.default,
172 |     types: Optional[TypeMapping] = None,
173 | ):
174 |     if types is None:
175 |         types = TypeMapping(reader)
176 | 
177 |     # TODO: Lift validate() function out and use a TypeMapping
178 |     def make_validator(defn: type):
179 |         def validate(reader: Reader, key: str, accum: set[str]):
180 |             record = reader[key]
181 |             if ATTRIBUTES_KEY in record:
182 |                 return accum | set(
183 |                     [k for k in record[ATTRIBUTES_KEY]]
184 |                 )  # should it be defn[attrs][k]['name'] ?
185 |             else:
186 |                 return accum
187 | 
188 |         return validate
189 | 
190 |     attrs = reader.map(make_validator(OcsfObject), ObjectMatcher(), set())
191 |     attrs |= reader.map(make_validator(OcsfEvent), EventMatcher(), set())
192 | 
193 |     d = reader.find("dictionary.json")
194 | 
195 |     if d is not None:
196 |         for k in d[ATTRIBUTES_KEY]:
197 |             if k not in attrs:
198 |                 collector.handle(UnusedAttributeError(k))
199 | 
200 | 
201 | def validate_undefined_attrs(
202 |     reader: Reader,
203 |     collector: Collector = Collector.default,
204 |     types: Optional[TypeMapping] = None,
205 | ):
206 |     if types is None:
207 |         types = TypeMapping(reader)
208 | 
209 |     EXCLUDE = ["$include"]
210 | 
211 |     dicts = []
212 |     for d in reader.match(DictionaryMatcher()):
213 |         dicts.append(reader[d])
214 | 
215 |     if len(dicts) == 0:
216 |         collector.handle(InvalidMetaSchemaError())
217 | 
218 |     def validate(reader: Reader, file: str):
219 |         record = reader[file]
220 |         if ATTRIBUTES_KEY in record:
221 |             for k in record[ATTRIBUTES_KEY]:
222 |                 found = False
223 |                 for d in dicts:
224 |                     if k in d[ATTRIBUTES_KEY]:
225 |                         found = True
226 | 
227 |                 if found is False and k not in EXCLUDE:
228 |                     collector.handle(UndefinedAttributeError(k, file))
229 | 
230 |     reader.apply(
231 |         validate,
232 |         AnyMatcher([ObjectMatcher(), EventMatcher(), ProfileMatcher()]),
233 |     )
234 | 
235 | 
236 | def validate_intra_type_collisions(
237 |     reader: Reader,
238 |     collector: Collector = Collector.default,
239 |     types: Optional[TypeMapping] = None,
240 | ):
241 |     if types is None:
242 |         types = TypeMapping(reader)
243 | 
244 |     found: dict[str, dict[str, list[str]]] = {}
245 | 
246 |     def validate(reader: Reader, file: str):
247 |         t = str(types[file])
248 |         if t not in found:
249 |             found[t] = {}
250 | 
251 |         # The patch extends case _always_ has the the same name as its base
252 |         if "name" in reader[file] and not _is_patch_extends(reader[file]):
253 |             name = reader[file]["name"]
254 |             if name not in found[t]:
255 |                 found[t][name] = []
256 |             else:
257 |                 collector.handle(
258 |                     TypeNameCollisionError(name, t, file, found[t][name][0])
259 |                 )
260 |             found[t][name].append(file)
261 | 
262 |     reader.apply(validate, AnyMatcher([ObjectMatcher(), EventMatcher()]))
263 | 
264 | 
265 | def _default_get_registry(reader: Reader, base_uri: str) -> referencing.Registry:
266 |     registry: referencing.Registry = referencing.Registry()
267 | 
268 |     for schema_file_path in reader.metaschema_path.glob("*.schema.json"):  # type: ignore
269 |         with open(schema_file_path, "r") as file:
270 |             schema = json.load(file)
271 |             resource = referencing.Resource.from_contents(schema)  # type: ignore
272 |             registry = registry.with_resource(
273 |                 base_uri + schema_file_path.name, resource=resource
274 |             )
275 |     return registry
276 | 
277 | 
278 | def validate_metaschemas(
279 |     reader: Reader,
280 |     collector: Collector = Collector.default,
281 |     types: Optional[TypeMapping] = None,
282 |     get_registry: Callable[[Reader, str], referencing.Registry] = _default_get_registry,
283 | ) -> None:
284 |     if types is None:
285 |         types = TypeMapping(reader)
286 | 
287 |     base_uri = "https://schemas.ocsf.io/"
288 |     registry = get_registry(reader, base_uri)
289 | 
290 |     for metaschema, matcher in METASCHEMA_MATCHERS.items():
291 |         try:
292 |             schema = registry.resolver(base_uri).lookup(metaschema).contents
293 |         except referencing.exceptions.Unresolvable as exc:
294 |             collector.handle(
295 |                 InvalidMetaSchemaFileError(
296 |                     f"The metaschema file for {metaschema} is invalid or missing."
297 |                     f" Error: {type(exc).__name__}"
298 |                 )
299 |             )
300 |             continue
301 | 
302 |         def validate(reader: Reader, file: str) -> None:
303 |             data = reader.contents(file)
304 |             validator = jsonschema.Draft202012Validator(schema, registry=registry)
305 |             errors = sorted(validator.iter_errors(data), key=lambda e: e.path)
306 |             for error in errors:
307 |                 collector.handle(
308 |                     InvalidMetaSchemaError(
309 |                         f"File at {file} does not pass metaschema validation. "
310 |                         f"Error: {error.message} at JSON path: '{error.json_path}'"
311 |                     )
312 |                 )
313 | 
314 |         reader.apply(validate, matcher)
315 | 
316 | 
317 | def validate_attr_types(
318 |     reader: Reader,
319 |     collector: Collector = Collector.default,
320 |     types: Optional[TypeMapping] = None,
321 | ) -> None:
322 |     if types is None:
323 |         types = TypeMapping(reader)
324 | 
325 |     EXCLUDE = ["$include"]
326 | 
327 |     dicts = []
328 |     for d in reader.match(DictionaryMatcher()):
329 |         dicts.append(reader[d])
330 | 
331 |     if len(dicts) == 0:
332 |         collector.handle(InvalidMetaSchemaError())
333 | 
334 |     ## Build a list of object names
335 |     def names(reader: Reader, file: str, accum: list[str]) -> list[str]:
336 |         if "name" in reader[file]:
337 |             accum.append(reader[file]["name"])
338 | 
339 |         return accum
340 | 
341 |     objects: list[str] = []
342 |     reader.map(names, ObjectMatcher(), objects)
343 | 
344 |     # Validation for each file
345 |     def validate(reader: Reader, file: str):
346 |         record = reader[file]
347 |         if ATTRIBUTES_KEY in record:
348 |             for k in record[ATTRIBUTES_KEY]:
349 |                 if k not in EXCLUDE:
350 |                     attr = record[ATTRIBUTES_KEY][k]
351 |                     if "type" in attr:
352 |                         found = False
353 | 
354 |                         if attr["type"][-2:] == "_t":
355 |                             # Scalar type; check dictionaries.
356 |                             for d in dicts:
357 |                                 if (
358 |                                     TYPES_KEY in d
359 |                                     and attr["type"] in d[TYPES_KEY][ATTRIBUTES_KEY]
360 |                                 ):
361 |                                     found = True
362 |                         else:
363 |                             # Object type; check objects in repository.
364 |                             found = attr["type"] in objects
365 | 
366 |                         if found is False:
367 |                             collector.handle(
368 |                                 InvalidAttributeTypeError(attr["type"], k, file)
369 |                             )
370 | 
371 |     reader.apply(
372 |         validate,
373 |         AnyMatcher([ObjectMatcher(), EventMatcher(), ProfileMatcher()]),
374 |     )
375 | 
376 | 
377 | def validate_observables(
378 |     reader: Reader,
379 |     collector: Collector = Collector.default,
380 |     types: Optional[TypeMapping] = None,
381 | ) -> str:
382 |     """
383 |     Validate defined observable type_id values:
384 |         * Ensure there are no collisions.
385 |         * Ensure no definitions in "hidden" (intermediate) classes and objects.
386 | 
387 |     NOTE: This must be called _before_ merging extends to avoid incorrectly detecting
388 |           collisions between parent and child classes and objects -- specifically
389 |           before runner.process_includes.
390 |     """
391 |     observables = validate_and_get_observables(reader, collector)
392 |     return observables_to_string(observables)
393 | 
394 | 
395 | # Factored out to a function for unit testing
396 | def observables_to_string(observables: Dict[Any, List[str]]) -> str:
397 |     strs = ["   Observables:"]
398 |     # Supplying key function is needed for when type_ids are incorrectly defined as
399 |     # something other than ints
400 |     type_ids = sorted(observables.keys(), key=_lenient_to_int)
401 |     for tid in type_ids:
402 |         collision = ""
403 |         if len(observables[tid]) > 1:
404 |             collision = "💥COLLISION💥 "
405 |         strs.append(f'   {tid:7} →️ {collision}{", ".join(observables[tid])}')
406 |     return "\n".join(strs)
407 | 
408 | 
409 | def _lenient_to_int(value) -> int:
410 |     try:
411 |         return int(value)
412 |     except ValueError:
413 |         return -1
414 | 
415 | 
416 | def validate_and_get_observables(
417 |     reader: Reader, collector: Collector = Collector.default
418 | ) -> Dict[Any, List[str]]:
419 |     """
420 |     Actual validation implementation.
421 |     This exists so unit tests can interrogate the generated `observables` dictionary.
422 |     """
423 |     # Map of observable type_ids to list of definitions
424 |     observables: Dict[Any, List[str]] = {}
425 | 
426 |     def check_collision(type_id: Any, name: str, file: str) -> None:
427 |         if type_id in observables:
428 |             definitions = observables[type_id]
429 |             collector.handle(
430 |                 ObservableTypeIDCollisionError(type_id, name, definitions, file)
431 |             )
432 |             definitions.append(name)
433 |         else:
434 |             observables[type_id] = [name]
435 | 
436 |     def any_attribute_has_observable(source: Dict[str, Any]) -> bool:
437 |         # Returns true if any attribute defines an observable
438 |         if ATTRIBUTES_KEY in source:
439 |             for item in source[ATTRIBUTES_KEY].values():
440 |                 if OBSERVABLE_KEY in item:
441 |                     return True
442 |         return False
443 | 
444 |     def check_attributes(
445 |         source: Dict[str, Any], name_fn: Callable[[str, Dict[str, Any]], str], file: str
446 |     ):
447 |         if ATTRIBUTES_KEY in source:
448 |             for a_key, item in source[ATTRIBUTES_KEY].items():
449 |                 if OBSERVABLE_KEY in item:
450 |                     check_collision(item[OBSERVABLE_KEY], name_fn(a_key, item), file)
451 | 
452 |     def validate_dictionaries(reader: Reader, file: str) -> None:
453 |         if TYPES_KEY in reader[file]:
454 |             check_attributes(
455 |                 reader[file][TYPES_KEY],
456 |                 lambda a_key, item: f'"{a_key}" (Dictionary Type)',
457 |                 file,
458 |             )
459 | 
460 |         check_attributes(
461 |             reader[file],
462 |             lambda a_key, item: f'"{a_key}" (Dictionary Attribute)',
463 |             file,
464 |         )
465 | 
466 |     def validate_classes(reader: Reader, file: str) -> None:
467 |         # Classes do not have top-level "observable" attribute -- you can't specify an
468 |         # entire class as an observable.
469 | 
470 |         # Check for illegal definition in "hidden" classes. Hidden (or "intermediate")
471 |         # classes are those that are not a patch extends case, the name isn't
472 |         # "base_class", and class doesn't have a "uid".
473 |         if (
474 |             not _is_patch_extends(reader[file])
475 |             and "base_event" != reader[file].get("name")
476 |             and "uid" not in reader[file]
477 |         ):
478 |             if any_attribute_has_observable(reader[file]):
479 |                 cause = (
480 |                     f"Illegal definition of one or more attributes with"
481 |                     f' "{OBSERVABLE_KEY}" in hidden class, file "{file}": defining'
482 |                     f" attribute observables in a hidden class (classes other than"
483 |                     f' "base_event" without a "uid") causes collisions in child'
484 |                     f" classes. Instead define observables (of any kind) in non-hidden"
485 |                     f" child classes."
486 |                 )
487 |                 collector.handle(IllegalObservableTypeIDError(cause))
488 | 
489 |             if OBSERVABLES_KEY in reader[file]:
490 |                 cause = (
491 |                     f'Illegal "{OBSERVABLES_KEY}" definition in hidden class, file'
492 |                     f' "{file}": defining attribute path based observables in a hidden'
493 |                     f' class (classes other than "base_event" without a "uid") causes'
494 |                     f" collisions in child classes. Instead define observables (of any"
495 |                     f" kind) in non-hidden child classes."
496 |                 )
497 |                 collector.handle(IllegalObservableTypeIDError(cause))
498 | 
499 |         # Check class-specific attributes
500 |         check_attributes(
501 |             reader[file],
502 |             lambda a_key, item: f"{_item_name(reader[file])} class: {a_key}"
503 |             f" (Class-Specific Attribute)",
504 |             file,
505 |         )
506 | 
507 |         # Check class-specific attribute path observables
508 |         if OBSERVABLES_KEY in reader[file]:
509 |             for attribute_path in reader[file][OBSERVABLES_KEY]:
510 |                 check_collision(
511 |                     reader[file][OBSERVABLES_KEY][attribute_path],
512 |                     f"{_item_name(reader[file])} class: {attribute_path}"
513 |                     f" (Class-Specific Attribute Path)",
514 |                     file,
515 |                 )
516 | 
517 |     def validate_objects(reader: Reader, file: str) -> None:
518 |         # Special-case: the "observable" object model's type_id enum has the base for
519 |         # observable type_id typically defining 0: "Unknown" and 99: "Other", which are
520 |         # otherwise not defined.
521 |         if (
522 |             reader[file].get("name") == "observable"
523 |             and ATTRIBUTES_KEY in reader[file]
524 |             and "type_id" in reader[file][ATTRIBUTES_KEY]
525 |             and "enum" in reader[file][ATTRIBUTES_KEY]["type_id"]
526 |         ):
527 |             enum_dict = reader[file][ATTRIBUTES_KEY]["type_id"]["enum"]
528 |             for observable_type_id_str, enum in enum_dict.items():
529 |                 name = enum.get("caption", f"Observable enum {observable_type_id_str}")
530 |                 check_collision(int(observable_type_id_str), name, file)
531 | 
532 |         # Check for illegal definition in "hidden" objects. Hidden (or "intermediate")
533 |         # objects are those that are not a patch extends case, and the name has a
534 |         # leading underscore.
535 |         if (
536 |             not _is_patch_extends(reader[file])
537 |             and "name" in reader[file]
538 |             and PurePath(reader[file]["name"]).name.startswith("_")
539 |         ):
540 |             if OBSERVABLE_KEY in reader[file]:
541 |                 cause = (
542 |                     f'Illegal "{OBSERVABLE_KEY}" definition in hidden object,'
543 |                     f' file "{file}": defining top-level observable in a hidden'
544 |                     f" object (name with leading underscore) causes collisions"
545 |                     f" in child objects. Instead define observables (of any kind) in"
546 |                     f" non-hidden child objects."
547 |                 )
548 |                 collector.handle(IllegalObservableTypeIDError(cause))
549 | 
550 |             if any_attribute_has_observable(reader[file]):
551 |                 cause = (
552 |                     f"Illegal definition of one or more attributes with"
553 |                     f' "{OBSERVABLE_KEY}" in hidden object, file "{file}": defining'
554 |                     f" attribute observables in a hidden object (name with leading"
555 |                     f" underscore) causes collisions in child objects. Instead define"
556 |                     f" observables (of any kind) in non-hidden child objects."
557 |                 )
558 |                 collector.handle(IllegalObservableTypeIDError(cause))
559 | 
560 |         # Check top-level observable -- entire object is an observable
561 |         if OBSERVABLE_KEY in reader[file]:
562 |             check_collision(
563 |                 reader[file][OBSERVABLE_KEY],
564 |                 f"{_item_name(reader[file])} (Object)",
565 |                 file,
566 |             )
567 | 
568 |         # Check object-specific attributes
569 |         check_attributes(
570 |             reader[file],
571 |             lambda a_key, item: f"{_item_name(reader[file])} object: {a_key}"
572 |             f" (Object-Specific Attribute)",
573 |             file,
574 |         )
575 | 
576 |     reader.apply(validate_dictionaries, DictionaryMatcher())
577 |     reader.apply(validate_classes, EventMatcher())
578 |     reader.apply(validate_objects, ObjectMatcher())
579 | 
580 |     return observables
581 | 
582 | 
583 | def _item_name(item):
584 |     if _is_patch_extends(item):
585 |         suffix = " [patch extends]"
586 |     else:
587 |         suffix = ""
588 |     name = item.get("name")
589 |     if name:
590 |         return f'"{name}"{suffix}'
591 |     extends = item.get("extends")
592 |     if extends:
593 |         return f'"{extends}"{suffix}'
594 |     return f"<unknown>{suffix}"
595 | 
596 | 
597 | def _is_patch_extends(item):
598 |     """
599 |     Returns True if class or object is a "special" patch extends, which allows
600 |     extensions to modify core schema classes and objects.
601 |     """
602 |     name = item.get("name")
603 |     if name is None:
604 |         name = item.get("extends")
605 |     return name == item.get("extends")
606 | 
607 | 
608 | def validate_event_categories(
609 |     reader: Reader,
610 |     collector: Collector = Collector.default,
611 |     types: Optional[TypeMapping] = None,
612 | ):
613 |     # Initialize categories list with "other" since it isn't defined in categories.json
614 |     categories = {"other"}
615 | 
616 |     def gather_categories(reader: Reader, file: str) -> None:
617 |         if ATTRIBUTES_KEY in reader[file]:
618 |             categories.update(reader[file][ATTRIBUTES_KEY].keys())
619 | 
620 |     def validate_classes(reader: Reader, file: str) -> None:
621 |         if (
622 |             CATEGORY_KEY in reader[file]
623 |             and reader[file][CATEGORY_KEY] not in categories
624 |         ):
625 |             collector.handle(UnknownCategoryError(reader[file][CATEGORY_KEY], file))
626 | 
627 |     reader.apply(gather_categories, CategoriesMatcher())
628 |     reader.apply(validate_classes, EventMatcher())
629 | 


--------------------------------------------------------------------------------
/poetry.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
  2 | 
  3 | [[package]]
  4 | name = "attrs"
  5 | version = "23.2.0"
  6 | description = "Classes Without Boilerplate"
  7 | optional = false
  8 | python-versions = ">=3.7"
  9 | files = [
 10 |     {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"},
 11 |     {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"},
 12 | ]
 13 | 
 14 | [package.extras]
 15 | cov = ["attrs[tests]", "coverage[toml] (>=5.3)"]
 16 | dev = ["attrs[tests]", "pre-commit"]
 17 | docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"]
 18 | tests = ["attrs[tests-no-zope]", "zope-interface"]
 19 | tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"]
 20 | tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"]
 21 | 
 22 | [[package]]
 23 | name = "black"
 24 | version = "24.3.0"
 25 | description = "The uncompromising code formatter."
 26 | optional = false
 27 | python-versions = ">=3.8"
 28 | files = [
 29 |     {file = "black-24.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7d5e026f8da0322b5662fa7a8e752b3fa2dac1c1cbc213c3d7ff9bdd0ab12395"},
 30 |     {file = "black-24.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9f50ea1132e2189d8dff0115ab75b65590a3e97de1e143795adb4ce317934995"},
 31 |     {file = "black-24.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2af80566f43c85f5797365077fb64a393861a3730bd110971ab7a0c94e873e7"},
 32 |     {file = "black-24.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:4be5bb28e090456adfc1255e03967fb67ca846a03be7aadf6249096100ee32d0"},
 33 |     {file = "black-24.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4f1373a7808a8f135b774039f61d59e4be7eb56b2513d3d2f02a8b9365b8a8a9"},
 34 |     {file = "black-24.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:aadf7a02d947936ee418777e0247ea114f78aff0d0959461057cae8a04f20597"},
 35 |     {file = "black-24.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c02e4ea2ae09d16314d30912a58ada9a5c4fdfedf9512d23326128ac08ac3d"},
 36 |     {file = "black-24.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:bf21b7b230718a5f08bd32d5e4f1db7fc8788345c8aea1d155fc17852b3410f5"},
 37 |     {file = "black-24.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:2818cf72dfd5d289e48f37ccfa08b460bf469e67fb7c4abb07edc2e9f16fb63f"},
 38 |     {file = "black-24.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4acf672def7eb1725f41f38bf6bf425c8237248bb0804faa3965c036f7672d11"},
 39 |     {file = "black-24.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7ed6668cbbfcd231fa0dc1b137d3e40c04c7f786e626b405c62bcd5db5857e4"},
 40 |     {file = "black-24.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:56f52cfbd3dabe2798d76dbdd299faa046a901041faf2cf33288bc4e6dae57b5"},
 41 |     {file = "black-24.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:79dcf34b33e38ed1b17434693763301d7ccbd1c5860674a8f871bd15139e7837"},
 42 |     {file = "black-24.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e19cb1c6365fd6dc38a6eae2dcb691d7d83935c10215aef8e6c38edee3f77abd"},
 43 |     {file = "black-24.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65b76c275e4c1c5ce6e9870911384bff5ca31ab63d19c76811cb1fb162678213"},
 44 |     {file = "black-24.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:b5991d523eee14756f3c8d5df5231550ae8993e2286b8014e2fdea7156ed0959"},
 45 |     {file = "black-24.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c45f8dff244b3c431b36e3224b6be4a127c6aca780853574c00faf99258041eb"},
 46 |     {file = "black-24.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6905238a754ceb7788a73f02b45637d820b2f5478b20fec82ea865e4f5d4d9f7"},
 47 |     {file = "black-24.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7de8d330763c66663661a1ffd432274a2f92f07feeddd89ffd085b5744f85e7"},
 48 |     {file = "black-24.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:7bb041dca0d784697af4646d3b62ba4a6b028276ae878e53f6b4f74ddd6db99f"},
 49 |     {file = "black-24.3.0-py3-none-any.whl", hash = "sha256:41622020d7120e01d377f74249e677039d20e6344ff5851de8a10f11f513bf93"},
 50 |     {file = "black-24.3.0.tar.gz", hash = "sha256:a0c9c4a0771afc6919578cec71ce82a3e31e054904e7197deacbc9382671c41f"},
 51 | ]
 52 | 
 53 | [package.dependencies]
 54 | click = ">=8.0.0"
 55 | mypy-extensions = ">=0.4.3"
 56 | packaging = ">=22.0"
 57 | pathspec = ">=0.9.0"
 58 | platformdirs = ">=2"
 59 | 
 60 | [package.extras]
 61 | colorama = ["colorama (>=0.4.3)"]
 62 | d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"]
 63 | jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
 64 | uvloop = ["uvloop (>=0.15.2)"]
 65 | 
 66 | [[package]]
 67 | name = "click"
 68 | version = "8.1.7"
 69 | description = "Composable command line interface toolkit"
 70 | optional = false
 71 | python-versions = ">=3.7"
 72 | files = [
 73 |     {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
 74 |     {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"},
 75 | ]
 76 | 
 77 | [package.dependencies]
 78 | colorama = {version = "*", markers = "platform_system == \"Windows\""}
 79 | 
 80 | [[package]]
 81 | name = "colorama"
 82 | version = "0.4.6"
 83 | description = "Cross-platform colored terminal text."
 84 | optional = false
 85 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 86 | files = [
 87 |     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
 88 |     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 89 | ]
 90 | 
 91 | [[package]]
 92 | name = "iniconfig"
 93 | version = "2.0.0"
 94 | description = "brain-dead simple config-ini parsing"
 95 | optional = false
 96 | python-versions = ">=3.7"
 97 | files = [
 98 |     {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
 99 |     {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
100 | ]
101 | 
102 | [[package]]
103 | name = "isort"
104 | version = "5.13.2"
105 | description = "A Python utility / library to sort Python imports."
106 | optional = false
107 | python-versions = ">=3.8.0"
108 | files = [
109 |     {file = "isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6"},
110 |     {file = "isort-5.13.2.tar.gz", hash = "sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109"},
111 | ]
112 | 
113 | [package.extras]
114 | colors = ["colorama (>=0.4.6)"]
115 | 
116 | [[package]]
117 | name = "jsonschema"
118 | version = "4.21.1"
119 | description = "An implementation of JSON Schema validation for Python"
120 | optional = false
121 | python-versions = ">=3.8"
122 | files = [
123 |     {file = "jsonschema-4.21.1-py3-none-any.whl", hash = "sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f"},
124 |     {file = "jsonschema-4.21.1.tar.gz", hash = "sha256:85727c00279f5fa6bedbe6238d2aa6403bedd8b4864ab11207d07df3cc1b2ee5"},
125 | ]
126 | 
127 | [package.dependencies]
128 | attrs = ">=22.2.0"
129 | jsonschema-specifications = ">=2023.03.6"
130 | referencing = ">=0.28.4"
131 | rpds-py = ">=0.7.1"
132 | 
133 | [package.extras]
134 | format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
135 | format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"]
136 | 
137 | [[package]]
138 | name = "jsonschema-specifications"
139 | version = "2023.12.1"
140 | description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry"
141 | optional = false
142 | python-versions = ">=3.8"
143 | files = [
144 |     {file = "jsonschema_specifications-2023.12.1-py3-none-any.whl", hash = "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c"},
145 |     {file = "jsonschema_specifications-2023.12.1.tar.gz", hash = "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc"},
146 | ]
147 | 
148 | [package.dependencies]
149 | referencing = ">=0.31.0"
150 | 
151 | [[package]]
152 | name = "mypy-extensions"
153 | version = "1.0.0"
154 | description = "Type system extensions for programs checked with the mypy type checker."
155 | optional = false
156 | python-versions = ">=3.5"
157 | files = [
158 |     {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
159 |     {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
160 | ]
161 | 
162 | [[package]]
163 | name = "nodeenv"
164 | version = "1.8.0"
165 | description = "Node.js virtual environment builder"
166 | optional = false
167 | python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*"
168 | files = [
169 |     {file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"},
170 |     {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"},
171 | ]
172 | 
173 | [package.dependencies]
174 | setuptools = "*"
175 | 
176 | [[package]]
177 | name = "packaging"
178 | version = "24.0"
179 | description = "Core utilities for Python packages"
180 | optional = false
181 | python-versions = ">=3.7"
182 | files = [
183 |     {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"},
184 |     {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"},
185 | ]
186 | 
187 | [[package]]
188 | name = "pathspec"
189 | version = "0.12.1"
190 | description = "Utility library for gitignore style pattern matching of file paths."
191 | optional = false
192 | python-versions = ">=3.8"
193 | files = [
194 |     {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
195 |     {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
196 | ]
197 | 
198 | [[package]]
199 | name = "platformdirs"
200 | version = "4.2.0"
201 | description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
202 | optional = false
203 | python-versions = ">=3.8"
204 | files = [
205 |     {file = "platformdirs-4.2.0-py3-none-any.whl", hash = "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068"},
206 |     {file = "platformdirs-4.2.0.tar.gz", hash = "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768"},
207 | ]
208 | 
209 | [package.extras]
210 | docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"]
211 | test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"]
212 | 
213 | [[package]]
214 | name = "pluggy"
215 | version = "1.4.0"
216 | description = "plugin and hook calling mechanisms for python"
217 | optional = false
218 | python-versions = ">=3.8"
219 | files = [
220 |     {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"},
221 |     {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"},
222 | ]
223 | 
224 | [package.extras]
225 | dev = ["pre-commit", "tox"]
226 | testing = ["pytest", "pytest-benchmark"]
227 | 
228 | [[package]]
229 | name = "pyright"
230 | version = "1.1.355"
231 | description = "Command line wrapper for pyright"
232 | optional = false
233 | python-versions = ">=3.7"
234 | files = [
235 |     {file = "pyright-1.1.355-py3-none-any.whl", hash = "sha256:bf30b6728fd68ae7d09c98292b67152858dd89738569836896df786e52b5fe48"},
236 |     {file = "pyright-1.1.355.tar.gz", hash = "sha256:dca4104cd53d6484e6b1b50b7a239ad2d16d2ffd20030bcf3111b56f44c263bf"},
237 | ]
238 | 
239 | [package.dependencies]
240 | nodeenv = ">=1.6.0"
241 | 
242 | [package.extras]
243 | all = ["twine (>=3.4.1)"]
244 | dev = ["twine (>=3.4.1)"]
245 | 
246 | [[package]]
247 | name = "pytest"
248 | version = "7.4.4"
249 | description = "pytest: simple powerful testing with Python"
250 | optional = false
251 | python-versions = ">=3.7"
252 | files = [
253 |     {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"},
254 |     {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"},
255 | ]
256 | 
257 | [package.dependencies]
258 | colorama = {version = "*", markers = "sys_platform == \"win32\""}
259 | iniconfig = "*"
260 | packaging = "*"
261 | pluggy = ">=0.12,<2.0"
262 | 
263 | [package.extras]
264 | testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
265 | 
266 | [[package]]
267 | name = "referencing"
268 | version = "0.34.0"
269 | description = "JSON Referencing + Python"
270 | optional = false
271 | python-versions = ">=3.8"
272 | files = [
273 |     {file = "referencing-0.34.0-py3-none-any.whl", hash = "sha256:d53ae300ceddd3169f1ffa9caf2cb7b769e92657e4fafb23d34b93679116dfd4"},
274 |     {file = "referencing-0.34.0.tar.gz", hash = "sha256:5773bd84ef41799a5a8ca72dc34590c041eb01bf9aa02632b4a973fb0181a844"},
275 | ]
276 | 
277 | [package.dependencies]
278 | attrs = ">=22.2.0"
279 | rpds-py = ">=0.7.0"
280 | 
281 | [[package]]
282 | name = "rpds-py"
283 | version = "0.18.0"
284 | description = "Python bindings to Rust's persistent data structures (rpds)"
285 | optional = false
286 | python-versions = ">=3.8"
287 | files = [
288 |     {file = "rpds_py-0.18.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:5b4e7d8d6c9b2e8ee2d55c90b59c707ca59bc30058269b3db7b1f8df5763557e"},
289 |     {file = "rpds_py-0.18.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c463ed05f9dfb9baebef68048aed8dcdc94411e4bf3d33a39ba97e271624f8f7"},
290 |     {file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01e36a39af54a30f28b73096dd39b6802eddd04c90dbe161c1b8dbe22353189f"},
291 |     {file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d62dec4976954a23d7f91f2f4530852b0c7608116c257833922a896101336c51"},
292 |     {file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dd18772815d5f008fa03d2b9a681ae38d5ae9f0e599f7dda233c439fcaa00d40"},
293 |     {file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:923d39efa3cfb7279a0327e337a7958bff00cc447fd07a25cddb0a1cc9a6d2da"},
294 |     {file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39514da80f971362f9267c600b6d459bfbbc549cffc2cef8e47474fddc9b45b1"},
295 |     {file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a34d557a42aa28bd5c48a023c570219ba2593bcbbb8dc1b98d8cf5d529ab1434"},
296 |     {file = "rpds_py-0.18.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:93df1de2f7f7239dc9cc5a4a12408ee1598725036bd2dedadc14d94525192fc3"},
297 |     {file = "rpds_py-0.18.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:34b18ba135c687f4dac449aa5157d36e2cbb7c03cbea4ddbd88604e076aa836e"},
298 |     {file = "rpds_py-0.18.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c0b5dcf9193625afd8ecc92312d6ed78781c46ecbf39af9ad4681fc9f464af88"},
299 |     {file = "rpds_py-0.18.0-cp310-none-win32.whl", hash = "sha256:c4325ff0442a12113a6379af66978c3fe562f846763287ef66bdc1d57925d337"},
300 |     {file = "rpds_py-0.18.0-cp310-none-win_amd64.whl", hash = "sha256:7223a2a5fe0d217e60a60cdae28d6949140dde9c3bcc714063c5b463065e3d66"},
301 |     {file = "rpds_py-0.18.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:3a96e0c6a41dcdba3a0a581bbf6c44bb863f27c541547fb4b9711fd8cf0ffad4"},
302 |     {file = "rpds_py-0.18.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30f43887bbae0d49113cbaab729a112251a940e9b274536613097ab8b4899cf6"},
303 |     {file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcb25daa9219b4cf3a0ab24b0eb9a5cc8949ed4dc72acb8fa16b7e1681aa3c58"},
304 |     {file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d68c93e381010662ab873fea609bf6c0f428b6d0bb00f2c6939782e0818d37bf"},
305 |     {file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b34b7aa8b261c1dbf7720b5d6f01f38243e9b9daf7e6b8bc1fd4657000062f2c"},
306 |     {file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e6d75ab12b0bbab7215e5d40f1e5b738aa539598db27ef83b2ec46747df90e1"},
307 |     {file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b8612cd233543a3781bc659c731b9d607de65890085098986dfd573fc2befe5"},
308 |     {file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aec493917dd45e3c69d00a8874e7cbed844efd935595ef78a0f25f14312e33c6"},
309 |     {file = "rpds_py-0.18.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:661d25cbffaf8cc42e971dd570d87cb29a665f49f4abe1f9e76be9a5182c4688"},
310 |     {file = "rpds_py-0.18.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1df3659d26f539ac74fb3b0c481cdf9d725386e3552c6fa2974f4d33d78e544b"},
311 |     {file = "rpds_py-0.18.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a1ce3ba137ed54f83e56fb983a5859a27d43a40188ba798993812fed73c70836"},
312 |     {file = "rpds_py-0.18.0-cp311-none-win32.whl", hash = "sha256:69e64831e22a6b377772e7fb337533c365085b31619005802a79242fee620bc1"},
313 |     {file = "rpds_py-0.18.0-cp311-none-win_amd64.whl", hash = "sha256:998e33ad22dc7ec7e030b3df701c43630b5bc0d8fbc2267653577e3fec279afa"},
314 |     {file = "rpds_py-0.18.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:7f2facbd386dd60cbbf1a794181e6aa0bd429bd78bfdf775436020172e2a23f0"},
315 |     {file = "rpds_py-0.18.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1d9a5be316c15ffb2b3c405c4ff14448c36b4435be062a7f578ccd8b01f0c4d8"},
316 |     {file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd5bf1af8efe569654bbef5a3e0a56eca45f87cfcffab31dd8dde70da5982475"},
317 |     {file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5417558f6887e9b6b65b4527232553c139b57ec42c64570569b155262ac0754f"},
318 |     {file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:56a737287efecafc16f6d067c2ea0117abadcd078d58721f967952db329a3e5c"},
319 |     {file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8f03bccbd8586e9dd37219bce4d4e0d3ab492e6b3b533e973fa08a112cb2ffc9"},
320 |     {file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4457a94da0d5c53dc4b3e4de1158bdab077db23c53232f37a3cb7afdb053a4e3"},
321 |     {file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0ab39c1ba9023914297dd88ec3b3b3c3f33671baeb6acf82ad7ce883f6e8e157"},
322 |     {file = "rpds_py-0.18.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9d54553c1136b50fd12cc17e5b11ad07374c316df307e4cfd6441bea5fb68496"},
323 |     {file = "rpds_py-0.18.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0af039631b6de0397ab2ba16eaf2872e9f8fca391b44d3d8cac317860a700a3f"},
324 |     {file = "rpds_py-0.18.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:84ffab12db93b5f6bad84c712c92060a2d321b35c3c9960b43d08d0f639d60d7"},
325 |     {file = "rpds_py-0.18.0-cp312-none-win32.whl", hash = "sha256:685537e07897f173abcf67258bee3c05c374fa6fff89d4c7e42fb391b0605e98"},
326 |     {file = "rpds_py-0.18.0-cp312-none-win_amd64.whl", hash = "sha256:e003b002ec72c8d5a3e3da2989c7d6065b47d9eaa70cd8808b5384fbb970f4ec"},
327 |     {file = "rpds_py-0.18.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:08f9ad53c3f31dfb4baa00da22f1e862900f45908383c062c27628754af2e88e"},
328 |     {file = "rpds_py-0.18.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c0013fe6b46aa496a6749c77e00a3eb07952832ad6166bd481c74bda0dcb6d58"},
329 |     {file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e32a92116d4f2a80b629778280103d2a510a5b3f6314ceccd6e38006b5e92dcb"},
330 |     {file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e541ec6f2ec456934fd279a3120f856cd0aedd209fc3852eca563f81738f6861"},
331 |     {file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bed88b9a458e354014d662d47e7a5baafd7ff81c780fd91584a10d6ec842cb73"},
332 |     {file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2644e47de560eb7bd55c20fc59f6daa04682655c58d08185a9b95c1970fa1e07"},
333 |     {file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e8916ae4c720529e18afa0b879473049e95949bf97042e938530e072fde061d"},
334 |     {file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:465a3eb5659338cf2a9243e50ad9b2296fa15061736d6e26240e713522b6235c"},
335 |     {file = "rpds_py-0.18.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:ea7d4a99f3b38c37eac212dbd6ec42b7a5ec51e2c74b5d3223e43c811609e65f"},
336 |     {file = "rpds_py-0.18.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:67071a6171e92b6da534b8ae326505f7c18022c6f19072a81dcf40db2638767c"},
337 |     {file = "rpds_py-0.18.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:41ef53e7c58aa4ef281da975f62c258950f54b76ec8e45941e93a3d1d8580594"},
338 |     {file = "rpds_py-0.18.0-cp38-none-win32.whl", hash = "sha256:fdea4952db2793c4ad0bdccd27c1d8fdd1423a92f04598bc39425bcc2b8ee46e"},
339 |     {file = "rpds_py-0.18.0-cp38-none-win_amd64.whl", hash = "sha256:7cd863afe7336c62ec78d7d1349a2f34c007a3cc6c2369d667c65aeec412a5b1"},
340 |     {file = "rpds_py-0.18.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:5307def11a35f5ae4581a0b658b0af8178c65c530e94893345bebf41cc139d33"},
341 |     {file = "rpds_py-0.18.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:77f195baa60a54ef9d2de16fbbfd3ff8b04edc0c0140a761b56c267ac11aa467"},
342 |     {file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39f5441553f1c2aed4de4377178ad8ff8f9d733723d6c66d983d75341de265ab"},
343 |     {file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a00312dea9310d4cb7dbd7787e722d2e86a95c2db92fbd7d0155f97127bcb40"},
344 |     {file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8f2fc11e8fe034ee3c34d316d0ad8808f45bc3b9ce5857ff29d513f3ff2923a1"},
345 |     {file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:586f8204935b9ec884500498ccc91aa869fc652c40c093bd9e1471fbcc25c022"},
346 |     {file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddc2f4dfd396c7bfa18e6ce371cba60e4cf9d2e5cdb71376aa2da264605b60b9"},
347 |     {file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ddcba87675b6d509139d1b521e0c8250e967e63b5909a7e8f8944d0f90ff36f"},
348 |     {file = "rpds_py-0.18.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7bd339195d84439cbe5771546fe8a4e8a7a045417d8f9de9a368c434e42a721e"},
349 |     {file = "rpds_py-0.18.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:d7c36232a90d4755b720fbd76739d8891732b18cf240a9c645d75f00639a9024"},
350 |     {file = "rpds_py-0.18.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6b0817e34942b2ca527b0e9298373e7cc75f429e8da2055607f4931fded23e20"},
351 |     {file = "rpds_py-0.18.0-cp39-none-win32.whl", hash = "sha256:99f70b740dc04d09e6b2699b675874367885217a2e9f782bdf5395632ac663b7"},
352 |     {file = "rpds_py-0.18.0-cp39-none-win_amd64.whl", hash = "sha256:6ef687afab047554a2d366e112dd187b62d261d49eb79b77e386f94644363294"},
353 |     {file = "rpds_py-0.18.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ad36cfb355e24f1bd37cac88c112cd7730873f20fb0bdaf8ba59eedf8216079f"},
354 |     {file = "rpds_py-0.18.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:36b3ee798c58ace201289024b52788161e1ea133e4ac93fba7d49da5fec0ef9e"},
355 |     {file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8a2f084546cc59ea99fda8e070be2fd140c3092dc11524a71aa8f0f3d5a55ca"},
356 |     {file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e4461d0f003a0aa9be2bdd1b798a041f177189c1a0f7619fe8c95ad08d9a45d7"},
357 |     {file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8db715ebe3bb7d86d77ac1826f7d67ec11a70dbd2376b7cc214199360517b641"},
358 |     {file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:793968759cd0d96cac1e367afd70c235867831983f876a53389ad869b043c948"},
359 |     {file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66e6a3af5a75363d2c9a48b07cb27c4ea542938b1a2e93b15a503cdfa8490795"},
360 |     {file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6ef0befbb5d79cf32d0266f5cff01545602344eda89480e1dd88aca964260b18"},
361 |     {file = "rpds_py-0.18.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:1d4acf42190d449d5e89654d5c1ed3a4f17925eec71f05e2a41414689cda02d1"},
362 |     {file = "rpds_py-0.18.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:a5f446dd5055667aabaee78487f2b5ab72e244f9bc0b2ffebfeec79051679984"},
363 |     {file = "rpds_py-0.18.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:9dbbeb27f4e70bfd9eec1be5477517365afe05a9b2c441a0b21929ee61048124"},
364 |     {file = "rpds_py-0.18.0-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:22806714311a69fd0af9b35b7be97c18a0fc2826e6827dbb3a8c94eac6cf7eeb"},
365 |     {file = "rpds_py-0.18.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:b34ae4636dfc4e76a438ab826a0d1eed2589ca7d9a1b2d5bb546978ac6485461"},
366 |     {file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c8370641f1a7f0e0669ddccca22f1da893cef7628396431eb445d46d893e5cd"},
367 |     {file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c8362467a0fdeccd47935f22c256bec5e6abe543bf0d66e3d3d57a8fb5731863"},
368 |     {file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:11a8c85ef4a07a7638180bf04fe189d12757c696eb41f310d2426895356dcf05"},
369 |     {file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b316144e85316da2723f9d8dc75bada12fa58489a527091fa1d5a612643d1a0e"},
370 |     {file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf1ea2e34868f6fbf070e1af291c8180480310173de0b0c43fc38a02929fc0e3"},
371 |     {file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e546e768d08ad55b20b11dbb78a745151acbd938f8f00d0cfbabe8b0199b9880"},
372 |     {file = "rpds_py-0.18.0-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:4901165d170a5fde6f589acb90a6b33629ad1ec976d4529e769c6f3d885e3e80"},
373 |     {file = "rpds_py-0.18.0-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:618a3d6cae6ef8ec88bb76dd80b83cfe415ad4f1d942ca2a903bf6b6ff97a2da"},
374 |     {file = "rpds_py-0.18.0-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ed4eb745efbff0a8e9587d22a84be94a5eb7d2d99c02dacf7bd0911713ed14dd"},
375 |     {file = "rpds_py-0.18.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:6c81e5f372cd0dc5dc4809553d34f832f60a46034a5f187756d9b90586c2c307"},
376 |     {file = "rpds_py-0.18.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:43fbac5f22e25bee1d482c97474f930a353542855f05c1161fd804c9dc74a09d"},
377 |     {file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d7faa6f14017c0b1e69f5e2c357b998731ea75a442ab3841c0dbbbfe902d2c4"},
378 |     {file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:08231ac30a842bd04daabc4d71fddd7e6d26189406d5a69535638e4dcb88fe76"},
379 |     {file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:044a3e61a7c2dafacae99d1e722cc2d4c05280790ec5a05031b3876809d89a5c"},
380 |     {file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3f26b5bd1079acdb0c7a5645e350fe54d16b17bfc5e71f371c449383d3342e17"},
381 |     {file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:482103aed1dfe2f3b71a58eff35ba105289b8d862551ea576bd15479aba01f66"},
382 |     {file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1374f4129f9bcca53a1bba0bb86bf78325a0374577cf7e9e4cd046b1e6f20e24"},
383 |     {file = "rpds_py-0.18.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:635dc434ff724b178cb192c70016cc0ad25a275228f749ee0daf0eddbc8183b1"},
384 |     {file = "rpds_py-0.18.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:bc362ee4e314870a70f4ae88772d72d877246537d9f8cb8f7eacf10884862432"},
385 |     {file = "rpds_py-0.18.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:4832d7d380477521a8c1644bbab6588dfedea5e30a7d967b5fb75977c45fd77f"},
386 |     {file = "rpds_py-0.18.0.tar.gz", hash = "sha256:42821446ee7a76f5d9f71f9e33a4fb2ffd724bb3e7f93386150b61a43115788d"},
387 | ]
388 | 
389 | [[package]]
390 | name = "setuptools"
391 | version = "69.2.0"
392 | description = "Easily download, build, install, upgrade, and uninstall Python packages"
393 | optional = false
394 | python-versions = ">=3.8"
395 | files = [
396 |     {file = "setuptools-69.2.0-py3-none-any.whl", hash = "sha256:c21c49fb1042386df081cb5d86759792ab89efca84cf114889191cd09aacc80c"},
397 |     {file = "setuptools-69.2.0.tar.gz", hash = "sha256:0ff4183f8f42cd8fa3acea16c45205521a4ef28f73c6391d8a25e92893134f2e"},
398 | ]
399 | 
400 | [package.extras]
401 | docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
402 | testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
403 | testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.2)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
404 | 
405 | [[package]]
406 | name = "termcolor"
407 | version = "2.4.0"
408 | description = "ANSI color formatting for output in terminal"
409 | optional = false
410 | python-versions = ">=3.8"
411 | files = [
412 |     {file = "termcolor-2.4.0-py3-none-any.whl", hash = "sha256:9297c0df9c99445c2412e832e882a7884038a25617c60cea2ad69488d4040d63"},
413 |     {file = "termcolor-2.4.0.tar.gz", hash = "sha256:aab9e56047c8ac41ed798fa36d892a37aca6b3e9159f3e0c24bc64a9b3ac7b7a"},
414 | ]
415 | 
416 | [package.extras]
417 | tests = ["pytest", "pytest-cov"]
418 | 
419 | [metadata]
420 | lock-version = "2.0"
421 | python-versions = "^3.11"
422 | content-hash = "4641b55170a77a7338a47a0f7dc66ab3a764af99f30ea925f2623a210f3532d4"
423 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "ocsf-validator"
 3 | version = "0.2.3"
 4 | description = "OCSF Schema Validation"
 5 | authors = [
 6 |     "Jeremy Fisher <jeremy@query.ai>",
 7 |     "Alan Pinkert <apinkert@cisco.com>",
 8 |     "Rick Mouritzen <rmouritzen@splunk.com>",
 9 | ]
10 | readme = "README.md"
11 | packages = [{include = "ocsf_validator"}]
12 | 
13 | [tool.poetry.dependencies]
14 | jsonschema = "^4.21.1"
15 | python = "^3.11"
16 | termcolor = "^2.4.0"
17 | 
18 | [tool.poetry.group.dev.dependencies]
19 | isort = "^5.12.0"
20 | black = ">=23.9.1,<25.0.0"
21 | pytest = "^7.4.2"
22 | pyright = "^1.1.327"
23 | 
24 | [build-system]
25 | requires = ["poetry-core"]
26 | build-backend = "poetry.core.masonry.api"
27 | 
28 | [tool.isort]
29 | profile = "black"
30 | 


--------------------------------------------------------------------------------
/tests/test_dependencies.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | 
  3 | import pytest
  4 | 
  5 | from ocsf_validator.errors import *
  6 | from ocsf_validator.processor import *
  7 | from ocsf_validator.reader import DictReader, Reader
  8 | 
  9 | 
 10 | def attributes(attrs: list = []) -> dict[str, Any]:
 11 |     d = {}
 12 |     for a in attrs:
 13 |         d[a] = {"name": a}
 14 |     return {"attributes": d}
 15 | 
 16 | 
 17 | def obj(name: str = "object", attrs: list = []) -> dict[str, Any]:
 18 |     return {"name": name, "caption": ""} | attributes(attrs)
 19 | 
 20 | 
 21 | def event(name: str = "event", attrs: list = []) -> dict[str, Any]:
 22 |     return {"name": name, "caption": ""} | attributes(attrs)
 23 | 
 24 | 
 25 | def test_include_one():
 26 |     net = attributes(["proxy", "src_ip"])
 27 |     net["name"] = "network"
 28 |     net["name2"] = "network"
 29 |     httpa = event("http_activity")
 30 |     httpa["$include"] = "profiles/network.json"
 31 | 
 32 |     s = {
 33 |         "/events/network/http_activity.json": httpa,
 34 |         "/profiles/network.json": net,
 35 |         "/dictionary.json": attributes(["stuff"]),
 36 |     }
 37 | 
 38 |     r = DictReader()
 39 |     r.set_data(s)
 40 |     process_includes(r)
 41 | 
 42 |     assert "attributes" in r["/events/network/http_activity.json"]
 43 |     assert "name2" in r["/events/network/http_activity.json"]
 44 |     assert r["/events/network/http_activity.json"]["name"] == "http_activity"
 45 |     assert "proxy" in r["/events/network/http_activity.json"]["attributes"]
 46 | 
 47 | 
 48 | def test_include_many():
 49 |     net = attributes(["proxy", "src_ip"])
 50 |     thing = attributes(["dest_ip", "score"])
 51 |     httpa = event("http_activity")
 52 |     httpa["$include"] = ["profiles/network.json", "events/thing.json"]
 53 | 
 54 |     s = {
 55 |         "/events/network/http_activity.json": httpa,
 56 |         "/profiles/network.json": net,
 57 |         "/events/thing.json": thing,
 58 |         "/dictionary.json": attributes(["stuff"]),
 59 |     }
 60 | 
 61 |     r = DictReader()
 62 |     r.set_data(s)
 63 |     process_includes(r)
 64 | 
 65 |     assert "attributes" in r["/events/network/http_activity.json"]
 66 |     assert r["/events/network/http_activity.json"]["name"] == "http_activity"
 67 |     assert "proxy" in r["/events/network/http_activity.json"]["attributes"]
 68 |     assert "dest_ip" in r["/events/network/http_activity.json"]["attributes"]
 69 | 
 70 | 
 71 | def test_include_attrs():
 72 |     net = attributes(["proxy", "src_ip"])
 73 |     thing = attributes(["dest_ip", "score"])
 74 |     httpa = event("http_activity")
 75 |     httpa["attributes"]["$include"] = ["profiles/network.json", "events/thing.json"]
 76 | 
 77 |     s = {
 78 |         "/events/network/http_activity.json": httpa,
 79 |         "/profiles/network.json": net,
 80 |         "/events/thing.json": thing,
 81 |         "/dictionary.json": attributes(["stuff"]),
 82 |     }
 83 | 
 84 |     r = DictReader()
 85 |     r.set_data(s)
 86 |     process_includes(r)
 87 | 
 88 |     assert "attributes" in r["/events/network/http_activity.json"]
 89 |     assert r["/events/network/http_activity.json"]["name"] == "http_activity"
 90 |     assert "proxy" in r["/events/network/http_activity.json"]["attributes"]
 91 |     assert "dest_ip" in r["/events/network/http_activity.json"]["attributes"]
 92 | 
 93 | 
 94 | def test_missing_include():
 95 |     httpa = event("http_activity")
 96 |     httpa["attributes"]["$include"] = "profiles/network.json"
 97 | 
 98 |     s = {
 99 |         "/events/network/http_activity.json": httpa,
100 |         "/dictionary.json": attributes(["stuff"]),
101 |     }
102 | 
103 |     r = DictReader()
104 |     r.set_data(s)
105 | 
106 |     with pytest.raises(MissingIncludeError):
107 |         process_includes(r)
108 | 
109 | 
110 | def test_extends():
111 |     base = event("base_event", ["thing"])
112 |     httpa = event("http_activity")
113 |     httpa["extends"] = "base_event"
114 | 
115 |     s = {
116 |         "/events/network/http_activity.json": httpa,
117 |         "/events/base_event.json": base,
118 |         "/dictionary.json": attributes(["stuff"]),
119 |     }
120 |     r = DictReader()
121 |     r.set_data(s)
122 | 
123 |     process_includes(r)
124 | 
125 |     assert "thing" in r["/events/network/http_activity.json"]["attributes"]
126 | 
127 | 
128 | def test_profiles_basic():
129 |     prof = event("profile1", ["thing"])
130 |     httpa = event("http_activity")
131 |     httpa["profiles"] = "profile1"
132 | 
133 |     s = {
134 |         "/events/network/http_activity.json": httpa,
135 |         "/profiles/profile1.json": prof,
136 |         "/dictionary.json": attributes(["stuff"]),
137 |     }
138 |     r = DictReader()
139 |     r.set_data(s)
140 | 
141 |     process_includes(r)
142 | 
143 |     assert "thing" in r["/events/network/http_activity.json"]["attributes"]
144 | 
145 | 
146 | def test_profiles_many():
147 |     prof1 = event("profile1", ["thing1"])
148 |     prof2 = event("profile2", ["thing2"])
149 |     httpa = event("http_activity")
150 |     httpa["profiles"] = ["profile1", "profile2"]
151 | 
152 |     s = {
153 |         "/events/network/http_activity.json": httpa,
154 |         "/profiles/profile1.json": prof1,
155 |         "/profiles/profile2.json": prof2,
156 |         "/dictionary.json": attributes(["stuff"]),
157 |     }
158 |     r = DictReader()
159 |     r.set_data(s)
160 | 
161 |     process_includes(r)
162 | 
163 |     assert "thing1" in r["/events/network/http_activity.json"]["attributes"]
164 |     assert "thing2" in r["/events/network/http_activity.json"]["attributes"]
165 | 
166 | 
167 | def test_profiles():
168 |     prof = event("profile1", ["thing"])
169 |     prof["meta"] = "stuff"
170 |     httpa = event("http_activity")
171 |     httpa["profiles"] = "profile1"
172 |     prof2 = event("profile1", ["thing2"])
173 |     neta = event("network_activity")
174 |     neta["profiles"] = "profile1"
175 | 
176 |     s = {
177 |         "/extensions/one/events/network/http_activity.json": httpa,
178 |         "/extensions/one/profiles/profile1.json": prof,
179 |         "/events/network/net_activity.json": neta,
180 |         "/profiles/profile1.json": prof2,
181 |         "/dictionary.json": attributes(["stuff"]),
182 |     }
183 |     r = DictReader()
184 |     r.set_data(s)
185 | 
186 |     process_includes(r)
187 | 
188 |     assert (
189 |         "thing" in r["/extensions/one/events/network/http_activity.json"]["attributes"]
190 |     )
191 |     assert "meta" not in r["/extensions/one/events/network/http_activity.json"]
192 |     assert (
193 |         "thing2"
194 |         not in r["/extensions/one/events/network/http_activity.json"]["attributes"]
195 |     )
196 |     assert "thing" not in r["/events/network/net_activity.json"]["attributes"]
197 |     assert "thing2" in r["/events/network/net_activity.json"]["attributes"]
198 | 
199 | 
200 | def test_attrs_from_dictionary():
201 |     o1 = obj("o1", ["thing"])
202 |     o1["attributes"]["thing"]["name"] = "thing1"
203 | 
204 |     d = {
205 |         "attributes": {
206 |             "thing": {"name": "thing", "caption": "Thing", "requirement": "optional"},
207 |             "thing2": {"name": "thing2"},
208 |         }
209 |     }
210 | 
211 |     s = {
212 |         "/objects/o1.json": o1,
213 |         "/dictionary.json": d,
214 |     }
215 |     r = DictReader()
216 |     r.set_data(s)
217 | 
218 |     process_includes(r)
219 |     assert "thing" in r["/objects/o1.json"]["attributes"]
220 |     assert r["/objects/o1.json"]["attributes"]["thing"]["name"] is "thing1"
221 |     assert "thing2" not in r["/objects/o1.json"]["attributes"]
222 |     assert "requirement" in r["/objects/o1.json"]["attributes"]["thing"]
223 | 


--------------------------------------------------------------------------------
/tests/test_mapping.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ocsf_validator.reader import DictReader
 4 | from ocsf_validator.type_mapping import *
 5 | 
 6 | 
 7 | def test_mapping():
 8 |     s = {
 9 |         "/dictionary.json": {},
10 |         "/objects/object.json": {},
11 |         "/categories.json": {},
12 |         "/profiles/profile.json": {},
13 |         "/version.json": {},
14 |         "/events/event.json": {},
15 |         "/extensions/a/events/event.json": {},
16 |         "/extensions/a/objects/object.json": {},
17 |         "/extensions/a/profiles/profile.json": {},
18 |     }
19 |     r = DictReader()
20 |     r.set_data(s)
21 |     tm = TypeMapping(r)
22 | 
23 |     assert tm["/dictionary.json"] is OcsfDictionary
24 |     assert tm["/events/event.json"] is OcsfEvent
25 |     assert tm["/extensions/a/events/event.json"] is OcsfEvent
26 |     assert tm["/objects/object.json"] is OcsfObject
27 |     assert tm["/extensions/a/objects/object.json"] is OcsfObject
28 |     assert tm["/categories.json"] is OcsfCategories
29 |     assert tm["/version.json"] is OcsfVersion
30 |     assert tm["/profiles/profile.json"] is OcsfProfile
31 |     assert tm["/extensions/a/profiles/profile.json"] is OcsfProfile
32 | 


--------------------------------------------------------------------------------
/tests/test_matchers.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ocsf_validator.matchers import *
 4 | from ocsf_validator.types import *
 5 | 
 6 | 
 7 | def test_dictionary_matcher():
 8 |     m = DictionaryMatcher()
 9 | 
10 |     assert m.match("dictionary.json") is True
11 |     assert m.match("/dictionary.json") is True
12 |     assert m.match("/extension/win/dictionary.json") is True
13 |     assert m.match("/objects/thing.json") is False
14 |     assert m.get_type() is OcsfDictionary
15 | 
16 | 
17 | def test_object_matcher():
18 |     m = ObjectMatcher()
19 | 
20 |     assert m.match("/objects/thing.json") is True
21 |     assert m.match("/extensions/win/objects/thing.json") is True
22 |     assert m.match("objects/win/objects/thing.json") is True
23 |     assert m.match("/events/thing.json") is False
24 |     assert m.get_type() is OcsfObject
25 | 
26 | 
27 | def test_event_matcher():
28 |     m = EventMatcher()
29 | 
30 |     assert m.match("/events/base_event.json") is True
31 |     assert m.match("events/activity/network_activity.json") is True
32 |     assert m.match("events/filesystem/filesystem.json") is True
33 |     assert m.match("/extensions/win/events/activity/network_activity.json") is True
34 |     assert m.get_type() is OcsfEvent
35 | 
36 | 
37 | def test_extension_matcher():
38 |     m = ExtensionMatcher()
39 | 
40 |     assert m.match("/extensions/ext1/extension.json") is True
41 |     assert m.match("/extension.json") is False
42 |     assert m.get_type() is OcsfExtension
43 | 
44 | 
45 | def test_exclude_matcher():
46 |     m = ExcludeMatcher(ExtensionMatcher())
47 | 
48 |     assert m.match("/extensions/ext1/extension.json") is False
49 |     assert m.match("/extension.json") is True
50 | 
51 | 
52 | def test_make_matcher():
53 |     m = Matcher.make(".*thing.json")
54 | 
55 |     assert m.match("thing.json") is True
56 | 


--------------------------------------------------------------------------------
/tests/test_reader.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | from ocsf_validator.matchers import GlobMatcher
 6 | from ocsf_validator.reader import DictReader, Reader
 7 | 
 8 | event = {"name": "an event"}
 9 | obj = {"name": "an object"}
10 | data = {
11 |     "/events/base_event.json": event.copy(),
12 |     "/events/application/application.json": event.copy(),
13 |     "/objects/os.json": obj.copy(),
14 |     "/extensions/win/objects/win_process.json": obj.copy(),
15 |     "/extensions/win/events/system/registry_key.json": event.copy(),
16 |     "/dictionary.json": obj.copy(),
17 | }
18 | 
19 | 
20 | def reader():
21 |     r = DictReader()
22 |     r.set_data(data)
23 |     return r
24 | 
25 | 
26 | def test_get_item():
27 |     r = reader()
28 |     assert r["/objects/os.json"] == obj
29 |     assert r.contents("/objects/os.json") == obj
30 | 
31 | 
32 | def test_set_item():
33 |     r = reader()
34 |     r["/objects/api.json"] = {"name": "api"}
35 |     assert r["/objects/api.json"]["name"] == "api"
36 | 
37 | 
38 | def test_apply():
39 |     r = reader()
40 | 
41 |     def mark(reader: Reader, key: str):
42 |         reader[key]["test"] = True
43 | 
44 |     r.apply(mark, GlobMatcher("objects/*"))
45 |     assert r["/objects/os.json"]["test"] == True
46 |     assert r["/extensions/win/objects/win_process.json"]["test"] == True
47 | 
48 | 
49 | def test_find():
50 |     r = reader()
51 |     f = r.find("objects", "os.json")
52 |     assert f is not None
53 |     assert "name" in f
54 | 
55 | 
56 | def test_map():
57 |     r = reader()
58 | 
59 |     def f(reader: Reader, key: str, acc: int):
60 |         return acc + 1
61 | 
62 |     matches = r.map(f, GlobMatcher("objects/*"), 0)
63 |     assert matches == 2
64 | 
65 | 
66 | def test_ls():
67 |     r = reader()
68 | 
69 |     matches = r.ls()
70 |     assert "objects" in matches
71 |     assert "win" not in matches
72 | 
73 |     matches = r.ls("objects")
74 |     assert "os.json" in matches
75 |     assert "win" not in matches
76 | 
77 |     matches = r.ls("extensions")
78 |     assert "os.json" not in matches
79 |     assert "win" in matches
80 | 
81 |     matches = r.ls("events", files=False)
82 |     assert "application" in matches
83 |     assert "base_event.json" not in matches
84 | 
85 |     matches = r.ls("events", dirs=False)
86 |     assert "application" not in matches
87 |     assert "base_event.json" in matches
88 | 


--------------------------------------------------------------------------------
/tests/test_types.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ocsf_validator.types import *
 4 | 
 5 | 
 6 | def test_is_ocsf_type():
 7 |     assert is_ocsf_type(OcsfDictionary) is True
 8 |     assert is_ocsf_type(OcsfAttr) is True
 9 |     assert is_ocsf_type(str) is False
10 | 


--------------------------------------------------------------------------------
/tests/test_validators.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from ocsf_validator.reader import DictReader, ReaderOptions
  4 | from ocsf_validator.validators import *
  5 | 
  6 | d1 = {
  7 |     "/extensions/ext1/extension.json": {
  8 |         "uid": 1,
  9 |         "name": "ext1",
 10 |         "path": "ext1",
 11 |         # "caption": "Extension One"
 12 |         "color": "blue",
 13 |     }
 14 | }
 15 | 
 16 | 
 17 | def test_required_keys():
 18 |     r = DictReader()
 19 |     r.set_data(d1)
 20 | 
 21 |     with pytest.raises(MissingRequiredKeyError):
 22 |         validate_required_keys(r)
 23 | 
 24 | 
 25 | def test_deep_required_keys():
 26 |     s = {
 27 |         "/events/event.json": {
 28 |             "caption": "Event",
 29 |             "name": "event",
 30 |             "attributes": {
 31 |                 "one": {
 32 |                     "name": "one",
 33 |                 },
 34 |             },
 35 |         },
 36 |     }
 37 |     r = DictReader()
 38 |     r.set_data(s)
 39 | 
 40 |     with pytest.raises(MissingRequiredKeyError) as exc:
 41 |         validate_required_keys(r)
 42 |     assert exc.value.key is "caption"
 43 | 
 44 | 
 45 | def test_unknown_keys():
 46 |     r = DictReader()
 47 |     r.set_data(d1)
 48 | 
 49 |     with pytest.raises(UnknownKeyError):
 50 |         validate_no_unknown_keys(r)
 51 | 
 52 | 
 53 | def test_validate_unused_attrs():
 54 |     r = DictReader()
 55 |     r.set_data(
 56 |         {
 57 |             "/dictionary.json": {
 58 |                 "attributes": {
 59 |                     "one": {
 60 |                         "name": "one",
 61 |                         "caption": "One",
 62 |                     },
 63 |                     "two": {
 64 |                         "name": "two",
 65 |                         "caption": "Two",
 66 |                     },
 67 |                     "three": {
 68 |                         "name": "three",
 69 |                         "caption": "Three",
 70 |                     },
 71 |                 },
 72 |             },
 73 |             "/objects/thing.json": {
 74 |                 "name": "thing",
 75 |                 "attributes": {
 76 |                     "one": {"name": "one"},
 77 |                 },
 78 |             },
 79 |             "/events/stuff/another-thing.json": {
 80 |                 "name": "thing",
 81 |                 "attributes": {
 82 |                     "two": {"name": "two"},
 83 |                 },
 84 |             },
 85 |         }
 86 |     )
 87 | 
 88 |     with pytest.raises(UnusedAttributeError) as exc:
 89 |         validate_unused_attrs(r)
 90 |     assert exc.value.attr == "three"
 91 | 
 92 | 
 93 | def test_validate_undefined_attrs():
 94 |     r = DictReader()
 95 |     r.set_data(
 96 |         {
 97 |             "/dictionary.json": {
 98 |                 "attributes": {
 99 |                     "one": {
100 |                         "name": "one",
101 |                         "caption": "One",
102 |                     },
103 |                 },
104 |             },
105 |             "/objects/thing.json": {
106 |                 "name": "thing",
107 |                 "attributes": {
108 |                     "one": {"name": "one"},
109 |                     "two": {"name": "two"},
110 |                 },
111 |             },
112 |         }
113 |     )
114 | 
115 |     with pytest.raises(UndefinedAttributeError) as exc:
116 |         validate_undefined_attrs(r)
117 |     assert exc.value.attr == "two"
118 | 
119 | 
120 | def test_validate_intra_type_collisions():
121 |     r = DictReader()
122 |     r.set_data(
123 |         {
124 |             "/objects/thing.json": {
125 |                 "name": "thing",
126 |                 "attributes": {
127 |                     "one": {"name": "one"},
128 |                     "two": {"name": "two"},
129 |                 },
130 |             },
131 |             "/objects/thing2.json": {
132 |                 "name": "thing",
133 |                 "attributes": {},
134 |             },
135 |         }
136 |     )
137 | 
138 |     with pytest.raises(TypeNameCollisionError) as exc:
139 |         validate_intra_type_collisions(r)
140 |     assert exc.value.name == "thing"
141 | 
142 |     r["/events/event.json"] = {"name": "thing"}
143 |     r["/objects/thing2.json"] = {"name": "thing2"}
144 |     # no error
145 |     validate_intra_type_collisions(r)
146 | 
147 | 
148 | def test_validate_attr_keys():
149 |     r = DictReader()
150 |     r.set_data(
151 |         {
152 |             "/objects/thing.json": {
153 |                 "name": "thing",
154 |                 "attributes": {
155 |                     "one": {"name": "one", "type": "string_t"},
156 |                     "two": {"name": "two", "type": "thing2"},
157 |                 },
158 |             },
159 |             "/objects/thing2.json": {
160 |                 "name": "thing2",
161 |                 "attributes": {},
162 |             },
163 |             "/objects/dictionary.json": {
164 |                 "types": {
165 |                     "attributes": {
166 |                         "string_t": {},
167 |                     },
168 |                 },
169 |             },
170 |         }
171 |     )
172 | 
173 |     # raise no errors
174 |     validate_attr_types(r)
175 | 
176 |     r["/objects/thing2.json"]["name"] = "thing3"
177 |     with pytest.raises(InvalidAttributeTypeError):
178 |         validate_attr_types(r)
179 | 
180 | 
181 | def test_validate_observables():
182 |     good_data = {
183 |         "dictionary.json": {
184 |             "attributes": {
185 |                 "name": {"caption": "Name", "type": "string_t"},
186 |                 "alpha": {"caption": "Alpha", "type": "string_t"},
187 |                 "beta": {"caption": "Beta", "type": "string_t"},
188 |                 "gamma": {"caption": "Gamma", "type": "gamma_t", "observable": 1},
189 |                 "delta": {"caption": "Delta", "type": "delta_t"},
190 |             },
191 |             "types": {
192 |                 "attributes": {
193 |                     "string_t": {"caption": "String"},
194 |                     "integer_t": {"caption": "Integer"},
195 |                     "gamma_t": {
196 |                         "caption": "Gamma_T",
197 |                         "type": "string_t",
198 |                         "type_name": "String",
199 |                     },
200 |                     "delta_t": {
201 |                         "caption": "Delta_T",
202 |                         "type": "integer_t",
203 |                         "type_name": "Integer",
204 |                         "observable": 2,
205 |                     },
206 |                 },
207 |             },
208 |         },
209 |         "/objects/bird.json": {
210 |             "name": "bird",
211 |             "caption": "Bird",
212 |             "attributes": {
213 |                 "name": {"requirement": "required"},
214 |                 "alpha": {"requirement": "required"},
215 |             },
216 |         },
217 |         "/objects/cat.json": {
218 |             "name": "cat",
219 |             "caption": "Cat",
220 |             "observable": 10,
221 |             "attributes": {
222 |                 "name": {"requirement": "required"},
223 |                 "alpha": {"requirement": "required"},
224 |             },
225 |         },
226 |         "/objects/dog.json": {
227 |             "name": "dog",
228 |             "caption": "Dog",
229 |             "attributes": {
230 |                 "name": {"requirement": "required"},
231 |                 "alpha": {"requirement": "required", "observable": 11},
232 |             },
233 |         },
234 |         "/objects/dog_house.json": {
235 |             "name": "dog_house",
236 |             "caption": "Dog House",
237 |             "attributes": {"tenant": {"type": "dog", "requirement": "required"}},
238 |             "observables": {"dog.name": 12},
239 |         },
240 |         "/events/blue.json": {
241 |             "uid": 1,
242 |             "name": "blue",
243 |             "caption": "Blue",
244 |         },
245 |         "/events/green.json": {
246 |             "uid": 2,
247 |             "name": "green",
248 |             "caption": "Green",
249 |         },
250 |         "/events/red.json": {
251 |             "uid": 3,
252 |             "name": "red",
253 |             "caption": "Red",
254 |             "attributes": {"beta": {"requirement": "required", "observable": 100}},
255 |         },
256 |         "/events/yellow.json": {
257 |             "uid": 4,
258 |             "name": "yellow",
259 |             "caption": "Yellow",
260 |             "attributes": {"bird": {"requirement": "required"}},
261 |             "observables": {"bird.name": 101},
262 |         },
263 |     }
264 | 
265 |     observables = validate_and_get_observables(DictReader(good_data))
266 |     assert observables is not None
267 |     assert len(observables) == 6
268 |     print("\ntest_validate_observables - collected observables:")
269 |     print(observables_to_string(observables))
270 | 
271 |     with pytest.raises(IllegalObservableTypeIDError):
272 |         bad_data = dict(good_data)
273 |         bad_data["/objects/_hidden.json"] = {
274 |             "name": "_hidden",
275 |             "caption": "Hidden",
276 |             "observable": 1,
277 |         }
278 |         validate_observables(DictReader(bad_data))
279 | 
280 |     with pytest.raises(IllegalObservableTypeIDError):
281 |         bad_data = dict(good_data)
282 |         bad_data["/objects/_hidden.json"] = {
283 |             "name": "_hidden",
284 |             "caption": "Hidden",
285 |             "attributes": {"beta": {"requirement": "required", "observable": 1}},
286 |         }
287 |         validate_observables(DictReader(bad_data))
288 | 
289 |     with pytest.raises(IllegalObservableTypeIDError):
290 |         bad_data = dict(good_data)
291 |         bad_data["/events/_hidden.json"] = {
292 |             "name": "hidden",
293 |             "caption": "Hidden",
294 |             "attributes": {"beta": {"requirement": "required", "observable": 1}},
295 |         }
296 |         validate_observables(DictReader(bad_data))
297 | 
298 |     with pytest.raises(ObservableTypeIDCollisionError):
299 |         bad_data = dict(good_data)
300 |         dictionary_attributes = bad_data["dictionary.json"]["attributes"]
301 |         dictionary_attributes["epsilon"] = {
302 |             "caption": "Epsilon",
303 |             "type": "string_t",
304 |             "observable": 1,
305 |         }
306 |         validate_observables(DictReader(bad_data))
307 | 
308 |     with pytest.raises(ObservableTypeIDCollisionError):
309 |         bad_data = dict(good_data)
310 |         dictionary_types_attributes = bad_data["dictionary.json"]["types"]["attributes"]
311 |         dictionary_types_attributes["epsilon_t"] = (
312 |             {
313 |                 "caption": "Epsilon_T",
314 |                 "type": "string_t",
315 |                 "type_name": "String",
316 |                 "observable": 2,
317 |             },
318 |         )
319 |         validate_observables(DictReader(bad_data))
320 | 
321 | 
322 | def test_validate_event_categories():
323 |     good_data = {
324 |         "categories.json": {
325 |             "attributes": {
326 |                 "alpha": {"caption": "Alpha", "uid": 1},
327 |                 "beta": {"caption": "Beta", "uid": 2},
328 |             }
329 |         },
330 |         "events/foo.json": {"caption": "Foo", "category": "alpha"},
331 |         "events/bar.json": {"caption": "Bar", "category": "beta"},
332 |         "events/baz.json": {"caption": "Baz", "category": "other"},
333 |         "events/guux.json": {"caption": "Quux"},
334 |     }
335 |     validate_event_categories(DictReader(good_data))
336 | 
337 |     bad_data = {
338 |         "categories.json": {
339 |             "attributes": {
340 |                 "alpha": {"caption": "Alpha", "uid": 1},
341 |                 "beta": {"caption": "Beta", "uid": 2},
342 |             }
343 |         },
344 |         "events/foo.json": {"caption": "Foo", "category": "alpha"},
345 |         "events/bar.json": {"caption": "Bar", "category": "gamma"},
346 |         "events/baz.json": {"caption": "Baz", "category": "other"},
347 |         "events/guux.json": {"caption": "Quux"},
348 |     }
349 |     with pytest.raises(UnknownCategoryError):
350 |         validate_event_categories(DictReader(bad_data))
351 | 
352 | 
353 | def test_validate_metaschemas():
354 |     # set up a json schema that expects an object with a name property only
355 |     object_json_schema = {
356 |         "$id": "https://fake.schema.ocsf.io/object.schema.json",
357 |         "$schema": "https://json-schema.org/draft/2020-12/schema",
358 |         "title": "Object",
359 |         "type": "object",
360 |         "required": ["name"],
361 |         "properties": {"name": {"type": "string"}},
362 |         "additionalProperties": False,
363 |     }
364 | 
365 |     def _get_registry(reader, base_uri) -> referencing.Registry:
366 |         registry: referencing.Registry = referencing.Registry()
367 |         for schema in METASCHEMA_MATCHERS.keys():
368 |             resource = referencing.Resource.from_contents(object_json_schema)  # type: ignore
369 |             registry = registry.with_resource(base_uri + schema, resource=resource)
370 |         return registry
371 | 
372 |     options = ReaderOptions(base_path=Path(""))
373 | 
374 |     # test that a bad schema fails validation
375 |     r = DictReader(options)
376 |     r.set_data(
377 |         {
378 |             "/objects/thing.json": {
379 |                 "notARealAttribute": "thing",
380 |             },
381 |         }
382 |     )
383 | 
384 |     with pytest.raises(InvalidMetaSchemaError) as exc:
385 |         validate_metaschemas(r, get_registry=_get_registry)
386 | 
387 |     # test that a good schema passes validation
388 |     r = DictReader(options)
389 |     r.set_data(
390 |         {
391 |             "/objects/thing.json": {
392 |                 "name": "thing",
393 |             },
394 |         }
395 |     )
396 | 
397 |     validate_metaschemas(r, get_registry=_get_registry)
398 | 
399 |     # test that a good schema passes validation
400 |     r = DictReader(options)
401 |     r.set_data(
402 |         {
403 |             "/objects/thing.json": {
404 |                 "name": "thing",
405 |             },
406 |         }
407 |     )
408 | 
409 |     validate_metaschemas(r, get_registry=_get_registry)
410 | 
411 |     # test that a missing metaschema file fails validation
412 |     def _get_blank_registry(reader, base_uri):
413 |         registry = referencing.Registry()
414 |         return registry
415 | 
416 |     with pytest.raises(InvalidMetaSchemaFileError) as exc:
417 |         validate_metaschemas(r, get_registry=_get_blank_registry)
418 | 


--------------------------------------------------------------------------------