├── .github ├── FUNDING.yml └── workflows │ ├── linting.yml │ ├── publish.yml │ └── run-tests.yml ├── .gitignore ├── .readthedocs.yaml ├── LICENSE ├── README.md ├── docs ├── changelog.md ├── command-line.md ├── development.md ├── index.md ├── plugins.md ├── python-module.md └── reference.md ├── docs_theme └── partials │ └── copyright.html ├── mkdocs.yml ├── pyproject.toml ├── src └── ixbrlparse │ ├── __about__.py │ ├── __init__.py │ ├── __main__.py │ ├── cli │ └── __init__.py │ ├── components │ ├── __init__.py │ ├── _base.py │ ├── constants.py │ ├── context.py │ ├── formats.py │ ├── nonnumeric.py │ ├── numeric.py │ └── transform.py │ ├── core.py │ ├── hookspecs.py │ └── plugins.py ├── stubs └── word2number │ ├── __init__.pyi │ ├── py.typed │ └── w2n.pyi └── tests ├── test_accounts ├── account_1.html ├── account_1.xml ├── account_2.html ├── account_3.html ├── account_4.html ├── account_5.html ├── account_6.xhtml ├── account_errors.html ├── account_errors.xml ├── account_errors_date.html └── account_errors_nonnumeric.html ├── test_classes.py ├── test_cli.py ├── test_formats.py ├── test_parse.py └── test_plugins.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [kanedata] 4 | -------------------------------------------------------------------------------- /.github/workflows/linting.yml: -------------------------------------------------------------------------------- 1 | name: linting 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | max-parallel: 4 10 | matrix: 11 | python-version: ["3.9"] 12 | 13 | steps: 14 | - uses: actions/checkout@v4 15 | - name: Set up Python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | cache: pip 20 | cache-dependency-path: pyproject.toml 21 | - name: Install dependencies 22 | run: | 23 | pip install -e .[lint] 24 | - name: Lint 25 | run: | 26 | ruff check . 27 | ruff format --check --diff . 28 | mypy --install-types --non-interactive src/ixbrlparse tests 29 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | on: 3 | release: 4 | types: [created] 5 | 6 | permissions: 7 | contents: read 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | max-parallel: 4 14 | matrix: 15 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | cache: pip 24 | cache-dependency-path: pyproject.toml 25 | - name: Install libraries for LXML 26 | run: | 27 | sudo apt-get install -y libxml2-dev libxslt-dev 28 | - name: Install dependencies 29 | run: | 30 | pip install -e .[test] 31 | - name: Run tests 32 | run: | 33 | coverage run -m pytest tests 34 | coverage combine 35 | coverage report --fail-under=95 36 | build: 37 | runs-on: ubuntu-latest 38 | needs: [test] 39 | steps: 40 | - uses: actions/checkout@v4 41 | - name: Set up Python 42 | uses: actions/setup-python@v5 43 | with: 44 | python-version: "3.12" 45 | cache: pip 46 | cache-dependency-path: pyproject.toml 47 | - name: Install dependencies 48 | run: | 49 | pip install hatch 50 | - name: Build 51 | run: | 52 | hatch build 53 | - name: Store the distribution packages 54 | uses: actions/upload-artifact@v4 55 | with: 56 | name: python-packages 57 | path: dist/ 58 | publish: 59 | name: Publish to PyPI 60 | runs-on: ubuntu-latest 61 | if: startsWith(github.ref, 'refs/tags/') 62 | needs: [build] 63 | environment: release 64 | permissions: 65 | id-token: write 66 | steps: 67 | - name: Download distribution packages 68 | uses: actions/download-artifact@v4 69 | with: 70 | name: python-packages 71 | path: dist/ 72 | - name: Publish to PyPI 73 | uses: pypa/gh-action-pypi-publish@release/v1 74 | -------------------------------------------------------------------------------- /.github/workflows/run-tests.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | max-parallel: 4 10 | matrix: 11 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] 12 | 13 | steps: 14 | - uses: actions/checkout@v4 15 | - name: Set up Python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | cache: pip 20 | cache-dependency-path: pyproject.toml 21 | - name: Install libraries for LXML 22 | run: | 23 | sudo apt-get install -y libxml2-dev libxslt-dev 24 | - name: Install dependencies 25 | run: | 26 | pip install -e .[test] 27 | - name: Run tests 28 | run: | 29 | coverage run -m pytest tests 30 | coverage combine 31 | coverage report --fail-under=95 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | env/ 2 | working/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | cover/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | .pybuilder/ 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | # For a library or package, you might want to ignore these files since the code is 90 | # intended to run in multiple environments; otherwise, check them in: 91 | # .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # poetry 101 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 102 | # This is especially recommended for binary packages to ensure reproducibility, and is more 103 | # commonly ignored for libraries. 104 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 105 | #poetry.lock 106 | 107 | # pdm 108 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 109 | #pdm.lock 110 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 111 | # in version control. 112 | # https://pdm.fming.dev/#use-with-ide 113 | .pdm.toml 114 | 115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 116 | __pypackages__/ 117 | 118 | # Celery stuff 119 | celerybeat-schedule 120 | celerybeat.pid 121 | 122 | # SageMath parsed files 123 | *.sage.py 124 | 125 | # Environments 126 | .env 127 | .venv 128 | env/ 129 | venv/ 130 | ENV/ 131 | env.bak/ 132 | venv.bak/ 133 | 134 | # Spyder project settings 135 | .spyderproject 136 | .spyproject 137 | 138 | # Rope project settings 139 | .ropeproject 140 | 141 | # mkdocs documentation 142 | /site 143 | 144 | # mypy 145 | .mypy_cache/ 146 | .dmypy.json 147 | dmypy.json 148 | 149 | # Pyre type checker 150 | .pyre/ 151 | 152 | # pytype static type analyzer 153 | .pytype/ 154 | 155 | # Cython debug symbols 156 | cython_debug/ 157 | 158 | # PyCharm 159 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 160 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 161 | # and can be added to the global gitignore or merged into this file. For a more nuclear 162 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 163 | #.idea/ 164 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for MkDocs projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the version of Python and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.11" 12 | 13 | mkdocs: 14 | configuration: mkdocs.yml 15 | 16 | # Optionally declare the Python requirements required to build your docs 17 | python: 18 | install: 19 | - method: pip 20 | path: . 21 | extra_requirements: 22 | - docs -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 David Kane 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ixbrl-parse 2 | 3 | ![Test status](https://github.com/kanedata/ixbrl-parse/workflows/tests/badge.svg) 4 | [![PyPI version](https://img.shields.io/pypi/v/ixbrlparse)](https://pypi.org/project/ixbrlparse/) 5 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ixbrlparse) 6 | ![PyPI - License](https://img.shields.io/pypi/l/ixbrlparse) 7 | [![Documentation Status](https://readthedocs.org/projects/ixbrl-parse/badge/?version=latest)](https://ixbrl-parse.readthedocs.io/en/latest/?badge=latest) 8 | 9 | A python module for getting useful data out of iXBRL™ and XBRL™ files. The library is at an early stage - feedback and improvements are very welcome. 10 | 11 | Full documentation is available at [ixbrl-parse.readthedocs.io](https://ixbrl-parse.readthedocs.io/) 12 | 13 | For more about the iXBRL™ and XBRL™ standards, see the [specification site](https://specifications.xbrl.org/) 14 | and [XBRL International](https://www.xbrl.org/). This tool is not affiliated with XBRL International. 15 | 16 | **[Changelog](https://ixbrl-parse.readthedocs.io/en/latest/changelog/)** 17 | 18 | ## Requirements 19 | 20 | The module requires [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) and [lxml](https://lxml.de/) to parse the documents. 21 | 22 | If you're using Python 3.13 you may need to ensure that the `libxml2-dev` and `libxslt-dev` packages have been installed. 23 | 24 | [word2number](https://github.com/akshaynagpal/w2n) is used to process the 25 | numeric items with the `numsenwords` format. 26 | 27 | ## How to install 28 | 29 | You can install from pypi using pip: 30 | 31 | ``` 32 | pip install ixbrlparse 33 | ``` 34 | 35 | ## How to use 36 | 37 | You can run the module directly to extract data from an iXBRL™ file. 38 | 39 | ```bash 40 | ixbrlparse example_file.html 41 | # or 42 | python -m ixbrlparse example_file.html 43 | ``` 44 | 45 | While primarily designed for iXBRL™ files, the parser should also work 46 | for XBRL™ files. 47 | 48 | The various options for using this can be found through: 49 | 50 | ```bash 51 | python -m ixbrlparse -h 52 | # optional arguments: 53 | # -h, --help show this help message and exit 54 | # --outfile OUTFILE Where to output the file 55 | # --format {csv,json,jsonlines,jsonl} 56 | # format of the output 57 | # --fields {numeric,nonnumeric,all} 58 | # Which fields to output 59 | ``` 60 | 61 | You can also use as a python module (see [the documentation](https://ixbrl-parse.readthedocs.io/en/latest/python-module/) for more details) 62 | 63 | ## Development 64 | 65 | The module is setup for development using [hatch](https://hatch.pypa.io/latest/). It should be possible to run tests and linting without needed hatch, however. 66 | 67 | ### Run tests 68 | 69 | Tests can be run with `pytest`: 70 | 71 | ```bash 72 | hatch run test 73 | ``` 74 | 75 | Without hatch, you'll need to run: 76 | 77 | ```bash 78 | pip install -e .[test] 79 | python -m pytest tests 80 | ``` 81 | 82 | ### Test coverage 83 | 84 | Run tests then report on coverage 85 | 86 | ```bash 87 | hatch run cov 88 | ``` 89 | 90 | Without hatch, you'll need to run: 91 | 92 | ```bash 93 | pip install -e .[test] 94 | coverage run -m pytest tests 95 | coverage report 96 | ``` 97 | 98 | Run tests then run a server showing where coverage is missing 99 | 100 | ```bash 101 | hatch run cov-html 102 | ``` 103 | 104 | Without hatch, you'll need to run: 105 | 106 | ```bash 107 | pip install -e .[test] 108 | coverage run -m pytest tests 109 | coverage report 110 | coverage html 111 | python -m http.server -d htmlcov 112 | ``` 113 | 114 | ### Run typing checks 115 | 116 | ```bash 117 | hatch run lint:typing 118 | ``` 119 | 120 | Without hatch, you'll need to run: 121 | 122 | ```bash 123 | pip install -e .[lint] 124 | mypy --install-types --non-interactive src/ixbrlparse tests 125 | ``` 126 | 127 | ### Linting 128 | 129 | Ruff should be run before committing any changes. 130 | 131 | To check for any changes needed: 132 | 133 | ```bash 134 | hatch run lint:style 135 | ``` 136 | 137 | Without hatch, you'll need to run: 138 | 139 | ```bash 140 | pip install -e .[lint] 141 | ruff check . 142 | ruff format --check --diff . 143 | ``` 144 | 145 | To run any autoformatting possible: 146 | 147 | ```sh 148 | hatch run lint:fmt 149 | ``` 150 | 151 | Without hatch, you'll need to run: 152 | 153 | ```bash 154 | pip install -e .[lint] 155 | ruff format . 156 | ruff check --fix . 157 | ``` 158 | 159 | ### Run all checks at once 160 | 161 | ```sh 162 | hatch run lint:all 163 | ``` 164 | 165 | Without hatch, you'll need to run: 166 | 167 | ```bash 168 | pip install -e .[lint] 169 | ruff check . 170 | ruff format --check --diff . 171 | mypy --install-types --non-interactive src/ixbrlparse tests 172 | ``` 173 | 174 | ## Publish to pypi 175 | 176 | ```bash 177 | hatch build 178 | hatch publish 179 | git tag v 180 | git push origin v 181 | ``` 182 | 183 | ## Acknowledgements 184 | 185 | Developed by [David Kane](https://dkane.net/) of [Kane Data Ltd](https://kanedata.co.uk/) 186 | 187 | Originally developed for a project with 188 | [Power to Change](https://www.powertochange.org.uk/) looking at how to extract data from 189 | financial documents of community businesses. 190 | 191 | Thanks to the following users for their contributions: 192 | 193 | - [@avyfain](https://github.com/avyfain) 194 | - [@wcollinscw](https://github.com/wcollinscw) 195 | - [@ajmarks](https://github.com/ajmarks) 196 | - [@adobrinevski](https://github.com/adobrinevski) 197 | - [@JWFB](https://github.com/JWFB) 198 | - [@vin0110](https://github.com/vin0110) 199 | 200 | XBRL™ and iXBRL™ are trademarks of XBRL International, Inc. All rights reserved. 201 | 202 | The XBRL™ standards are open and freely licensed by way of the XBRL International License Agreement. Our use of these trademarks is permitted by XBRL International in accordance with the XBRL International Trademark Policy. 203 | -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | **New in version 0.9.0**: Change plugin loading to allow for overriding existing format specifiers. 4 | 5 | **New in version 0.8.2**: More permissive date format acceptance 6 | 7 | **New in version 0.8.1**: Add full support for python 3.12 8 | 9 | **New in version 0.8.0**: Add `raise_on_error` support if context parsing fails. This release also introduces the `ixbrlError` class if an error is found, instead of a dict - this is a minor breaking change compared to how errors were stored before. Add provisional support for python 3.12. 10 | 11 | **New in version 0.7.1**: Allow for case-insensitive schema tags 12 | 13 | **New in version 0.7.0**: Add plugin support. Add documentation 14 | 15 | **New in version 0.6.0**: Switch to use the [hatch](https://hatch.pypa.io/latest/) build and development system. 16 | 17 | **New in version 0.5.4**: Added backreferences to BeautifulSoup objects - thanks to @avyfain for PR. 18 | 19 | **New in version 0.5.3**: Support for `exclude` and `continuation` elements within iXBRL™ documents. Thanks to @wcollinscw for adding support for exclude elements. 20 | 21 | **New in version 0.5**: Support for Python 3.11 has been added. I've had some problems with Python 3.11 and Windows as lxml binaries aren't yet available. Also new in version 0.5 is type checking - the whole library now has types added. 22 | 23 | **New in version 0.4**: I've added initial support for pure XBRL™ files as well as tagged HTML iXBRL™ files. Feedback on this feature is welcome - particularly around getting values out of numeric items. 24 | -------------------------------------------------------------------------------- /docs/command-line.md: -------------------------------------------------------------------------------- 1 | # Command line 2 | 3 | You can run the module directly to extract data from an iXBRL™ file. 4 | 5 | ```bash 6 | ixbrlparse example_file.html 7 | # or 8 | python -m ixbrlparse example_file.html 9 | ``` 10 | 11 | While primarily designed for iXBRL™ files, the parser should also work 12 | for XBRL™ files. 13 | 14 | The various options for using this can be found through: 15 | 16 | ```bash 17 | python -m ixbrlparse -h 18 | # optional arguments: 19 | # -h, --help show this help message and exit 20 | # --outfile OUTFILE Where to output the file 21 | # --format {csv,json,jsonlines,jsonl} 22 | # format of the output 23 | # --fields {numeric,nonnumeric,all} 24 | # Which fields to output 25 | ``` 26 | -------------------------------------------------------------------------------- /docs/development.md: -------------------------------------------------------------------------------- 1 | # Development 2 | 3 | The module is setup for development using [hatch](https://hatch.pypa.io/latest/). 4 | 5 | ## Run tests 6 | 7 | Tests can be run with `pytest`: 8 | 9 | ```bash 10 | hatch run test 11 | ``` 12 | 13 | ## Test coverage 14 | 15 | Run tests then report on coverage 16 | 17 | ```bash 18 | hatch run cov 19 | ``` 20 | 21 | Run tests then run a server showing where coverage is missing 22 | 23 | ```bash 24 | hatch run cov-html 25 | ``` 26 | 27 | ## Run typing checks 28 | 29 | ```bash 30 | hatch run lint:typing 31 | ``` 32 | 33 | ## Linting 34 | 35 | Ruff should be run before committing any changes. 36 | 37 | To check for any changes needed: 38 | 39 | ```bash 40 | hatch run lint:style 41 | ``` 42 | 43 | To run any autoformatting possible: 44 | 45 | ```sh 46 | hatch run lint:fmt 47 | ``` 48 | 49 | ## Run all checks at once 50 | 51 | ```sh 52 | hatch run lint:all 53 | ``` 54 | 55 | # Publish to pypi 56 | 57 | ```bash 58 | hatch build 59 | hatch publish 60 | git tag v 61 | git push origin v 62 | ``` 63 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # ixbrlParse 2 | 3 | ![Test status](https://github.com/kanedata/ixbrl-parse/workflows/tests/badge.svg) 4 | [![PyPI version](https://img.shields.io/pypi/v/ixbrlparse)](https://pypi.org/project/ixbrlparse/) 5 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ixbrlparse) 6 | ![PyPI - License](https://img.shields.io/pypi/l/ixbrlparse) 7 | [![Documentation Status](https://readthedocs.org/projects/ixbrl-parse/badge/?version=latest)](https://ixbrl-parse.readthedocs.io/en/latest/?badge=latest) 8 | 9 | A python module for getting useful data out of iXBRL™ and XBRL™ files. The library is at an early stage - feedback and improvements are very welcome. 10 | 11 | For more about the iXBRL™ and XBRL™ standards, see the [specification site](https://specifications.xbrl.org/) 12 | and [XBRL International](https://www.xbrl.org/). This tool is not affiliated with XBRL International. 13 | 14 | ## Requirements 15 | 16 | The module requires [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) and [lxml](https://lxml.de/) to parse the documents. 17 | 18 | [word2number](https://github.com/akshaynagpal/w2n) is used to process the 19 | numeric items with the `numsenwords` format. 20 | 21 | ## How to install 22 | 23 | You can install from pypi using pip: 24 | 25 | ``` 26 | pip install ixbrlparse 27 | ``` 28 | 29 | ## Acknowledgements 30 | 31 | Developed by [David Kane](https://dkane.net/) of [Kane Data Ltd](https://kanedata.co.uk/) 32 | 33 | Originally developed for a project with 34 | [Power to Change](https://www.powertochange.org.uk/) looking at how to extract data from 35 | financial documents of community businesses. 36 | 37 | Thanks to the following users for their contributions: 38 | 39 | - [@avyfain](https://github.com/avyfain) 40 | - [@wcollinscw](https://github.com/wcollinscw) 41 | - [@ajmarks](https://github.com/ajmarks) 42 | - [@adobrinevski](https://github.com/adobrinevski) 43 | - [@JWFB](https://github.com/JWFB) 44 | 45 | XBRL™ and iXBRL™ are trademarks of XBRL International, Inc. All rights reserved. 46 | 47 | The XBRL™ standards are open and freely licensed by way of the XBRL International License Agreement. Our use of these trademarks is permitted by XBRL International in accordance with the XBRL International Trademark Policy. 48 | -------------------------------------------------------------------------------- /docs/plugins.md: -------------------------------------------------------------------------------- 1 | # Plugins 2 | 3 | The module allows for plugins to customize functionality, using the [pluggy](https://pluggy.readthedocs.io/en/stable/) framework. 4 | 5 | The only current plugin endpoint is to add more Formatters. A formatter takes a value from a iXBRL™ item and converts it into the appropriate python value. For example, the `ixtNumWordsEn` formatter would take a value like "eighty-five" and turn it into 85. 6 | 7 | The formats used within iXBRL™ files can vary between schemas and countries. Rather than try to cover everything in this module, you can write a plugin to support the format that you need. 8 | 9 | ## Creating a plugin 10 | 11 | ### Create a custom format class 12 | 13 | To create a plugin, you first need to create a new format class that subclasses `ixbrlparse.ixbrlFormat`. This has two key components: 14 | 15 | - a `format_names` attribute which consists of a tuple of possible names for the format. These are the values that will be checked against the iXBRL™ items. These names will override any already defined by the ixbrlparse, so it is possible to override the default implementation. 16 | - a `parse_value` function which takes the original text value and returns the processed value. 17 | 18 | An example class might look like (in the file `ixbrlparse-dateplugin/ixbrlparse_dateplugin.py`): 19 | 20 | ```python 21 | import ixbrlparse 22 | 23 | class ixtParseIsoDate(ixbrlparse.ixbrlFormat): 24 | format_names = ("isodateformat") 25 | 26 | def parse_value(self, value): 27 | return datetime.datetime.strptime(value, "%Y-%m-%d").astimezone().date() 28 | ``` 29 | 30 | ### Hook into ixbrlparse 31 | 32 | Next you need to add a function which will hook into ixbrlparse at the right point. This function needs to be called `ixbrl_add_formats`, and returns a list of new format classes (added to the bottom of `ixbrlparse-dateplugin/ixbrlparse_dateplugin.py`): 33 | 34 | ```python 35 | @ixbrlparse.hookimpl 36 | def ixbrl_add_formats(): 37 | return [ixtParseIsoDate] 38 | ``` 39 | 40 | or you can specify the specname if you don't want to call the function `ixbrl_add_formats` 41 | 42 | ```python 43 | @ixbrlparse.hookimpl(specname="ixbrl_add_formats") 44 | def add_new_ixbrl_formats(): 45 | return [ixtParseIsoDate] 46 | ``` 47 | 48 | You then need to add an entrypoint to `setup.py` or to `pyproject.toml` which will be activated when your project is installed. This should look something like (using an example `ixbrlparse-dateplugin/setup.py`): 49 | 50 | ```python 51 | from setuptools import setup 52 | 53 | setup( 54 | name="ixbrlparse-dateplugin", 55 | install_requires="ixbrlparse", 56 | entry_points={"ixbrlparse": ["dateplugin = ixbrlparse_dateplugin"]}, 57 | py_modules=["ixbrlparse_dateplugin"], 58 | ) 59 | ``` 60 | 61 | Or to use `pyproject.toml` you would add 62 | 63 | ```toml 64 | [project.entry-points.ixbrlparse] 65 | dateplugin = "ixbrlparse_dateplugin" 66 | ``` 67 | 68 | ### Override an existing format 69 | 70 | By default, formats from plugins are loaded after the default formats included with the module. This means it is possible to override 71 | them by subclassing the format class. 72 | 73 | For example, if you wanted to add additional date formats to a format class: 74 | 75 | ```python 76 | from ixbrlparse.components.formats import ixtDateDayMonthYear 77 | 78 | class ixtDateDayMonthYearExtended(ixtDateDayMonthYear): 79 | date_format = (*ixtDateDayMonthYear.date_format, "%d-%b-%Y", "%d-%b-%y") 80 | 81 | @hookimpl 82 | def ixbrl_add_formats(self) -> list[type[ixbrlFormat]]: 83 | return [ixtDateDayMonthYearExtended] 84 | ``` 85 | 86 | After installation, this new format class would override the existing formats for `ixtDateDayMonthYear` and 87 | allow for dates like "29 aug 2022" to be parsed as well as dates like "29/08/2022". 88 | 89 | ### Install the plugin 90 | 91 | If you then install the plugin it should be picked up by ixbrlparse and will also include the additional formats when checking. 92 | 93 | ## Acknowledgements 94 | 95 | The implementation of pluggy used here draws heavily on [pluggy's own tutorial](https://pluggy.readthedocs.io/en/stable/#a-complete-example) and @simonw's [implementation of plugins for datasette](https://docs.datasette.io/en/stable/plugins.html). 96 | -------------------------------------------------------------------------------- /docs/python-module.md: -------------------------------------------------------------------------------- 1 | # Python module 2 | 3 | ## Import the `IXBRL` class which parses the file. 4 | 5 | ```python 6 | from ixbrlparse import IXBRL 7 | ``` 8 | 9 | ## Initialise an object and parse the file 10 | 11 | You need to pass a file handle or other object with a `.read()` method. 12 | 13 | ```python 14 | with open('sample_ixbrl.html', encoding="utf8") as a: 15 | x = IXBRL(a) 16 | ``` 17 | 18 | If your iXBRL™ data comes as a string then use a `io.StringIO` wrapper to 19 | pass it to the class: 20 | 21 | ```python 22 | import io 23 | from ixbrlparse import IXBRL 24 | 25 | content = '''''' 26 | x = IXBRL(io.StringIO(content)) 27 | ``` 28 | 29 | ## Get the contexts and units used in the data 30 | 31 | These are held in the object. The contexts are stored as a dictionary with the context 32 | id as the key, and a `ixbrlContext` object as the value. 33 | 34 | ```python 35 | print(x.contexts) 36 | # { 37 | # "cfwd_2018_03_31": ixbrlContext( 38 | # id="cfwd_2018_03_31", 39 | # entity="0123456", # company number 40 | # segments=[], # used for hypercubes 41 | # instant="2018-03-31", 42 | # startdate=None, # used for periods 43 | # enddate=None, # used for periods 44 | # ), 45 | # .... 46 | # } 47 | ``` 48 | 49 | The units are stored as key:value dictionary entries 50 | 51 | ```python 52 | print(x.units) 53 | # { 54 | # "GBP": "ISO4107:GBP" 55 | # "shares": "shares" 56 | # } 57 | ``` 58 | 59 | ## Get financial facts 60 | 61 | Numeric facts are stored in `x.numeric` as a list of `ixbrlNumeric` objects. 62 | The `ixbrlNumeric.value` object contains the value as a parsed python number 63 | (after the sign and scale formatting values have been applied). 64 | 65 | `ixbrlNumeric.context` holds the context object relating to this value. 66 | The `.name` and `.schema` values give the key of this value, according to 67 | the applied schema. 68 | 69 | Non-numeric facts are stored in `x.nonnumeric` as a list of `ixbrlNonnumeric` 70 | objects, with similar `.value`, `.context`, `.name` and `.schema` values. 71 | The value of `.value` will be a string for non-numeric facts. 72 | 73 | ## Check for any parsing errors 74 | 75 | By default, the parser will throw an exception if it encounters an error 76 | when processing the document. 77 | 78 | You can parse `raise_on_error=False` to the initial object to suppress 79 | these exceptions. You can then access a list of the errors (and the element) 80 | that created them through the `.errors` attribute. For example: 81 | 82 | ```python 83 | with open('sample_ixbrl.html', encoding="utf8") as a: 84 | x = IXBRL(a, raise_on_error=False) 85 | print(x.errors) # populated with any exceptions found 86 | # [ eg... 87 | # ixbrlError( 88 | # error=, 89 | # element= 90 | # ) 91 | # ] 92 | ``` 93 | 94 | Note that the error catching is only available for parsing of `.nonnumeric` 95 | and `numeric` items in the document, as well as context items. 96 | Any other errors with parsing will be thrown as normal no matter what 97 | `raise_on_error` is set to. Errors in `context` items may make it more difficult 98 | to use the resulting data. 99 | -------------------------------------------------------------------------------- /docs/reference.md: -------------------------------------------------------------------------------- 1 | # API documentation 2 | 3 | ## ixbrlparse.IXBRL 4 | 5 | ::: src.ixbrlparse.core.IXBRL 6 | 7 | ## ixbrlparse.ixbrlFormat 8 | 9 | ::: src.ixbrlparse.components._base.ixbrlFormat 10 | 11 | ## ixbrlparse.ixbrlContext 12 | 13 | ::: src.ixbrlparse.components.context.ixbrlContext 14 | 15 | ## ixbrlparse.ixbrlNonNumeric 16 | 17 | ::: src.ixbrlparse.components.nonnumeric.ixbrlNonNumeric 18 | 19 | ## ixbrlparse.ixbrlNumeric 20 | 21 | ::: src.ixbrlparse.components.numeric.ixbrlNumeric 22 | -------------------------------------------------------------------------------- /docs_theme/partials/copyright.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: ixbrlParse 2 | repo_url: https://github.com/kanedata/ixbrl-parse 3 | site_description: A python module for getting useful data out of ixbrl files. 4 | site_author: David Kane 5 | theme: 6 | name: material 7 | custom_dir: docs_theme 8 | features: 9 | # - navigation.tabs 10 | - navigation.sections 11 | # - navigation.footer 12 | nav: 13 | - index.md 14 | - changelog.md 15 | - "Usage": 16 | - "command-line.md" 17 | - "python-module.md" 18 | - "Development": 19 | - "development.md" 20 | - "plugins.md" 21 | - reference.md 22 | plugins: 23 | - mkdocstrings: 24 | enabled: true 25 | default_handler: python 26 | handlers: 27 | python: 28 | options: 29 | heading_level: 3 30 | copyright: Copyright © 2018 - %%NOW%% Kane Data Limited 31 | watch: 32 | - docs_theme 33 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "ixbrlparse" 7 | dynamic = ["version"] 8 | description = "A python module for getting useful data out of ixbrl files." 9 | readme = "README.md" 10 | license = "MIT" 11 | requires-python = ">=3.9" 12 | authors = [{ name = "David Kane", email = "david@dkane.net" }] 13 | classifiers = [ 14 | "Development Status :: 4 - Beta", 15 | "License :: OSI Approved :: MIT License", 16 | "Operating System :: OS Independent", 17 | "Programming Language :: Python :: 3 :: Only", 18 | "Programming Language :: Python :: 3.9", 19 | "Programming Language :: Python :: 3.10", 20 | "Programming Language :: Python :: 3.11", 21 | "Programming Language :: Python :: 3.12", 22 | "Programming Language :: Python :: 3.13", 23 | "Topic :: Office/Business :: Financial :: Accounting", 24 | "Topic :: Text Processing :: Markup :: HTML", 25 | "Topic :: Text Processing :: Markup :: XML", 26 | ] 27 | dependencies = [ 28 | "click", 29 | "beautifulsoup4", 30 | "lxml>=5.2.2", 31 | "word2number", 32 | "pluggy>=1", 33 | ] 34 | 35 | [project.optional-dependencies] 36 | docs = ["mkdocs", "mkdocs-material", "mkdocstrings[python]"] 37 | test = ["coverage[toml]>=6.5", "pytest"] 38 | lint = ["mypy>=1.0.0", "ruff>=0.4.7", "types-beautifulsoup4", "types-click"] 39 | 40 | [project.urls] 41 | Homepage = "https://github.com/kanedata/ixbrl-parse" 42 | Documentation = "https://ixbrl-parse.readthedocs.io/" 43 | Issues = "https://github.com/kanedata/ixbrl-parse/issues" 44 | Source = "https://github.com/kanedata/ixbrl-parse" 45 | 46 | [project.scripts] 47 | ixbrlparse = "ixbrlparse.cli:ixbrlparse" 48 | 49 | [tool.hatch.version] 50 | path = "src/ixbrlparse/__about__.py" 51 | 52 | [tool.hatch.envs.default] 53 | features = ["test"] 54 | 55 | [tool.hatch.envs.default.scripts] 56 | test = "pytest {args:tests}" 57 | test-cov = "coverage run -m pytest {args:tests}" 58 | cov-report = ["- coverage combine", "coverage report"] 59 | cov = ["test-cov", "cov-report"] 60 | cov-fail = ["test-cov", "- coverage combine", "coverage report --fail-under=95"] 61 | cov-html = [ 62 | "test-cov", 63 | "- coverage combine", 64 | "coverage report", 65 | "coverage html", 66 | "python -m http.server -d htmlcov", 67 | ] 68 | 69 | [[tool.hatch.envs.all.matrix]] 70 | python = ["3.9", "3.10", "3.11", "3.12", "3.13"] 71 | 72 | [tool.hatch.envs.docs] 73 | features = ["docs"] 74 | 75 | [tool.hatch.envs.docs.scripts] 76 | serve = "mkdocs serve" 77 | build = "mkdocs build" 78 | deploy = "mkdocs gh-deploy --force" 79 | 80 | [tool.hatch.envs.lint] 81 | detached = true 82 | features = ["lint"] 83 | 84 | [tool.hatch.envs.lint.scripts] 85 | typing = "mypy --install-types --non-interactive {args:src/ixbrlparse tests}" 86 | style = ["ruff check {args:.}", "ruff format --check --diff {args:.}"] 87 | fmt = ["ruff format {args:.}", "ruff check --fix {args:.}", "style"] 88 | all = ["style", "typing"] 89 | 90 | [tool.mypy] 91 | ignore_missing_imports = true 92 | mypy_path = "$MYPY_CONFIG_FILE_DIR/stubs/" 93 | files = "./stubs/**.*" 94 | 95 | [tool.ruff] 96 | target-version = "py39" 97 | line-length = 120 98 | 99 | [tool.ruff.lint] 100 | select = [ 101 | "A", 102 | "ARG", 103 | "B", 104 | "C", 105 | "DTZ", 106 | "E", 107 | "EM", 108 | "F", 109 | "FBT", 110 | "I", 111 | "ICN", 112 | "ISC", 113 | "N", 114 | "PLC", 115 | "PLE", 116 | "PLR", 117 | "PLW", 118 | "Q", 119 | "RUF", 120 | "S", 121 | "T", 122 | "TID", 123 | "UP", 124 | "W", 125 | "YTT", 126 | ] 127 | ignore = [ 128 | # Allow non-abstract empty methods in abstract base classes 129 | "B027", 130 | # Allow boolean positional values in function calls, like `dict.get(... True)` 131 | "FBT003", 132 | # Ignore checks for possible passwords 133 | "S105", 134 | "S106", 135 | "S107", 136 | # Ignore complexity 137 | "C901", 138 | "PLR0911", 139 | "PLR0912", 140 | "PLR0913", 141 | "PLR0915", 142 | "ISC001", 143 | ] 144 | unfixable = [ 145 | # Don't touch unused imports 146 | "F401", 147 | ] 148 | 149 | [tool.ruff.lint.isort] 150 | known-first-party = ["ixbrlparse"] 151 | 152 | [tool.ruff.lint.flake8-tidy-imports] 153 | ban-relative-imports = "all" 154 | 155 | [tool.ruff.lint.per-file-ignores] 156 | # Tests can use magic values, assertions, and relative imports 157 | "tests/**/*" = ["PLR2004", "S101", "TID252"] 158 | 159 | [tool.coverage.run] 160 | source_pkgs = ["ixbrlparse", "tests"] 161 | branch = true 162 | parallel = true 163 | omit = ["src/ixbrlparse/__about__.py"] 164 | 165 | [tool.coverage.paths] 166 | ixbrlparse = ["src/ixbrlparse", "*/ixbrlparse/src/ixbrlparse"] 167 | tests = ["tests", "*/ixbrlparse/tests"] 168 | 169 | [tool.coverage.report] 170 | exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] 171 | 172 | [tool.hatch.build.targets.sdist] 173 | include = ["/ixbrlparse"] 174 | -------------------------------------------------------------------------------- /src/ixbrlparse/__about__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.10.0" 2 | -------------------------------------------------------------------------------- /src/ixbrlparse/__init__.py: -------------------------------------------------------------------------------- 1 | from ixbrlparse.components import ixbrlContext, ixbrlFormat, ixbrlNonNumeric, ixbrlNumeric 2 | from ixbrlparse.core import IXBRL 3 | from ixbrlparse.hookspecs import hookimpl, hookspec 4 | 5 | __all__ = ["IXBRL", "hookimpl", "hookspec", "ixbrlContext", "ixbrlFormat", "ixbrlNonNumeric", "ixbrlNumeric"] 6 | -------------------------------------------------------------------------------- /src/ixbrlparse/__main__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | if __name__ == "__main__": 4 | from ixbrlparse.cli import ixbrlparse_cli 5 | 6 | sys.exit(ixbrlparse_cli()) 7 | -------------------------------------------------------------------------------- /src/ixbrlparse/cli/__init__.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | import logging 4 | import sys 5 | from datetime import date 6 | from typing import Any 7 | 8 | import click 9 | 10 | from ixbrlparse.__about__ import __version__ 11 | from ixbrlparse.core import IXBRL 12 | 13 | logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s:%(name)s:%(message)s") 14 | 15 | 16 | @click.group( 17 | context_settings={"help_option_names": ["-h", "--help"]}, 18 | invoke_without_command=True, 19 | ) 20 | @click.version_option(version=__version__, prog_name="IXBRLParse") 21 | @click.option( 22 | "--format", 23 | "-f", 24 | "output_format", 25 | default="csv", 26 | help="Output format", 27 | type=click.Choice(["csv", "json", "jsonlines", "jsonl"]), 28 | ) 29 | @click.option( 30 | "--fields", default="all", type=click.Choice(["numeric", "nonnumeric", "all"]), help="Which fields to output" 31 | ) 32 | @click.option("--outfile", default=sys.stdout, help="Where to output the file", type=click.File("w", encoding="UTF-8")) 33 | @click.argument("infile", type=click.File("rb"), default=sys.stdin, nargs=1) 34 | def ixbrlparse_cli(output_format: str, fields: str, outfile, infile): 35 | x = IXBRL(infile) 36 | 37 | if output_format == "csv": 38 | values = x.to_table(fields) 39 | columns: dict[str, Any] = {} 40 | for r in values: 41 | columns = {**dict.fromkeys(r.keys()), **columns} 42 | writer = csv.DictWriter(outfile, columns.keys()) 43 | writer.writeheader() 44 | writer.writerows(values) 45 | elif output_format in ["jsonlines", "jsonl"]: 46 | values = x.to_table(fields) 47 | for v in values: 48 | if isinstance(v["value"], date): 49 | v["value"] = str(v["value"]) 50 | json.dump(v, outfile) 51 | outfile.write("\n") 52 | elif output_format == "json": 53 | json.dump(x.to_json(), outfile, indent=4) 54 | -------------------------------------------------------------------------------- /src/ixbrlparse/components/__init__.py: -------------------------------------------------------------------------------- 1 | from ixbrlparse.components._base import ixbrlFormat 2 | from ixbrlparse.components.context import ixbrlContext 3 | from ixbrlparse.components.nonnumeric import ixbrlNonNumeric 4 | from ixbrlparse.components.numeric import ixbrlNumeric 5 | 6 | __all__ = ["ixbrlContext", "ixbrlNonNumeric", "ixbrlNumeric", "ixbrlFormat"] 7 | -------------------------------------------------------------------------------- /src/ixbrlparse/components/_base.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from dataclasses import dataclass 3 | from datetime import date 4 | from typing import Optional, Union 5 | 6 | from bs4 import Tag 7 | 8 | 9 | @dataclass 10 | class ixbrlError: # noqa: N801 11 | error: Exception 12 | element: Optional[Tag] = None 13 | context: Optional[str] = None 14 | 15 | 16 | class ixbrlFormat: # noqa: N801 17 | """Class to represent an ixbrl format. 18 | 19 | This class should generally be subclassed to provide additional functionality. 20 | 21 | Attributes: 22 | format_names: A tuple of format names that this class should be used for.""" 23 | 24 | format_names: tuple[str, ...] = () 25 | 26 | def __init__( 27 | self, 28 | format_: str, 29 | decimals: Optional[Union[int, str]] = None, 30 | scale: Union[int, str] = 0, 31 | sign: Optional[str] = None, 32 | ) -> None: 33 | """Initialise the ixbrl format object. 34 | 35 | Parameters: 36 | format_: The name of the format. 37 | decimals: The number of decimal places (only used for numeric formats). 38 | scale: The scale of the format (only for numeric formats). 39 | If more than 0 this value is used as the exponent for a value, so for example with a scale of 40 | 4 and a value of 20, the parsed value is 20 * (10 ^ 4) == 200000. 41 | sign: The sign of the format (only for numeric formats). The sign given is usually "-" or empty. 42 | """ 43 | if isinstance(decimals, str): 44 | if decimals.lower() == "inf": 45 | self.decimals = None 46 | else: 47 | self.decimals = int(decimals) 48 | 49 | self.format: Optional[str] = None 50 | self.namespace: Optional[str] = None 51 | if format_: 52 | format_array: list[str] = format_.split(":") 53 | if len(format_array) > 1: 54 | self.format = ":".join(format_array[1:]) 55 | self.namespace = format_array[0] 56 | else: 57 | self.format = ":".join(format_array) 58 | self.namespace = None 59 | 60 | self.scale = int(scale) 61 | self.sign = sign 62 | 63 | def to_json(self): 64 | """Convert the object to a JSON serialisable dictionary.""" 65 | return deepcopy(self.__dict__) 66 | 67 | def parse_value(self, value: Union[str, int, float]) -> Optional[Union[int, float, bool, date, str]]: 68 | """Parse a value using the format. 69 | 70 | Parameters: 71 | value: The value to parse. 72 | 73 | Returns: 74 | The parsed value in the appropriate python type. 75 | """ 76 | if isinstance(value, (int, float)): 77 | return value 78 | 79 | if isinstance(value, str): 80 | if value in ("-", ""): 81 | return 0 82 | 83 | value_numeric: float = float(value.replace(" ", "").replace(",", "")) 84 | 85 | if self.sign == "-": 86 | value_numeric = value_numeric * -1 87 | 88 | if self.scale != 0: 89 | value_numeric = value_numeric * (10**self.scale) 90 | 91 | return value_numeric 92 | -------------------------------------------------------------------------------- /src/ixbrlparse/components/constants.py: -------------------------------------------------------------------------------- 1 | NAME_SPLIT_EXPECTED = 2 2 | -------------------------------------------------------------------------------- /src/ixbrlparse/components/context.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from copy import deepcopy 3 | from typing import Any, Optional 4 | 5 | 6 | class ixbrlContext: # noqa: N801 7 | """Class to represent an ixbrl context. 8 | 9 | The context should either have an instant date or a start and end date. 10 | 11 | Attributes: 12 | id: The id of the context. 13 | entity: A dictionary of the entity information. 14 | segments: A list of dictionaries of the segment information. 15 | instant: The instant date of the context. 16 | startdate: The start date of the context. 17 | enddate: The end date of the context.""" 18 | 19 | def __init__( 20 | self, 21 | _id: str, 22 | entity: dict[str, Optional[str]], 23 | segments: Optional[list[dict]], 24 | instant: Optional[str], 25 | startdate: Optional[str], 26 | enddate: Optional[str], 27 | ): 28 | self.id = _id 29 | self.entity = entity 30 | self.segments = segments 31 | self.instant: Optional[datetime.date] = None 32 | self.startdate: Optional[datetime.date] = None 33 | self.enddate: Optional[datetime.date] = None 34 | 35 | date_fields = { 36 | "instant": instant, 37 | "startdate": startdate, 38 | "enddate": enddate, 39 | } 40 | for field, value in date_fields.items(): 41 | if value: 42 | datevalue = datetime.datetime.strptime(value.strip(), "%Y-%m-%d").astimezone().date() 43 | setattr(self, field, datevalue) 44 | 45 | def __repr__(self) -> str: 46 | if self.startdate and self.enddate: 47 | datestr = f"{self.startdate} to {self.enddate}" 48 | else: 49 | datestr = str(self.instant) 50 | 51 | segmentstr = " (with segments)" if self.segments else "" 52 | 53 | return f"" 54 | 55 | def to_json(self) -> dict[str, list[dict[str, Any]]]: 56 | """Convert the object to a JSON serialisable dictionary.""" 57 | values = deepcopy(self.__dict__) 58 | for i in ["startdate", "enddate", "instant"]: 59 | if isinstance(values[i], datetime.date): 60 | values[i] = str(values[i]) 61 | return values 62 | -------------------------------------------------------------------------------- /src/ixbrlparse/components/formats.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import re 3 | import warnings 4 | from collections.abc import Sequence 5 | from typing import Optional, Union 6 | 7 | from ixbrlparse.components._base import ixbrlFormat 8 | from ixbrlparse.hookspecs import hookimpl 9 | 10 | 11 | class ixtZeroDash(ixbrlFormat): # noqa: N801 12 | format_names = ( 13 | "zerodash", 14 | "numdash", 15 | "fixedzero", 16 | "ixt:zerodash", 17 | "ixt:numdash", 18 | "ixt:fixedzero", 19 | ) 20 | 21 | def parse_value(self, *_args, **_kwargs) -> Union[int, float]: 22 | return 0 23 | 24 | 25 | class ixtNoContent(ixbrlFormat): # noqa: N801 26 | format_names = ( 27 | "nocontent", 28 | "fixedempty", 29 | "ixt:nocontent", 30 | "ixt:fixedempty", 31 | ) 32 | 33 | def parse_value(self, *_args, **_kwargs) -> None: 34 | return None 35 | 36 | 37 | class ixtFixedFalse(ixbrlFormat): # noqa: N801 38 | format_names = ( 39 | "booleanfalse", 40 | "fixedfalse", 41 | "ixt:booleanfalse", 42 | "ixt:fixedfalse", 43 | ) 44 | 45 | def parse_value(self, *_args, **_kwargs) -> bool: 46 | return False 47 | 48 | 49 | class ixtFixedTrue(ixbrlFormat): # noqa: N801 50 | format_names = ( 51 | "booleantrue", 52 | "fixedtrue", 53 | "ixt:booleantrue", 54 | "ixt:fixedtrue", 55 | ) 56 | 57 | def parse_value(self, *_args, **_kwargs) -> bool: 58 | return True 59 | 60 | 61 | class ixtNumComma(ixbrlFormat): # noqa: N801 62 | format_names = ( 63 | "numcomma", 64 | "numdotcomma", 65 | "numspacecomma", 66 | "numcommadecimal", 67 | "ixt:numcomma", 68 | "ixt:numdotcomma", 69 | "ixt:numspacecomma", 70 | "ixt:numcommadecimal", 71 | ) 72 | 73 | def parse_value(self, value: Union[str, int, float]) -> Optional[Union[int, float]]: 74 | if isinstance(value, str): 75 | value = value.replace(".", "") 76 | value = value.replace(",", ".") 77 | parsed_value = super().parse_value(value) 78 | if isinstance(parsed_value, (float, int)): 79 | return parsed_value 80 | msg = f"Could not parse value {value} as a number" # pragma: no cover 81 | warnings.warn(msg, stacklevel=2) # pragma: no cover 82 | return None # pragma: no cover 83 | 84 | 85 | class ixtNumWordsEn(ixbrlFormat): # noqa: N801 86 | format_names = ( 87 | "numwordsen", 88 | "ixt:numwordsen", 89 | ) 90 | 91 | def parse_value(self, value: Union[str, int, float]) -> Optional[Union[int, float]]: 92 | if isinstance(value, str): 93 | value = value.strip().lower() 94 | if value in ("no", "none"): 95 | return 0 96 | from word2number import w2n 97 | 98 | return w2n.word_to_num(value) 99 | parsed_value = super().parse_value(value) 100 | if isinstance(parsed_value, (float, int)): 101 | return parsed_value 102 | msg = f"Could not parse value {value} as a number" # pragma: no cover 103 | warnings.warn(msg, stacklevel=2) # pragma: no cover 104 | return None # pragma: no cover 105 | 106 | 107 | class ixtNumDotDecimal(ixbrlFormat): # noqa: N801 108 | format_names = ( 109 | "numdotdecimal", 110 | "numcommadot", 111 | "numspacedot", 112 | "ixt:numdotdecimal", 113 | "ixt:numcommadot", 114 | "ixt:numspacedot", 115 | ) 116 | 117 | 118 | DATE_ORDINAL_SUFFIX_REGEX = re.compile(r"([0-9]{1,2})(st|nd|rd|th)\b") 119 | DATE_NON_ALPHANUMERIC_REGEX = re.compile(r"[\/\.\-\\–— ]") # noqa: RUF001 120 | 121 | 122 | class ixtDateFormat(ixbrlFormat): # noqa: N801 123 | format_names: tuple[str, ...] = () 124 | date_format: Union[tuple[str, ...], str] = "%Y-%m-%d" 125 | 126 | def _get_date_formats(self) -> Sequence[str]: 127 | if isinstance(self.date_format, str): 128 | return (self.date_format,) 129 | return self.date_format 130 | 131 | def parse_value(self, value: Union[str, int, float]) -> Optional[datetime.date]: 132 | if isinstance(value, str): 133 | value = value.lower() 134 | # remove ordinal suffixes with regex 135 | value = DATE_ORDINAL_SUFFIX_REGEX.sub(r"\1", value) 136 | # replace non-alphanumeric characters with dashes 137 | value = DATE_NON_ALPHANUMERIC_REGEX.sub("-", value) 138 | 139 | date_formats = self._get_date_formats() 140 | error: Optional[Exception] = None 141 | for date_format in date_formats: 142 | try: 143 | return datetime.datetime.strptime(value, date_format).date() # noqa: DTZ007 144 | except ValueError as e: 145 | error = e 146 | continue 147 | # if we get here, we couldn't parse the date. Raise the last error 148 | if error: # pragma: no cover 149 | raise error 150 | msg = f"Could not parse value {value} as a date" 151 | warnings.warn(msg, stacklevel=2) 152 | return None 153 | 154 | 155 | class ixtDateLongUK(ixtDateFormat): # noqa: N801 156 | format_names = ( 157 | "datelonguk", 158 | "datedaymonthyearen", 159 | "ixt:datelonguk", 160 | "ixt:datedaymonthyearen", 161 | ) 162 | date_format = ("%d-%B-%Y", "%d-%B-%y", "%d-%b-%Y", "%d-%b-%y") 163 | 164 | 165 | class ixtDateShortUK(ixtDateFormat): # noqa: N801 166 | format_names = ( 167 | "dateshortuk", 168 | "ixt:dateshortuk", 169 | ) 170 | date_format = ("%d-%b-%Y", "%d-%b-%y", "%d-%B-%Y", "%d-%B-%y") 171 | 172 | 173 | class ixtDateLongUS(ixtDateFormat): # noqa: N801 174 | format_names = ( 175 | "datelongus", 176 | "ixt:datelongus", 177 | ) 178 | date_format = ("%B-%d,-%Y", "%B-%d,-%y", "%b-%d,-%Y", "%b-%d,-%y") 179 | 180 | 181 | class ixtDateShortUS(ixtDateFormat): # noqa: N801 182 | format_names = ( 183 | "dateshortus", 184 | "ixt:dateshortus", 185 | ) 186 | date_format = ("%b-%d,-%Y", "%b-%d,-%y", "%B-%d,-%Y", "%B-%d,-%y") 187 | 188 | 189 | class ixtDateDayMonthYear(ixtDateFormat): # noqa: N801 190 | format_names = ( 191 | "datedaymonthyear", 192 | "ixt:datedaymonthyear", 193 | "dateslasheu", 194 | "ixt:dateslasheu", 195 | "datedoteu", 196 | "ixt:datedoteu", 197 | ) 198 | date_format = ("%d-%m-%Y", "%d-%m-%y") 199 | 200 | 201 | class ixtDateSlashUS(ixtDateFormat): # noqa: N801 202 | format_names = ( 203 | "dateslashus", 204 | "ixt:dateslashus", 205 | "datedotus", 206 | "ixt:datedotus", 207 | ) 208 | date_format = ("%m-%d-%Y", "%m-%d-%y") 209 | 210 | 211 | class ixtDateDotEU(ixtDateDayMonthYear): # noqa: N801 212 | pass 213 | 214 | 215 | class ixtDateSlashEU(ixtDateDayMonthYear): # noqa: N801 216 | pass 217 | 218 | 219 | class ixtDateDotUS(ixtDateSlashUS): # noqa: N801 220 | pass 221 | 222 | 223 | @hookimpl(tryfirst=True) 224 | def ixbrl_add_formats() -> list[type[ixbrlFormat]]: 225 | return [ 226 | ixtZeroDash, 227 | ixtNoContent, 228 | ixtFixedFalse, 229 | ixtFixedTrue, 230 | ixtNumDotDecimal, 231 | ixtNumComma, 232 | ixtNumWordsEn, 233 | ixtDateLongUK, 234 | ixtDateLongUS, 235 | ixtDateShortUK, 236 | ixtDateShortUS, 237 | ixtDateDayMonthYear, 238 | ixtDateSlashUS, 239 | ] 240 | -------------------------------------------------------------------------------- /src/ixbrlparse/components/nonnumeric.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from copy import deepcopy 3 | from datetime import date 4 | from typing import Any, Optional, Union 5 | 6 | from bs4 import Tag 7 | 8 | from ixbrlparse.components import ixbrlContext 9 | from ixbrlparse.components.constants import NAME_SPLIT_EXPECTED 10 | from ixbrlparse.components.transform import get_format, ixbrlFormat 11 | 12 | 13 | class ixbrlNonNumeric: # noqa: N801 14 | """Models a non-numeric element in an iXBRL document 15 | 16 | Non-numeric elements are used to store information such as the name of the 17 | entity, the name of the reporting period, etc. 18 | The value of non-numeric elements is always a string, so we don't need to 19 | worry about parsing the string.""" 20 | 21 | def __init__( 22 | self, 23 | context: Optional[Union[ixbrlContext, str]] = None, 24 | name: Optional[str] = None, 25 | format_: Optional[str] = None, 26 | value: Optional[str] = None, 27 | soup_tag: Optional[Tag] = None, 28 | ) -> None: 29 | """Constructor for the ixbrlNonNumeric class. 30 | 31 | Parameters: 32 | context (ixbrlContext): The context of the non-numeric element 33 | name (str): The name of the non-numeric element 34 | format_ (str): The format of the non-numeric element 35 | value (str): The value of the non-numeric element 36 | soup_tag (Tag): The source tag in beautiful soup 37 | """ 38 | if isinstance(name, str): 39 | name_split: list[str] = name.split(":", maxsplit=1) 40 | if len(name_split) == NAME_SPLIT_EXPECTED: 41 | self.schema = name_split[0] 42 | self.name = name_split[1] 43 | else: 44 | self.schema = "unknown" 45 | self.name = name_split[0] 46 | 47 | self.context = context 48 | self.format: Optional[ixbrlFormat] = None 49 | self.text: Optional[str] = value 50 | self.value: Optional[Union[str, int, float, None, date]] = value 51 | if isinstance(format_, str) and format_ != "" and self.text is not None: 52 | try: 53 | self.format = get_format(format_)(format_=format_) 54 | self.value = self.format.parse_value(self.text) 55 | except NotImplementedError: 56 | msg = f"Format {format_} not implemented - value '{value}' not parsed" 57 | warnings.warn(msg, stacklevel=2) 58 | self.soup_tag = soup_tag 59 | 60 | def to_json(self) -> dict[str, Any]: 61 | values = {k: deepcopy(v) for k, v in self.__dict__.items() if k != "soup_tag"} 62 | if isinstance(self.value, date): 63 | values["value"] = self.value.isoformat() 64 | if isinstance(self.format, ixbrlFormat): 65 | values["format"] = self.format.to_json() 66 | if isinstance(self.context, ixbrlContext): 67 | values["context"] = self.context.to_json() 68 | return values 69 | -------------------------------------------------------------------------------- /src/ixbrlparse/components/numeric.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from copy import deepcopy 3 | from typing import Optional, Union 4 | 5 | from bs4 import Tag 6 | 7 | from ixbrlparse.components.constants import NAME_SPLIT_EXPECTED 8 | from ixbrlparse.components.context import ixbrlContext 9 | from ixbrlparse.components.transform import get_format, ixbrlFormat 10 | 11 | 12 | class ixbrlNumeric: # noqa: N801 13 | """Models a numeric element in an iXBRL document""" 14 | 15 | def __init__( 16 | self, 17 | name: Optional[str] = None, 18 | unit: Optional[str] = None, 19 | value: Optional[Union[str, int, float]] = None, 20 | text: Optional[Union[str, int, float]] = None, 21 | context: Union[ixbrlContext, str, None] = None, 22 | soup_tag: Optional[Tag] = None, 23 | **attrs, 24 | ) -> None: 25 | """Constructor for the ixbrlNumeric class. 26 | 27 | Parameters: 28 | name (str): The name of the numeric element 29 | unit (str): The unit of the numeric element 30 | value (float): The value of the numeric element 31 | text (str): The text of the numeric element 32 | context (ixbrlContext): The context of the numeric element 33 | soup_tag (Tag): The source tag in beautiful soup 34 | """ 35 | self.name: Optional[str] = name 36 | self.schema: str = "unknown" 37 | if isinstance(name, str): 38 | name_value = name.split(":", maxsplit=1) 39 | if len(name_value) == NAME_SPLIT_EXPECTED: 40 | self.schema = name_value[0] 41 | self.name = name_value[1] 42 | else: 43 | self.schema = "unknown" 44 | self.name = name_value[0] 45 | 46 | if not isinstance(value, (str, int, float)): 47 | value = text 48 | if not isinstance(value, (str, int, float)): 49 | msg = "Must provide either value or text" 50 | raise ValueError(msg) 51 | self.text: Union[str, int, float] = value 52 | self.context: Union[ixbrlContext, str, None] = context 53 | self.unit: Optional[str] = unit 54 | self.value: Optional[Union[int, float]] = None 55 | self.soup_tag = soup_tag 56 | 57 | format_ = { 58 | "format_": attrs.get("format"), 59 | "decimals": attrs.get("decimals", "0"), 60 | "scale": attrs.get("scale", 0), 61 | "sign": attrs.get("sign", ""), 62 | } 63 | self.format: Optional[ixbrlFormat] = get_format(format_["format_"])(**format_) 64 | 65 | try: 66 | if isinstance(self.format, ixbrlFormat): 67 | parsed_value = self.format.parse_value(self.text) 68 | if isinstance(parsed_value, (int, float)): 69 | self.value = parsed_value 70 | except ValueError: 71 | logging.info(attrs) 72 | raise 73 | 74 | def to_json(self) -> dict: 75 | values = {k: deepcopy(v) for k, v in self.__dict__.items() if k != "soup_tag"} 76 | if isinstance(self.format, ixbrlFormat): 77 | values["format"] = self.format.to_json() 78 | if isinstance(self.context, ixbrlContext): 79 | values["context"] = self.context.to_json() 80 | return values 81 | -------------------------------------------------------------------------------- /src/ixbrlparse/components/transform.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from ixbrlparse.components._base import ixbrlFormat 4 | from ixbrlparse.plugins import pm 5 | 6 | 7 | def get_format(format_: Optional[str]) -> type[ixbrlFormat]: 8 | if not isinstance(format_, str): 9 | return ixbrlFormat 10 | 11 | original_format: str = format_ 12 | 13 | format_list: list[str] = format_.split(":") 14 | if len(format_list) > 1: 15 | namespace = format_list[0] 16 | format_ = ":".join(format_list[1:]) 17 | else: 18 | namespace = None 19 | format_ = ":".join(format_list) 20 | 21 | format_ = format_.replace("-", "") 22 | 23 | formats = {} 24 | for additional_formats in pm.hook.ixbrl_add_formats(): 25 | for format_class in additional_formats: 26 | for format_str in format_class.format_names: 27 | formats[format_str] = format_class 28 | 29 | if format_ in formats: 30 | return formats[format_] 31 | 32 | msg = f'Format "{original_format}" not implemented (namespace "{namespace}")' 33 | raise NotImplementedError(msg) 34 | -------------------------------------------------------------------------------- /src/ixbrlparse/core.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Generator, Iterable 2 | from pathlib import Path 3 | from typing import IO, Optional, Union 4 | 5 | from bs4 import BeautifulSoup, Tag 6 | 7 | from ixbrlparse.components import ixbrlContext, ixbrlNonNumeric, ixbrlNumeric 8 | from ixbrlparse.components._base import ixbrlError 9 | 10 | FILETYPE_IXBRL = "ixbrl" 11 | FILETYPE_XBRL = "xbrl" 12 | 13 | 14 | class IXBRLParseError(Exception): 15 | pass 16 | 17 | 18 | class BaseParser: 19 | def _get_tag_attribute( 20 | self, s: Union[BeautifulSoup, Tag], tag: Union[str, list[str]], attribute: str 21 | ) -> Optional[str]: 22 | tag_contents = s.find(tag) 23 | if isinstance(tag_contents, Tag): 24 | attribute_value = tag_contents.get(attribute) 25 | if isinstance(attribute_value, str): 26 | return attribute_value.strip() 27 | return None # pragma: no cover 28 | 29 | def _get_tag_text(self, s: Union[BeautifulSoup, Tag], tag: Union[str, list[str]]) -> Optional[str]: 30 | tag_contents = s.find(tag) 31 | if isinstance(tag_contents, Tag): 32 | text_value = tag_contents.text 33 | if isinstance(text_value, str): 34 | return text_value.strip() 35 | return None # pragma: no cover 36 | 37 | def _get_tag_children(self, s: Union[BeautifulSoup, Tag], tag: Union[str, list[str]]) -> Iterable[Tag]: 38 | tag_contents = s.find(tag) 39 | if isinstance(tag_contents, Tag): 40 | return tag_contents.findChildren() 41 | return [] 42 | 43 | def _get_schema(self) -> None: 44 | pass 45 | 46 | def _get_contexts(self) -> None: 47 | pass 48 | 49 | def _get_units(self) -> None: 50 | pass 51 | 52 | def _get_nonnumeric(self) -> None: 53 | pass 54 | 55 | def _get_numeric(self) -> None: 56 | pass 57 | 58 | 59 | class IXBRLParser(BaseParser): 60 | root_element: str = "html" 61 | 62 | def __init__(self, soup: BeautifulSoup, raise_on_error: bool = True) -> None: # noqa: FBT001, FBT002 63 | self.soup = soup 64 | self.raise_on_error = raise_on_error 65 | self.errors: list = [] 66 | self.contexts: dict[str, ixbrlContext] = {} 67 | self.schema: Optional[str] = None 68 | self.namespaces: dict[str, Union[str, list[str]]] = {} 69 | self.nonnumeric: list[ixbrlNonNumeric] = [] 70 | self.numeric: list[ixbrlNumeric] = [] 71 | 72 | def _get_schema(self) -> None: 73 | self.schema = None 74 | schema_tag = self.soup.find(["link:schemaRef", "schemaRef", "link:schemaref", "schemaref"]) 75 | if isinstance(schema_tag, Tag) and schema_tag.get("xlink:href"): 76 | schema = schema_tag["xlink:href"] 77 | if isinstance(schema, str): 78 | self.schema = schema.strip() 79 | 80 | self.namespaces = {} 81 | namespace_tag = self.soup.find(self.root_element) 82 | if isinstance(namespace_tag, Tag): 83 | for k in namespace_tag.attrs: 84 | if isinstance(k, str) and (k.startswith("xmlns") or ":" in k): 85 | namespace_value = namespace_tag[k] 86 | if isinstance(namespace_value, str): 87 | self.namespaces[k] = namespace_value.split(" ") 88 | 89 | def _get_context_elements( 90 | self, 91 | ) -> Generator[Tag, None, None]: 92 | resources = self.soup.find(["ix:resources", "resources"]) 93 | if isinstance(resources, Tag): 94 | for s in resources.find_all(["xbrli:context", "context"]): 95 | if isinstance(s, Tag): 96 | yield s 97 | 98 | def _get_contexts(self) -> None: 99 | self.contexts = {} 100 | for s in self._get_context_elements(): 101 | if not s.get("id"): 102 | continue 103 | s_id = s["id"] 104 | if not isinstance(s_id, str): 105 | continue # pragma: no cover 106 | try: 107 | self.contexts[s_id] = ixbrlContext( 108 | _id=s_id, 109 | entity={ 110 | "scheme": self._get_tag_attribute(s, ["xbrli:identifier", "identifier"], "scheme"), 111 | "identifier": self._get_tag_text(s, ["xbrli:identifier", "identifier"]), 112 | }, 113 | segments=[ 114 | {"tag": x.name, "value": x.text.strip(), **x.attrs} 115 | for x in self._get_tag_children(s, ["xbrli:segment", "segment"]) 116 | ], 117 | instant=self._get_tag_text(s, ["xbrli:instant", "instant"]), 118 | startdate=self._get_tag_text(s, ["xbrli:startDate", "startDate"]), 119 | enddate=self._get_tag_text(s, ["xbrli:endDate", "endDate"]), 120 | ) 121 | except Exception as e: 122 | self.errors.append( 123 | ixbrlError( 124 | error=e, 125 | element=s, 126 | ) 127 | ) 128 | if self.raise_on_error: 129 | raise 130 | 131 | def _get_unit_elements(self) -> Generator[Tag, None, None]: 132 | resources = self.soup.find(["ix:resources", "resources"]) 133 | if isinstance(resources, Tag): 134 | for s in resources.find_all(["xbrli:unit", "unit"]): 135 | if isinstance(s, Tag): 136 | yield s 137 | 138 | def _get_units(self) -> None: 139 | self.units: dict[str, Optional[str]] = {} 140 | for s in self._get_unit_elements(): 141 | s_id = s.get("id") 142 | if isinstance(s_id, str): 143 | self.units[s_id] = self._get_tag_text(s, ["xbrli:measure", "measure"]) 144 | 145 | def _get_tag_continuation(self, s: Union[BeautifulSoup, Tag], start_str: str = "") -> str: 146 | if not isinstance(s, Tag): 147 | return start_str 148 | start_str += s.text 149 | if s.attrs.get("continuedAt"): 150 | continuation_tag = self.soup.find(id=s.attrs.get("continuedAt")) 151 | if isinstance(continuation_tag, Tag) and continuation_tag.name == "continuation": 152 | return self._get_tag_continuation(continuation_tag, start_str) 153 | return start_str 154 | 155 | def _get_nonnumeric(self) -> None: 156 | self.nonnumeric = [] 157 | for s in self.soup.find_all({"nonNumeric"}): 158 | try: 159 | context = self.contexts.get(s["contextRef"], s["contextRef"]) 160 | format_ = s.get("format") 161 | if not isinstance(format_, str): 162 | format_ = None 163 | exclusion = s.find("exclude") 164 | if exclusion is not None: 165 | exclusion.extract() 166 | 167 | text = s.text 168 | if s.attrs.get("continuedAt"): 169 | text = self._get_tag_continuation(s) 170 | 171 | self.nonnumeric.append( 172 | ixbrlNonNumeric( 173 | context=context, 174 | name=s["name"] if isinstance(s["name"], str) else "", 175 | format_=format_, 176 | value=text.strip().replace("\n", "") if isinstance(text, str) else "", 177 | soup_tag=s, 178 | ) 179 | ) 180 | except Exception as e: 181 | self.errors.append( 182 | ixbrlError( 183 | error=e, 184 | element=s, 185 | ) 186 | ) 187 | if self.raise_on_error: 188 | raise 189 | 190 | def _get_numeric(self) -> None: 191 | self.numeric = [] 192 | for s in self.soup.find_all({"nonFraction"}): 193 | try: 194 | self.numeric.append( 195 | ixbrlNumeric( 196 | text=s.text, 197 | context=self.contexts.get(s["contextRef"], s["contextRef"]), 198 | unit=self.units.get(s["unitRef"], s["unitRef"]), 199 | soup_tag=s, 200 | **s.attrs, 201 | ) 202 | ) 203 | except Exception as e: 204 | self.errors.append( 205 | ixbrlError( 206 | error=e, 207 | element=s, 208 | ) 209 | ) 210 | if self.raise_on_error: 211 | raise 212 | 213 | 214 | class XBRLParser(IXBRLParser): 215 | root_element = "xbrl" 216 | 217 | def _get_context_elements(self) -> Generator[Tag, None, None]: 218 | for s in self.soup.find_all(["xbrli:context", "context"]): 219 | if isinstance(s, Tag): 220 | yield s 221 | 222 | def _get_unit_elements(self) -> Generator[Tag, None, None]: 223 | for s in self.soup.find_all(["xbrli:unit", "unit"]): 224 | if isinstance(s, Tag): 225 | yield s 226 | 227 | def _get_elements(self) -> Generator[Tag, None, None]: 228 | resource = self.soup.find(self.root_element) 229 | if isinstance(resource, Tag): 230 | for s in resource.find_all(True): 231 | if isinstance(s, Tag): 232 | yield s 233 | 234 | def _get_numeric(self) -> None: 235 | self.numeric = [] 236 | for s in self._get_elements(): 237 | if not s.get("contextRef") or not s.get("unitRef"): 238 | continue 239 | context_ref = s["contextRef"] 240 | unit_ref = s["unitRef"] 241 | if not isinstance(context_ref, str) or not isinstance(unit_ref, str): 242 | continue # pragma: no cover 243 | try: 244 | self.numeric.append( 245 | ixbrlNumeric( 246 | name=s.name, 247 | text=s.text, 248 | context=self.contexts.get(context_ref, context_ref), 249 | unit=self.units.get(unit_ref, unit_ref), 250 | soup_tag=s, 251 | **s.attrs, 252 | ) 253 | ) 254 | except Exception as e: 255 | self.errors.append( 256 | { 257 | "error": e, 258 | "element": s, 259 | } 260 | ) 261 | if self.raise_on_error: 262 | raise 263 | 264 | def _get_nonnumeric(self) -> None: 265 | self.nonnumeric = [] 266 | for s in self._get_elements(): 267 | try: 268 | if not s.get("contextRef") or s.get("unitRef"): 269 | continue 270 | context_ref = s["contextRef"] 271 | if not isinstance(context_ref, str): 272 | continue # pragma: no cover 273 | context = self.contexts.get(context_ref, context_ref) 274 | format_ = s.get("format") 275 | if not isinstance(format_, str): 276 | format_ = None 277 | 278 | text = s.text 279 | 280 | self.nonnumeric.append( 281 | ixbrlNonNumeric( 282 | context=context, 283 | name=s.name if isinstance(s.name, str) else "", 284 | format_=format_, 285 | value=text.strip().replace("\n", "") if isinstance(text, str) else "", 286 | soup_tag=s, 287 | ) 288 | ) 289 | except Exception as e: 290 | self.errors.append( 291 | ixbrlError( 292 | error=e, 293 | element=s, 294 | ) 295 | ) 296 | if self.raise_on_error: 297 | raise 298 | 299 | 300 | class IXBRL: 301 | """ 302 | Parse an iXBRL file. 303 | """ 304 | 305 | def __init__(self, f: IO, raise_on_error: bool = True) -> None: # noqa: FBT001, FBT002 306 | """Constructor for the IXBRL class. 307 | 308 | Parameters: 309 | f: File-like object to parse. 310 | raise_on_error: Whether to raise an exception on error 311 | """ 312 | self.soup = BeautifulSoup(f.read(), "xml", multi_valued_attributes=None) 313 | self.raise_on_error = raise_on_error 314 | self._get_parser() 315 | self.parser._get_schema() 316 | self.parser._get_contexts() 317 | self.parser._get_units() 318 | self.parser._get_nonnumeric() 319 | self.parser._get_numeric() 320 | 321 | @classmethod 322 | def open(cls, filename: Union[str, Path], raise_on_error: bool = True): # noqa: FBT001, FBT002 323 | """Open an iXBRL file. 324 | 325 | Parameters: 326 | filename: Path to file to parse. 327 | raise_on_error: Whether to raise an exception on error 328 | """ 329 | with open(filename, "rb") as a: 330 | return cls(a, raise_on_error=raise_on_error) 331 | 332 | def _get_parser(self) -> None: 333 | if self.soup.find("html"): 334 | self.filetype = FILETYPE_IXBRL 335 | parser = IXBRLParser 336 | elif self.soup.find("xbrl"): 337 | self.filetype = FILETYPE_XBRL 338 | parser = XBRLParser 339 | else: 340 | msg = "Filetype not recognised" 341 | raise IXBRLParseError(msg) 342 | self.parser: BaseParser = parser(self.soup, raise_on_error=self.raise_on_error) 343 | 344 | def __getattr__(self, name: str): 345 | return getattr(self.parser, name) 346 | 347 | def to_json(self) -> dict: 348 | """Return a JSON representation of the iXBRL file. 349 | 350 | Returns: 351 | A dictionary containing the following keys: 352 | 353 | - schema: The schema used in the iXBRL file. 354 | - namespaces: The namespaces used in the iXBRL file. 355 | - contexts: The contexts used in the iXBRL file. 356 | - units: The units used in the iXBRL file. 357 | - nonnumeric: The non-numeric elements in the iXBRL file. 358 | - numeric: The numeric elements in the iXBRL file. 359 | - errors: The number of errors encountered when parsing the iXBRL file. 360 | """ 361 | return { 362 | "schema": self.schema, 363 | "namespaces": self.namespaces, 364 | "contexts": {c: ct.to_json() for c, ct in self.contexts.items()}, 365 | "units": self.units, 366 | "nonnumeric": [a.to_json() for a in self.nonnumeric], 367 | "numeric": [a.to_json() for a in self.numeric], 368 | "errors": len(self.errors), 369 | } 370 | 371 | def to_table(self, fields: str = "numeric") -> list[dict]: 372 | """Return a list of dictionaries representing the iXBRL file. 373 | 374 | This is suitable for passing to pandas.DataFrame.from_records(). 375 | 376 | Parameters: 377 | fields: Which fields to include in the output. Can be "numeric", "nonnumeric" or "all". 378 | 379 | Returns: 380 | A list of dictionaries representing the iXBRL file. 381 | 382 | The fields included are: 383 | 384 | - schema (str) 385 | - name (str) -- the name of the element 386 | - value -- the value of the element. Can be number, str, None, or boolean 387 | - unit (str) -- the unit of the element if present 388 | - instant (date) -- the instant date of the element context if present 389 | - startdate (date) -- the start date of the element context if present 390 | - enddate (date) -- the end date of the element context if present 391 | - segment:N (str) -- the Nth segment of the element context if present (can be repeated) 392 | 393 | Examples: 394 | >>> import pandas as pd 395 | >>> i = IXBRL.open("tests/fixtures/ixbrl/uk-gaap/2009-12-31/Company-Accounts-Data.xml") 396 | >>> df = pd.DataFrame.from_records(i.to_table(fields="numeric")) 397 | >>> df.head() 398 | """ 399 | if fields == "nonnumeric": 400 | values = self.nonnumeric 401 | elif fields == "numeric": 402 | values = self.numeric 403 | else: 404 | values = self.nonnumeric + self.numeric 405 | 406 | ret = [] 407 | for v in values: 408 | if isinstance(v.context, ixbrlContext) and v.context.segments: 409 | segments = { 410 | f"segment:{i}": "{} {} {}".format(s.get("tag", ""), s.get("dimension"), s.get("value")).strip() 411 | for i, s in enumerate(v.context.segments) 412 | } 413 | else: 414 | segments = {"segment:0": ""} 415 | 416 | ret.append( 417 | { 418 | "schema": " ".join(self.namespaces.get(f"xmlns:{v.schema}", [v.schema])), 419 | "name": v.name, 420 | "value": v.value, 421 | "unit": v.unit if hasattr(v, "unit") else None, 422 | "instant": str(v.context.instant) 423 | if isinstance(v.context, ixbrlContext) and v.context.instant 424 | else None, 425 | "startdate": str(v.context.startdate) 426 | if isinstance(v.context, ixbrlContext) and v.context.startdate 427 | else None, 428 | "enddate": str(v.context.enddate) 429 | if isinstance(v.context, ixbrlContext) and v.context.enddate 430 | else None, 431 | **segments, 432 | } 433 | ) 434 | return ret 435 | -------------------------------------------------------------------------------- /src/ixbrlparse/hookspecs.py: -------------------------------------------------------------------------------- 1 | import pluggy 2 | 3 | from ixbrlparse.components.transform import ixbrlFormat 4 | 5 | hookimpl = pluggy.HookimplMarker("ixbrlparse") 6 | hookspec = pluggy.HookspecMarker("ixbrlparse") 7 | 8 | 9 | @hookspec 10 | def ixbrl_add_formats() -> list[type[ixbrlFormat]]: # type: ignore 11 | """Add new formats to the ixbrlparse library. 12 | 13 | Returns: 14 | list[[ixbrlFormat]]: A list of ixbrlFormat classes. 15 | """ 16 | -------------------------------------------------------------------------------- /src/ixbrlparse/plugins.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | import pluggy 4 | 5 | from ixbrlparse import hookspecs 6 | 7 | DEFAULT_PLUGINS = ["ixbrlparse.components.formats"] 8 | 9 | pm = pluggy.PluginManager("ixbrlparse") 10 | pm.add_hookspecs(hookspecs) 11 | 12 | pm.load_setuptools_entrypoints("ixbrlparse") 13 | 14 | # Load default plugins 15 | for plugin in DEFAULT_PLUGINS: 16 | mod = importlib.import_module(plugin) 17 | pm.register(mod, plugin) 18 | -------------------------------------------------------------------------------- /stubs/word2number/__init__.pyi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kanedata/ixbrl-parse/284e374012229539f8d90f2b95f9a12115d39777/stubs/word2number/__init__.pyi -------------------------------------------------------------------------------- /stubs/word2number/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kanedata/ixbrl-parse/284e374012229539f8d90f2b95f9a12115d39777/stubs/word2number/py.typed -------------------------------------------------------------------------------- /stubs/word2number/w2n.pyi: -------------------------------------------------------------------------------- 1 | # fmt: off 2 | american_number_system: dict[str, (int | str)] 3 | 4 | decimal_words: list[str] 5 | 6 | 7 | def number_formation(number_words: list[str]) -> int: ... 8 | 9 | 10 | def get_decimal_sum(decimal_digit_words: list[str]) -> float: ... 11 | 12 | 13 | def word_to_num(number_sentence: str) -> (int | float | None): ... 14 | -------------------------------------------------------------------------------- /tests/test_accounts/account_1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 245 | Micro-entity Accounts - GREEN BARKINGSHIRE LIMITED 246 | 247 | 248 | 249 | 401 | 402 | 403 |
404 |

Registered Number 02345678 405 |

406 |

407 | GREEN BARKINGSHIRE LIMITED 408 |

409 |

Micro-entity Accounts

410 |

31 October 2017

411 |

412 | 413 |

414 | 415 |
416 |
417 |

418 | GREEN BARKINGSHIRE LIMITED 419 | Registered Number 02345678 420 |

421 |
422 | 423 |
424 |

Micro-entity Balance Sheet as at 31 October 2017 425 |

426 | 427 |

428 | 429 | 430 | 431 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 458 | 459 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 477 | 478 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 496 | 497 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 516 | 517 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 537 | 538 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 558 | 559 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 578 | 579 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 597 | 598 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 616 | 617 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 636 | 637 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 659 | 660 | 666 | 667 | 668 | 669 |
432 | Notes20172016
440 | 441 | ££
Fixed Assets 453 | 454 |
455 | - 456 |
457 |
460 |
461 | - 462 |
463 |
Current Assets 472 | 473 |
474 | 2,909 475 |
476 |
479 |
480 | 4,585 481 |
482 |
Prepayments and accrued income 491 | 492 |
493 | - 494 |
495 |
498 |
499 | - 500 |
501 |
Creditors: amounts falling due within one year 510 | 511 |
512 | (6,719 513 | ) 514 |
515 |
518 |
519 | (5,879 520 | ) 521 |
522 |
Net current assets (liabilities) 531 | 532 |
533 | (3,810 534 | ) 535 |
536 |
539 |
540 | (1,294 541 | ) 542 |
543 |
Total assets less current liabilities 552 | 553 |
554 | (3,810 555 | ) 556 |
557 |
560 |
561 | (1,294 562 | ) 563 |
564 |
Creditors: amounts falling due after more than one year 573 | 574 |
575 | 0 576 |
577 |
580 |
581 | 0 582 |
583 |
Provisions for liabilities 592 | 593 |
594 | 0 595 |
596 |
599 |
600 | 0 601 |
602 |
Accruals and deferred income 611 | 612 |
613 | 0 614 |
615 |
618 |
619 | 0 620 |
621 |
Total net assets (liabilities) 630 | 631 |
632 | (3,810 633 | ) 634 |
635 |
638 |
639 | (1,294 640 | ) 641 |
642 |
Reserves 653 | 654 |
655 | (3,810 656 | ) 657 |
658 |
661 |
662 | (1,294 663 | ) 664 |
665 |
670 |

671 | 672 | 673 |
674 |
675 |
    676 |
  • 677 | For the year ending 31 October 2017 the company was entitled to exemption under section 477 of the Companies Act 2006 relating to small companies. 678 |
  • 679 |
  • 680 | The members have not required the company to obtain an audit in accordance with section 476 of the Companies Act 2006. 681 |
  • 682 |
  • 683 | The directors acknowledge their responsibilities for complying with the requirements of the Companies Act 2006 with respect to accounting records and the preparation of accounts. 684 |
  • 685 |
  • 686 | The accounts have been prepared in accordance with the micro-entity provisions and delivered in accordance with the provisions applicable to companies subject to the small companies regime. 687 |
  • 688 |
689 |
690 | 691 |
692 |

Approved by the Board on 693 | 25 September 2018 694 |

695 |

And signed on their behalf by:
696 | 697 | Johnny Fakename, Director 698 |
699 |

700 |
701 |
702 | 703 | 704 | 705 |
706 | 707 | 708 | 709 | -------------------------------------------------------------------------------- /tests/test_accounts/account_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | true 7 | 8 | 9 | 01234567 10 | 11 | 12 | 13 | DEMO XML LIMITED 14 | 15 | 16 | 17 | 2020-12-31 18 | 19 | 20 | 1 21 | 22 | 23 | 1 24 | 25 | 26 | 1 27 | 28 | 29 | 1 30 | 31 | 32 | 33 | Ordinary A shares 34 | 35 | 36 | 1 37 | 38 | 39 | 1 40 | 41 | 42 | 43 | 44 | Ordinary A shares 45 | 46 | 47 | 1 48 | 49 | 50 | 1 51 | 52 | 53 | 1 54 | 55 | 56 | 1 57 | 58 | 59 | 1 60 | 61 | 62 | 63 | true 64 | 65 | 66 | true 67 | 68 | 69 | true 70 | 71 | 72 | true 73 | 74 | 75 | 76 | 77 | 2021-08-18 78 | 79 | 80 | 81 | JOAN IMAGINARYNAME 82 | 83 | 84 | 85 | 1000 86 | 87 | 88 | 1000 89 | 90 | 91 | 1000 92 | 93 | 94 | 95 | xbrli:shares 96 | 97 | 98 | 99 | 100 | xbrli:pure 101 | 102 | 103 | 104 | 105 | iso4217:GBP 106 | 107 | 108 | 109 | 110 | 111 | DEMO XML LIMITED 112 | 113 | 114 | 115 | 116 | 2020-01-01 117 | 118 | 119 | 2020-12-31 120 | 121 | 122 | 123 | 124 | 125 | 126 | DEMO XML LIMITED 127 | 128 | 129 | 130 | 131 | 2019-01-01 132 | 133 | 134 | 2019-12-31 135 | 136 | 137 | 138 | 139 | 140 | 141 | DEMO XML LIMITED 142 | 143 | 144 | 145 | 146 | 2020-01-01 147 | 148 | 149 | 150 | 151 | 152 | 153 | DEMO XML LIMITED 154 | 155 | 156 | 157 | 158 | 2020-12-31 159 | 160 | 161 | 162 | 163 | 164 | 165 | DEMO XML LIMITED 166 | 167 | 168 | 169 | 170 | 2019-01-01 171 | 172 | 173 | 174 | 175 | 176 | 177 | DEMO XML LIMITED 178 | 179 | 180 | 181 | 182 | 2019-12-31 183 | 184 | 185 | 186 | 187 | 2021-08-19 188 | 189 | 190 | -------------------------------------------------------------------------------- /tests/test_accounts/account_2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 73 | Micro-entity Accounts - COMMA NAME, TESTAREA AND DISTRICT COMMUNITY ASSOCIATION LIMITED 74 | 75 | 76 | 169 | 170 |
171 |

Registered Number 12345678

172 |

COMMA NAME, TESTAREA AND DISTRICT COMMUNITY ASSOCIATION LIMITED

173 |

Micro-entity Accounts

174 |

31 March 2017

175 |

176 | 177 |

178 | 179 |
180 |
181 |

182 | COMMA NAME, TESTAREA AND DISTRICT COMMUNITY ASSOCIATION LIMITED 183 | Registered Number 12345678 184 |

185 |
186 | 187 |
188 |

Micro-entity Balance Sheet as at 31 March 2017

189 |

190 | 191 | 192 | 193 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 231 | 232 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 250 | 251 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 267 | 268 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 282 | 283 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 303 | 304 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 322 | 323 | 326 | 327 | 328 | 329 |
194 | Notes20172016
202 | 203 | ££
Current Assets 228 | 229 | 4,373 230 | 233 | 4,928 234 |
Creditors: amounts falling due within one year 246 | 247 | (1,154 248 | ) 249 | 252 | (495 253 | ) 254 |
Net current assets (liabilities) 264 | 265 | 3,219 266 | 269 | 4,433 270 |
Total assets less current liabilities 279 | 280 | 3,219 281 | 284 | 4,433 285 |
Total net assets (liabilities) 300 | 301 | 3,219 302 | 305 | 4,433 306 |
Reserves 319 | 320 | 3,219 321 | 324 | 4,433 325 |
330 |

331 | 332 | 333 |
334 |
335 |
    336 |
  • 337 | For the year ending 31 March 2017 the company was entitled to exemption under section 477 of the Companies Act 2006 relating to small companies. 338 |
  • 339 |
  • 340 | The members have not required the company to obtain an audit in accordance with section 476 of the Companies Act 2006. 341 |
  • 342 |
  • 343 | The directors acknowledge their responsibilities for complying with the requirements of the Companies Act 2006 with respect to accounting records and the preparation of accounts. 344 |
  • 345 |
  • 346 | The accounts have been prepared in accordance with the micro-entity provisions and delivered in accordance with the provisions applicable to companies subject to the small companies regime. 347 |
  • 348 |
349 |
350 | 351 |
352 |

Approved by the Board on 353 | 17 August 2017 354 | 355 |

356 |

And signed on their behalf by:
357 | 358 | 359 | F A Kename, Director 360 |
361 |

362 |
363 |
364 | 365 | 366 | 367 |
368 | 369 | 370 | 371 | 372 | 373 | 374 | -------------------------------------------------------------------------------- /tests/test_accounts/account_3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 72 | Micro-entity Accounts - FAKETEST TECHNOLOGIES LIMITED 73 | 74 | 75 | 168 | 169 |
170 |

Registered Number 03456789

171 |

FAKETEST TECHNOLOGIES LIMITED

172 |

Micro-entity Accounts

173 |

30 September 2016

174 |

175 | 176 |

177 | 178 |
179 |
180 |

181 | FAKETEST TECHNOLOGIES LIMITED 182 | Registered Number 03456789 183 |

184 |
185 | 186 |
187 |

Micro-entity Balance Sheet as at 30 September 2016

188 |

189 | 190 | 191 | 192 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 213 | 214 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 237 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 264 | 265 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 280 | 281 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 295 | 296 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 311 | 312 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 326 | 327 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 347 | 348 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 365 | 366 | 369 | 370 | 371 | 372 | 373 |
193 | Notes20162015
201 | 202 | ££
Called up share capital not paid 210 | 211 | 1,000 212 | 215 | 1,000 216 |
Fixed Assets 234 | 235 | - 236 | 238 | - 239 |
Current Assets 261 | 262 | - 263 | 266 | - 267 |
Prepayments and accrued income 277 | 278 | - 279 | 282 | - 283 |
Creditors: amounts falling due within one year 292 | 293 | 0 294 | 297 | 0 298 |
Net current assets (liabilities) 308 | 309 | 0 310 | 313 | 0 314 |
Total assets less current liabilities 323 | 324 | 1,000 325 | 328 | 1,000 329 |
Total net assets (liabilities) 344 | 345 | 1,000 346 | 349 | 1,000 350 |
Capital and reserves 362 | 363 | 1,000 364 | 367 | 1,000 368 |
374 |

375 | 376 | 377 |
378 |
379 |
    380 |
  • 381 | For the year ending 30 September 2016 the company was entitled to exemption under section 477 of the Companies Act 2006 relating to small companies. 382 |
  • 383 |
  • 384 | The members have not required the company to obtain an audit in accordance with section 476 of the Companies Act 2006. 385 |
  • 386 |
  • 387 | The directors acknowledge their responsibilities for complying with the requirements of the Companies Act 2006 with respect to accounting records and the preparation of accounts. 388 |
  • 389 |
  • 390 | The accounts have been prepared in accordance with the micro-entity provisions and delivered in accordance with the provisions applicable to companies subject to the small companies regime. 391 |
  • 392 |
393 |
394 | 395 |
396 |

Approved by the Board on 397 | 22 April 2017 398 | 399 |

400 |

And signed on their behalf by:
401 | 402 | 403 | Fakename McFakerson, Director 404 |
405 |

406 |
407 |
408 | 409 | 410 | 411 |
412 | 413 | 414 | 415 | 416 | 417 | 418 | -------------------------------------------------------------------------------- /tests/test_accounts/account_errors.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | true 7 | 8 | 9 | 01234567 10 | 11 | 12 | 13 | DEMO XML LIMITED 14 | 15 | 16 | 17 | 2020-12-31 18 | 19 | 20 | 1 21 | 22 | 23 | 1 24 | 25 | 26 | 1 27 | 28 | 29 | 1 30 | 31 | 32 | 33 | Ordinary A shares 34 | 35 | 36 | 1 37 | 38 | 39 | 1 40 | 41 | 42 | 43 | 44 | Ordinary A shares 45 | 46 | 47 | 1 48 | 49 | 50 | 1 51 | 52 | 53 | 1 54 | 55 | 56 | 1 57 | 58 | 59 | 1 60 | 61 | 62 | 63 | true 64 | 65 | 66 | true 67 | 68 | 69 | true 70 | 71 | 72 | true 73 | 74 | 75 | 76 | 77 | 2021-08-18 78 | 79 | 80 | 81 | JOAN IMAGINARYNAME 82 | 83 | 84 | 85 | 1000 86 | 87 | 88 | 1000 89 | 90 | 91 | 1000 92 | 93 | 94 | 95 | xbrli:shares 96 | 97 | 98 | 99 | 100 | xbrli:pure 101 | 102 | 103 | 104 | 105 | iso4217:GBP 106 | 107 | 108 | 109 | 110 | 111 | DEMO XML LIMITED 112 | 113 | 114 | 115 | 116 | 2020-01-01 117 | 118 | 119 | 2020-12-31 120 | 121 | 122 | 123 | 124 | 125 | 126 | DEMO XML LIMITED 127 | 128 | 129 | 130 | 131 | 2019-01-01 132 | 133 | 134 | 2019-12-31 135 | 136 | 137 | 138 | 139 | 140 | 141 | DEMO XML LIMITED 142 | 143 | 144 | 145 | 146 | 2020-01-01 147 | 148 | 149 | 150 | 151 | 152 | 153 | DEMO XML LIMITED 154 | 155 | 156 | 157 | 158 | 2020-12-31 159 | 160 | 161 | 162 | 163 | 164 | 165 | DEMO XML LIMITED 166 | 167 | 168 | 169 | 170 | 2019-01-01 171 | 172 | 173 | 174 | 175 | 176 | 177 | DEMO XML LIMITED 178 | 179 | 180 | 181 | 182 | 2019-12-31 183 | 184 | 185 | 186 | 187 | 2021-08-19 188 | 189 | 190 | -------------------------------------------------------------------------------- /tests/test_accounts/account_errors_nonnumeric.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 245 | Micro-entity Accounts - GREEN BARKINGSHIRE LIMITED 246 | 247 | 248 | 249 | 402 | 403 | 404 |
405 |

Registered Number 02345678 406 |

407 |

408 | GREEN BARKINGSHIRE LIMITED 409 |

410 |

Micro-entity Accounts

411 |

31 October 2017

412 |

413 | 414 |

415 | 416 |
417 |
418 |

419 | GREEN BARKINGSHIRE LIMITED 420 | Registered Number 02345678 421 |

422 |
423 | 424 |
425 |

Micro-entity Balance Sheet as at 31 October 2017 426 |

427 | 428 |

429 | 430 | 431 | 432 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 459 | 460 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 478 | 479 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 497 | 498 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 517 | 518 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 538 | 539 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 559 | 560 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 579 | 580 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 598 | 599 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 617 | 618 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 637 | 638 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | 660 | 661 | 667 | 668 | 669 | 670 |
433 | Notes20172016
441 | 442 | ££
Fixed Assets 454 | 455 |
456 | - 457 |
458 |
461 |
462 | - 463 |
464 |
Current Assets 473 | 474 |
475 | 2,909 476 |
477 |
480 |
481 | 4,585 482 |
483 |
Prepayments and accrued income 492 | 493 |
494 | - 495 |
496 |
499 |
500 | - 501 |
502 |
Creditors: amounts falling due within one year 511 | 512 |
513 | (6,719 514 | ) 515 |
516 |
519 |
520 | (5,879 521 | ) 522 |
523 |
Net current assets (liabilities) 532 | 533 |
534 | (3,810 535 | ) 536 |
537 |
540 |
541 | (1,294 542 | ) 543 |
544 |
Total assets less current liabilities 553 | 554 |
555 | (3,810 556 | ) 557 |
558 |
561 |
562 | (1,294 563 | ) 564 |
565 |
Creditors: amounts falling due after more than one year 574 | 575 |
576 | 0 577 |
578 |
581 |
582 | 0 583 |
584 |
Provisions for liabilities 593 | 594 |
595 | 0 596 |
597 |
600 |
601 | 0 602 |
603 |
Accruals and deferred income 612 | 613 |
614 | 0 615 |
616 |
619 |
620 | 0 621 |
622 |
Total net assets (liabilities) 631 | 632 |
633 | (3,810 634 | ) 635 |
636 |
639 |
640 | (1,294 641 | ) 642 |
643 |
Reserves 654 | 655 |
656 | (3,810 657 | ) 658 |
659 |
662 |
663 | (1,294 664 | ) 665 |
666 |
671 |

672 | 673 | 674 |
675 |
676 |
    677 |
  • 678 | For the year ending 31 October 2017 the company was entitled to exemption under section 477 of the Companies Act 2006 relating to small companies. 679 |
  • 680 |
  • 681 | The members have not required the company to obtain an audit in accordance with section 476 of the Companies Act 2006. 682 |
  • 683 |
  • 684 | The directors acknowledge their responsibilities for complying with the requirements of the Companies Act 2006 with respect to accounting records and the preparation of accounts. 685 |
  • 686 |
  • 687 | The accounts have been prepared in accordance with the micro-entity provisions and delivered in accordance with the provisions applicable to companies subject to the small companies regime. 688 |
  • 689 |
690 |
691 | 692 |
693 |

Approved by the Board on 694 | 25 September 2018 695 |

696 |

And signed on their behalf by:
697 | 698 | Johnny Fakename, Director 699 |
700 |

701 |
702 |
703 | 704 | 705 | 706 |
707 | 708 | 709 | 710 | -------------------------------------------------------------------------------- /tests/test_classes.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pytest 4 | 5 | from ixbrlparse.core import ixbrlContext, ixbrlNonNumeric, ixbrlNumeric 6 | 7 | 8 | def test_context(): 9 | instant_context = ixbrlContext( 10 | **{ 11 | "_id": "123456", 12 | "entity": None, 13 | "segments": None, 14 | "instant": "2011-01-01", 15 | "startdate": None, 16 | "enddate": None, 17 | } 18 | ) 19 | assert isinstance(instant_context.instant, datetime.date) 20 | assert "2011-01-01" in str(instant_context) 21 | 22 | interval = ixbrlContext( 23 | **{ 24 | "_id": "123456", 25 | "entity": None, 26 | "segments": None, 27 | "instant": None, 28 | "startdate": "2011-01-01", 29 | "enddate": "2011-12-31", 30 | } 31 | ) 32 | assert isinstance(interval.startdate, datetime.date) 33 | assert isinstance(interval.enddate, datetime.date) 34 | assert interval.startdate.year == 2011 35 | assert interval.enddate.month == 12 36 | 37 | # @TODO: Validation of values - eg startdate before enddate 38 | 39 | 40 | def test_context_json(): 41 | instant_context = ixbrlContext( 42 | **{ 43 | "_id": "123456", 44 | "entity": None, 45 | "segments": None, 46 | "instant": "2011-01-01", 47 | "startdate": None, 48 | "enddate": None, 49 | } 50 | ).to_json() 51 | assert instant_context["instant"] == "2011-01-01" 52 | assert instant_context["startdate"] is None 53 | 54 | interval = ixbrlContext( 55 | **{ 56 | "_id": "123456", 57 | "entity": None, 58 | "segments": None, 59 | "instant": None, 60 | "startdate": "2011-01-01", 61 | "enddate": "2011-12-31", 62 | } 63 | ).to_json() 64 | assert interval["startdate"] == "2011-01-01" 65 | assert interval["instant"] is None 66 | 67 | 68 | def test_context_segments(): 69 | i = ixbrlContext( 70 | **{ 71 | "_id": "123456", 72 | "entity": None, 73 | "segments": [{"tag": "1", "value": "2", "dimension": "3"}], 74 | "instant": None, 75 | "startdate": "2011-01-01", 76 | "enddate": "2011-12-31", 77 | } 78 | ) 79 | assert isinstance(i.segments, list) and len(i.segments) == 1 80 | assert i.segments is not None and i.segments[0]["value"] == "2" 81 | assert "with segment" in str(i) 82 | 83 | 84 | def test_nonnumeric(): 85 | a = {"context": {}, "format_": "", "value": ""} 86 | 87 | x = ixbrlNonNumeric(name="value", **a) 88 | assert x.schema == "unknown" 89 | assert x.name == "value" 90 | 91 | 92 | def test_nonnumeric_json(): 93 | a = { 94 | "context": ixbrlContext( 95 | **{ 96 | "_id": "123456", 97 | "entity": None, 98 | "segments": None, 99 | "instant": "2011-01-01", 100 | "startdate": None, 101 | "enddate": None, 102 | } 103 | ), 104 | "format_": "", 105 | "value": "", 106 | } 107 | 108 | x = ixbrlNonNumeric(name="value", **a).to_json() 109 | assert "context" in x 110 | 111 | 112 | def test_nonnumeric_schema(): 113 | a = {"context": "", "format_": "", "value": "", "soup_tag": None} 114 | 115 | x = ixbrlNonNumeric(name="schema:value", **a) 116 | assert x.schema == "schema" 117 | assert x.name == "value" 118 | 119 | 120 | def test_numeric_value(): 121 | assert ixbrlNumeric(text="1234").value == 1234 122 | assert ixbrlNumeric(value="1234").value == 1234 123 | 124 | 125 | def test_numeric_value_error(): 126 | with pytest.raises(ValueError): 127 | ixbrlNumeric(text="1234blahblab") 128 | with pytest.raises(ValueError): 129 | ixbrlNumeric(value="1234blahblah") 130 | with pytest.raises(ValueError): 131 | ixbrlNumeric() 132 | 133 | 134 | def test_numeric_to_json(): 135 | assert ( 136 | ixbrlNumeric( 137 | **{ 138 | "context": ixbrlContext( 139 | **{ 140 | "_id": "123456", 141 | "entity": None, 142 | "segments": None, 143 | "instant": "2011-01-01", 144 | "startdate": None, 145 | "enddate": None, 146 | } 147 | ), 148 | "text": "1234", 149 | } 150 | ).to_json()["value"] 151 | == 1234 152 | ) 153 | assert ( 154 | ixbrlNumeric( 155 | **{ 156 | "context": ixbrlContext( 157 | **{ 158 | "_id": "123456", 159 | "entity": None, 160 | "segments": None, 161 | "instant": "2011-01-01", 162 | "startdate": None, 163 | "enddate": None, 164 | } 165 | ), 166 | "value": "1234", 167 | } 168 | ).to_json()["value"] 169 | == 1234 170 | ) 171 | 172 | 173 | def test_numeric_already_float(): 174 | assert ixbrlNumeric(value=1234).value == 1234 175 | assert ixbrlNumeric(value=1234.0).value == 1234 176 | 177 | 178 | def test_numeric_comma_replace(): 179 | assert ixbrlNumeric(text="1,234").value == 1234 180 | assert ixbrlNumeric(value="1,234").value == 1234 181 | 182 | 183 | def test_numeric_sign(): 184 | assert ixbrlNumeric(text="1,234", sign="-").value == -1234 185 | assert ixbrlNumeric(value="1,234", sign="-").value == -1234 186 | assert ixbrlNumeric(value="1,234", sign="").value == 1234 187 | 188 | 189 | def test_numeric_blank(): 190 | assert ixbrlNumeric(value="-").value == 0 191 | assert ixbrlNumeric(text="-").value == 0 192 | 193 | 194 | def test_numeric_scale(): 195 | assert ixbrlNumeric(value="1,234", scale="0").value == 1234 196 | assert ixbrlNumeric(value="1,234", scale="1").value == 12340 197 | assert ixbrlNumeric(text="1,234", scale="2").value == 123400 198 | 199 | 200 | def test_numeric_scale_sign(): 201 | assert ixbrlNumeric(value="1,234", scale="3", sign="-").value == -1234000 202 | assert ixbrlNumeric(text="1,234", scale="3", sign="-").value == -1234000 203 | 204 | 205 | def test_numeric_inf_format(): 206 | assert ixbrlNumeric(text="1234", decimals="INF").value == 1234 207 | 208 | 209 | def test_format_zerodash(): 210 | assert ixbrlNumeric(text="-", format="zerodash").value == 0 211 | assert ixbrlNumeric(text="-", format="numdash").value == 0 212 | assert ixbrlNumeric(text="-", format="numdotdecimal").value == 0 213 | 214 | 215 | def test_format_nocontent(): 216 | assert ixbrlNumeric(text="-", format="nocontent").value is None 217 | assert ixbrlNumeric(text="-", format="fixed-empty").value is None 218 | 219 | 220 | def test_format_fixed_true(): 221 | assert ixbrlNumeric(text="-", format="fixed-true").value is True 222 | assert ixbrlNumeric(text="-", format="booleantrue").value is True 223 | 224 | 225 | def test_format_fixed_false(): 226 | assert ixbrlNumeric(text="-", format="fixed-false").value is False 227 | assert ixbrlNumeric(text="-", format="booleanfalse").value is False 228 | 229 | 230 | def test_format_numdotdecimal(): 231 | assert ixbrlNumeric(text="1234.12", format="numdotdecimal").value == 1234.12 232 | assert ixbrlNumeric(text="1234", format="numdotdecimal").value == 1234 233 | assert ixbrlNumeric(text="1234.34", format="numcommadot").value == 1234.34 234 | assert ixbrlNumeric(text="1234.45", format="numspacedot").value == 1234.45 235 | assert ixbrlNumeric(text="1,234.45", format="numspacedot").value == 1234.45 236 | assert ixbrlNumeric(text="1234.12", format="num-dot-decimal").value == 1234.12 237 | 238 | 239 | def test_format_numcomma(): 240 | assert ixbrlNumeric(text="1234,12", format="numcomma").value == 1234.12 241 | assert ixbrlNumeric(text="1234", format="numcomma").value == 1234 242 | assert ixbrlNumeric(text="1234,34", format="numcomma").value == 1234.34 243 | assert ixbrlNumeric(text="1234,45", format="numcomma").value == 1234.45 244 | assert ixbrlNumeric(text="1.234,45", format="numcomma").value == 1234.45 245 | assert ixbrlNumeric(text="1234,12", format="numcomma").value == 1234.12 246 | 247 | 248 | def test_format_numwordsen(): 249 | assert ( 250 | ixbrlNumeric( 251 | text="one thousand two hundred and thirty four", 252 | format="numwordsen", 253 | ).value 254 | == 1234 255 | ) 256 | assert ixbrlNumeric(text="eight", format="numwordsen").value == 8 257 | assert ixbrlNumeric(text=8, format="numwordsen").value == 8 258 | assert ixbrlNumeric(text="Eight", format="numwordsen").value == 8 259 | assert ( 260 | ixbrlNumeric( 261 | text="one thousand two hundred and thirty four point four five", 262 | format="numwordsen", 263 | ).value 264 | == 1234.45 265 | ) 266 | assert ixbrlNumeric(text="no", format="numwordsen").value == 0 267 | assert ixbrlNumeric(text="None", format="numwordsen").value == 0 268 | assert ixbrlNumeric(text="none", format="numwordsen").value == 0 269 | 270 | 271 | def test_format_dates(): 272 | assert ixbrlNonNumeric(value="20 September 2020", format_="datelonguk").value == datetime.date(2020, 9, 20) 273 | assert ixbrlNonNumeric(value="20 September 2020", format_="datedaymonthyearen").value == datetime.date(2020, 9, 20) 274 | assert ixbrlNonNumeric(value="20th September 2020", format_="datedaymonthyearen").value == datetime.date( 275 | 2020, 9, 20 276 | ) 277 | assert ixbrlNonNumeric(value="20.9.20", format_="datedaymonthyear").value == datetime.date(2020, 9, 20) 278 | 279 | 280 | def test_format_notimplemented(): 281 | with pytest.warns(): 282 | assert ixbrlNonNumeric(value="blahdeblah", format_="blahdeblah").value == "blahdeblah" 283 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | import io 2 | import json 3 | 4 | from click.testing import CliRunner 5 | 6 | import ixbrlparse.__main__ as ixmain 7 | from ixbrlparse.cli import ixbrlparse_cli 8 | 9 | _ = ixmain 10 | 11 | 12 | def test_cli(): 13 | buffer = io.StringIO() 14 | runner = CliRunner() 15 | result = runner.invoke(ixbrlparse_cli, ["--outfile", buffer, "tests/test_accounts/account_1.html"]) # type: ignore 16 | assert result.exit_code == 0 17 | assert ",CurrentAssets,2909.0," in buffer.getvalue() 18 | 19 | 20 | def test_cli_json(): 21 | buffer = io.StringIO() 22 | runner = CliRunner() 23 | result = runner.invoke( 24 | ixbrlparse_cli, # type: ignore 25 | ["--outfile", buffer, "--format", "json", "tests/test_accounts/account_1.html"], # type: ignore 26 | ) 27 | assert result.exit_code == 0 28 | data = json.loads(buffer.getvalue()) 29 | assert data["numeric"][2]["name"] == "CurrentAssets" 30 | assert data["numeric"][2]["value"] == 2909.0 31 | 32 | 33 | def test_cli_unknown_format(): 34 | buffer = io.StringIO() 35 | runner = CliRunner() 36 | result = runner.invoke( 37 | ixbrlparse_cli, # type: ignore 38 | ["--outfile", buffer, "--format", "flurg", "tests/test_accounts/account_1.html"], # type: ignore 39 | ) 40 | assert result.exit_code != 0 41 | data = buffer.getvalue() 42 | assert not data 43 | 44 | 45 | def test_cli_jsonl(): 46 | buffer = io.StringIO() 47 | runner = CliRunner() 48 | result = runner.invoke( 49 | ixbrlparse_cli, # type: ignore 50 | ["--outfile", buffer, "--format", "jsonl", "tests/test_accounts/account_1.html"], # type: ignore 51 | ) 52 | assert result.exit_code == 0 53 | lines = buffer.getvalue().splitlines() 54 | for line in lines: 55 | data = json.loads(line) 56 | if data["name"] == "CurrentAssets": 57 | assert data["value"] == 2909.0 58 | break 59 | else: 60 | msg = "CurrentAssets not found" 61 | raise AssertionError(msg) 62 | -------------------------------------------------------------------------------- /tests/test_formats.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | 3 | import pytest 4 | 5 | from ixbrlparse.components.formats import ( 6 | ixbrlFormat, 7 | ixtDateDayMonthYear, 8 | ixtDateFormat, 9 | ixtDateLongUK, 10 | ixtDateLongUS, 11 | ixtDateShortUK, 12 | ixtDateShortUS, 13 | ixtDateSlashUS, 14 | ixtFixedFalse, 15 | ixtFixedTrue, 16 | ixtNoContent, 17 | ixtNumComma, 18 | ixtNumDotDecimal, 19 | ixtNumWordsEn, 20 | ixtZeroDash, 21 | ) 22 | 23 | 24 | @pytest.mark.parametrize( 25 | "dateclass, datestring, expecteddate, errordate", 26 | ( 27 | (ixtDateFormat, "2019-01-05", date(2019, 1, 5), "0400502019"), 28 | (ixtDateLongUK, "05 January 2019", date(2019, 1, 5), "0400502019"), 29 | (ixtDateLongUK, "05 January 19", date(2019, 1, 5), "0400502019"), 30 | (ixtDateLongUK, "10 July 1023", date(1023, 7, 10), "2019005004"), 31 | (ixtDateLongUS, "January 05, 2019", date(2019, 1, 5), "0400502019"), 32 | (ixtDateLongUS, "January 05, 19", date(2019, 1, 5), "0400502019"), 33 | (ixtDateShortUK, "05 Jan 2019", date(2019, 1, 5), "0400502019"), 34 | (ixtDateShortUK, "05 Jan 19", date(2019, 1, 5), "0400502019"), 35 | (ixtDateShortUS, "Jan 05, 2019", date(2019, 1, 5), "0400502019"), 36 | (ixtDateShortUS, "Jan 05, 19", date(2019, 1, 5), "0400502019"), 37 | (ixtDateDayMonthYear, "05/01/2019", date(2019, 1, 5), "0400502019"), 38 | (ixtDateDayMonthYear, "05.01.2019", date(2019, 1, 5), "0400502019"), 39 | (ixtDateDayMonthYear, "05.01.19", date(2019, 1, 5), "0400502019"), 40 | (ixtDateDayMonthYear, "05/01/2019", date(2019, 1, 5), "2019005004"), 41 | (ixtDateDayMonthYear, "05/01/19", date(2019, 1, 5), "2019005004"), 42 | (ixtDateDayMonthYear, "05.01.2019", date(2019, 1, 5), "0400502019"), 43 | (ixtDateDayMonthYear, "05.01.19", date(2019, 1, 5), "0400502019"), 44 | (ixtDateSlashUS, "01/05/2019", date(2019, 1, 5), "2019005004"), 45 | (ixtDateSlashUS, "01/05/19", date(2019, 1, 5), "2019005004"), 46 | (ixtDateSlashUS, "01.05.2019", date(2019, 1, 5), "0400502019"), 47 | (ixtDateSlashUS, "01.05.19", date(2019, 1, 5), "0400502019"), 48 | ), 49 | ) 50 | def test_date_formats(dateclass, datestring, expecteddate, errordate): 51 | f = dateclass("dateformat") 52 | assert f.parse_value(datestring) == expecteddate 53 | 54 | with pytest.warns(): 55 | assert f.parse_value(1234) is None 56 | 57 | with pytest.raises(ValueError): 58 | assert f.parse_value(errordate) is None 59 | 60 | 61 | def test_ixtnumwordsen(): 62 | f = ixtNumWordsEn("format") 63 | assert f.parse_value("no") == 0 64 | assert f.parse_value("none") == 0 65 | assert f.parse_value("none ") == 0 66 | assert f.parse_value("eighty-five") == 85.0 67 | assert f.parse_value("seven hundred and eighty-five") == 785.0 68 | 69 | with pytest.raises(ValueError): 70 | assert f.parse_value("blurdy-burg") is None 71 | 72 | 73 | def test_ixtnumcomma(): 74 | f = ixtNumComma("format") 75 | assert f.parse_value("0") == 0 76 | assert f.parse_value("85") == 85.0 77 | assert f.parse_value("85.123") == 85123.0 78 | 79 | with pytest.raises(ValueError): 80 | assert f.parse_value("blurdy-burg") is None 81 | 82 | 83 | @pytest.mark.parametrize( 84 | "formatclass, valuestring, expectedvalue, errorvalue", 85 | ( 86 | (ixbrlFormat, None, None, None), 87 | (ixtFixedFalse, "hfkjdah", False, None), 88 | (ixtFixedTrue, "hfkjdah", True, None), 89 | (ixtNoContent, "hfkjdah", None, None), 90 | (ixtNumDotDecimal, "235,100,356.79", 235_100_356.79, None), 91 | (ixtNumDotDecimal, "100,356.79", 100_356.79, None), 92 | (ixtNumDotDecimal, "100", 100, None), 93 | (ixtNumDotDecimal, 100, 100, None), 94 | (ixtNumComma, "235.100.345,79", 235_100_345.79, None), 95 | (ixtNumComma, "100.345,79", 100_345.79, None), 96 | (ixtNumComma, "100", 100, None), 97 | (ixtNumComma, 100, 100, None), 98 | (ixtZeroDash, "cjsjdsf", 0, None), 99 | (ixtZeroDash, "-", 0, None), 100 | ), 101 | ) 102 | def test_formats(formatclass, valuestring, expectedvalue, errorvalue): 103 | f = formatclass("format") 104 | assert f.parse_value(valuestring) == expectedvalue 105 | 106 | if errorvalue is not None: 107 | with pytest.raises(ValueError): 108 | assert f.parse_value(errorvalue) is None 109 | -------------------------------------------------------------------------------- /tests/test_parse.py: -------------------------------------------------------------------------------- 1 | import io 2 | import json 3 | from datetime import date 4 | 5 | import pytest 6 | from bs4 import BeautifulSoup, Tag 7 | 8 | from ixbrlparse import IXBRL 9 | from ixbrlparse.core import ( 10 | BaseParser, 11 | IXBRLParseError, 12 | ixbrlContext, 13 | ixbrlNonNumeric, 14 | ixbrlNumeric, 15 | ) 16 | 17 | TEST_ACCOUNTS = [ 18 | "tests/test_accounts/account_1.html", 19 | "tests/test_accounts/account_2.html", 20 | "tests/test_accounts/account_3.html", 21 | "tests/test_accounts/account_4.html", 22 | "tests/test_accounts/account_5.html", 23 | "tests/test_accounts/account_6.xhtml", 24 | "tests/test_accounts/account_errors.html", 25 | "tests/test_accounts/account_errors_nonnumeric.html", 26 | "tests/test_accounts/account_errors_date.html", 27 | ] 28 | TEST_XML_ACCOUNTS = [ 29 | "tests/test_accounts/account_1.xml", 30 | "tests/test_accounts/account_errors.xml", 31 | ] 32 | 33 | EXPECTED_TABLE_KEYS = [ 34 | "schema", 35 | "name", 36 | "value", 37 | "unit", 38 | "instant", 39 | "startdate", 40 | "enddate", 41 | ] 42 | 43 | 44 | def test_base_parser(): 45 | p = BaseParser() 46 | 47 | assert p._get_schema() is None 48 | assert p._get_contexts() is None 49 | assert p._get_units() is None 50 | assert p._get_nonnumeric() is None 51 | assert p._get_numeric() is None 52 | 53 | s = BeautifulSoup("World", "html.parser") 54 | 55 | assert p._get_tag_attribute(s, "html", "attribute") == "hello" 56 | assert p._get_tag_attribute(s, "html", "missingattribute") is None 57 | assert p._get_tag_text(s, "html") == "World" 58 | 59 | 60 | def test_open(): 61 | with open(TEST_ACCOUNTS[0]) as a: 62 | x = IXBRL(a) 63 | assert x.filetype == "ixbrl" 64 | assert isinstance(x.soup, BeautifulSoup) 65 | 66 | x = IXBRL.open(TEST_ACCOUNTS[1]) 67 | assert x.filetype == "ixbrl" 68 | assert isinstance(x.soup, BeautifulSoup) 69 | 70 | 71 | def test_open_str(): 72 | with open(TEST_ACCOUNTS[0]) as a: 73 | content = a.read() 74 | x = IXBRL(io.StringIO(content)) 75 | assert x.filetype == "ixbrl" 76 | assert isinstance(x.soup, BeautifulSoup) 77 | 78 | 79 | def test_open_malformed_str(): 80 | content = "blahblah" 81 | with pytest.raises(IXBRLParseError): 82 | IXBRL(io.StringIO(content)) 83 | 84 | 85 | def test_open_xml(): 86 | with open(TEST_XML_ACCOUNTS[0]) as a: 87 | x = IXBRL(a) 88 | assert x.filetype == "xbrl" 89 | assert isinstance(x.soup, BeautifulSoup) 90 | 91 | x = IXBRL.open(TEST_XML_ACCOUNTS[0]) 92 | assert x.filetype == "xbrl" 93 | assert isinstance(x.soup, BeautifulSoup) 94 | 95 | 96 | def test_open_xml_str(): 97 | with open(TEST_XML_ACCOUNTS[0]) as a: 98 | content = a.read() 99 | x = IXBRL(io.StringIO(content)) 100 | assert x.filetype == "xbrl" 101 | assert isinstance(x.soup, BeautifulSoup) 102 | 103 | 104 | @pytest.mark.parametrize( 105 | "account,schema,namespaces", 106 | zip( 107 | TEST_ACCOUNTS[0:5] + TEST_XML_ACCOUNTS[0:1], 108 | [ 109 | "https://xbrl.frc.org.uk/FRS-102/2014-09-01/FRS-102-2014-09-01.xsd", 110 | "http://www.xbrl.org/uk/gaap/core/2009-09-01/uk-gaap-full-2009-09-01.xsd", 111 | "http://www.xbrl.org/uk/gaap/core/2009-09-01/uk-gaap-full-2009-09-01.xsd", 112 | "https://xbrl.frc.org.uk/FRS-102/2014-09-01/FRS-102-2014-09-01.xsd", 113 | "https://xbrl.frc.org.uk/FRS-102/2014-09-01/FRS-102-2014-09-01.xsd", 114 | "http://www.companieshouse.gov.uk/ef/xbrl/uk/fr/gaap/ae/2009-06-21/uk-gaap-ae-2009-06-21.xsd", 115 | ], # type: ignore 116 | [ 117 | 11, 118 | 12, 119 | 12, 120 | 38, 121 | 19, 122 | 10, 123 | ], # type: ignore 124 | ), 125 | ) 126 | def test_schema(account, schema, namespaces): 127 | x = IXBRL.open(account) 128 | assert x.schema == schema 129 | assert len(x.namespaces) == namespaces 130 | 131 | 132 | @pytest.mark.parametrize( 133 | "account,contexts,expected_key", 134 | [(TEST_ACCOUNTS[0], 12, "dcur6"), (TEST_XML_ACCOUNTS[0], 6, "y2")], 135 | ) 136 | def test_contexts(account, contexts, expected_key): 137 | x = IXBRL.open(account) 138 | 139 | # test all the contexts have been found 140 | assert len(x.contexts) == contexts 141 | 142 | # test that the context is correct class 143 | assert isinstance(next(iter(x.contexts.values())), ixbrlContext) 144 | 145 | # test an expected key is in the contexts 146 | assert expected_key in x.contexts 147 | 148 | 149 | def test_contexts_values(): 150 | x = IXBRL.open(TEST_ACCOUNTS[0]) 151 | 152 | # test values have been correctly parsed 153 | assert x.contexts["icur1"].instant == date(2017, 10, 31) 154 | assert x.contexts["dcur1"].startdate == date(2016, 11, 1) 155 | assert x.contexts["dcur1"].enddate == date(2017, 10, 31) 156 | assert x.contexts["dcur1"].entity["identifier"] == "02345678" 157 | assert x.contexts["dcur1"].entity["scheme"] == "http://www.companieshouse.gov.uk/" 158 | 159 | 160 | def test_contexts_segments(): 161 | x = IXBRL.open(TEST_ACCOUNTS[0]) 162 | 163 | assert len(x.contexts["dcur6"].segments) == 1 164 | assert x.contexts["dcur6"].segments[0]["tag"] in "xbrldi:explicitMember" 165 | assert x.contexts["dcur6"].segments[0]["value"] == "uk-bus:FullAccounts" 166 | assert x.contexts["dcur6"].segments[0].get("dimension") == "uk-bus:AccountsTypeDimension" 167 | 168 | 169 | def test_contexts_values_xml(): 170 | x = IXBRL.open(TEST_XML_ACCOUNTS[0]) 171 | 172 | # test values have been correctly parsed 173 | assert x.contexts["s1"].instant == date(2020, 1, 1) 174 | assert x.contexts["y1"].startdate == date(2020, 1, 1) 175 | assert x.contexts["y1"].enddate == date(2020, 12, 31) 176 | assert x.contexts["y1"].entity["identifier"].strip() == "DEMO XML LIMITED" 177 | assert x.contexts["y1"].entity["scheme"] == "gee-lawson-/results" 178 | 179 | 180 | # def test_contexts_segments_xml(): 181 | # x = IXBRL.open(TEST_XML_ACCOUNTS[0]) 182 | 183 | # assert len(x.contexts["dcur6"].segments) == 1 184 | # assert x.contexts["dcur6"].segments[0]["tag"] in "xbrldi:explicitMember" 185 | # assert x.contexts["dcur6"].segments[0]["value"] == "uk-bus:FullAccounts" 186 | # assert ( 187 | # x.contexts["dcur6"].segments[0].get("dimension") 188 | # == "uk-bus:AccountsTypeDimension" 189 | # ) 190 | 191 | 192 | def test_contexts_no_prefix(): 193 | # check an account with elements (without prefix) 194 | x = IXBRL.open(TEST_ACCOUNTS[1]) 195 | 196 | # test values have been correctly parsed 197 | assert x.contexts["current-period-director2"].startdate == date(2016, 4, 1) 198 | assert x.contexts["current-period-director2"].enddate == date(2017, 3, 31) 199 | assert x.contexts["current-period-director2"].entity["identifier"] == "12345678" 200 | assert x.contexts["current-period-director2"].entity["scheme"] == "http://www.companieshouse.gov.uk/" 201 | 202 | 203 | @pytest.mark.parametrize("account", [a for a in TEST_ACCOUNTS if "error" not in a]) 204 | def test_json(account): 205 | x = IXBRL.open(account) 206 | x.to_json() 207 | 208 | assert json.dumps(x.to_json()) 209 | 210 | 211 | def test_units(): 212 | x = IXBRL.open(TEST_ACCOUNTS[0]) 213 | 214 | assert len(x.units) == 1 215 | assert x.units["GBP"] == "iso4217:GBP" 216 | 217 | 218 | def test_units_no_prefix(): 219 | x = IXBRL.open(TEST_ACCOUNTS[1]) 220 | 221 | assert len(x.units) == 2 222 | assert x.units["currencyUnit"] == "iso4217:GBP" 223 | assert x.units["shares"] == "shares" 224 | 225 | 226 | def test_nonnumeric(): 227 | x = IXBRL.open(TEST_ACCOUNTS[2]) 228 | 229 | assert len(x.nonnumeric) == 15 230 | assert isinstance(x.nonnumeric[0], ixbrlNonNumeric) 231 | assert "FAKETEST TECHNOLOGIES LIMITED" in [n.value for n in x.nonnumeric] 232 | value_seen = False 233 | for n in x.nonnumeric: 234 | if n.schema == "uk-gaap-cd-bus" and n.name == "UKCompaniesHouseRegisteredNumber": 235 | assert n.value == "03456789" 236 | assert isinstance(n.context, ixbrlContext) 237 | value_seen = True 238 | assert isinstance(x.nonnumeric[0].soup_tag, Tag) 239 | assert value_seen 240 | 241 | 242 | def test_nonnumeric_xml(): 243 | x = IXBRL.open(TEST_XML_ACCOUNTS[0]) 244 | 245 | assert len(x.nonnumeric) == 14 246 | assert isinstance(x.nonnumeric[0], ixbrlNonNumeric) 247 | assert "DEMO XML LIMITED" in [n.value for n in x.nonnumeric] 248 | value_seen = False 249 | for n in x.nonnumeric: 250 | if n.name == "NameApprovingDirector": 251 | assert n.value == "JOAN IMAGINARYNAME" 252 | assert isinstance(n.context, ixbrlContext) 253 | value_seen = True 254 | assert value_seen 255 | 256 | 257 | def test_numeric(): 258 | x = IXBRL.open(TEST_ACCOUNTS[3]) 259 | 260 | assert len(x.numeric) == 32 261 | value_seen = False 262 | for n in x.numeric: 263 | assert isinstance(n, ixbrlNumeric) 264 | 265 | if ( 266 | n.name == "NetCurrentAssetsLiabilities" 267 | and isinstance(n.context, ixbrlContext) 268 | and n.context.id == "cfwd_31_03_2017" 269 | ): 270 | assert n.format is not None and n.format.sign == "-" 271 | assert n.value == -17957 272 | value_seen = True 273 | 274 | if n.format is not None and n.format.sign == "-": 275 | assert n.value is not None and n.value < 0 276 | 277 | assert value_seen 278 | 279 | assert x.numeric[0].unit == "iso4217:GBP" 280 | assert x.numeric[0].value == 52982 281 | assert x.numeric[0].name == "PropertyPlantEquipment" 282 | assert x.numeric[0].schema == "ns5" 283 | assert isinstance(x.nonnumeric[0].soup_tag, Tag) 284 | 285 | 286 | def test_numeric_xml(): 287 | x = IXBRL.open(TEST_XML_ACCOUNTS[0]) 288 | 289 | assert len(x.numeric) == 14 290 | value_seen = False 291 | for n in x.numeric: 292 | assert isinstance(n, ixbrlNumeric) 293 | 294 | if n.name == "NumberOrdinarySharesAllotted" and isinstance(n.context, ixbrlContext) and n.context.id == "e2": 295 | assert n.format is not None and n.format.sign == "" 296 | assert n.value == 1 297 | value_seen = True 298 | 299 | if n.format is not None and n.format.sign == "-": 300 | assert n.value is not None and n.value < 0 301 | 302 | assert value_seen 303 | 304 | assert x.numeric[0].unit == "iso4217:GBP" 305 | assert x.numeric[0].value == 1 306 | assert x.numeric[0].name == "CashBankInHand" 307 | assert x.numeric[0].schema == "unknown" 308 | assert isinstance(x.nonnumeric[0].soup_tag, Tag) 309 | 310 | 311 | def test_exclude(): 312 | x = IXBRL.open(TEST_ACCOUNTS[5]) 313 | value_seen = False 314 | for n in x.nonnumeric: 315 | if n.name == "BalanceSheetDate": 316 | assert n.value == date(2022, 7, 31) 317 | value_seen = True 318 | 319 | assert value_seen 320 | 321 | 322 | def test_continuation(): 323 | x = IXBRL.open(TEST_ACCOUNTS[5]) 324 | value_seen = False 325 | for n in x.nonnumeric: 326 | if n.name == "AccountantsReportOnFinancialStatements": 327 | assert n.value == ( 328 | "This report is made solely to the board of directors of Test Exclude " 329 | "Limited, as a body, in accordance with the terms of our engagement " 330 | "letter dated 18 November 2022. Our work has been undertaken solely " 331 | "to prepare for your approval the financial statements of Test Exclude " 332 | "Limited and state those matters that we have agreed to state to the " 333 | "board of directors of Test Exclude Limited, as a body, in this report " 334 | "in accordance with ICAEW Technical Release 07/16 AAF. To the fullest " 335 | "extent permitted by law, we do not accept or assume responsibility " 336 | "to anyone other than Test Exclude Limited and its board of directors " 337 | "as a body, for our work or for this report." 338 | ) 339 | value_seen = True 340 | 341 | assert value_seen 342 | 343 | 344 | def test_table_output(): 345 | x = IXBRL.open(TEST_ACCOUNTS[1]) 346 | table = x.to_table("all") 347 | 348 | assert len(table) == 27 349 | 350 | for row in table[0:5]: 351 | for col, value in row.items(): 352 | # output is two dimensional 353 | assert not isinstance(value, (list, dict, tuple)) 354 | 355 | # column is expected 356 | assert col in EXPECTED_TABLE_KEYS or col.startswith("segment") 357 | 358 | # needs either an instant or start & end dates 359 | assert row["instant"] or (row["startdate"] and row["enddate"]) 360 | 361 | 362 | def test_table_output_numeric(): 363 | x = IXBRL.open(TEST_ACCOUNTS[2]) 364 | table = x.to_table("numeric") 365 | 366 | assert len(table) == 18 367 | 368 | for row in table[0:5]: 369 | for col, value in row.items(): 370 | # output is two dimensional 371 | assert not isinstance(value, (list, dict, tuple)) 372 | 373 | # column is expected 374 | assert col in EXPECTED_TABLE_KEYS or col.startswith("segment") 375 | 376 | # value is numeric 377 | assert isinstance(row["value"], (int, float)) 378 | 379 | 380 | def test_table_output_nonnumeric(): 381 | x = IXBRL.open(TEST_ACCOUNTS[3]) 382 | table = x.to_table("nonnumeric") 383 | 384 | assert len(table) == 59 385 | 386 | for row in table[0:5]: 387 | for col, value in row.items(): 388 | # output is two dimensional 389 | assert not isinstance(value, (list, dict, tuple)) 390 | 391 | # column is expected 392 | assert col in EXPECTED_TABLE_KEYS or col.startswith("segment") 393 | 394 | # value is a string 395 | assert isinstance(row["value"], (str, type(None))) 396 | 397 | 398 | def test_errors_raised(): 399 | with open(TEST_ACCOUNTS[6]) as a: 400 | with pytest.raises(NotImplementedError): 401 | IXBRL(a) 402 | 403 | with open(TEST_ACCOUNTS[6]) as a: 404 | x = IXBRL(a, raise_on_error=False) 405 | assert isinstance(x.soup, BeautifulSoup) 406 | assert len(x.errors) == 1 407 | 408 | 409 | def test_errors_raised_nonnumeric(): 410 | with open(TEST_ACCOUNTS[7]) as a: 411 | with pytest.raises(KeyError): 412 | IXBRL(a) 413 | 414 | with open(TEST_ACCOUNTS[7]) as a: 415 | x = IXBRL(a, raise_on_error=False) 416 | assert isinstance(x.soup, BeautifulSoup) 417 | assert len(x.errors) == 2 418 | 419 | 420 | def test_errors_raised_date(): 421 | with open(TEST_ACCOUNTS[8]) as a: 422 | with pytest.raises((OSError, ValueError)): 423 | IXBRL(a) 424 | 425 | with open(TEST_ACCOUNTS[8]) as a: 426 | x = IXBRL(a, raise_on_error=False) 427 | assert isinstance(x.soup, BeautifulSoup) 428 | assert len(x.errors) == 2 429 | 430 | 431 | def test_errors_raised_open(): 432 | with pytest.raises(NotImplementedError): 433 | IXBRL.open(TEST_ACCOUNTS[6]) 434 | 435 | x = IXBRL.open(TEST_ACCOUNTS[6], raise_on_error=False) 436 | assert isinstance(x.soup, BeautifulSoup) 437 | assert len(x.errors) == 1 438 | 439 | 440 | def test_errors_raised_open_xml(): 441 | with pytest.raises(NotImplementedError): 442 | IXBRL.open(TEST_XML_ACCOUNTS[1]) 443 | 444 | x = IXBRL.open(TEST_XML_ACCOUNTS[1], raise_on_error=False) 445 | assert isinstance(x.soup, BeautifulSoup) 446 | assert len(x.errors) == 1 447 | 448 | 449 | def test_errors_raised_open_nonnumeric(): 450 | with pytest.raises(KeyError): 451 | IXBRL.open(TEST_ACCOUNTS[7]) 452 | 453 | x = IXBRL.open(TEST_ACCOUNTS[7], raise_on_error=False) 454 | assert isinstance(x.soup, BeautifulSoup) 455 | assert len(x.errors) == 2 456 | -------------------------------------------------------------------------------- /tests/test_plugins.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from typing import Union 3 | 4 | import pytest 5 | 6 | from ixbrlparse import hookimpl 7 | from ixbrlparse.components._base import ixbrlFormat 8 | from ixbrlparse.components.formats import ixtDateDayMonthYear, ixtZeroDash 9 | from ixbrlparse.components.transform import get_format 10 | from ixbrlparse.plugins import pm 11 | 12 | 13 | def test_using_test_plugin(): 14 | class FlurgFormat(ixbrlFormat): 15 | format_names = ("flurg",) 16 | 17 | def parse_value(self, value: Union[str, int, float]) -> str: # noqa: ARG002 18 | return "flurg" 19 | 20 | class TestPlugin: 21 | @hookimpl 22 | def ixbrl_add_formats(self) -> list[type[ixbrlFormat]]: 23 | return [FlurgFormat] 24 | 25 | pm.register(TestPlugin(), name="flurg") 26 | try: 27 | # check new format is available 28 | assert get_format("flurg") == FlurgFormat 29 | 30 | # check existing formats are still available 31 | assert get_format("zerodash") == ixtZeroDash 32 | finally: 33 | pm.unregister(name="flurg") 34 | 35 | 36 | def test_using_test_plugin_alt_syntax(): 37 | class FlurgFormat(ixbrlFormat): 38 | format_names = ("flurg",) 39 | 40 | def parse_value(self, value: Union[str, int, float]) -> str: # noqa: ARG002 41 | return "flurg" 42 | 43 | class TestPlugin: 44 | @hookimpl(specname="ixbrl_add_formats") 45 | def add_flurg_format(self) -> list[type[ixbrlFormat]]: 46 | return [FlurgFormat] 47 | 48 | pm.register(TestPlugin(), name="flurg") 49 | try: 50 | # check new format is available 51 | assert get_format("flurg") == FlurgFormat 52 | 53 | # check existing formats are still available 54 | assert get_format("zerodash") == ixtZeroDash 55 | finally: 56 | pm.unregister(name="flurg") 57 | 58 | 59 | def test_registering_duplicate_plugin(): 60 | class FlurgFormat(ixbrlFormat): 61 | format_names = ("zerodash",) 62 | 63 | def parse_value(self, value: Union[str, int, float]) -> str: # noqa: ARG002 64 | return "flurg" 65 | 66 | class TestPlugin: 67 | @hookimpl() 68 | def ixbrl_add_formats(self) -> list[type[ixbrlFormat]]: 69 | return [FlurgFormat] 70 | 71 | pm.register(TestPlugin(), name="flurg") 72 | try: 73 | assert get_format("zerodash") == FlurgFormat 74 | with pytest.raises(NotImplementedError): 75 | get_format("flurg") 76 | finally: 77 | pm.unregister(name="flurg") 78 | 79 | 80 | def test_registering_duplicate_plugin_last(): 81 | class FlurgFormat(ixbrlFormat): 82 | format_names = ("zerodash",) 83 | 84 | def parse_value(self, value: Union[str, int, float]) -> str: # noqa: ARG002 85 | return "flurg" 86 | 87 | class TestPlugin: 88 | @hookimpl(trylast=True) 89 | def ixbrl_add_formats(self) -> list[type[ixbrlFormat]]: 90 | return [FlurgFormat] 91 | 92 | pm.register(TestPlugin(), name="flurg") 93 | try: 94 | assert get_format("zerodash") == FlurgFormat 95 | with pytest.raises(NotImplementedError): 96 | get_format("flurg") 97 | finally: 98 | pm.unregister(name="flurg") 99 | 100 | 101 | def test_registering_duplicate_plugin_first(): 102 | class FlurgFormat(ixbrlFormat): 103 | format_names = ("zerodash",) 104 | 105 | def parse_value(self, value: Union[str, int, float]) -> str: # noqa: ARG002 106 | return "flurg" 107 | 108 | class TestPlugin: 109 | @hookimpl(tryfirst=True) 110 | def ixbrl_add_formats(self) -> list[type[ixbrlFormat]]: 111 | return [FlurgFormat] 112 | 113 | pm.register(TestPlugin(), name="flurg") 114 | try: 115 | assert get_format("zerodash") == ixtZeroDash 116 | with pytest.raises(NotImplementedError): 117 | get_format("flurg") 118 | finally: 119 | pm.unregister(name="flurg") 120 | 121 | 122 | @pytest.mark.parametrize( 123 | "datestring, expecteddate", 124 | ( 125 | ("05/01/2019", date(2019, 1, 5)), 126 | ("05.01.2019", date(2019, 1, 5)), 127 | ("05.01.19", date(2019, 1, 5)), 128 | ("05/01/2019", date(2019, 1, 5)), 129 | ("05/01/19", date(2019, 1, 5)), 130 | ("05.01.2019", date(2019, 1, 5)), 131 | ("05.01.19", date(2019, 1, 5)), 132 | ("29 aug 2022", date(2022, 8, 29)), 133 | ), 134 | ) 135 | def test_plugin_override_date(datestring, expecteddate): 136 | class FlurgFormat(ixtDateDayMonthYear): 137 | date_format = (*ixtDateDayMonthYear.date_format, "%d-%b-%Y", "%d-%b-%y") 138 | 139 | class TestPlugin: 140 | @hookimpl 141 | def ixbrl_add_formats(self) -> list[type[ixbrlFormat]]: 142 | return [FlurgFormat] 143 | 144 | pm.register(TestPlugin(), name="flurg") 145 | format_class = get_format("datedaymonthyear") 146 | try: 147 | assert format_class == FlurgFormat 148 | assert format_class("datedaymonthyear").parse_value(datestring) == expecteddate 149 | finally: 150 | pm.unregister(name="flurg") 151 | --------------------------------------------------------------------------------