├── .cookiecutter.json ├── .editorconfig ├── .gitattributes ├── .github ├── dependabot.yml ├── labels.yml ├── release-drafter.yml └── workflows │ ├── constraints.txt │ ├── labeler.yml │ ├── release.yml │ └── tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── .safety-policy.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE.md ├── README.md ├── codecov.yml ├── docs ├── _static │ ├── favicon.ico │ └── logo.png ├── codeofconduct.md ├── conf.py ├── contributing.md ├── faq.md ├── how-it-works.md ├── index.md ├── installation.md ├── license.md ├── recommended-template-fields.md ├── reference.md ├── requirements.txt ├── tutorial.md └── usage.md ├── noxfile.py ├── pyproject.toml ├── requirements.txt ├── src └── invoice2data │ ├── __init__.py │ ├── __main__.py │ ├── extract │ ├── __init__.py │ ├── invoice_template.py │ ├── loader.py │ ├── parsers │ │ ├── __init__.py │ │ ├── __interface__.py │ │ ├── lines.py │ │ ├── regex.py │ │ └── static.py │ ├── plugins │ │ ├── __init__.py │ │ ├── __interface__.py │ │ ├── lines.py │ │ └── tables.py │ ├── templates │ │ ├── au │ │ │ ├── au.com.opal.yml │ │ │ └── au.com.telstra.yml │ │ ├── be │ │ │ ├── be.accor.invest.ibis.yml │ │ │ ├── be.accor.invest.novotel.yml │ │ │ ├── be.boucherie.pochet.yml │ │ │ ├── be.cebeo.yml │ │ │ ├── be.eg_retail.yml │ │ │ ├── be.lampiris.facture-dacompte.yml │ │ │ ├── be.lampiris.factuur.yml │ │ │ ├── be.lampiris.regularisation.yml │ │ │ ├── be.melchior-vins.yml │ │ │ ├── be.proximus.yml │ │ │ ├── be.scarlet.yml │ │ │ └── be.securex.social.yml │ │ ├── ch │ │ │ └── ch.pcengines.yml │ │ ├── com │ │ │ ├── com.AzureInterior.yml │ │ │ ├── com.amazon.aws.yml │ │ │ ├── com.apple.yml │ │ │ ├── com.apps4rent.yml │ │ │ ├── com.binarylife.yml │ │ │ ├── com.bloomberg.yml │ │ │ ├── com.cloudflare.yml │ │ │ ├── com.cloudns.yml │ │ │ ├── com.datadoghq.yml │ │ │ ├── com.digitalocean.yml │ │ │ ├── com.envato.yml │ │ │ ├── com.eur.aliexpress.json │ │ │ ├── com.expressvpn.yml │ │ │ ├── com.expressvpn_prio6.yml │ │ │ ├── com.flipkart.WSRetail.json │ │ │ ├── com.ftserussell.yml │ │ │ ├── com.github.yml │ │ │ ├── com.globalsign.yml │ │ │ ├── com.google.adwords.hk.yml │ │ │ ├── com.hetzner.yml │ │ │ ├── com.hobohost.yml │ │ │ ├── com.jamiepro.yml │ │ │ ├── com.linode.yml │ │ │ ├── com.microsoftonline.hk-v2017.yml │ │ │ ├── com.microsoftonline.hk.yml │ │ │ ├── com.mongodb.yml │ │ │ ├── com.namecheap.yml │ │ │ ├── com.namesilo.yml │ │ │ ├── com.newrelic.yml │ │ │ ├── com.nl.lenovo.digitalriver.yml │ │ │ ├── com.nmmn.yml │ │ │ ├── com.nodisto.yml │ │ │ ├── com.nyse.yml │ │ │ ├── com.oyo.invoice.yml │ │ │ ├── com.packtpub.yml │ │ │ ├── com.pixartprinting.yml │ │ │ ├── com.runbox.yml │ │ │ ├── com.sammymaystone.yml │ │ │ ├── com.scaleway.yml │ │ │ ├── com.textmaster.yml │ │ │ ├── com.tmx.yml │ │ │ ├── com.travis-ci.yml │ │ │ ├── com.twitter.de.yml │ │ │ ├── com.twitter.uk.yml │ │ │ ├── com.twitter.yml │ │ │ ├── com.upwork.yml │ │ │ ├── com.usersnap.yml │ │ │ └── com.vultr.yml │ │ ├── de │ │ │ ├── de.amazon.yml │ │ │ ├── de.bettina-kast.yml │ │ │ ├── de.digikey.com.yml │ │ │ ├── de.hosteurope.yml │ │ │ ├── de.notebooksbilligerBillPay.yml │ │ │ ├── de.ovh.yml │ │ │ ├── de.qualityhosting.yml │ │ │ └── de.united-domains.yml │ │ ├── es │ │ │ ├── com.mob-barcelona.caterina.yml │ │ │ ├── com.pepephone.yml │ │ │ ├── es.amazon.yml │ │ │ ├── es.digimobile.yml │ │ │ └── es.supplies24.yml │ │ ├── fr │ │ │ ├── co.mooncard.yml │ │ │ ├── com.adobe.ie.yml │ │ │ ├── com.akretion.fr.yml │ │ │ ├── com.amazon.aws.yml │ │ │ ├── com.ateliercopieservice.yml │ │ │ ├── com.chauffeur-prive.yml │ │ │ ├── com.coriolis.yml │ │ │ ├── com.easyjet.fr.yml │ │ │ ├── com.eaudugrandlyon.yml │ │ │ ├── com.godaddy.yml │ │ │ ├── com.google.ie.yml │ │ │ ├── com.hootsuite.yml │ │ │ ├── com.jeanbesson.yml │ │ │ ├── com.ldlc.yml │ │ │ ├── com.linkedin.yml │ │ │ ├── com.mention.yml │ │ │ ├── com.microsoft.ie.yml │ │ │ ├── com.myflyingbox.yml │ │ │ ├── com.officetimeline.yml │ │ │ ├── com.orange-business.mobile.yml │ │ │ ├── com.ovh.fr.yml │ │ │ ├── com.rs-online.fr.yml │ │ │ ├── com.saur.yml │ │ │ ├── com.soyoustart.yml │ │ │ ├── com.vinci-autoroutes.yml │ │ │ ├── dolibarr.generique.yml │ │ │ ├── eu.trainline.yml │ │ │ ├── fr.actn.yml │ │ │ ├── fr.airfrance.yml │ │ │ ├── fr.also.yml │ │ │ ├── fr.amazon.yml │ │ │ ├── fr.assurance-epargne-pension.yml │ │ │ ├── fr.bouyguestelecom.adsl-fiber.yml │ │ │ ├── fr.bouyguestelecom.mobile.yml │ │ │ ├── fr.butagaz.yml │ │ │ ├── fr.chronopost.yml │ │ │ ├── fr.dirafi.yml │ │ │ ├── fr.domaine-achat.yml │ │ │ ├── fr.easytrip.yml │ │ │ ├── fr.edf.entreprises.yml │ │ │ ├── fr.edf.pme.yml │ │ │ ├── fr.finagaz.yml │ │ │ ├── fr.fountain.yml │ │ │ ├── fr.free.adsl-fiber.yml │ │ │ ├── fr.free.mobile.yml │ │ │ ├── fr.free.mobile2.yml │ │ │ ├── fr.futur.yml │ │ │ ├── fr.ge-iroise.yml │ │ │ ├── fr.google.yml │ │ │ ├── fr.greffe-tc-lyon.yml │ │ │ ├── fr.hiscox.yml │ │ │ ├── fr.internetsatellite.yml │ │ │ ├── fr.jpg.yml │ │ │ ├── fr.kubii.yml │ │ │ ├── fr.laposte.boutique.yml │ │ │ ├── fr.laposte.coliposte.yml │ │ │ ├── fr.lecab.yml │ │ │ ├── fr.leroymerlin.yml │ │ │ ├── fr.maaf.yml │ │ │ ├── fr.mediapart.yml │ │ │ ├── fr.moneo-resto.yml │ │ │ ├── fr.mouser.yml │ │ │ ├── fr.mycelium-roulement.yml │ │ │ ├── fr.napsis.yml │ │ │ ├── fr.nexity.yml │ │ │ ├── fr.orange.fibre.yml │ │ │ ├── fr.orange.fixedline.yml │ │ │ ├── fr.prestaclic.yml │ │ │ ├── fr.publicationannoncelegale.yml │ │ │ ├── fr.sfr.adsl-fiber.yml │ │ │ ├── fr.sfr.mobile.yml │ │ │ ├── fr.sosh.yml │ │ │ ├── fr.teledec.yml │ │ │ ├── fr.topoffice.yml │ │ │ ├── net.online.yml │ │ │ └── net.scaleway.yml │ │ ├── nl │ │ │ ├── nl.accor.rhine.opco hotels.json │ │ │ ├── nl.action.yml │ │ │ ├── nl.agrisneltank.json │ │ │ ├── nl.albron.yml │ │ │ ├── nl.anwb.yml │ │ │ ├── nl.argos.json │ │ │ ├── nl.be.coolblue.yml │ │ │ ├── nl.begra.yml │ │ │ ├── nl.blokker.yml │ │ │ ├── nl.bouwmans.yml │ │ │ ├── nl.bp.yml │ │ │ ├── nl.buijtendijk.yml │ │ │ ├── nl.bunq.yml │ │ │ ├── nl.cpe.yml │ │ │ ├── nl.esso_eg_services.yml │ │ │ ├── nl.esso_eg_services_v2.yml │ │ │ ├── nl.farnell.yml │ │ │ ├── nl.fedex.json │ │ │ ├── nl.ferbox.yml │ │ │ ├── nl.fletcher.yml │ │ │ ├── nl.gamma.yml │ │ │ ├── nl.goos.yml │ │ │ ├── nl.gulf.yml │ │ │ ├── nl.ipparking.paleiskwartier.yml │ │ │ ├── nl.karwei.yml │ │ │ ├── nl.kav.yml │ │ │ ├── nl.koffiehenk.yml │ │ │ ├── nl.kuwait-q8.json │ │ │ ├── nl.makro.json │ │ │ ├── nl.marktplaats.json │ │ │ ├── nl.megekko.json │ │ │ ├── nl.momentsenmore.yml │ │ │ ├── nl.ns.invoice.yml │ │ │ ├── nl.odido.json │ │ │ ├── nl.ok.yml │ │ │ ├── nl.parkmobile.yml │ │ │ ├── nl.praxis.yml │ │ │ ├── nl.reclameland.yml │ │ │ ├── nl.saeco.philips.eluscious.yml │ │ │ ├── nl.shell_nederland.yml │ │ │ ├── nl.shell_schellenkens.yml │ │ │ ├── nl.simpel.yml │ │ │ ├── nl.tango.json │ │ │ ├── nl.total_express.yml │ │ │ ├── nl.total_ototol.yml │ │ │ ├── nl.total_servauto_ned.json │ │ │ ├── nl.transip.yml │ │ │ ├── nl.tuynder.yml │ │ │ ├── nl.valk.exclusief.hotel.json │ │ │ ├── nl.valk.exclusief.restaurant.json │ │ │ ├── nl.vistaprint.yml │ │ │ ├── nl.vodafone.yml │ │ │ ├── nl.wasco.yml │ │ │ ├── nl.weid.yml │ │ │ ├── nl.yezzer.yml │ │ │ └── nl.zinkunie.yml │ │ └── pl │ │ │ ├── pl.bmw-fs.yml │ │ │ ├── pl.insert.subiekt-gt.yml │ │ │ ├── pl.insert.subiekt-nexo.yml │ │ │ ├── pl.orlen.yml │ │ │ ├── pl.p4.yml │ │ │ └── pl.paypro.yml │ └── utils.py │ ├── input │ ├── __init__.py │ ├── gvision.py │ ├── ocrmypdf.py │ ├── pdfminer_wrapper.py │ ├── pdfplumber.py │ ├── pdftotext.py │ ├── tesseract.py │ └── text.py │ ├── output │ ├── __init__.py │ ├── to_csv.py │ ├── to_json.py │ └── to_xml.py │ └── py.typed ├── tests ├── __init__.py ├── common.py ├── compare │ ├── AmazonWebServices.json │ ├── AmazonWebServices.pdf │ ├── AmazonWebServices.png │ ├── AzureInterior.json │ ├── AzureInterior.pdf │ ├── FlipkartInvoice.json │ ├── FlipkartInvoice.pdf │ ├── FlipkartInvoice.png │ ├── NetpresseInvoice.json │ ├── NetpresseInvoice.pdf │ ├── Orlen.json │ ├── Orlen.txt │ ├── QualityHosting.json │ ├── QualityHosting.pdf │ ├── SammyMaystoneLinesTest.json │ ├── SammyMaystoneLinesTest.pdf │ ├── SammyMaystoneLinesTest.png │ ├── coolblue1.json │ ├── coolblue1.pdf │ ├── coolblue2.json │ ├── coolblue2.pdf │ ├── free_fiber.json │ ├── free_fiber.pdf │ ├── oyo.json │ ├── oyo.pdf │ ├── oyo.png │ ├── saeco.json │ └── saeco.pdf ├── custom │ ├── basic.json │ ├── basic.txt │ ├── lines-basic.json │ ├── lines-basic.txt │ ├── lines-blocks.json │ ├── lines-blocks.txt │ ├── lines-multiple-patterns.json │ ├── lines-multiple-patterns.txt │ ├── table-groups.json │ ├── table-groups.txt │ └── templates │ │ ├── basic.yml │ │ ├── lines-basic.yml │ │ ├── lines-blocks.yml │ │ ├── lines-multiple-patterns.yml │ │ └── table-groups.yml ├── test_cli.py ├── test_extraction.py ├── test_gvision.py ├── test_invoice_template.py ├── test_lib.py ├── test_loader.py └── test_main.py └── uv.lock /.cookiecutter.json: -------------------------------------------------------------------------------- 1 | { 2 | "_checkout": null, 3 | "_output_dir": "/home/data/git/invoice2data-new", 4 | "_repo_dir": "/home/bosd/.cookiecutters/cookiecutter-uv-hypermodern-python", 5 | "_template": "gh:bosd/cookiecutter-uv-hypermodern-python", 6 | "author": "Manuel Riel", 7 | "copyright_year": "2024", 8 | "development_status": "Development Status :: 5 - Production/Stable", 9 | "email": "", 10 | "friendly_name": "Invoice2Data", 11 | "github_user": "m3nu", 12 | "license": "MIT", 13 | "package_name": "invoice2data", 14 | "project_name": "invoice2data", 15 | "version": "0.4.5" 16 | } 17 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | insert_final_newline = true 7 | trim_trailing_whitespace = true 8 | 9 | [*.{py,toml}] 10 | indent_style = space 11 | indent_size = 4 12 | 13 | [*.yml,yaml,json] 14 | indent_style = space 15 | indent_size = 2 16 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | groups: 8 | github-actions-updates: 9 | applies-to: version-updates 10 | dependency-type: development 11 | github-actions-security-updates: 12 | applies-to: security-updates 13 | dependency-type: development 14 | - package-ecosystem: pip 15 | directory: "/.github/workflows" 16 | schedule: 17 | interval: weekly 18 | groups: 19 | workflow-updates: 20 | applies-to: version-updates 21 | dependency-type: development 22 | workflow-security-updates: 23 | applies-to: security-updates 24 | dependency-type: development 25 | - package-ecosystem: pip 26 | directory: "/docs" 27 | schedule: 28 | interval: weekly 29 | groups: 30 | doc-updates: 31 | applies-to: version-updates 32 | dependency-type: development 33 | doc-security-updates: 34 | applies-to: security-updates 35 | dependency-type: production 36 | - package-ecosystem: pip 37 | directory: "/" 38 | schedule: 39 | interval: weekly 40 | versioning-strategy: lockfile-only 41 | allow: 42 | - dependency-type: "all" 43 | groups: 44 | pip-version-updates: 45 | applies-to: version-updates 46 | dependency-type: development 47 | pip-security-updates: 48 | applies-to: security-updates 49 | dependency-type: production 50 | -------------------------------------------------------------------------------- /.github/labels.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Labels names are important as they are used by Release Drafter to decide 3 | # regarding where to record them in changelog or if to skip them. 4 | # 5 | # The repository labels will be automatically configured using this file and 6 | # the GitHub Action https://github.com/marketplace/actions/github-labeler. 7 | - name: breaking 8 | description: Breaking Changes 9 | color: bfd4f2 10 | - name: bug 11 | description: Something isn't working 12 | color: d73a4a 13 | - name: build 14 | description: Build System and Dependencies 15 | color: bfdadc 16 | - name: ci 17 | description: Continuous Integration 18 | color: 4a97d6 19 | - name: dependencies 20 | description: Pull requests that update a dependency file 21 | color: 0366d6 22 | - name: documentation 23 | description: Improvements or additions to documentation 24 | color: 0075ca 25 | - name: duplicate 26 | description: This issue or pull request already exists 27 | color: cfd3d7 28 | - name: enhancement 29 | description: New feature or request 30 | color: a2eeef 31 | - name: github_actions 32 | description: Pull requests that update Github_actions code 33 | color: "000000" 34 | - name: good first issue 35 | description: Good for newcomers 36 | color: 7057ff 37 | - name: help wanted 38 | description: Extra attention is needed 39 | color: 008672 40 | - name: invalid 41 | description: This doesn't seem right 42 | color: e4e669 43 | - name: performance 44 | description: Performance 45 | color: "016175" 46 | - name: python 47 | description: Pull requests that update Python code 48 | color: 2b67c6 49 | - name: question 50 | description: Further information is requested 51 | color: d876e3 52 | - name: refactoring 53 | description: Refactoring 54 | color: ef67c4 55 | - name: removal 56 | description: Removals and Deprecations 57 | color: 9ae7ea 58 | - name: style 59 | description: Style 60 | color: c120e5 61 | - name: testing 62 | description: Testing 63 | color: b1fc6f 64 | - name: wontfix 65 | description: This will not be worked on 66 | color: ffffff 67 | -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | categories: 2 | - title: ":boom: Breaking Changes" 3 | label: "breaking" 4 | - title: ":rocket: Features" 5 | label: "enhancement" 6 | - title: ":fire: Removals and Deprecations" 7 | label: "removal" 8 | - title: ":beetle: Fixes" 9 | label: "bug" 10 | - title: ":racehorse: Performance" 11 | label: "performance" 12 | - title: ":rotating_light: Testing" 13 | label: "testing" 14 | - title: ":construction_worker: Continuous Integration" 15 | label: "ci" 16 | - title: ":books: Documentation" 17 | label: "documentation" 18 | - title: ":hammer: Refactoring" 19 | label: "refactoring" 20 | - title: ":lipstick: Style" 21 | label: "style" 22 | - title: ":package: Dependencies" 23 | labels: 24 | - "dependencies" 25 | - "build" 26 | template: | 27 | ## Changes 28 | 29 | $CHANGES 30 | -------------------------------------------------------------------------------- /.github/workflows/constraints.txt: -------------------------------------------------------------------------------- 1 | pip==25.2 2 | nox==2025.10.16 3 | virtualenv==20.35.3 4 | -------------------------------------------------------------------------------- /.github/workflows/labeler.yml: -------------------------------------------------------------------------------- 1 | name: Labeler 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | paths: 9 | - '.github/labels.yml' 10 | pull_request: 11 | paths: 12 | - '.github/labels.yml' 13 | 14 | jobs: 15 | labeler: 16 | runs-on: ubuntu-latest 17 | permissions: 18 | contents: read 19 | issues: write 20 | steps: 21 | - name: Check out the repository 22 | uses: actions/checkout@v5 23 | 24 | - name: Run Labeler 25 | uses: crazy-max/ghaction-github-labeler@v5.3.0 26 | with: 27 | github-token: ${{ secrets.GITHUB_TOKEN }} 28 | yaml-file: .github/labels.yml 29 | dry-run: ${{ github.event_name == 'pull_request' }} 30 | exclude: | 31 | help* 32 | *issue 33 | skip-delete: true 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | .DS_Store 3 | __pycache__/ 4 | *.py[cod] 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | # PyCharm 62 | .idea/ 63 | 64 | # PyTest 65 | .pytest_cache/ 66 | 67 | .mypy_cache/ 68 | /.coverage 69 | /.coverage.* 70 | /.nox/ 71 | /.python-version 72 | /.pytype/ 73 | /dist/ 74 | /docs/_build/ 75 | /src/*.egg-info/ 76 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: local 3 | hooks: 4 | - id: check-added-large-files 5 | name: Check for added large files 6 | entry: check-added-large-files 7 | language: system 8 | - id: check-toml 9 | name: Check Toml 10 | entry: check-toml 11 | language: system 12 | types: [toml] 13 | - id: check-yaml 14 | name: Check Yaml 15 | entry: check-yaml 16 | language: system 17 | types: [yaml] 18 | args: [--unsafe] # needed to allow duplicate keys 19 | - id: end-of-file-fixer 20 | name: Fix End of Files 21 | entry: end-of-file-fixer 22 | language: system 23 | types: [text] 24 | stages: [commit, push, manual] 25 | - id: trailing-whitespace 26 | name: Trim Trailing Whitespace 27 | entry: trailing-whitespace-fixer 28 | language: system 29 | types: [text] 30 | stages: [commit, push, manual] 31 | - id: pydoclint 32 | name: pydoclint 33 | entry: pydoclint 34 | language: system 35 | types: [python] 36 | - id: ruff 37 | name: ruff 38 | entry: ruff check 39 | args: [--fix] 40 | language: python 41 | types_or: [python, pyi] 42 | - id: ruff-format 43 | name: ruff-format 44 | entry: ruff format 45 | language: python 46 | types_or: [python, pyi] 47 | # - repo: https://github.com/pre-commit/mirrors-prettier 48 | # rev: v2.6.0 49 | # hooks: 50 | # - id: prettier 51 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | build: 3 | os: ubuntu-24.04 4 | tools: 5 | python: "3.13" 6 | sphinx: 7 | configuration: docs/conf.py 8 | formats: all 9 | python: 10 | install: 11 | - requirements: docs/requirements.txt 12 | - path: . 13 | -------------------------------------------------------------------------------- /.safety-policy.yml: -------------------------------------------------------------------------------- 1 | security: 2 | ignore-vulnerabilities: 3 | "70612": 4 | reason: Not a bug. This is the same as marking python as unsafe due to the existence of 'eval'. See https://bugzilla.redhat.com/show_bug.cgi?id=1677653 5 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Manuel Riel 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | coverage: 3 | status: 4 | project: 5 | default: 6 | target: "82" 7 | patch: 8 | default: 9 | target: "82" 10 | -------------------------------------------------------------------------------- /docs/_static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/docs/_static/favicon.ico -------------------------------------------------------------------------------- /docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/docs/_static/logo.png -------------------------------------------------------------------------------- /docs/codeofconduct.md: -------------------------------------------------------------------------------- 1 | ```{include} ../CODE_OF_CONDUCT.md 2 | 3 | ``` 4 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | """Sphinx configuration.""" 2 | 3 | import os 4 | import sys 5 | 6 | 7 | project = "Invoice2data" 8 | author = "Manuel Riel" 9 | copyright = "2024, Manuel Riel" 10 | 11 | # import invoice2data 12 | 13 | # If extensions (or modules to document with autodoc) are in another directory, 14 | # add these directories to sys.path here. If the directory is relative to the 15 | # documentation root, use os.path.abspath to make it absolute, like shown here. 16 | # 17 | # sys.path.insert(0, os.path.abspath('..')) 18 | 19 | # Insert invoice2data's path into the system. 20 | sys.path.insert(0, os.path.abspath("../src")) 21 | 22 | extensions = [ 23 | "sphinxmermaid", 24 | "sphinx.ext.autodoc", 25 | "sphinx.ext.napoleon", 26 | "sphinx_click", 27 | "myst_parser", 28 | ] 29 | autodoc_typehints = "description" 30 | html_theme = "furo" 31 | myst_fence_as_directive = ["mermaid"] 32 | 33 | 34 | html_logo = "_static/logo.png" 35 | 36 | # The language for content autogenerated by Sphinx. Refer to documentation 37 | # for a list of supported languages. 38 | # 39 | # This is also used if you do content translation via gettext catalogs. 40 | # Usually you set "language" from the command line for these cases. 41 | language = "en" 42 | 43 | # The name of an image file (relative to this directory) to use as a favicon of 44 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 45 | # pixels large. 46 | html_favicon = "_static/favicon.ico" 47 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | ```{include} ../CONTRIBUTING.md 2 | --- 3 | end-before: 4 | --- 5 | ``` 6 | 7 | [code of conduct]: codeofconduct 8 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ```{include} ../README.md 2 | --- 3 | end-before: 4 | --- 5 | ``` 6 | 7 | [license]: license 8 | [contributor guide]: contributing 9 | [command-line reference]: usage 10 | 11 | ```{toctree} 12 | --- 13 | hidden: 14 | maxdepth: 1 15 | --- 16 | How it works 17 | installation 18 | usage 19 | reference 20 | Template Creation 21 | Recommeneded Template Fields 22 | Frequently asked Questions 23 | contributing 24 | Code of Conduct 25 | License 26 | Changelog 27 | ``` 28 | -------------------------------------------------------------------------------- /docs/license.md: -------------------------------------------------------------------------------- 1 | # License 2 | 3 | ```{literalinclude} ../LICENSE.md 4 | --- 5 | language: none 6 | --- 7 | ``` 8 | -------------------------------------------------------------------------------- /docs/reference.md: -------------------------------------------------------------------------------- 1 | # Reference 2 | 3 | ## invoice2data API 4 | 5 | 6 | ## Input modules 7 | 8 | ### pdftotext 9 | ```{eval-rst} 10 | .. automodule:: invoice2data.input.pdftotext 11 | :members: 12 | ``` 13 | 14 | ### text 15 | ```{eval-rst} 16 | .. automodule:: invoice2data.input.text 17 | :members: 18 | ``` 19 | 20 | ### pdfplumber 21 | ```{eval-rst} 22 | .. automodule:: invoice2data.input.pdfplumber 23 | :members: 24 | ``` 25 | 26 | ### pdfminer 27 | ```{eval-rst} 28 | .. automodule:: invoice2data.input.pdfminer_wrapper 29 | :members: 30 | ``` 31 | 32 | ### ocrmypdf 33 | ```{eval-rst} 34 | .. automodule:: invoice2data.input.ocrmypdf 35 | :members: 36 | ``` 37 | 38 | ### Google Vision 39 | ```{eval-rst} 40 | .. automodule:: invoice2data.input.gvision 41 | :members: 42 | ``` 43 | 44 | ## Output modules 45 | 46 | ### csv 47 | ```{eval-rst} 48 | .. automodule:: invoice2data.output.to_csv 49 | :members: 50 | ``` 51 | 52 | ### json 53 | ```{eval-rst} 54 | .. automodule:: invoice2data.output.to_json 55 | :members: 56 | ``` 57 | 58 | ### xml 59 | ```{eval-rst} 60 | .. automodule:: invoice2data.output.to_xml 61 | :members: 62 | ``` 63 | 64 | ## Extract 65 | 66 | ### loader 67 | ```{eval-rst} 68 | .. automodule:: invoice2data.extract.loader 69 | :members: 70 | ``` 71 | 72 | ### InvoiceTemplate 73 | ```{eval-rst} 74 | .. autoclass:: invoice2data.extract.invoice_template.InvoiceTemplate 75 | 76 | :members: 77 | 78 | ``` 79 | 80 | ### Plugins 81 | 82 | #### tables 83 | ```{eval-rst} 84 | .. automodule:: invoice2data.extract.plugins.tables 85 | :members: 86 | ``` 87 | 88 | #### lines 89 | ```{eval-rst} 90 | .. automodule:: invoice2data.extract.plugins.lines 91 | :members: 92 | ``` 93 | 94 | ### Parsers 95 | 96 | #### static 97 | ```{eval-rst} 98 | .. automodule:: invoice2data.extract.parsers.static 99 | :members: 100 | ``` 101 | 102 | #### lines 103 | ```{eval-rst} 104 | .. automodule:: invoice2data.extract.parsers.lines 105 | :members: 106 | ``` 107 | 108 | #### regex 109 | ```{eval-rst} 110 | .. automodule:: invoice2data.extract.parsers.regex 111 | :members: 112 | ``` 113 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | furo==2025.9.25 2 | sphinx==8.1.3 3 | sphinx-click==6.1.0 4 | myst_parser==4.0.1 5 | sphinx-mermaid==0.0.8 6 | -------------------------------------------------------------------------------- /docs/usage.md: -------------------------------------------------------------------------------- 1 | # Usage 2 | 3 | ```{eval-rst} 4 | .. click:: invoice2data.__main__:main 5 | :prog: invoice2data 6 | :nested: full 7 | ``` 8 | -------------------------------------------------------------------------------- /src/invoice2data/__init__.py: -------------------------------------------------------------------------------- 1 | """Invoice2Data.""" 2 | 3 | from .__main__ import extract_data # noqa: F401 4 | -------------------------------------------------------------------------------- /src/invoice2data/extract/__init__.py: -------------------------------------------------------------------------------- 1 | """Initialize the parsers.""" 2 | -------------------------------------------------------------------------------- /src/invoice2data/extract/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | """Initialize the parsers.""" 2 | 3 | from . import lines # noqa: F401 4 | from . import regex # noqa: F401 5 | from . import static # noqa: F401 6 | -------------------------------------------------------------------------------- /src/invoice2data/extract/parsers/__interface__.py: -------------------------------------------------------------------------------- 1 | """Interface for fields parsers. 2 | 3 | Parsers are basic modules used for extracting data. They are responsible 4 | for parsing invoice text using specified settings. Depending on a parser 5 | and settings it may be e.g.: 6 | 1. Looking for a single value 7 | 2. Grouping multiple occurences (e.g. summing up) 8 | 3. Finding repeating parts (e.g. multiple rows) 9 | 10 | Each parser is a module (file) in the package `parsers` and provides at 11 | a minimum the `parse` function with those arguments: 12 | 13 | def parse(template, field, settings, content) 14 | 15 | Parser has to return a single value (e.g. number, date, string, array) 16 | or None in case of error. Such a value will be included in the output. 17 | """ 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/parsers/static.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | 3 | """Pseudo-parser returning a static (predefined) value.""" 4 | 5 | from logging import getLogger 6 | from typing import Any 7 | from typing import Dict 8 | from typing import Optional 9 | 10 | 11 | logger = getLogger(__name__) 12 | 13 | 14 | def parse( 15 | template: Any, 16 | field: str, 17 | settings: Dict[str, Any], 18 | content: str, 19 | legacy: bool = False, 20 | ) -> Optional[Any]: 21 | if "value" not in settings: 22 | logger.warning('Field "%s" doesn\'t have static value specified', field) 23 | return None 24 | 25 | logger.debug("field=%s | value=['%s']", field, settings["value"]) 26 | 27 | return settings["value"] 28 | -------------------------------------------------------------------------------- /src/invoice2data/extract/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | """Initialize the plugins.""" 2 | -------------------------------------------------------------------------------- /src/invoice2data/extract/plugins/__interface__.py: -------------------------------------------------------------------------------- 1 | """Interface for extraction plugins. 2 | 3 | Plugins are used for extracting more complex data and should be used 4 | only if they can't fit parsers design. They can't be used in the 5 | standard `fields` associative array. 6 | 7 | The main advantage of plugins (as the cost of clean template 8 | integration) is full access to the output. It allows plugins to e.g. 9 | set multiple output entires. 10 | 11 | Each plugin is a module (file) in package `plugins` that provides at a minimum the `extract` 12 | function with those arguments: 13 | 14 | def extract(settings, optimized_str, output) 15 | """ 16 | -------------------------------------------------------------------------------- /src/invoice2data/extract/plugins/lines.py: -------------------------------------------------------------------------------- 1 | """Plugin to extract individual lines from an invoice. 2 | 3 | This plugin has been replaced by the "lines" parser. All new templates 4 | should use the parser instead. It's provided for backward compatibility 5 | only. 6 | """ 7 | 8 | from collections import OrderedDict 9 | from typing import Any 10 | from typing import Dict 11 | 12 | from .. import parsers 13 | 14 | 15 | def extract( 16 | self: "OrderedDict[str, Any]", content: str, output: Dict[str, Any] 17 | ) -> None: 18 | """Extract individual lines from an invoice. 19 | 20 | This plugin has been replaced by the "lines" parser. All new templates 21 | should use the parser instead. It's provided for backward compatibility 22 | only. 23 | 24 | Args: 25 | self (OrderedDict[str, Any]): The current instance of the class. 26 | content (str): The text content to parse. 27 | output (Dict[str, Any]): A dictionary to store the extracted data. 28 | """ 29 | lines_data = parsers.lines.parse(self, "lines", self["lines"], content) 30 | if lines_data is not None: 31 | output["lines"] = lines_data 32 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/au/au.com.opal.yml: -------------------------------------------------------------------------------- 1 | issuer: Opal Customer Care 2 | keywords: 3 | - Opal Customer Care 4 | - Your activity statement 5 | - or visit opal.com.au 6 | fields: 7 | opal_card_number: 'Opal card number\s+(\d{4}\s\d{4}\s\d{4}\s\d{4})' 8 | opal_card_status: 'Opal card status\s+(\w+)' 9 | card_nickname: 'Card nickname\s+(\w+)' 10 | type_of_card: 'Type of card\s+(\w+)' 11 | amount: 'Total fares \(incl. GST\)\s+-\$(\d{1,10}.\d\d)' 12 | total_top_ups: 'Total top ups\s+(-?\$\d{1,10}.\d\d)' 13 | total_adjustments: 'Total adjustments\s+(-?\$\d{1,10}.\d\d)' 14 | activity_statement_balance: 'Activity statement\s+(-?\$\d{1,10}.\d\d)' 15 | date: 'Printed (\d\d:\d\d \d\d \w\w\w \d{4})' 16 | invoice_number: 'Printed (\d\d:\d\d \d\d \w\w\w \d{4})' 17 | from_date: 'top ups from (\d\d \w+ \d\d\d\d) to' 18 | to_date: 'to (\d\d \w+ \d\d\d\d)' 19 | lines: 20 | start: '\s*Always remember to tap on and tap off' 21 | end: "Understanding your activity statement" 22 | first_line: '^(?P\d{1,10})[ ]{10,22}(?P(Mon|Tue|Wed|Thu|Fri|Sat|Sun))[ ]{6}(?P\d\d:\d\d)[ ]{18}(?P
\S+( \S+)*)[ ]{6,32}(?P(\d{1,3})?)[ ]+(?P(Travel Reward|Off-peak|Default fare|Day Cap)?)[ ][ ]+(?P(\d{1,3}.\d\d)?)[ ]+(?P(\d{1,3}.\d\d)?)[ ]+(?P-?\d{1,3}.\d\d)$' 23 | line: '^[ ]{18,24}(?P(\d\d/\d\d/\d\d)?)[ ]{,32}(?P
\S+( \S+)*)?$' 24 | last_line: "^$" 25 | options: 26 | currency: AUD 27 | remove_whitespace: false 28 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/au/au.com.telstra.yml: -------------------------------------------------------------------------------- 1 | issuer: Telstra Corporation Limited 2 | keywords: 3 | - Telstra Corporation Limited 4 | - ABN 33 051 775 556 5 | fields: 6 | account_number: 'Account\s+(\d{3}\s+\d{4}\s+\d{3})' 7 | amount: 'Total\s+\$(\d+,\d+.\d{2})' 8 | date: 'Tax Invoice - issued\s+(\d{2}\s[a-zA-Z]{3}\D+\d{2})' 9 | invoice_number: 'Bill number\s+([A-Z]\s+\d{3}\s+\d{3}\s+\d{3}\S\d)' 10 | gst: 'GST\s+included\s+in\s+new\s+charges\s+\$(\d+,?\d+.\d{2})' 11 | options: 12 | currency: AUD 13 | remove_whitespace: false 14 | date_formats: 15 | - "%d/%m/%Y" 16 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/be/be.accor.invest.ibis.yml: -------------------------------------------------------------------------------- 1 | issuer: Accor Invest Belgium nv SA 2 | fields: 3 | amount: 4 | parser: regex 5 | regex: Total to pay EUR\s+(\d?.?\d{1,4},\d{2}) 6 | type: float 7 | amount_tax: 8 | parser: regex 9 | regex: \d{2}[%]\s+\w\s+\d{1,4}[,]\d{2}\s+(\d{1,4}[,]\d{2}) 10 | type: float 11 | amount_untaxed: 12 | parser: regex 13 | regex: \d{2}[%]\s+\w\s+(\d{1,4}[,]\d{2})\s+\d{1,4}[,]\d{2} 14 | type: float 15 | date: 16 | parser: regex 17 | regex: '\w+\S\s(\d{1,2}[/]\d{1,2}[/]\d{4})\n' 18 | type: date 19 | date_due: 20 | parser: regex 21 | regex: '\w+\S\s(\d{1,2}[/]\d{1,2}[/]\d{4})\n' 22 | type: date 23 | invoice_number: 24 | parser: regex 25 | regex: Our ref[.]\s[:](.*)\s[(] 26 | iban: 27 | parser: regex 28 | regex: '[A-Z]{2}\d{2}?\w{4}?\d{4}?\d{4}?\d{0,2}' 29 | bic: 30 | parser: regex 31 | regex: '(?i)BIC[:]\s+(\w{8,11})' 32 | static_vat: BE0673923435 33 | partner_website: 34 | parser: static 35 | value: all.accor.com 36 | partner_name: 37 | parser: static 38 | value: Accor Invest Belgium nv SA 39 | country_code: 40 | parser: static 41 | value: be 42 | partner_zip: 43 | parser: static 44 | value: B-1000 45 | partner_city: 46 | parser: regex 47 | regex: Brussel 48 | partner_street: 49 | parser: static 50 | value: Square de Meeus 35 51 | narration: 52 | parser: regex 53 | regex: Ibis\s+\S+ 54 | lines: 55 | start: Datum\s\S\s+Omschrijving 56 | end: (?i)Totaal\ste 57 | line: (?P(\d+[-]\d+[-]\w+(?:\s+\S+){1,8}))\s+(?P\d+)\s+(?P\d+[,]\d{2}) 58 | types: 59 | qty: float 60 | price_unit: float 61 | price_subtotal: float 62 | taxpercent: float 63 | keywords: 64 | - (?i)Accor 65 | - 'BE\s0673\s923' 66 | - "Factuur" 67 | options: 68 | currency: EUR 69 | languages: 70 | - nl 71 | - be 72 | - en 73 | decimal_separator: "," 74 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/be/be.accor.invest.novotel.yml: -------------------------------------------------------------------------------- 1 | issuer: Accor Invest Belgium nv 2 | fields: 3 | amount: 4 | parser: regex 5 | regex: Total to be paid\s+(\d?.?\d{1,4}\.\d{2}) 6 | type: float 7 | amount_tax: 8 | parser: regex 9 | regex: VAT\s+\d{2}[.]\d{2}\s+\d{1,4}[.,]\d{2}\s+(\d{1,4}[.,]\d{2}) 10 | type: float 11 | amount_untaxed: 12 | parser: regex 13 | regex: VAT\s+\d{2}[.]\d{2}\s+(\d{1,4}[.,]\d{2})\s+\d{1,4}[.,]\d{2} 14 | type: float 15 | date: 16 | parser: regex 17 | regex: ',\s(\d{1,2}[-]\S{1,4}[-]\d{2})\n' 18 | type: date 19 | date_due: 20 | parser: regex 21 | regex: ',\s(\d{1,2}[-]\S{1,4}[-]\d{2})\n' 22 | type: date 23 | invoice_number: 24 | parser: regex 25 | regex: Invoice Number\s+(\d+) 26 | iban: 27 | parser: regex 28 | regex: '[A-Z]{2}\d{2}?\w{4}?\d{4}?\d{4}?\d{0,2}' 29 | bic: 30 | parser: regex 31 | regex: '(?i)BIC[:]\s+(\w{8,11})' 32 | static_vat: BE0673923435 33 | partner_website: 34 | parser: static 35 | value: all.accor.com 36 | partner_name: 37 | parser: static 38 | value: Accor Invest Belgium nv 39 | country_code: 40 | parser: static 41 | value: be 42 | partner_zip: 43 | parser: static 44 | value: B-1000 45 | partner_city: 46 | parser: regex 47 | regex: Brussel 48 | partner_street: 49 | parser: static 50 | value: Square de Meeus 35 51 | narration: 52 | parser: regex 53 | regex: Ibis\s+\S+ 54 | lines: 55 | start: Date\s+Description 56 | end: (?i)Total 57 | line: (?P(\d+[-]\d+[-]\w+(?:\s+\S+){1,8}))\s+(?P\d+)\s+(?P-?\d+[.,]\d{2}) 58 | types: 59 | qty: float 60 | price_unit: float 61 | price_subtotal: float 62 | keywords: 63 | - (?i)Accor 64 | - 'BE\s0673\s923' 65 | - "Invoice" 66 | options: 67 | currency: EUR 68 | languages: 69 | - nl 70 | - be 71 | - en 72 | decimal_separator: "." 73 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/be/be.boucherie.pochet.yml: -------------------------------------------------------------------------------- 1 | issuer: S.P.R.L Pochet 2 | fields: 3 | amount_untaxed: \sHTVA\s+(\d?\.?\d+,\d+)EUR\s+ 4 | amount: NETAPAYER\s+(\d?\.?\d+,\d+)EUR\s+ 5 | date: DU(\d+/\d+/\d+) 6 | invoice_number: 'FACTUREN.{1}(\d+)' 7 | keywords: 8 | - IBANBE68370103949634 9 | - FACTURE 10 | options: 11 | currency: EUR 12 | remove_whitespace: true 13 | decimal_separator: "," 14 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/be/be.cebeo.yml: -------------------------------------------------------------------------------- 1 | issuer: CEBEO NV 2 | fields: 3 | amount_untaxed: \s+21,00%\s+(\d?\.?\d+,\d+)\s+ 4 | amount: (\d?\.?\d+,\d+)EUR\sDISPENSE 5 | date: DATEFA\s(\d+/\d+/\d+)\sECHEANCE 6 | invoice_number: 'FACTURE\s+N.{1}\s?(\d+)\s+' 7 | keywords: 8 | - www.cebeo.be 9 | - FACTURE 10 | options: 11 | currency: EUR 12 | remove_whitespace: true 13 | decimal_separator: "," 14 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/be/be.lampiris.facture-dacompte.yml: -------------------------------------------------------------------------------- 1 | issuer: Lampiris S.A. 2 | fields: 3 | amount: 'Montant\s+total\s+a\s+payer\s+EUR\s+(\d+,\d{2})' 4 | date: 'Date\s+facture\s+:\s+(\d{2}\/\d{2}\/\d{4})' 5 | date_due: 'Date\sd.echeance\s:\s(\d{2}/\d{2}/\d{4})\sconformement' 6 | date_invoice_start: 'Periode\sde\sfacturation\s:\s(\d{2}/\d{2}/\d{4})\sau\s\d{2}/\d{2}/\d{4}' 7 | date_invoice_end: 'Periode\sde\sfacturation\s:\s\d{2}/\d{2}/\d{4}\sau\s(\d{2}/\d{2}/\d{4})' 8 | invoice_number: 'Reference\s+facture\s+:\s+(E\d{2}\/\d+)' 9 | ean: 'Code\sEAN\selectricite\s+Gestionnaire\sreseau\selectricite\s+(\d{8,})\s' 10 | keywords: 11 | - Lampiris 12 | - Facture 13 | - acompte 14 | - BE 0859 655 570 15 | - BE38 0015 0942 4272 16 | options: 17 | currency: EUR 18 | remove_whitespace: false 19 | remove_accents: true 20 | lowercase: false 21 | decimal_separator: "," 22 | date_formats: 23 | - "%d/%m/%Y" 24 | languages: 25 | - fr 26 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/be/be.lampiris.factuur.yml: -------------------------------------------------------------------------------- 1 | issuer: Lampiris S.A. 2 | fields: 3 | amount: 'Bedrag+\s+incl.+\s+btw+\s+:+\s+(\d+,\d{2})' 4 | date: 'Factuurdatum+\s+(\d{2}\/\d{2}\/\d{4})' 5 | invoice_number: 'Factuurnummer+\s+(E\d{2}\/\d+)' 6 | keywords: 7 | - Lampiris 8 | - Factuur 9 | - Klantnummer 10 | - BE38 0015 0942 4272 11 | options: 12 | currency: EUR 13 | remove_whitespace: false 14 | remove_accents: true 15 | lowercase: false 16 | decimal_separator: "," 17 | date_formats: 18 | - "%d/%m/%Y" 19 | languages: 20 | - nl 21 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/be/be.lampiris.regularisation.yml: -------------------------------------------------------------------------------- 1 | issuer: Lampiris S.A. 2 | fields: 3 | amount: '=\s+EUR\s(\d+,\d{2})\s+TVAC' 4 | date: 'Date\s+facture\s+:\s+(\d{2}\/\d{2}\/\d{4})' 5 | date_due: 'Date\sd.echeance\s:\s(\d{2}/\d{2}/\d{4})\sconformement' 6 | date_invoice_start: 'de\scompteur\s+\d{8,}\s+du\s(\d{2}/\d{2}/\d{4})\sau\s\d{2}/\d{2}/\d{4}' 7 | date_invoice_end: 'de\scompteur\s+\d{8,}\s+du\s\d{2}/\d{2}/\d{4}\sau\s(\d{2}/\d{2}/\d{4})' 8 | invoice_number: 'Reference\s+facture\s+:\s+(E\d{2}\/\d+)' 9 | ean: 'de\scompteur\s+(\d{8,})\s+' 10 | keywords: 11 | - Lampiris 12 | - Regularisation 13 | - BE 0859 655 570 14 | - BE38 0015 0942 4272 15 | options: 16 | currency: EUR 17 | remove_whitespace: false 18 | remove_accents: true 19 | lowercase: false 20 | decimal_separator: "," 21 | date_formats: 22 | - "%d/%m/%Y" 23 | languages: 24 | - fr 25 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/be/be.melchior-vins.yml: -------------------------------------------------------------------------------- 1 | issuer: Melchior Vins 2 | fields: 3 | amount_untaxed: (\d?\.?\d+.\d+)\s+21% 4 | amount: APayerenEuro\s+(\d?,?\d+.\d+)\s+ 5 | date: Date:\s(\d+.\d+.\d+)\sFACTURE 6 | invoice_number: 'FACTURE\s+(\d+)' 7 | keywords: 8 | - www.melchior-vins.be 9 | - FACTURE 10 | options: 11 | currency: EUR 12 | remove_whitespace: true 13 | decimal_separator: "." 14 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/be/be.proximus.yml: -------------------------------------------------------------------------------- 1 | issuer: Proximus SA 2 | fields: 3 | amount: 'Montanttotalàpayer\s+€(\d+,\d+)\s+' 4 | amount_untaxed: TVA21%sur€(\d?\.?\d+,\d+) 5 | date: (\d{1,2}[a-z]{4,}\d{4}) 6 | invoice_number: '\d{1,2}[a-z]{4,}\d{4}\s+(\d+)\s+' 7 | keywords: 8 | - BE61000171003017 9 | - Facture 10 | options: 11 | currency: EUR 12 | remove_whitespace: true 13 | decimal_separator: "," 14 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/be/be.scarlet.yml: -------------------------------------------------------------------------------- 1 | issuer: Scarlet Belgium S.A. 2 | fields: 3 | amount: 'Total\sa\spayer\s+(\d+,\d{2})\s' 4 | date: 'Date\s+(\d{2}\/\d{2}\/\d{4})\s+Date\slimite\sde\spaiement ' 5 | date_due: 'Date\slimite\sde\spaiement.+\s+(\d{2}\/\d{2}\/\d{4})' 6 | date_invoice_start: 'Abonnements\s\((\d{2}\/\d{2}\/\d{2})-\d{2}\/\d{2}\/\d{2}\)' 7 | date_invoice_end: 'Abonnements\s\(\d{2}\/\d{2}\/\d{2}-(\d{2}\/\d{2}\/\d{2})\)' 8 | invoice_number: 'Numero\sde\sdocument\s+(\d{2}R-\d{7})' 9 | keywords: 10 | - Scarlet 11 | - Decompte 12 | - BE0447.976.484 13 | - BE43 0013 6602 2001 14 | options: 15 | currency: EUR 16 | remove_whitespace: false 17 | remove_accents: true 18 | lowercase: false 19 | decimal_separator: "," 20 | date_formats: 21 | - "%d/%m/%Y" 22 | - "%d/%m/%y" 23 | languages: 24 | - fr 25 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/be/be.securex.social.yml: -------------------------------------------------------------------------------- 1 | issuer: Secretar. Social Securex 2 | fields: 3 | amount_untaxed: 21,00%Base:\s?(\d?\.?\d+,\d+)\s+ 4 | amount: TOTALAPAYER\(EUR\):\s+[a-z]{0,}(\d?\.?\d+,\d+) 5 | date: FACTURE\d+/\d+du(\d+/\d+/\d+) 6 | invoice_number: 'FACTURE(\d+/\d+)du' 7 | keywords: 8 | - BBRUBEBBBE58390001370179 9 | - FACTURE 10 | options: 11 | currency: EUR 12 | remove_whitespace: true 13 | decimal_separator: "," 14 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/ch/ch.pcengines.yml: -------------------------------------------------------------------------------- 1 | issuer: PC Engines GmbH 2 | fields: 3 | amount: Total\s+EUR\s+([\d,]+.\d{2}) 4 | amount_untaxed: Subtotal\s+EUR\s+([\d,]+.\d{2}) 5 | date: Date:\s+(\d{1,2}\.\d{1,2}\.\d{4}) 6 | invoice_number: Invoice\s+(\w+) 7 | static_vat: CHE-109.825.964 8 | static_partner_email: orders@pcengines.ch 9 | keywords: 10 | - CHE-109.825.964 11 | - Invoice 12 | - EUR 13 | - pcengines.ch 14 | options: 15 | currency: EUR 16 | date_formats: 17 | - "%d.%m.%Y" 18 | languages: 19 | - en 20 | decimal_separator: "." 21 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.amazon.aws.yml: -------------------------------------------------------------------------------- 1 | issuer: Amazon Web Services 2 | fields: 3 | amount: TOTAL AMOUNT DUE ON.*\$(\d+\.\d+) 4 | amount_untaxed: TOTAL AMOUNT DUE ON.*\$(\d+\.\d+) 5 | date: Invoice Date:\s+([a-zA-Z]+ \d+ , \d+) 6 | invoice_number: Invoice Number:\s+(\d+) 7 | partner_name: (Amazon Web Services, Inc\.) 8 | static_partner_website: aws.amazon.com 9 | keywords: 10 | - Amazon Web Services 11 | - $ 12 | - Invoice 13 | lines: 14 | start: Detail 15 | end: \* May include estimated US sales tax 16 | first_line: ^ (?P\w+.*)\$(?P\d+\.\d+) 17 | line: (.*)\$(\d+\.\d+) 18 | last_line: VAT \*\* 19 | options: 20 | currency: USD 21 | date_formats: 22 | - "%B %d, %Y" 23 | languages: 24 | - en 25 | decimal_separator: "." 26 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.apple.yml: -------------------------------------------------------------------------------- 1 | issuer: Apple Distribution International 2 | fields: 3 | amount: Total\s+Price\s+\(\w+.VAT\)\s+EUR\s+(\d+,\d{2}) 4 | date: Invoice\s+Date\s?:\s+(\d{2}.\d{2}.\d{4}) 5 | invoice_number: Invoice\s+Number\s?:\s+(\w+) 6 | static_vat: FR18539565218 7 | tables: 8 | - start: VAT Basis\s+VAT Amount\s+VAT Rate 9 | end: "Pour plus" 10 | body: '\s+(?P\d*,\d*)\s+(?P\d*,\d*)\s+(?P\d*,\d*)\s+%' 11 | keywords: 12 | - FR18539565218 13 | - Apple 14 | - EUR 15 | options: 16 | currency: EUR 17 | date_formats: 18 | - "%d.%m.%Y" 19 | decimal_separator: "," 20 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.apps4rent.yml: -------------------------------------------------------------------------------- 1 | issuer: Apps4Rent LLC 2 | fields: 3 | amount: Total\$(\d+.\d+)USD 4 | date: InvoiceDate:(\d+/\d+/\d+) 5 | invoice_number: 'Invoice#(\d+)' 6 | keywords: 7 | - Apps4Rent 8 | - Maplewood 9 | options: 10 | currency: USD 11 | remove_whitespace: true 12 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.binarylife.yml: -------------------------------------------------------------------------------- 1 | issuer: BinaryLife Inc. 2 | fields: 3 | amount: Total\s?:\s+\$(\d+\.\d{2}) 4 | amount_untaxed: Total\s?:\s+\$(\d+\.\d{2}) 5 | date: Dated\s?:\s+(\d{2}.+,\s\d{4}) 6 | invoice_number: Invoice\sID\s?:\s+(\d+) 7 | static_partner_name: BinaryLife 8 | static_partner_email: support@browserstack.com 9 | keywords: 10 | - support@browserstack.com 11 | - BinaryLife 12 | - $ 13 | - Invoice 14 | options: 15 | currency: USD 16 | date_formats: 17 | - "%d %B %Y" 18 | languages: 19 | - en 20 | replace: 21 | - ["t h", "th"] 22 | decimal_separator: "." 23 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.bloomberg.yml: -------------------------------------------------------------------------------- 1 | issuer: Bloomberg 2 | fields: 3 | amount: TOTAL \(USD \)\s+([\d,]+\.\d{2}) 4 | amount_untaxed: SUBTOTAL\s+([\d,]+\.\d{2}) 5 | date: INVOICE NUMBER\s+INVOICE DATE\s+ACCOUNT NUMBER\s+\d+\s+(\d{2}\/\d{2}\/\d{4}) 6 | invoice_number: INVOICE NUMBER\s+INVOICE DATE\s+ACCOUNT NUMBER\s+(\d+) 7 | static_partner_name: Bloomberg 8 | keywords: 9 | - INVOICE 10 | - USD 11 | - bloomberg.com 12 | options: 13 | currency: USD 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "." 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.cloudflare.yml: -------------------------------------------------------------------------------- 1 | issuer: Cloudflare, Inc 2 | fields: 3 | amount: 4 | parser: regex 5 | regex: 6 | - Total\s+[$€](\d+.\d{2})\s 7 | type: float 8 | amount_untaxed: 9 | parser: regex 10 | regex: 11 | - Subtotal [(]USD[)]\s+[$€](\d+.\d{2})\s 12 | type: float 13 | amount_tax: 14 | parser: regex 15 | regex: 16 | - Tax Amount\s+[$€](\d+.\d{2})\s 17 | type: float 18 | date: 19 | parser: regex 20 | regex: 21 | - Date[:]\s+(\d{2}.\d{2}.\d{4})\s+ 22 | type: date 23 | invoice_number: 24 | parser: regex 25 | regex: 26 | - INVOICE.\s+(\w+) 27 | partner_website: 28 | parser: regex 29 | regex: 30 | - (cloudflare[.]com) 31 | group: first 32 | partner_name: 33 | parser: regex 34 | regex: 35 | - "(Cloudflare, Inc)" 36 | partner_email: 37 | parser: static 38 | value: billing@cloudflare.com 39 | partner_city: 40 | parser: regex 41 | regex: "San Francisco" 42 | group: first 43 | partner_zip: 44 | parser: regex 45 | regex: "CA 94107" 46 | country_code: 47 | parser: static 48 | value: US 49 | payment_method: 50 | - (?i)(AMEX) 51 | - (?i)(American express) 52 | - (?i)(VISA) 53 | - (?i)(Vpay) 54 | - (?i)(Mastercard) 55 | - (?i)(CONTANT) 56 | - (?i)(KAS):\s.\s\d+\.\d+ 57 | lines: 58 | parser: lines 59 | rules: 60 | - start: "Summary of Current Charges" 61 | end: '\s+Total\s+[$€](\d+.\d{2})\s' 62 | line: 63 | - '(?P(\w+(?:\s\S+)*))\s+(?P\d{2}[\/]\d{1,2}[\/]\d{4})\s-\s(?P\d{2}[\/]\d{1,2}[\/]\d{4})\s+(?P\d)\s+[$€](?P\d+.\d{2})\s+[$€](?P\d+.\d{2})' 64 | types: 65 | qty: float 66 | price_unit: float 67 | line_tax_amount: float 68 | price_subtotal: float 69 | date_end: date 70 | date_start: date 71 | keywords: 72 | - "Cloudflare, Inc" 73 | - INVOICE 74 | options: 75 | date_formats: 76 | - "%d %m %Y" 77 | currency: USD 78 | languages: 79 | - en 80 | decimal_separator: "." 81 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.cloudns.yml: -------------------------------------------------------------------------------- 1 | issuer: Cloud DNS Ltd 2 | fields: 3 | amount: Крайнасума:(\d+\.\d+)USD 4 | date: Date:(\d{1,2}\.\d{1,2}\.\d{4}) 5 | invoice_number: Number#(\d+) 6 | keywords: 7 | - BG202743734 8 | options: 9 | remove_whitespace: true 10 | currency: USD 11 | date_formats: 12 | - "%d.%m.%Y" 13 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.datadoghq.yml: -------------------------------------------------------------------------------- 1 | # The Datadog invoices are really not nice! 2 | # So this invoice template is pretty weak. I hope Datadog will improve their 3 | # invoices in the future... 4 | issuer: Datadog 5 | fields: 6 | amount: Charged\s\$(\d+)\sto 7 | amount_untaxed: Charged $(\d+)\sto 8 | date: \son\s+(.+,\s\d{4}) at 9 | invoice_number: (\w+)\s+Notes:\s 10 | static_partner_name: Datadog 11 | static_partner_email: billing@datadoghq.com 12 | keywords: 13 | - billing@datadoghq.com 14 | - $ 15 | - Invoice 16 | options: 17 | currency: USD 18 | date_formats: 19 | - "%B %d, %Y" 20 | languages: 21 | - en 22 | decimal_separator: "." 23 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.digitalocean.yml: -------------------------------------------------------------------------------- 1 | issuer: Online SAS 2 | fields: 3 | amount: Total:\$(\d+\.\d+) 4 | date: DateIssued:(\w{3,}\d{1,2},\d{4}) 5 | invoice_number: InvoiceNumber:(\d+) 6 | keywords: 7 | - EU528002224 8 | options: 9 | remove_whitespace: true 10 | currency: USD 11 | date_formats: 12 | - "%B%d,%Y" 13 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.envato.yml: -------------------------------------------------------------------------------- 1 | issuer: Envato 2 | fields: 3 | amount: Invoice Total: \$(\d+.\d{2}) 4 | amount_untaxed: Invoice Total: \$(\d+.\d{2}) 5 | date: Order date: (\d+ \w+ \d+) 6 | invoice_number: Invoice No. (\d+) 7 | partner_name: (Envato) 8 | keywords: 9 | - Envato 10 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.eur.aliexpress.json: -------------------------------------------------------------------------------- 1 | { 2 | "issuer": "Alibaba", 3 | "fields": { 4 | "amount": "Amount paid\\s+\\d{1,4}.\\d{2}\\s+(\\d{1,4}.\\d{2})", 5 | "amount_untaxed": "\\s{4}Total\\s+(\\d+.\\d{2})", 6 | "date": { 7 | "parser": "regex", 8 | "regex": "Invoice Date . (\\d{4}-\\d.-\\d.)", 9 | "type": "date" 10 | }, 11 | "invoice_number": "Invoice No.? . ([A-Z]{2}\\d+)", 12 | "static_vat": "IM5280002556" 13 | }, 14 | "lines": { 15 | "start": "Amount.In.+[)]", 16 | "end": "Grant Total", 17 | "first_line": [ 18 | "(?P(\\w+(?:\\S|[ ]\\w\\w+|\\n)*))\\s+(?P\\S)\\s+(?P\\d+.\\d{2})\\s+(?P\\d+.\\d{2})\\s+(?P\\d{2}).\\s+(?P\\d+.\\d{2})\\s+\\s+(?P\\d+.\\d{2})\\s+(?P\\d+.\\d{2})", 19 | "(?POrder Number.\\s+(\\d+))" 20 | ], 21 | "line": "^(?P\\w+(?:\\S|[ ]\\w\\w+|\\n)*)$", 22 | "types": { 23 | "qty": "float", 24 | "price_unit": "float", 25 | "discount": "float", 26 | "line_tax_percent": "float", 27 | "line_tax_amount": "float", 28 | "amounttxcurrency": "float", 29 | "amountcurrency": "float" 30 | } 31 | }, 32 | "keywords": ["Alibaba.com Singapore E-Commerce Private Limited"], 33 | "options": { 34 | "currency": "EUR", 35 | "languages": ["en"], 36 | "decimal_separator": ".", 37 | "replace": [ 38 | ["\\s-\\s", "1"], 39 | ["/", "_"], 40 | ["\\n\\n", "\\n"], 41 | ["\\n\\s\\s\\s", ""] 42 | ] 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.expressvpn.yml: -------------------------------------------------------------------------------- 1 | issuer: ExpressVPN 2 | fields: 3 | amount: Grand Total:\s+\$([\d,]+.\d{2}) 4 | amount_untaxed: Subtotal:\s+\$([\d,]+.\d{2}) 5 | date: Upon Payment\s+(\d{4}-\d{2}-\d{2}) 6 | invoice_number: Invoice No. \#(\d+) 7 | static_partner_name: ExpressVPN 8 | keywords: 9 | - ExpressVPN 10 | - Invoice 11 | - $ 12 | options: 13 | currency: USD 14 | date_formats: 15 | - "%Y-%m-%d" 16 | languages: 17 | - en 18 | decimal_separator: "." 19 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.expressvpn_prio6.yml: -------------------------------------------------------------------------------- 1 | issuer: ExpressVPN 2 | fields: 3 | amount: 4 | parser: regex 5 | regex: '[$]\s\d{2}[.]\d{2}\s+[$]\s(\d{2}[.]\d{2})' 6 | type: float 7 | # amount_untaxed: '[$]\s\d{2}[.]\d{2}\s+[$]\s(\d{2}[.]\d{2})' 8 | invoice_number: 9 | parser: regex 10 | regex: Invoice No. \#(\d+) 11 | partner_name: 12 | parser: static 13 | value: ExpressVPN 14 | partner_website: 15 | parser: static 16 | value: expressvpn.com 17 | country_code: 18 | parser: static 19 | value: us 20 | lines: 21 | parser: lines 22 | start: 'Status\s+' 23 | end: '\Z' 24 | line: 25 | - '(?P\w+)\s[(](?P\w{3}\s\d{2},\s\d{4})\sto\s(?P\w{3}\s\d{2},\s\d{4})[)]\s+[$]\s(?P\d{2}[.]\d{2})\s+[$]\s(?P\d{2}[.]\d{0,2})' 26 | types: 27 | price_unit: float 28 | amount: float 29 | date_start: date 30 | date_end: date 31 | tables: 32 | - start: Date\s+ 33 | end: "Qty" 34 | body: '(?P\w{3}\s\d{2},\s\d{4})' 35 | - start: Status\s+ 36 | end: '\Z' 37 | body: '(?P\w+)\s[(](?P\w{3}\s\d{2},\s\d{4})\sto\s(?P\w{3}\s\d{2},\s\d{4})[)]\s+[$]\s(?P\d{2}[.]\d{2})\s+[$]\s(?P\d{2}[.]\d{0,2})' 38 | keywords: 39 | - ExpressVPN 40 | - Invoice 41 | - $ 42 | options: 43 | currency: USD 44 | date_formats: 45 | - "%Y-%m-%d" 46 | languages: 47 | - en 48 | decimal_separator: "." 49 | priority: 6 50 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.flipkart.WSRetail.json: -------------------------------------------------------------------------------- 1 | { 2 | "issuer": "Flipkart", 3 | "fields": { 4 | "amount": "GrandTotal\\s*(\\d+\\.\\d+)", 5 | "date": "InvoiceDate:\\s*(\\d{1,4}\\-\\d{1,2}\\-\\d{1,4})", 6 | "invoice_number": "InvoiceNo:(\\S+)", 7 | "order_id": "OrderID:(\\w{2}\\d{16,18})" 8 | }, 9 | "keywords": ["flipkart", "WS Retail", "OD"], 10 | "options": { 11 | "currency": "INR", 12 | "remove_whitespace": true 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.ftserussell.yml: -------------------------------------------------------------------------------- 1 | issuer: FTSE Russel 2 | fields: 3 | amount_untaxed: Net Total\s+EUR\s+€([\d,]+\.\d{2}) 4 | amount: Amount\s+EUR\s+€([\d,]+\.\d{2}) 5 | date: INVOICE DATE\s+(\d{2}-\w+-\d{4}) 6 | invoice_number: INVOICE NO:\s+(\w+) 7 | static_vat: GB524922449 8 | keywords: 9 | - INVOICE 10 | - EUR 11 | - GB524922449 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d-%b-%Y" 16 | decimal_separator: "." 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.github.yml: -------------------------------------------------------------------------------- 1 | issuer: Github, Inc. 2 | fields: 3 | amount: AmountUSD\$(\d+\.\d+) 4 | date: Date(\d{4}\-\d{1,2}\-\d{1,2}) 5 | invoice_number: TransactionID(\w+) 6 | keywords: 7 | - support@github.com 8 | options: 9 | remove_whitespace: true 10 | currency: USD 11 | date_formats: 12 | - "%Y-%m-%d" 13 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.globalsign.yml: -------------------------------------------------------------------------------- 1 | issuer: GlobalSign 2 | fields: 3 | amount: InvoiceTotalHKD\$(\d+.\d+) 4 | date: InvoiceDate:(\d{1,2}/\d{1,2}/\d{4}) 5 | invoice_number: InvoiceNo:(\w{2}\d+) 6 | keywords: 7 | - accounting-apac@globalsign.com 8 | options: 9 | remove_whitespace: true 10 | currency: HKD 11 | date_formats: 12 | - "%d/%m/%Y" 13 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.google.adwords.hk.yml: -------------------------------------------------------------------------------- 1 | issuer: Google Asia Pacific Pte. Ltd. 2 | fields: 3 | amount: 4 | - PaymentamountHK\$(\d+?,?\d+\.\d+) 5 | - Payment\s*amountHK\$(\d+?,?\d+\.\d+) 6 | date: 7 | - Paymentdate(\w+\d{1,2},\d{4}) 8 | - Payment\s*date(\w+\s*\d+,\s*\d{4}) 9 | invoice_number: 10 | - BillingID([\d\-]+) 11 | - Billing\s*ID([\d\-]+) 12 | keywords: 13 | - 200817984R 14 | options: 15 | remove_whitespace: true 16 | currency: HKD 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.hobohost.yml: -------------------------------------------------------------------------------- 1 | issuer: HoboHost, LLC 2 | fields: 3 | amount: Total\$(\d+.\d+)USD 4 | date: InvoiceDate:(\d+/\d+/\d+) 5 | invoice_number: 'Invoice#(\d+)' 6 | keywords: 7 | - HoboHost 8 | options: 9 | currency: USD 10 | remove_whitespace: true 11 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.jamiepro.yml: -------------------------------------------------------------------------------- 1 | issuer: JamiePro 2 | fields: 3 | amount: Total.{1}(\d{1,10},\d{2}) 4 | date: VATNr:\s(\d{2}/\w{3}/\d{4}) 5 | invoice_number: Invoice:([\w-]{4,11}) 6 | IBAN: Account:(\w{10,30}) 7 | BIC: BIC:(\w{8,11}) 8 | static_payment: transfer 9 | keywords: 10 | - NL818362509B01 11 | - Invoice 12 | options: 13 | remove_whitespace: true 14 | date_formats: 15 | - "%d/%b/%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.linode.yml: -------------------------------------------------------------------------------- 1 | issuer: Linode 2 | fields: 3 | amount: Invoice\s+Total:\$(\d+.\d{2}) 4 | amount_untaxed: Invoice\s+Total:\$(\d+.\d{2}) 5 | date: Invoice\s+Date:\s+(\d{4}-\d{2}-\d{2}) 6 | invoice_number: Invoice:\s+#(\d+) 7 | static_partner_website: www.linode.com 8 | keywords: 9 | - 855-454-6633 10 | - Linode 11 | - Invoice 12 | - $ 13 | options: 14 | currency: USD 15 | date_formats: 16 | - "%Y/%m/%d" 17 | decimal_separator: "." 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.microsoftonline.hk-v2017.yml: -------------------------------------------------------------------------------- 1 | issuer: Microsoft Regional Sales Corporation 2 | fields: 3 | amount: Total(\d+\.\d+) 4 | date: InvoiceDate:(\d{1,2}\/\d{1,2}\/\d{4}) 5 | invoice_number: InvoiceNumber:(\w+) 6 | keywords: 7 | - Microsoft 8 | - M90002526N 9 | options: 10 | remove_whitespace: true 11 | currency: HKD 12 | date_formats: 13 | - "%d/%m/%Y" 14 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.microsoftonline.hk.yml: -------------------------------------------------------------------------------- 1 | issuer: Microsoft Regional Sales Corporation 2 | fields: 3 | amount: GrandTotal(\d+\.\d+)HKD 4 | date: DocumentDate:(\d{1,2}\/\d{1,2}\/\d{4}) 5 | invoice_number: InvoiceNo.:(\w+) 6 | keywords: 7 | - Microsoft 8 | - M9-0002526-N 9 | options: 10 | remove_whitespace: true 11 | currency: HKD 12 | date_formats: 13 | - "%d/%m/%Y" 14 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.mongodb.yml: -------------------------------------------------------------------------------- 1 | issuer: mongoDB Cloud 2 | fields: 3 | amount: PAID\s+\w+\s\d{1,2},\s\d{4}\s+\$[\d,]+\.\d{2}\s+\$[\d,]+\.\d{2}\s+\$[\d,]+\.\d{2}\s+\$([\d,]+\.\d{2}) 4 | amount_untaxed: PAID\s+\w+\s\d{1,2},\s\d{4}\s+\$([\d,]+\.\d{2}) 5 | date: PAID\s+(\w+\s\d{1,2},\s\d{4}) 6 | invoice_number: Invoice\s+Number:\s+(\w+) 7 | static_vat: IE9793087U 8 | keywords: 9 | - IE9793087U 10 | - Invoice Number 11 | - $ 12 | - cloud.mongodb.com 13 | - PAID 14 | options: 15 | currency: USD 16 | date_formats: 17 | - "%B %d, %Y" 18 | decimal_separator: "." 19 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.namecheap.yml: -------------------------------------------------------------------------------- 1 | issuer: Namecheap, Inc. 2 | fields: 3 | amount: 4 | parser: regex 5 | regex: TOTAL\s+[$](\d+\.\d+) 6 | type: float 7 | date: 8 | parser: regex 9 | regex: Order\sDate\s+[:]\s+(\d+.\d+.\d{4}) 10 | type: date 11 | invoice_number: 12 | parser: regex 13 | regex: Order\sNumber\s+[:]\s(\d+) 14 | partner_website: 15 | parser: static 16 | value: namecheap.com 17 | partner_name: 18 | parser: static 19 | value: Namecheap, Inc. 20 | partner_city: 21 | parser: regex 22 | regex: Phoenix 23 | partner_street: 24 | parser: static 25 | value: East Washington Street 305 26 | partner_email: 27 | parser: static 28 | value: support@namecheap.com 29 | country_code: 30 | parser: static 31 | value: US 32 | keywords: 33 | - (?i)NameCheap 34 | - RECEIPT 35 | options: 36 | remove_whitespace: false 37 | currency: USD 38 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.namesilo.yml: -------------------------------------------------------------------------------- 1 | issuer: Namecheap, Inc. 2 | fields: 3 | amount: Total\$(\d+\.\d+) 4 | date: 5 | - OrderDate:(\d+/\d+/\d{4}) 6 | invoice_number: 7 | - OrderNumber:(\d+) 8 | keywords: 9 | - Namecheap 10 | - support@namecheap.com 11 | options: 12 | remove_whitespace: true 13 | currency: USD 14 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.newrelic.yml: -------------------------------------------------------------------------------- 1 | issuer: "New Relic, Inc." 2 | keywords: 3 | - "New Relic, Inc." 4 | - "26-2017431" 5 | - "San Francisco, CA 94105" 6 | fields: 7 | amount: 'Invoice Total \(USD\):\s+\$([\d\,]+\.\d{1,2})' 8 | date: '\s\sDate:\s+(\d{2}/\d{2}/\d{4})' 9 | invoice_number: 'Invoice No:\s+(\w+)' 10 | tax_amount: 'Tax:\s+\$([\d\,]+\.\d{1,2})' 11 | tax_id: 12 | parser: static 13 | value: "26-2017431" 14 | payment_term_date: 'Due Date:\s+(\d{2}/\d{2}/\d{4})' 15 | options: 16 | currency: USD 17 | decimal_separator: "." 18 | languages: 19 | - en 20 | date_formats: 21 | - "%d/%m/%Y" 22 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.nl.lenovo.digitalriver.yml: -------------------------------------------------------------------------------- 1 | issuer: Digital River Ireland, Ltd. 2 | fields: 3 | amount: 4 | parser: regex 5 | regex: Totaal [(]EUR[)]\s+(\d?.?\d{1,4}[,]\d{2}) 6 | type: float 7 | amount_tax: 8 | parser: regex 9 | regex: Totale btw-kosten\s+(\d?.?\d{1,4}[,]\d{2}) 10 | type: float 11 | date: 12 | parser: regex 13 | regex: 'Factuurdatum\s+(\d{1,2}[.]\d{1,2}[.]\d{4})' 14 | type: date 15 | invoice_number: 16 | parser: regex 17 | regex: Factuurnummer\s+(\d+) 18 | iban: 19 | parser: regex 20 | regex: '[A-Z]{2}\d{2}?\w{4}?\d{4}?\d{4}?\d{0,2}' 21 | bic: 22 | parser: regex 23 | regex: '(?i)BIC[:]\s+(\w{8,11})' 24 | static_vat: NL815471166B01 25 | partner_website: 26 | parser: static 27 | value: lenovo.com 28 | partner_name: 29 | parser: static 30 | value: Digital River Ireland, Ltd. 31 | country_code: 32 | parser: static 33 | value: ie 34 | partner_city: 35 | parser: regex 36 | regex: Dublin 37 | partner_street: 38 | parser: static 39 | value: Park Lane 40 | narration: 41 | parser: regex 42 | regex: Volgnummer factuur:\s+\S+ 43 | lines: 44 | start: Productbeschrijving 45 | end: (?i)Producttotaal 46 | line: (?P(\w+(?:\s+\S+){1,8}))\s+(?P\d)\s+(?P\d?[.]?\d+[,]\d{2}) 47 | types: 48 | qty: float 49 | price_unit: float 50 | price_subtotal: float 51 | taxpercent: float 52 | keywords: 53 | - (?i)Digital\sRiver 54 | - "NL815471166B01" 55 | - "Factuur" 56 | options: 57 | currency: EUR 58 | languages: 59 | - nl 60 | decimal_separator: "," 61 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.nmmn.yml: -------------------------------------------------------------------------------- 1 | issuer: NMMN IT-Services 2 | fields: 3 | amount: \d{1,10},\d{2}€\d{1,10}%\d{2},\d{2}€(\d{1,10},\d{2})€ 4 | date: Datum(\d{2}.\d{2}.\d{4}) 5 | invoice_number: Rechnung(\d{4}-\d{2}-\d{5}) 6 | IBAN: Bankverbindung:[\w\s,:\.\-]+IBAN:(\w{10,30}) 7 | BIC: Bankverbindung:[\w\s,:\.\-]+BIC:(\w{8,11}) 8 | static_payment: directdebit 9 | keywords: 10 | - DE289115734 11 | - Rechnung 12 | options: 13 | remove_whitespace: true 14 | date_formats: 15 | - "%d.%m.%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.nodisto.yml: -------------------------------------------------------------------------------- 1 | issuer: Nodisto IT 2 | fields: 3 | amount: Total\$(\d+\.\d+)USD 4 | date: InvoiceDate:(\d+/\d+/\d+) 5 | invoice_number: 'Invoice#(\d+)' 6 | keywords: 7 | - Nodisto 8 | options: 9 | remove_whitespace: true 10 | currency: USD 11 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.nyse.yml: -------------------------------------------------------------------------------- 1 | issuer: NYSE Market (DE) Inc. 2 | fields: 3 | amount: Invoice\s+Amount:\s+\$([\d,]+\.\d{2}) 4 | amount_untaxed: Sub\s+Total\s+:\s+\$([\d,]+\.\d{2}) 5 | date: Invoice\s+Date:\s+(\d{2}\/\d{2}\/\d{4}) 6 | invoice_number: Invoice\s+No:\s+(\w+) 7 | static_vat: 20-3783731 8 | keywords: 9 | - 20-3783731 10 | - Invoice 11 | - $ 12 | - nyse.com 13 | options: 14 | currency: USD 15 | date_formats: 16 | - "%m/%d/%Y" 17 | decimal_separator: "." 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.oyo.invoice.yml: -------------------------------------------------------------------------------- 1 | issuer: OYO 2 | fields: 3 | amount: Grand Total\s+Rs (\d+) 4 | date: Date:\s(\d{1,2}\/\d{1,2}\/\d{1,4}) 5 | invoice_number: ([A-Z0-9]+)\s+Cash at Hotel 6 | keywords: 7 | - OYO 8 | - Oravel 9 | - Stays 10 | tables: 11 | - start: Hotel Details\s+Check In\s+Check Out\s+Rooms 12 | end: Booking ID 13 | body: (?P[\S ]+),\s+(?P(?:0[1-9]|[12][0-9]|3[01])\/(?:0[1-9]|1[012])\/(?:19\d{2}|20\d{2}))\s+(?P(?:0[1-9]|[12][0-9]|3[01])\/(?:0[1-9]|1[012])\/(?:19\d{2}|20\d{2}))\s+(?P\d+) 14 | - start: Booking ID\s+Payment Mode 15 | end: DESCRIPTION 16 | body: (?P\w+)\s+(?P(?:\w+ ?)*) 17 | - start: GSTIN\s+CIN 18 | end: Oravel Stays Private Limited 19 | body: (?P\w+)\s+(?P\w+) 20 | options: 21 | currency: INR 22 | decimal_separator: "." 23 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.packtpub.yml: -------------------------------------------------------------------------------- 1 | issuer: Packt Publishing 2 | fields: 3 | amount: Total\s+\d+\s+€(\d+.\d{2}) 4 | amount_untaxed: Total\s+\d+\s+€(\d+.\d{2}) 5 | date: Invoice\s+Date\s+(\d{1,2}.+\s+\d{4}) 6 | invoice_number: Invoice\s+Ref\s+(\d+) 7 | static_vat: GB825446718 8 | keywords: 9 | - 825 44 6718 10 | - € 11 | - INVOICE 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d %B %Y" 16 | decimal_separator: "." 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.pixartprinting.yml: -------------------------------------------------------------------------------- 1 | issuer: Pixartprinting SpA 2 | fields: 3 | amount: \d+\.\d+\s+[\d,]+\.\d{2}\s+[\d,]+\.\d{2}\s+[\d,]+\.\d{2}\s+[\d,]+\.\d{2}\s+[\d,]+\.\d{2}\s+[\d,]+\.\d{2}\s+([\d,]+\.\d{2}) 4 | amount_untaxed: \d+\.\d+\s+[\d,]+\.\d{2}\s+[\d,]+\.\d{2}\s+[\d,]+\.\d{2}\s+([\d,]+\.\d{2})\s+[\d,]+\.\d{2}\s+[\d,]+\.\d{2}\s+[\d,]+\.\d{2} 5 | date: SALES INVOICE\s+N.\s*[\w/]+\s+OF\s+(\d{2}-\d{2}-\d{4}) 6 | invoice_number: SALES INVOICE\s+N.\s*([\w/]+) 7 | static_vat: IT04061550275 8 | keywords: 9 | - INVOICE 10 | - EUR 11 | - IT04061550275 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d-%m-%Y" 16 | decimal_separator: "." 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.runbox.yml: -------------------------------------------------------------------------------- 1 | issuer: Runbox Solutions AS 2 | fields: 3 | amount: 4 | parser: regex 5 | regex: \s+Total\s+EUR\s(d*[.,]?\d+[,.]\d+) 6 | type: float 7 | amount_untaxed: 8 | parser: regex 9 | regex: \s+Total\s+EUR\s(d*[.,]?\d+[,.]\d+) 10 | type: float 11 | invoice_number: 12 | parser: regex 13 | regex: Invoice no[.:]+\s+(\d+) 14 | partner_website: 15 | parser: static 16 | value: runbox.com 17 | partner_name: 18 | parser: regex 19 | regex: Runbox Solutions AS 20 | partner_city: 21 | parser: regex 22 | regex: Oslo 23 | country_code: 24 | parser: static 25 | value: "NO" 26 | partner_zip: 27 | parser: regex 28 | regex: '([,]\s\d{4})\s\w+' 29 | partner_email: 30 | parser: regex 31 | regex: '\w+[@]\w+[.]com' 32 | date: 33 | parser: regex 34 | regex: Invoice date[:]\s+(\d+-\d{2}-\d{2}) 35 | type: date 36 | date_due: 37 | parser: regex 38 | regex: 'Due date[:]\s+(\d{4}[-]\d{2}[-]\d{2})' 39 | type: date 40 | iban: 41 | parser: regex 42 | regex: (?:[A-Z]{2}[ \-]?[0-9]{2})(?:[ \-]?[A-Z0-9]{3,5}){2,7} 43 | bic: 44 | parser: regex 45 | regex: SWIFT code[:]\s+(\w{8,11}) 46 | lines: 47 | parser: lines 48 | start: "Description" 49 | end: "Total" 50 | line: 51 | - '(?P[\S ]+)\s+(?P\d+)\s+(?P\d+[,.]\d{2})\s+(?P\d+[,.]\d{2})\s+(?P\d+[,.]\d{2})' 52 | types: 53 | qty: float 54 | price_subtotal: float 55 | line_amount_tax: float 56 | unit_price: float 57 | keywords: 58 | - "Runbox" 59 | - "Invoice" 60 | required_fields: 61 | - lines 62 | options: 63 | languages: 64 | - en 65 | currency: EUR 66 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.sammymaystone.yml: -------------------------------------------------------------------------------- 1 | issuer: Sammy Maystone - For Lines testing 2 | keywords: 3 | - "Sammy Maystone" 4 | required_fields: 5 | - invoice_number 6 | - date 7 | - line_items 8 | fields: 9 | invoice_number: '\s+\#\s+(\S+)' 10 | date: '\s{10,}Date:\s+(.*)[\r\n]*' 11 | line_items: 12 | parser: lines 13 | start: 'Item\s+Quantity\s+Rate\s+Amount' 14 | first_line: 'Service (?P\w)' 15 | line: "(?P.*)" 16 | skip_line: ["Description:", "Notes:"] 17 | last_line: "(?PParts:.*)" 18 | end: "Subtotal" 19 | options: 20 | remove_whitespace: false 21 | currency: USD 22 | languages: 23 | - en 24 | decimal_separator: "." 25 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.scaleway.yml: -------------------------------------------------------------------------------- 1 | issuer: Online SAS 2 | fields: 3 | amount: Totaldue€(\d+\.\d+) 4 | date: Issued:(\w{3,}\d{1,2},\d{4}) 5 | invoice_number: Invoice#(\d+) 6 | keywords: 7 | - FR35433115904 8 | - Invoice 9 | options: 10 | remove_whitespace: true 11 | currency: EUR 12 | date_formats: 13 | - "%B%d,%Y" 14 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.textmaster.yml: -------------------------------------------------------------------------------- 1 | issuer: Textmaster SA 2 | fields: 3 | amount: Total\$(\d+\.\d+) 4 | date: 5 | - DateofIssue:(\w{1,10}\d+,\d{4}) 6 | invoice_number: 7 | - InvoiceNumber:([\w\-]+) 8 | keywords: 9 | - BE0837387439 10 | - TextMaster 11 | options: 12 | remove_whitespace: true 13 | currency: USD 14 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.tmx.yml: -------------------------------------------------------------------------------- 1 | issuer: TSX Inc. 2 | fields: 3 | amount: Total\s+Amount\s+Due\s+USD\s+\$([\d,]+\.\d{2}) 4 | amount_untaxed: Pre-Tax\s+Total\s+([\d,]+\.\d{2}) 5 | date: Invoice\s+Date\s+(\d{2}\s\w+\s\d{4}) 6 | invoice_number: Invoice\s+Number\s+(\w+) 7 | static_partner_name: TSX Inc. 8 | keywords: 9 | - Invoice 10 | - USD 11 | - payments@tmx.com 12 | options: 13 | currency: USD 14 | date_formats: 15 | - "%d-%b-%Y" 16 | decimal_separator: "." 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.travis-ci.yml: -------------------------------------------------------------------------------- 1 | issuer: Travis CI GmbH 2 | fields: 3 | amount: 'Total\$(\d+\.\d+)' 4 | amount_untaxed: 'Price\$(\d+\.\d+)' 5 | date: 'SubscriptionPeriod([\w]+[\d]{1,2}\,[\d]{4})' 6 | invoice_number: 'Invoice\#(\w+)' 7 | keywords: 8 | - support@travis-ci.com 9 | options: 10 | remove_whitespace: true 11 | currency: USD 12 | date_formats: 13 | - "%MMMM%d,%YYYY" 14 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.twitter.de.yml: -------------------------------------------------------------------------------- 1 | issuer: Twitter International Company 2 | fields: 3 | amount: GesamtsummemitUSt(\d+\,\d+) 4 | amount_untaxed: GesamtsummeohneUSt(\d+\,\d+) 5 | date: Ausstellungsdatum(\d{1,2}\.\w+\d{4}) 6 | invoice_number: Rechnungs-ID(\d+) 7 | keywords: 8 | - IE9803175Q 9 | - USt-Betrag 10 | options: 11 | remove_whitespace: true 12 | currency: EUR 13 | locale: de_DE.UTF-8 14 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.twitter.uk.yml: -------------------------------------------------------------------------------- 1 | issuer: Twitter International Company 2 | fields: 3 | amount: TotalincludingVAT£(\d+\.\d+) 4 | date: 5 | - Invoicedate(\d{1,2}\w+,\d{4}) 6 | - Issuedate(\d{1,2}\w+,\d{4}) 7 | invoice_number: 8 | - Invoicenumber(\d+) 9 | - InvoiceId(\d+) 10 | keywords: 11 | - 9803175Q 12 | - £ 13 | options: 14 | remove_whitespace: true 15 | currency: GBP 16 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.twitter.yml: -------------------------------------------------------------------------------- 1 | issuer: Twitter International Company 2 | fields: 3 | amount: Total\$(\d+\.\d+) 4 | date: 5 | - Invoicedate(\d{1,2}\w+,\d{4}) 6 | - Issuedate(\d{1,2}\w+,\d{4}) 7 | invoice_number: 8 | - Invoicenumber(\d+) 9 | - InvoiceId(\d+) 10 | keywords: 11 | - 9803175Q 12 | - Twitter 13 | - USD 14 | options: 15 | remove_whitespace: true 16 | currency: USD 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.upwork.yml: -------------------------------------------------------------------------------- 1 | issuer: Upwork 2 | fields: 3 | amount: TOTALAMOUNT:\$(\d{1,2}?,?\d{2,3}.\d{2}) 4 | date: DATE(\w+,\d{4}) 5 | invoice_number: INVOICE#(\w+) 6 | keywords: 7 | - MountainView,CA94043 8 | - 441LogueAve. 9 | options: 10 | currency: USD 11 | remove_whitespace: true 12 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/com/com.usersnap.yml: -------------------------------------------------------------------------------- 1 | issuer: Usersnap GmbH 2 | fields: 3 | amount: Totalamount:USD(\d+.\d+) 4 | date: Perg,(\d{4}-\d{2}-\d{2}) 5 | invoice_number: Invoice(\w+) 6 | keywords: 7 | - ATU67824857 8 | options: 9 | remove_whitespace: true 10 | currency: USD 11 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/de/de.amazon.yml: -------------------------------------------------------------------------------- 1 | # TODO: this template needs more keyword in order to avoid matching 2 | # instead of other Amazon templates for other countries and/or languages 3 | fields: 4 | amount: EUR (\d+,\d+)\n\nMit dieser Warenlieferung 5 | date: Lieferdatum/Rechnungsdatum.*(\d{1,2}\. \w+ \d{4}) 6 | invoice_number: Rechnungsnr\. ([A-Z0-9\-]+) 7 | keywords: 8 | - Amazon EU 9 | - Rechnungsnr 10 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/de/de.bettina-kast.yml: -------------------------------------------------------------------------------- 1 | issuer: Bettina Kast 2 | fields: 3 | amount: Rechnungsbetrag(\d{1,10},\d{2}) 4 | date: Datum(\d{2}.\d{2}.\d{4}) 5 | invoice_number: Rechnung(\d{2}/\d{4}) 6 | IBAN: IBAN:(\w{10,30}) 7 | BIC: BIC\(SWIFT\):(\w{8,11}) 8 | static_payment: transfer 9 | keywords: 10 | - DE225792426 11 | - Rechnung 12 | options: 13 | remove_whitespace: true 14 | date_formats: 15 | - "%d.%m.%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/de/de.hosteurope.yml: -------------------------------------------------------------------------------- 1 | issuer: Host Europe GmbH 2 | fields: 3 | amount: EndpreisBrutto(\d{1,10},\d{2}) 4 | date: Datum(\d{2}.\d{2}.\d{4}) 5 | invoice_number: Rechnung(AR-\d{1,12}) 6 | static_payment: directdebit 7 | keywords: 8 | - HostEuropeGmbH 9 | - Rechnung 10 | options: 11 | remove_whitespace: true 12 | date_formats: 13 | - "%d.%m.%Y" 14 | decimal_separator: "," 15 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/de/de.notebooksbilligerBillPay.yml: -------------------------------------------------------------------------------- 1 | issuer: notebooksbilliger.de 2 | fields: 3 | amount: Endsumme:(\d{1,10}.\d{2}) 4 | date: Rechnungs\-\+Lieferdatum(\d{2}.\d{2}.\d{4}) 5 | invoice_number: RE-Nummer(\w{5,10}) 6 | IBAN: MartinSchwagerIBAN:(\w{10,22}) 7 | BIC: MartinSchwagerIBAN:\w{10,22}BIC:([A-Z0-9]{8,11}) 8 | static_payment: transfer 9 | static_recipient: BillPay 10 | reference: Verwendungszweck:(\w{9,12}/\w{4}) 11 | keywords: 12 | - DE175671991 13 | - BillPay 14 | - Rechnung 15 | options: 16 | remove_whitespace: true 17 | date_formats: 18 | - "%d.%m.%Y" 19 | decimal_separator: "." 20 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/de/de.ovh.yml: -------------------------------------------------------------------------------- 1 | issuer: OVH GmbH 2 | fields: 3 | amount: 4 | - Gesamtbetraginkl\.(\d+\.\d+) 5 | - Gesamtbetraginkl.MwSt(\d+\.\d+) 6 | date: Datum:(\d{2}-\d{2}-\d{4}) 7 | invoice_number: QuittierteRechnung:(\w+) 8 | keywords: 9 | - DE245768940 10 | options: 11 | remove_whitespace: true 12 | date_formats: 13 | - "%d-%m-%Y" 14 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/de/de.qualityhosting.yml: -------------------------------------------------------------------------------- 1 | issuer: QualityHosting AG 2 | fields: 3 | amount: Total EUR\s+(\d+,\d+) 4 | amount_untaxed: Total EUR\s+(\d+,\d+) 5 | date: 6 | - \s{2,}(\d+\. .+ \d{4})\s{2,} 7 | - Rechnungsdatum\s+(\w+ \d+, \d{4}) 8 | invoice_number: Rechnungsnr\.\s+(\d{8}) 9 | vat: DE 232 446 240 10 | lines: 11 | start: 'Contract No. \w+' 12 | end: "Total EUR" 13 | first_line: '\s+(?P\d+)\s+(?P\d+)\s+(?P.{,70})\s+(?P\d+,\d+)' 14 | line: '^\s+(?P.+)$' 15 | types: 16 | qty: float 17 | price: float 18 | keywords: 19 | - QualityHosting 20 | options: 21 | currency: EUR 22 | decimal_separator: "," 23 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/de/de.united-domains.yml: -------------------------------------------------------------------------------- 1 | issuer: united-domains AG 2 | fields: 3 | amount: GesamtBrutto(\d{1,10},\d{2}) 4 | date: Rechnungsdatum:(\d{2}.\d{2}.\d{4}) 5 | invoice_number: Rechnung:(\w{1,20}) 6 | static_payment: directdebit 7 | static_IBAN: 8 | static_BIC: 9 | keywords: 10 | - DE203066334 11 | - Rechnung 12 | options: 13 | remove_whitespace: true 14 | date_formats: 15 | - "%d.%m.%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/es/com.mob-barcelona.caterina.yml: -------------------------------------------------------------------------------- 1 | issuer: MOB Caterina (Coworking) 2 | fields: 3 | amount: IMPORTE\sTOTAL\s+(\d+,\d{2}) 4 | amount_untaxed: IMPORTE\sNETO\s+(\d+,\d{2}) 5 | date: Factura\s+MC\.(\d{4}\.\d{1,2}.\d{2}) 6 | invoice_number: Factura\s+(MC\.\d{4}\.\d{1,2}.\d{2}) 7 | static_vat: B67334847 8 | vat_rate: IVA\s+\((\d+\.\d+)%\) 9 | address: B67334847\s+(.+España) 10 | static_partner_name: MOB Caterina S.L. 11 | keywords: 12 | - B67334847 13 | - MOB Caterina 14 | - Factura 15 | options: 16 | currency: EUR 17 | date_formats: 18 | - "%Y.%m.%d" 19 | decimal_separator: "," 20 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/es/com.pepephone.yml: -------------------------------------------------------------------------------- 1 | issuer: PepePhone (Mobile & Internet) 2 | fields: 3 | amount: Total\sfactura\s+(\d+,\d{2}) 4 | amount_untaxed: Total\s\(base\simponible\)\s+(\d+,\d{2}) 5 | date: Fecha\sde\semisión:\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: Número\sde\sfactura:\s+(\d+) 7 | static_vat: B85033470 8 | vat_rate: Impuesto\s+\(IVA\s+(.+)%\) 9 | address: CIF:\s+[A-Z0-9]+\s+(.+España) 10 | static_partner_name: PEPEMOBILE S.L. 11 | keywords: 12 | - ESB85033470 13 | - € 14 | - factura 15 | options: 16 | currency: EUR 17 | date_formats: 18 | - "%d/%m/%Y" 19 | decimal_separator: "," 20 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/es/es.amazon.yml: -------------------------------------------------------------------------------- 1 | issuer: amazon.es 2 | fields: 3 | amount: Total\s+(\d+,\d{2})\s€\s+IVA 4 | amount_untaxed: \(IVA\s+excluido\)\s+\d+%\s+(\d+,\d{2})\s€ 5 | date: Fecha\s+de\s+la\s+entrega\s+(\d+\s+[A-z]+\s+\d+)\s+ 6 | invoice_number: Número\s+de\s+la\s+factura\s+([A-Z0-9\-]+) 7 | vat: Vendido\s+por\s+.+\s+IVA\s+([A-Z0-9]+) 8 | vat_rate: \(IVA\s+excluido\)\s+(\d+)% 9 | partner_name: Vendido\s+por\s+(.+)\s+IVA\s+[A-Z0-9]+ 10 | keywords: 11 | - LU20260743 12 | - € 13 | - factura 14 | options: 15 | currency: EUR 16 | date_formats: 17 | - "%d %B %Y" 18 | decimal_separator: "," 19 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/es/es.digimobile.yml: -------------------------------------------------------------------------------- 1 | issuer: DIGI (Mobile & Internet) 2 | fields: 3 | amount: TOTAL\s+FACTURA\s+\(imp\.\s+incl\.\)\s+(\d+,\d{2}) 4 | amount_untaxed: IMPORTE\s+\(base\s+imponible\)\s+(\d+,\d{2}) 5 | date: Fecha\s+de\s+emisión\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: FACTURA\s+Número:\s+([A-Z0-9]+) 7 | static_vat: B84919760 8 | vat_rate: IMPUESTOS\s+\((\d+\.\d{2})%\s+IVA\) 9 | address: Domicilio\s+Social\s+en\s+(.+\(Madrid\)) 10 | static_partner_name: DIGI Spain Telecom, S.L.U. 11 | keywords: 12 | - B-84919760 13 | - € 14 | - factura 15 | options: 16 | currency: EUR 17 | date_formats: 18 | - "%d/%m/%Y" 19 | decimal_separator: "," 20 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/es/es.supplies24.yml: -------------------------------------------------------------------------------- 1 | issuer: supplies24.es 2 | fields: 3 | amount: Importe total:\s+(\d+,\d{2}) 4 | amount_untaxed: Suma sin IVA:\s+(\d+,\d{2}) 5 | date: Número de factura\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: Número de factura\s+\d{2}/\d{2}/\d{4}\s+(\w+) 7 | static_vat: DE230466785 8 | keywords: 9 | - DE230466785 10 | - € 11 | - Factura 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/co.mooncard.yml: -------------------------------------------------------------------------------- 1 | issuer: MoonGroup (mooncard.co) 2 | fields: 3 | amount: Total\s+à\s+payer\s+(\d+,\d{2})\s+€ 4 | amount_untaxed: Total\s+HT\s+(\d+,\d{2})\s+€ 5 | date: Date\s+(\d{2}/\d{2}/\d{2}) 6 | date_start: (\d{2}/\d{2}/\d{2})\s+\-\s+\d{2}/\d{2}/\d{2} 7 | date_end: \d{2}/\d{2}/\d{2}\s+\-\s+(\d{2}/\d{2}/\d{2}) 8 | invoice_number: Facture\s+(MOON\d+) 9 | static_vat: FR65818620783 10 | keywords: 11 | - 818 620 783 12 | - € 13 | - Facture 14 | options: 15 | currency: EUR 16 | date_formats: 17 | - "%d/%m/%y" 18 | decimal_separator: "," 19 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.adobe.ie.yml: -------------------------------------------------------------------------------- 1 | issuer: Adobe Systems Software 2 | fields: 3 | amount: Montant\s+TTC\s+(\d+.\d{2}) 4 | amount_tax: (\d+.\d{2})\s+TVA 5 | date: Date\s+de\s+la\s+facture\s?:\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: Numéro\s+de\s+facture\s?:\s+(\w+) 7 | static_vat: IE6364992H 8 | keywords: 9 | - IE6364992H 10 | - Facture 11 | - EUR 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "." 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.akretion.fr.yml: -------------------------------------------------------------------------------- 1 | issuer: Akretion France 2 | fields: 3 | amount: Total TTC :\s+(\d+,\d{2}) 4 | amount_untaxed: Total HT :\s+(\d+,\d{2}) 5 | date: (\d{2}/\d{2}/\d{4}) 6 | date_due: \d{2}/\d{2}/\d{4}.+(\d{2}/\d{2}/\d{4}) 7 | invoice_number: Facture (\w+) 8 | siren: (792 377 731) 9 | keywords: 10 | - 792 377 731 11 | - Facture 12 | - € 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d/%m/%Y" 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.amazon.aws.yml: -------------------------------------------------------------------------------- 1 | issuer: Amazon Web Services EMEA SARL, succursale française 2 | fields: 3 | amount: TOTAL\s+AMOUNT\s+EUR\s+(\d+.\d{2}) 4 | amount_tax: TOTAL\s+VAT\s+EUR\s+(\d+.\d{2}) 5 | date: VAT\s+Invoice\s+Date:\s+(.+\s+\d{1,2},\s+\d{4}) 6 | invoice_number: VAT\s+Invoice\s+Number:\s+(\w+\-\w+) 7 | static_vat: FR30831001334 8 | keywords: 9 | - FR30831001334 10 | - EUR 11 | - Invoice 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%B %d, %Y" 16 | decimal_separator: "." 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.ateliercopieservice.yml: -------------------------------------------------------------------------------- 1 | issuer: Atelier Copy Service 2 | fields: 3 | amount: Total TTC\s+([\d ]+.\d{2}) 4 | amount_untaxed: Total HT\s+([\d ]+.\d{2}) 5 | invoice_number: FACTURE n° (\w+) 6 | date: Date : (\d{2}/\d{2}/\d{4}) 7 | static_vat: FR7442210554400015 8 | keywords: 9 | - FR 7442210554400015 10 | - FACTURE 11 | - EUR 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "." 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.chauffeur-prive.yml: -------------------------------------------------------------------------------- 1 | issuer: Chauffeur Privé (TRANSOPCO France SAS) 2 | fields: 3 | amount: Totalfacturé([\d ]+,\d{2}) 4 | sum_amount_tax: 5 | - TVA10%\*\*\*([\d ]+,\d{2}) 6 | - TVA20%\*\*([\d ]+,\d{2}) 7 | date: Datedefacture:(\d{2}/\d{2}/\d{4}) 8 | invoice_number: FactureNo(.+) 9 | date_start: Périodedu:(\d{2}/\d{2}/\d{4}) 10 | date_end: Périodedu:\d{2}/\d{2}/\d{4}au(\d{2}/\d{2}/\d{4}) 11 | static_vat: FR 11 807978119 12 | keywords: 13 | - Facture 14 | - € 15 | - "807978119" 16 | options: 17 | currency: EUR 18 | date_formats: 19 | - "%d/%m/%Y" 20 | decimal_separator: "," 21 | replace: 22 | - [" ", ""] 23 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.coriolis.yml: -------------------------------------------------------------------------------- 1 | # Designed to work on the 2 invoice models: mobile phone and Internet 2 | issuer: Coriolis Telecom 3 | fields: 4 | amount: TOTAL FACTURE TTC\s+(\d+,\d{2}) 5 | amount_untaxed: TOTAL HT\s+(\d+,\d{2}) 6 | date: Date facture\s+(\d{2}/\d{2}/\d{4}) 7 | invoice_number: Facture\s[nº\s]+(\d+) 8 | static_vat: FR86419735741 9 | keywords: 10 | - FR 86419735741 11 | - FACTURE 12 | - € 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d/%m/%Y" 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.easyjet.fr.yml: -------------------------------------------------------------------------------- 1 | issuer: Easyjet 2 | fields: 3 | amount: MONTANT TOTAL\s+([\d ]+,\d{2}) 4 | amount_untaxed: MONTANT TOTAL\s+([\d ]+,\d{2}) 5 | date: Facture.+mission\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: N° DE LA FACTURE\s+(\w+) 7 | static_vat: FR51453172470 8 | keywords: 9 | - FR 51453172470 10 | - FACTURE 11 | - EUR 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | languages: 17 | - fr 18 | decimal_separator: "," 19 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.eaudugrandlyon.yml: -------------------------------------------------------------------------------- 1 | issuer: Eau du grand Lyon (eaudugrandlyon.com) 2 | fields: 3 | amount: Total\s+général\s+:\s+\d+,\d{2}\s+€\s+HT\s+\d+,\d{2}\s+(\d+,\d{2}) 4 | amount_untaxed: Total\s+général\s+:\s+(\d+,\d{2})\s+€\s+HT 5 | date: FACTURE\s+D'ACCÈS\s+AU\s+SERVICE\s+du\s+(\d{1,2}.+\d{4}) 6 | date_start: du\s+(\d{2}/\d{2}/\d{4})\s+au\s+\d{2}/\d{2}/\d{4} 7 | date_end: du\s+\d{2}/\d{2}/\d{4}\s+au\s+(\d{2}/\d{2}/\d{4}) 8 | invoice_number: Facture\s+n°\s+(\d+) 9 | static_vat: FR41799365887 10 | keywords: 11 | - FR 41 799 365 887 12 | - € 13 | - Facture 14 | options: 15 | currency: EUR 16 | date_formats: 17 | - "%d %B %Y" 18 | decimal_separator: "," 19 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.godaddy.yml: -------------------------------------------------------------------------------- 1 | issuer: Go Daddy 2 | fields: 3 | amount: Total\s?:\s+(\d+,\d{2}) 4 | amount_tax: Taxe\s?:\s+(\d+,\d{2}) 5 | date: (\d{2}/\d{2}/\d{4}) 6 | invoice_number: reçu\s?:\s+(\w+) 7 | static_vat: EU826010755 8 | keywords: 9 | - EU826010755 10 | - FACTURE 11 | - € 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.google.ie.yml: -------------------------------------------------------------------------------- 1 | issuer: Google Ireland Limited 2 | fields: 3 | amount: Montant dû en EUR\s+:\s+([\d ]+,\d{2}) 4 | amount_untaxed: Sous-total en EUR\s+:\s+([\d ]+,\d{2}) 5 | date: Date d'émission\s+:\s+(\d{1,2} .+ \d{4}) 6 | invoice_number: Numéro de la facture :\s+([\d-]+) 7 | static_vat: IE6388047V 8 | keywords: 9 | - Google Ireland 10 | - Facture 11 | - IE 6388047V 12 | - EUR 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d %B %Y" 17 | languages: 18 | - fr 19 | decimal_separator: "," 20 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.hootsuite.yml: -------------------------------------------------------------------------------- 1 | issuer: Hootsuite Media Inc. 2 | fields: 3 | amount: Montant payé\s?:\s+€(\d+,\d{2}) 4 | amount_untaxed: Montant payé\s?:\s+€(\d+,\d{2}) 5 | date: Date\sde\sfacture\s?:\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: N°\sde\sfacture\s?:\s+(\w+) 7 | static_vat: EU826021784 8 | keywords: 9 | - EU826021784 10 | - facture 11 | - € 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%m/%d/%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.jeanbesson.yml: -------------------------------------------------------------------------------- 1 | issuer: Transports Jean Besson 2 | fields: 3 | amount: MONTANT\s+TTC\s+A\s+REGLER\s+EN\s+EUROS\s+(\d+.\d{2}) 4 | amount_untaxed: MONTANT\s+HT\s+APRES\s+MAJORATION\s+:\s+(\d+.\d{2}) 5 | date: (\d{2}/\d{2}/\d{4}) 6 | invoice_number: FACTURE\s+N°\s?(\d+) 7 | static_vat: FR29775649965 8 | keywords: 9 | - FR 29775649965 10 | - FACTURE 11 | options: 12 | currency: EUR 13 | date_formats: 14 | - "%d/%m/%Y" 15 | decimal_separator: "." 16 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.ldlc.yml: -------------------------------------------------------------------------------- 1 | issuer: LDLC.com 2 | fields: 3 | amount: Montant TTC\s+(\d+,\d{2}) 4 | amount_untaxed: Montant HT\s+(\d+,\d{2}) 5 | amount_tax: Montant TVA\s+(\d+,\d{2}) 6 | date: Date de facture\s+:\s+(\d{2}/\d{2}/\d{4}) 7 | invoice_number: N° de facture\s+:\s+(\w+) 8 | static_vat: FR26403554181 9 | keywords: 10 | - FR 26403554181 11 | - facture 12 | - € 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d/%m/%Y" 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.linkedin.yml: -------------------------------------------------------------------------------- 1 | issuer: LinkedIn Ireland Unlimited 2 | fields: 3 | amount: Facture\s*:\s+([\d\s]+,\d{2}) 4 | amount_untaxed: Sous-total\s*:\s+([\d\s]+,\d{2}) 5 | date: Date\s*:\s+(\d+/\d+/\d{4}) 6 | invoice_number: N° de facture\s*:\s+(\w+) 7 | static_vat: IE9740425P 8 | keywords: 9 | - IE9740425P 10 | - EUR 11 | - Facture 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.mention.yml: -------------------------------------------------------------------------------- 1 | issuer: Mention Solutions SAS 2 | fields: 3 | amount: Total\s\(incl\.\sVAT\)\s+(\d+,\d{2}) 4 | amount_untaxed: Total\s\(excl\.\sVAT\)\s+(\d+,\d{2}) 5 | date: Date\s:\s+(\d{2}/\d{2}/\d{2}) 6 | invoice_number: Facture\s+\#([\w-]+) 7 | static_vat: FR34790841266 8 | keywords: 9 | - FR 34790841266 10 | - Facture 11 | - € 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.microsoft.ie.yml: -------------------------------------------------------------------------------- 1 | issuer: Microsoft Ireland Operations Ltd 2 | fields: 3 | amount: Total\sdes\sfrais\sactuels\s+(\d+,\d{2}) 4 | amount_untaxed: Total\sdes\sfrais\savant\simpôt\s+(\d+,\d{2}) 5 | date: Date\sdu\sdocument\s?:\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: N°\s?de\sfacture\s?:\s+(\w+) 7 | static_vat: IE8256796U 8 | keywords: 9 | - IE 8256796 U 10 | - FACTURE 11 | - EUR 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.myflyingbox.yml: -------------------------------------------------------------------------------- 1 | issuer: My Flying Box 2 | fields: 3 | amount: \d+,\d{2}\s+€\s+\d+,\d{2}\s+€\s+(\d+,\d{2})\s+€ 4 | amount_untaxed: (\d+,\d{2})\s+€\s+\d+,\d{2}\s+€\s+\d+,\d{2}\s+€ 5 | date: DATE\s+(\d{2}/\d{2}/\d{2}) 6 | invoice_number: N° DE FACTURE\s+(\d+) 7 | vat: FR 83 538 645 227 8 | keywords: 9 | - FR 83 538 645 227 10 | - FACTURE 11 | - € 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.officetimeline.yml: -------------------------------------------------------------------------------- 1 | issuer: Office TIMELINE 2 | fields: 3 | amount: Total\spayé\s\(\w{3}\)\s+\$(\d+) 4 | amount_tax: TVA\s+\$(\d+) 5 | currency_iso: Total\spayé\s\((\w{3})\) 6 | date: Date\sd’achat\s?:\s+\w+\s(\d+\s.+\d{4}) 7 | invoice_number: Facture\s+(OTLP-\d+) 8 | static_partner_name: Office TIMELINE 9 | keywords: 10 | - Office TIMELINE 11 | - Facture 12 | - $ 13 | options: 14 | currency: USD 15 | date_formats: 16 | - "%d %B %Y" 17 | languages: 18 | - fr 19 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.orange-business.mobile.yml: -------------------------------------------------------------------------------- 1 | issuer: Orange Business Services 2 | fields: 3 | amount: somme\sà\spayer\s\(EUR\sTTC\)\s+(\d+,\d{2}) 4 | amount_untaxed: total\sfacture\s\(EUR\sHT\)\s+(\d+,\d{2}) 5 | date: date\s+de\s+facture\s+:\s+(\d{2}/\d{2}/\d{2}) 6 | invoice_number: n°\s+de\s+facture\s+:\s+(.+) 7 | static_siren: "380129866" 8 | static_vat: FR89380129866 9 | keywords: 10 | - FR 89 380 129 866 11 | - www.orange-business.com/ece 12 | - facture 13 | - EUR 14 | options: 15 | currency: EUR 16 | date_formats: 17 | - "%d/%m/%y" 18 | decimal_separator: "," 19 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.ovh.fr.yml: -------------------------------------------------------------------------------- 1 | issuer: OVH.com 2 | fields: 3 | amount: Total\s+TTC\s+([\d ]+,\d{2}) 4 | amount_untaxed: Total de la facture HT\s+([\d ]+,\d{2}) 5 | date: Date d'émission\s+:\s+(\d{1,2}\/\d{1,2}\/\d{4}) 6 | invoice_number: Facture\s+.+(FR\w+) 7 | static_vat: FR22424761419 8 | keywords: 9 | - FR22424761419 10 | - Facture 11 | options: 12 | currency: EUR 13 | date_formats: 14 | - "%d %B %Y" 15 | languages: 16 | - fr 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.rs-online.fr.yml: -------------------------------------------------------------------------------- 1 | issuer: RadioSpares 2 | fields: 3 | amount: TOTAL TTC - EUR\s+([\d ]+,\d{2}) 4 | amount_untaxed: TOTAL NET H\.T\.\s+([\d ]+,\d{2}) 5 | date: Date de Facture\s+(\d{2}\.\d{2}\.\d{4}) 6 | date_due: échéance.+(\d{2}\.\d{2}\.\d{4}) 7 | invoice_number: Numero de Facture\s+(\w+) 8 | static_vat: FR 68 334 534 039 9 | keywords: 10 | - FR 68 334 534 039 11 | - EUR 12 | - FACTURE 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d.%m.%Y" 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.saur.yml: -------------------------------------------------------------------------------- 1 | # Doesn't work with the new version of pdftotext : Copying of text from this document is not allowed. 2 | issuer: Saur 3 | fields: 4 | amount: Total facture TTC\s+([\d ]+,\d{2}) 5 | amount_untaxed: HT soumis à TVA\s+:\s+([\d ]+,\d{2}) 6 | date: FACTURE.+\n.+(\d{2}\s.+\s\d{4}) 7 | invoice_number: FACTURE N°\s+(\d+) 8 | vat: (FR 28 339 379 984) 9 | keywords: 10 | - FR 28 339 379 984 11 | - FACTURE 12 | - € 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d/%m/%Y" 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.soyoustart.yml: -------------------------------------------------------------------------------- 1 | issuer: So you Start (OVH) 2 | fields: 3 | amount: TOTAL\s+TTC\s+(\d+.\d{2}) 4 | amount_untaxed: PRIX\s+HT\s+(\d+.\d{2}) 5 | date: Date\s+:\s+(\d+ .+ \d{4}) 6 | invoice_number: Facture\s+:\s+(\w+) 7 | static_vat: FR22424761419 8 | keywords: 9 | - FR22424761419 10 | - Facture 11 | - € 12 | - So you Start 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d %B %Y" 17 | languages: 18 | - fr 19 | decimal_separator: "." 20 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/com.vinci-autoroutes.yml: -------------------------------------------------------------------------------- 1 | issuer: Vinci Autoroutes 2 | fields: 3 | amount: NET A PAYER TTC\s+(\d+,\d{2}) 4 | amount_untaxed: TVA \(code 1\)\s+(\d+,\d{2}) 5 | date: Emise le (\d{2}/\d{2}/\d{4}) 6 | invoice_number: Facture n°\s+(\w+) 7 | static_vat: FR53572139996 8 | keywords: 9 | - FR 53 572 139 996 10 | - Facture 11 | - € 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/eu.trainline.yml: -------------------------------------------------------------------------------- 1 | issuer: Trainline.eu 2 | fields: 3 | amount: Montant\s+total\s+:\s+€(\d+.\d{2}) 4 | amount_untaxed: Montant\s+total\s+:\s+€(\d+.\d{2}) 5 | date: (\d{1,2}\s.{3,5},\s\d{4})\s+\d{2}:\d{2} 6 | description: \s+(.+)\s:\s\d{1,2}\s.{3,5},\s\d{4} 7 | invoice_number: Identifiant\s+de\s+transaction\s+:\s+(\d+) 8 | static_vat: FR58512277450 9 | keywords: 10 | - TVA FR 58 512 277 450 11 | - Justificatif de paiement 12 | - € 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d %B, %Y" 17 | decimal_separator: "." 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.actn.yml: -------------------------------------------------------------------------------- 1 | issuer: ACTN 2 | fields: 3 | date: Date\s+facture\s+:\s+(\d+/\d+/+\d{4}) 4 | invoice_number: FACTURE\s+n°\s+(\w+) 5 | static_vat: "FR54344350111" 6 | tables: 7 | - start: Total H.T.\s+Frais de port\s+Taux T.V.A.\s+Total T.V.A.\s+Total T.T.C. 8 | end: "AGENCE COMMERCIALE TECHNOLOGIES NOUVELLES" 9 | body: '(?P\d*,\d*)\s+€\s+20,00\s+(?P\d*,\d*)\s+€\s+(?P\d*,\d*)\s€' 10 | keywords: 11 | - FR54344350111 12 | - AGENCE COMMERCIALE TECHNOLOGIES NOUVELLES 13 | - FACTURE 14 | - € 15 | options: 16 | currency: EUR 17 | date_formats: 18 | - "%d %b %Y" 19 | decimal_separator: "," 20 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.airfrance.yml: -------------------------------------------------------------------------------- 1 | issuer: Air France 2 | fields: 3 | amount: NET A PAYER\s+([\d ]+.\d{2}) 4 | amount_untaxed: NET A PAYER\s+([\d ]+.\d{2}) 5 | date: FACTURE \w+ du (\d{2}/\d{2}/\d{4}) 6 | invoice_number: FACTURE (\w+) 7 | static_vat: FR61420495178 8 | keywords: 9 | - FR 61 420 495 178 10 | - FACTURE 11 | - EUR 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | languages: 17 | - fr 18 | decimal_separator: "." 19 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.also.yml: -------------------------------------------------------------------------------- 1 | issuer: ALSO France 2 | fields: 3 | date: Date\s+(\d+.\d+.+\d{4}) 4 | invoice_number: '.*Num.ro\sde\sdocument\s+(\w+)' 5 | static_vat: "FR 76 391 141 140" 6 | amount_untaxed: '.*Prix Total H.T.\s+(\d+,\d+)' 7 | amount: '.*Montant Total EUR\s+(\d+,\d+)' 8 | amount_tax: '.*TVA .*%\s+(\d+,\d+)' 9 | keywords: 10 | - "ALSO France 10 Avenue des Louvresses" 11 | - "Facture" 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d.%b.%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.amazon.yml: -------------------------------------------------------------------------------- 1 | issuer: Amazon France 2 | fields: 3 | amount: Facture Total\s+([\d ]+,\d{2}) 4 | amount_untaxed: Total\s+([\d ]+,\d{2})\s€\s+[\d ]+,\d{2}\s€ 5 | date: Date de la commande\s+(\d{2}.\d{2}.\d{4}) 6 | invoice_number: Numéro de la facture\s+([\w-]+) 7 | static_vat: FR 12487773327 8 | keywords: 9 | - LU20260743 10 | options: 11 | currency: EUR 12 | date_formats: 13 | - "%d.%m.%Y" 14 | decimal_separator: "," 15 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.assurance-epargne-pension.yml: -------------------------------------------------------------------------------- 1 | issuer: AEP - ASSURANCE EPARGNE PENSION 2 | fields: 3 | amount: Soit un net à payer de :\s+([\d\s]+,\d{2}) 4 | amount_untaxed: Soit un net à payer de :\s+([\d\s]+,\d{2}) 5 | date: Paris\, le \w+ (\d{1,2}.+\d{4}) 6 | invoice_number: N°\s*(\w+) 7 | static_siren: 732028154 8 | keywords: 9 | - 732 028 154 RCS Paris 10 | - € 11 | - FACTURE 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d %B %Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.bouyguestelecom.adsl-fiber.yml: -------------------------------------------------------------------------------- 1 | issuer: Bouygues Telecom (ADSL/Fibre) 2 | fields: 3 | amount: Montant total de cette facture\s+(\d+.\d{2}) 4 | amount_untaxed: Montant hors TVA .+\s(\d+.\d{2}) 5 | date: votre facture du\s+(\d{1,2}\s.+\s+\d{4}) 6 | invoice_number: N°:\s+(\w+/\w+-\w+-\w+) 7 | static_vat: FR74397480930 8 | keywords: 9 | - € 10 | - voici votre facture du 11 | - Bbox 12 | - Vos abonnements forfaits et options 13 | # The SIREN and VAT are image-based in the PDF and there is not even the word "Bouygues" as text in the PDF... that's why I match on other things 14 | options: 15 | currency: EUR 16 | date_formats: 17 | - "%d %B %Y" 18 | decimal_separator: "." 19 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.bouyguestelecom.mobile.yml: -------------------------------------------------------------------------------- 1 | issuer: Bouygues Telecom (Mobile) 2 | fields: 3 | amount: Montant de la facture soumis à TVA\s+\d+,\d{2}\s+(\d+,\d{2}) 4 | amount_untaxed: Montant de la facture soumis à TVA\s+(\d+,\d{2}) 5 | date: Date de facture\s+:\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: N° de facture\s+:\s+(\d+) 7 | static_vat: FR74397480930 8 | keywords: 9 | - FR 74 397 480 930 10 | - € 11 | - facture 12 | - mobile 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d/%m/%Y" 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.butagaz.yml: -------------------------------------------------------------------------------- 1 | issuer: Gaz de Paris 2 | keywords: 3 | - Butagaz 4 | - 22 510 764 335 5 | fields: 6 | amount_electricity: Electricité \(abonnement et consommation\)\s*(\d+,\d{2}) 7 | amount_local_taxes: Taxes locales et contributions\s*(\d+,\d{2}) 8 | amount_no_TVA: Total Hors TVA\s*(\d+,\d{2}) 9 | amount_TVA: \s{2}TVA\s*(\d+,\d{2}) 10 | amount: TOTAL TTC\s*(\d+,\d{2}) 11 | date: Ma facture d’électricité du (\d{2}/\d{2}/\d{4}) 12 | invoice_number: (\d{12}) 13 | 14 | options: 15 | currency: "EUR" 16 | languages: 17 | - fr 18 | decimal_separator: "," 19 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.chronopost.yml: -------------------------------------------------------------------------------- 1 | issuer: Chronopost 2 | fields: 3 | amount: TOTAL FACTURE\s+\d+.\d{2}\s+\d+.\d{2}\s+(\d+.\d{2}) 4 | amount_untaxed: TOTAL FACTURE\s+(\d+.\d{2}) 5 | date: Date\s+:\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: N° Facture :\s+(\w+) 7 | static_vat: FR82383960135 8 | keywords: 9 | - 82 383 960 135 10 | - CHRONOPOST 11 | - € 12 | - Facture 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d/%m/%Y" 17 | decimal_separator: "." 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.dirafi.yml: -------------------------------------------------------------------------------- 1 | issuer: dirafi.fr 2 | fields: 3 | amount: Total TTC\s+([\d\s]+,\d{2}) 4 | amount_untaxed: Total HT\s+([\d\s]+,\d{2}) 5 | date: FA\d+\s+(\d{2}/\d{2}/\d{4})\s+\d{2}/\d{2}/\d{4} 6 | invoice_number: (FA\d+)\s+\d{2}/\d{2}/\d{4}\s+\d{2}/\d{2}/\d{4} 7 | static_vat: FR77449189885 8 | keywords: 9 | - FR77449189885 10 | - Euros 11 | - Facture 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.domaine-achat.yml: -------------------------------------------------------------------------------- 1 | issuer: PRIVIANET SARL 2 | fields: 3 | amount: Total TTC\s+(\d+.\d{2}) 4 | amount_untaxed: Total HT\s+(\d+.\d{2}) 5 | date: Nyons,\sle\s\w+\s(\d{2}\s.+\s\d{4}) 6 | invoice_number: Facture\sn°\s([\d-]+) 7 | static_vat: FR50440585297 8 | keywords: 9 | - FR-50440585297 10 | - Facture 11 | - € 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d %B %Y" 16 | decimal_separator: "." 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.easytrip.yml: -------------------------------------------------------------------------------- 1 | issuer: Easytrip France 2 | fields: 3 | amount: TOTAL\s+A\s+PAYER\s+[\d ]+,\d{2}\s+[\d ]+,\d{2}\s+([\d ]+,\d{2}) 4 | amount_untaxed: TOTAL\s+A\s+PAYER\s+([\d ]+,\d{2}) 5 | date: Votre\s+facture\s+du\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: Facture\s+n°:\s+(\d+) 7 | static_vat: "FR12793875451" 8 | keywords: 9 | - FR 127 93 87 54 51 10 | - easytrip 11 | - Facture 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.edf.entreprises.yml: -------------------------------------------------------------------------------- 1 | issuer: EDF 2 | fields: 3 | amount: Total TTC en euros \(détails au verso\) :\s+([\d ]+,\d{2}) 4 | amount_untaxed: Montant Hors T.V.A. :\s+([\d ]+,\d{2}) 5 | date: Facture \d+ du (\d{2}/\d{2}/\d{4}) 6 | invoice_number: Facture (\d+) 7 | static_vat: FR 03 552 081 317 8 | keywords: 9 | - EDF Entreprises 10 | - Relation Client Grandes Entreprises 11 | - FR 03 552 081 317 12 | - Facture 13 | - € 14 | options: 15 | currency: EUR 16 | date_formats: 17 | - "%d/%m/%Y" 18 | languages: 19 | - fr 20 | decimal_separator: "," 21 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.finagaz.yml: -------------------------------------------------------------------------------- 1 | issuer: FINAGAZ 2 | fields: 3 | amount: Montant TTC en notre faveur\s+([\d ]+,\d{2}) 4 | amount_untaxed: Montant\sHors\sTaxe\s+([\d ]+,\d{2}) 5 | date: N°\s+\d+\s+du\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: N°\s+(\d+)\s+du\s+\d{2}/\d{2}/\d{4} 7 | date_due: Date\sd'échéance\s+:\s+(\d{2}/\d{2}/\d{4}) 8 | static_vat: FR68582018966 9 | keywords: 10 | - FR68582018966 11 | - FACTURE 12 | - € 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d/%m/%Y" 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.fountain.yml: -------------------------------------------------------------------------------- 1 | issuer: fountain.fr 2 | fields: 3 | amount: Montant total\s+([\d ]+,\d{2}) 4 | amount_untaxed: Montant total HTVA\s+([\d ]+,\d{2}) 5 | date: Date de facturation:\s+(\d{2}.\d{2}.\d{4}) 6 | date_due: Date d'échéance:\s+(\d{2}.\d{2}.\d{4}) 7 | invoice_number: N° de facture:\s+(\w+) 8 | static_vat: FR81411858046 9 | keywords: 10 | - FR 81 411 858 046 11 | - Facture 12 | - € 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d.%m.%Y" 17 | languages: 18 | - fr 19 | decimal_separator: "," 20 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.free.adsl-fiber.yml: -------------------------------------------------------------------------------- 1 | issuer: Free 2 | fields: 3 | amount: Total facture\s+\d+.\d{2}\s+\d+.\d{2}\s+(\d+.\d{2}) 4 | amount_untaxed: Total facture\s+(\d+.\d{2}) 5 | date: Facture n°\d+ du (\d+ .+ \d{4}) 6 | date_due: Date limite de paiement le (\d+ .+ \d{4}) 7 | invoice_number: Facture n°(\d+) 8 | static_vat: FR60421938861 9 | date_start: (\d{1,2}\w*\s\w+\s\d{4})\sau 10 | date_end: au\s(\d{2}\s\w+\s\d{4})\s+ 11 | siren: 421 938 861 12 | keywords: 13 | - FR 604 219 388 61 14 | - Facture 15 | - EUR 16 | tables: 17 | - start: Numéro de ligne\s+Id\.client\s+Adresse de l’installation 18 | end: Facture n° 19 | body: (?P\w+)\s+(?P\d+)\s+[\w ]+ 20 | options: 21 | currency: EUR 22 | date_formats: 23 | - "%d %B %Y" 24 | languages: 25 | - fr 26 | decimal_separator: "." 27 | replace: 28 | # We have to rewrite the start and end date for datepasrser to handle it. 29 | - [ 30 | '\sdu\s(?P\d{1,2}\w*)(?P\sau\s\d{2})(?P\s\w+\s\d{4})\s+', 31 | ' du \g\g\g\g ', 32 | ] 33 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.free.mobile.yml: -------------------------------------------------------------------------------- 1 | issuer: Free Mobile 2 | fields: 3 | amount: \spayer TTC\*\s+(\d+.\d{2}) 4 | amount_untaxed: Total de la facture HT\s+(\d+.\d{2}) 5 | date: Facture no \d+ du (\d+ .+ \d{4}) 6 | invoice_number: Facture no (\d+) 7 | static_vat: FR25499247138 8 | keywords: 9 | - FR25499247138 10 | - Facture 11 | options: 12 | currency: EUR 13 | date_formats: 14 | - "%d %B %Y" 15 | languages: 16 | - fr 17 | decimal_separator: "." 18 | replace: 19 | - ["e´ ", "é"] 20 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.free.mobile2.yml: -------------------------------------------------------------------------------- 1 | issuer: Free Mobile 2 | fields: 3 | amount: \spayer TTC\*\s+(\d+.\d{2}) 4 | amount_untaxed: Total de la facture HT\s+(\d+.\d{2}) 5 | date: no \d+ du (\d+ .+ \d{4}) 6 | invoice_number: no (\d+) du 7 | static_vat: FR25499247138 8 | keywords: 9 | - FR25499247138 10 | options: 11 | currency: EUR 12 | date_formats: 13 | - "%d %B %Y" 14 | languages: 15 | - fr 16 | decimal_separator: "." 17 | replace: 18 | - ["e´ ", "é"] 19 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.futur.yml: -------------------------------------------------------------------------------- 1 | issuer: Futur Telecom SAS 2 | fields: 3 | amount: Total T.T.C.\s+\(€\)\s+(\d+,\d{2}) 4 | amount_untaxed: Total H.T.\s+(\d+,\d{2}) 5 | date: Date Facture\s+:\s+(\d{2}\s.+\s\d{4}) 6 | invoice_number: N° Facture\s+:\s+(\d+) 7 | static_vat: FR 92 444 172 274 8 | keywords: 9 | - Coordonnées bancaires Futur 10 | - Facture 11 | - € 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d %b %Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.ge-iroise.yml: -------------------------------------------------------------------------------- 1 | issuer: GE IROISE 2 | fields: 3 | amount: Total\s+T.T.C.\s+([\d\s]+,\d{2}) 4 | amount_untaxed: Total\s+H.T.\s+([\d\s]+,\d{2}) 5 | date: DATE\s+:\s+(\d{2}\/\d{2}\/\d{4}) 6 | invoice_number: FACTURE\s+N°\s+(\d+) 7 | static_vat: FR38422294488 8 | keywords: 9 | - FR38422294488 10 | - FACTURE 11 | - € 12 | - IROISE 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d/%m/%Y" 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.google.yml: -------------------------------------------------------------------------------- 1 | issuer: Google Cloud France 2 | fields: 3 | amount: Total en EUR\s+([\d ]+,\d{2}) 4 | amount_untaxed: Sous-total en EUR\s+([\d ]+,\d{2}) 5 | date: \s{10}\s+(\d{1,2} [\w\.]+ \d{4}) 6 | invoice_number: (GCFR\w+) 7 | static_vat: IE6388047V 8 | keywords: 9 | - Google Cloud France 10 | - Facture 11 | - FR78881721583 12 | - EUR 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d %B %Y" 17 | languages: 18 | - fr 19 | decimal_separator: "," 20 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.greffe-tc-lyon.yml: -------------------------------------------------------------------------------- 1 | issuer: Greffe du tribunal de commerce de Lyon 2 | fields: 3 | amount: TOTAL\s+TTC\s?:\s+(\d+,\d{2}) 4 | amount_tax: Total\s+TVA\s+\d+,\d+%\s+\(1\)\s?:\s+(\d+,\d{2}) 5 | date: du\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: Facture\s+N°\s+([\w-]+) 7 | static_vat: FR67482531142 8 | keywords: 9 | - FR67482531142 10 | - Facture 11 | - € 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.hiscox.yml: -------------------------------------------------------------------------------- 1 | issuer: Hiscox Europe 2 | fields: 3 | amount: Prime TTC\s+([\d ]+,\d{2}) 4 | date: Date\s+Prime TTC\s+(\d{2}/\d{2}/\d{4}) 5 | date_start: Pour la période du\s+(\d{1,2}\s[A-Za-zéû]+\s\d{4}) 6 | date_end: Pour la période du \d{1,2}\s[A-Za-zéû]+\s\d{4} au (\d{1,2}\s[A-Za-zéû]+\s\d{4}) 7 | invoice_number: APPEL DE PRIME N°\s*(\d+) 8 | static_vat: FR55524737681 9 | keywords: 10 | - Hiscox 11 | - FR55524737681 12 | - APPEL 13 | - Euros 14 | options: 15 | currency: EUR 16 | date_formats: 17 | - "%d/%m/%Y" 18 | languages: 19 | - fr 20 | decimal_separator: "," 21 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.internetsatellite.yml: -------------------------------------------------------------------------------- 1 | issuer: Sat2Way 2 | fields: 3 | amount: Total\s+:\s+(\d+.\d{2}) 4 | amount_untaxed: Sous-total\s+:\s+(\d+.\d{2}) 5 | date: Date\sde\sfacturation\s?:\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: Facture\snum\s?:\s+(\w+) 7 | static_vat: FR85444740336 8 | keywords: 9 | - FR85444740336 10 | - Facture 11 | - EURO 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "." 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.jpg.yml: -------------------------------------------------------------------------------- 1 | issuer: JPG 2 | fields: 3 | amount: \d+,\d{2}\s+\d+,\d{2}\s+\d+,\d{2}\s+\d+,\d{2}\s+(\d+,\d{2}) 4 | amount_untaxed: \d+,\d{2}\s+\d+,\d{2}\s+(\d+,\d{2})\s+\d+,\d{2}\s+\d+,\d{2} 5 | date: (\d\d?/\d\d?/\d{4}) 6 | invoice_number: FACTURE N°\s+(\d+\.\s\d+\.\d+) 7 | static_vat: FR72997506407 8 | keywords: 9 | - FR 72 997 506 407 10 | - FACTURE 11 | - EUR 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.kubii.yml: -------------------------------------------------------------------------------- 1 | issuer: KUBII 2 | fields: 3 | amount: \sTotal\s+(\d+,\d{2})\s€ 4 | amount_untaxed: Total\s\(HT\)\s+(\d+,\d{2})\s€ 5 | amount_tax: Taxe totale\s+(\d+,\d{2})\s€ 6 | static_vat: FR41808140313 7 | tables: 8 | - start: '.*Num.ro de facture\s+Date de facturation\s+' 9 | end: 'R.f.rence\s+.*Produit' 10 | body: '(?P\d+)\s+(?P\d{2}.\d{2}.\d{4}).*' 11 | keywords: 12 | - FR41808140313 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d/%B/%Y" 17 | languages: 18 | - fr 19 | decimal_separator: "," 20 | replace: 21 | - ["e´ ", "é"] 22 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.laposte.boutique.yml: -------------------------------------------------------------------------------- 1 | issuer: La Poste (Coliposte et Solutions Business) 2 | fields: 3 | amount: Montant\s+à\s+payer\s+en\s+Euros\s+(\d+,\d{2}) 4 | amount_untaxed: Total\s+brut\s+facture\s+(\d+,\d{2}) 5 | date: votre\s+facture\s+N°\s+[A-Z0-9_]+\s+du\s+(\d{2}\/\d{2}\/\d{4}) 6 | invoice_number: votre\s+facture\s+N°\s+(\w+) 7 | static_vat: FR39356000000 8 | keywords: 9 | - FR 39 356 000 000 10 | - FACTURE 11 | - Euros 12 | - laposte.fr 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d/%m/%Y" 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.laposte.coliposte.yml: -------------------------------------------------------------------------------- 1 | issuer: La Poste (Coliposte) 2 | fields: 3 | amount: Total TTC:\s+(\d+,\d{2}) 4 | amount_untaxed: Total HT:\s+(\d+,\d{2}) 5 | date: FACTURE\s+(\d{2}/\d{2}/\d{2}) 6 | invoice_number: FACTURE\s+\d{2}/\d{2}/\d{2}\s+(\d+) 7 | static_vat: FR39356000000 8 | keywords: 9 | - FR 39 356 000 000 10 | - FACTURE 11 | - EUR 12 | - ColiPoste 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d/%m/%Y" 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.lecab.yml: -------------------------------------------------------------------------------- 1 | issuer: leCab 2 | fields: 3 | amount: Montant\s+total\s+à\s+régler\s+([\d\s]+,\d{2}) 4 | amount_tax: TVA\s+à\s+20%\s+([\d\s]+,\d{2}) 5 | date: Date\s+de\s+facture\s+:\s(\d{2}.+\d{4}) 6 | invoice_number: Numéro\s+de\s+facture\s+:\s+(\d+) 7 | date_start: Période\s+de\s+facturation\s+du\s+(\d{2}\/\d{2}\/\d{4}) 8 | date_end: Période\s+de\s+facturation\s+du\s+\d{2}\/\d{2}\/\d{4}\s+au\s+(\d{2}\/\d{2}\/\d{4}) 9 | static_vat: FR81749816328 10 | keywords: 11 | - FR 81 749 816 328 12 | - FACTURE 13 | - € 14 | - lecab.fr 15 | options: 16 | currency: EUR 17 | date_formats: 18 | - "%d/%m/%Y" 19 | decimal_separator: "," 20 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.leroymerlin.yml: -------------------------------------------------------------------------------- 1 | issuer: Leroy Merlin France 2 | fields: 3 | amount: Total TTC\s+(\d+.\d{2}) 4 | amount_untaxed: Total TVA\s+(\d+.\d{2})\s+€\s+ 5 | date: Date d.emission\s+:\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: FACTURE N°\s+(\w+) 7 | static_vat: FR49 384 560 942 8 | keywords: 9 | - Leroy Merlin France 10 | - FR49 384 560 942 11 | - € 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "." 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.maaf.yml: -------------------------------------------------------------------------------- 1 | issuer: MAAF Assurances SA 2 | fields: 3 | amount: VOTRE\sCOTISATION\sANNUELLE\s+\d{4}\s+(\d+,\d{2}) 4 | date: à\sla\sdate\sdu\s+(\d{2}\s.+\s\d{4}) 5 | date_start: Période\sdu\s(\d{2}/\d{2}/\d{4})\sau\s\d{2}/\d{2}/\d{4} 6 | date_end: Période\sdu\s\d{2}/\d{2}/\d{4}\sau\s(\d{2}/\d{2}/\d{4}) 7 | invoice_number: Votre\sn°\sfacture\s+:\s+(\d+) 8 | static_vat: FR38542073580 9 | keywords: 10 | - FR 38 542 073 580 11 | - AVIS 12 | - € 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d %B %Y" 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.mediapart.yml: -------------------------------------------------------------------------------- 1 | issuer: Mediapart 2 | fields: 3 | amount: TOTAL\sTTC\s+(\d+.\d{2}) 4 | amount_untaxed: H\.T\.\s+:\s+(\d+.\d{2}) 5 | date: DATE\s+CLIENT\s+\d\/\d\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: Facture\s+en\s+€\s+N°\s+:\s+(\w+) 7 | static_vat: FR91500631932 8 | keywords: 9 | - FR91500631932 10 | - Facture 11 | - EUROS 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "." 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.moneo-resto.yml: -------------------------------------------------------------------------------- 1 | issuer: Moneo Resto 2 | fields: 3 | amount: Total\sTTC\s+(\d+,\d{2}) 4 | amount_tax: Montant\sde\sla\sTVA\s\(20\s?%\)\s+(\d+,\d{2}) 5 | date: Date\sFacture\s?:\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: Facture\s+(.+) 7 | static_vat: FR93422721274 8 | keywords: 9 | - FR 93 422 721 274 10 | - Facture 11 | - EUR 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.mouser.yml: -------------------------------------------------------------------------------- 1 | # The PDF invoice has the 2 languages: French and English 2 | issuer: Mouser Electronic 3 | fields: 4 | amount: Montant à régler\s+EUR\s+([\d,]+.\d{2}) 5 | amount_tax: Montant à régler\s+EUR\s+[\d,]+.\d{2}\s+[\d,]+.\d{2}\s+[\d,]+.\d{2}\s+[\d,]+.\d{2}\s+([\d,]+.\d{2}\s+) 6 | date: Code SWIFT:\s+CHASGB2L\s+\d+\s+(\d{2}-\w+-\d{2}) 7 | invoice_number: Code SWIFT:\s+CHASGB2L\s+(\d+) 8 | static_vat: FR27512466913 9 | keywords: 10 | - FR 27512466913 11 | - Facture 12 | - EUR 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d-%b-%y" 17 | languages: 18 | - fr 19 | decimal_separator: "." 20 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.mycelium-roulement.yml: -------------------------------------------------------------------------------- 1 | issuer: Mycelium Roulement - 123roulement 2 | fields: 3 | amount: Total\sT\.T\.C\.\s+(\d+,\d{2})\s+EUR 4 | amount_untaxed: Total\sH\.T\.\s+(\d+,\d{2})\s+EUR 5 | date: Date\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: Numero\s+F\s+(.*?)\s+ 7 | static_vat: FR 43503908576 8 | keywords: 9 | - FR 43503908576 10 | - FACTURE 11 | options: 12 | currency: EUR 13 | date_formats: 14 | - "%d/%m/%Y" 15 | decimal_separator: "," 16 | remove_accents: true 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.napsis.yml: -------------------------------------------------------------------------------- 1 | issuer: Napsis (Iperlink) 2 | fields: 3 | amount: TOTAL TTC À PAYER\s+(\d+,\d{2}) 4 | amount_untaxed: TOTAL HT\s+(\d+,\d{2}) 5 | date: Facture du\s+:\s+(\d{1,2}/\d{2}/\d{4}) 6 | date_due: Date d’échéance :\s+(\d{1,2}/\d{2}/\d{4}) 7 | invoice_number: Référence facture\s+:\s*(\w+) 8 | static_vat: FR75491282323 9 | keywords: 10 | - FR75 491 282 323 11 | - € 12 | - facture 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d/%m/%Y" 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.nexity.yml: -------------------------------------------------------------------------------- 1 | issuer: Nexity 2 | fields: 3 | amount: SOLDE\sÀ\sPAYER\s+\(en\s€\)\s+([\d ]+,\d{2}) 4 | date: ',\sle\s(\d{2}/\d{2}/\d{4})' 5 | date_end: Période\sdu\s+\d{2}/\d{2}/\d{4}\sau\s(\d{2}/\d{2}/\d{4}) 6 | date_start: Période\sdu\s+(\d{2}/\d{2}/\d{4})\sau\s 7 | invoice_number: N°\sDE\sCOUPON\s:\s+(\w+) 8 | static_siren: "487530099" 9 | keywords: 10 | - "487530099" 11 | - NEXITY 12 | - APPEL DE LOYER 13 | - € 14 | options: 15 | currency: EUR 16 | date_formats: 17 | - "%d/%m/%Y" 18 | decimal_separator: "," 19 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.orange.fibre.yml: -------------------------------------------------------------------------------- 1 | issuer: Orange (Fibre optique) 2 | fields: 3 | amount: total auprès d\'Orange\s+\d+,\d{2}\s+(\d+,\d{2}) 4 | amount_untaxed: total auprès d\'Orange\s+(\d+,\d{2}) 5 | date: date de facture\s+:\s+(\d{2}/\d{2}/\d{2}) 6 | invoice_number: n° de facture\s+:\s+(.+) 7 | static_siren: "380129866" 8 | keywords: 9 | - 380 129 866 RCS Paris 10 | - service clients La fibre 11 | - facture 12 | - € 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d/%m/%y" 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.orange.fixedline.yml: -------------------------------------------------------------------------------- 1 | issuer: Orange (Fixed line) 2 | fields: 3 | amount: total des abonnements et achats\s+\d+,\d{2}\s+(\d+,\d{2}) 4 | amount_untaxed: total des abonnements et achats\s+(\d+,\d{2}) 5 | date: date de facture\s+:\s+(\d{2}/\d{2}/\d{2}) 6 | invoice_number: n° de facture\s+:\s+(.+) 7 | static_siren: "380129866" 8 | static_vat: FR89380129866 9 | keywords: 10 | - FR 89 380 129 866 11 | - "1015 : SAV en cas de panne de ligne fixe" 12 | - facture 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d/%m/%y" 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.prestaclic.yml: -------------------------------------------------------------------------------- 1 | issuer: Prestaclic 2 | fields: 3 | amount: TOTAL\s+TTC\s+([\d ]+,\d{2}) 4 | amount_untaxed: Total\s+HT\s+([\d ]+,\d{2}) 5 | date: Date\s+:\s+(\d+\s+.+\s+\d{4}) 6 | invoice_number: FACTURE\s+N°\s+(\w+) 7 | static_vat: "FR69803518919" 8 | keywords: 9 | - FR69803518919 10 | - PRESTACLIC 11 | - FACTURE 12 | - € 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d %b %Y" 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.publicationannoncelegale.yml: -------------------------------------------------------------------------------- 1 | issuer: NETPRESSE 2 | fields: 3 | amount: Total TTC\s+:\s+([\d ]+,\d{2}) 4 | amount_untaxed: Total HT\s+:\s+([\d ]+,\d{2}) 5 | date: 6 | parser: regex 7 | area: { f: 1, l: 1, x: 0, y: 155, r: 100, W: 825, H: 170 } 8 | regex: (\d{2}/\d{2}/\d{4}) 9 | type: date 10 | invoice_number: Facture\s+n°\s+(\d+) 11 | static_siren: 530848134 12 | keywords: 13 | - B 530 848 134 14 | - € 15 | - Facture 16 | - NETPRESSE 17 | options: 18 | currency: EUR 19 | date_formats: 20 | - "%d/%m/%Y" 21 | decimal_separator: "," 22 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.sfr.adsl-fiber.yml: -------------------------------------------------------------------------------- 1 | issuer: SFR 2 | fields: 3 | amount: TOTAL\s+prestations\s+facturées\s.+\d+,\d{2}\s+\d+,\d{2}\s+(\d+,\d{2}) 4 | amount_untaxed: TOTAL\s+prestations\s+facturées\s.+\s(\d+,\d{2})\s+\d+,\d{2}\s+\d+,\d{2} 5 | date: Facture du (\d{2}/\d{2}/\d{4}) 6 | invoice_number: Facture N°\s(\d+) 7 | static_vat: FR71343059564 8 | keywords: 9 | - FR 71 343 059 564 10 | - facture 11 | - EUR 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.sfr.mobile.yml: -------------------------------------------------------------------------------- 1 | issuer: SFR 2 | fields: 3 | amount: (\d+,\d{2}) € TTC 4 | amount_untaxed: (\d+,\d{2}) € HT 5 | date: facture\s+du\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: N°\sfacture\s:\s+([A-Z0-9\-]+) 7 | static_vat: FR71343059564 8 | keywords: 9 | - FR 71 343059564 10 | - facture 11 | - EUR 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.sosh.yml: -------------------------------------------------------------------------------- 1 | issuer: Sosh by Orange 2 | fields: 3 | amount: total auprès d\'Orange\s+\d+,\d{2}\s+(\d+,\d{2}) 4 | amount_untaxed: total auprès d\'Orange\s+(\d+,\d{2}) 5 | date: date de facture\s+\:\s+(\d{2}/\d{2}/\d{2}) 6 | invoice_number: n° de facture\s\:\s*(\d+) 7 | siren: (380\s?129\s?866) 8 | static_vat: FR89380129866 9 | keywords: 10 | - sosh.fr 11 | - € 12 | - facture 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d/%m/%Y" 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.teledec.yml: -------------------------------------------------------------------------------- 1 | issuer: Teledec.fr - LPI Conseil SAS 2 | fields: 3 | amount: Total\s+TTC\s+:\s+(\d+,\d{2}) 4 | amount_untaxed: Total\s+HT\s+:\s+(\d+,\d{2}) 5 | date: mise\s+le\s+:\s+(\d{2}/\d{2}/\d{4}) 6 | invoice_number: Numéro\s+de\s+facture\s+:\s+(.+) 7 | static_vat: FR03527571939 8 | keywords: 9 | - FR03527571939 10 | - € 11 | - Facture 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d/%m/%Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/fr.topoffice.yml: -------------------------------------------------------------------------------- 1 | issuer: Top Office 2 | fields: 3 | amount: '.*TOTAL\sTTC\s+(\d+.\d{2})\sEUR' 4 | amount_untaxed: '.*TOTAL\sHT.\s+(\d+.\d{2})\sEUR' 5 | amount_tax: '.*TVA.*(\d+.\d{2})\sEUR' 6 | date: 'Date\sfacture\s(\d{2}-\d{2}-\d{4}).*' 7 | invoice_number: '.*FACTURE\sN.\s:\s(\d+)' 8 | static_vat: FR8144917559500031 9 | keywords: 10 | - FR8144917559500031 11 | - FACTURE 12 | - TOP OFFICE 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d %m %Y" 17 | decimal_separator: "." 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/net.online.yml: -------------------------------------------------------------------------------- 1 | issuer: Online SAS 2 | fields: 3 | amount: '[\d ]+,\d{2}\s+Euros\s+\d+,\d{2}\s\%\s+[\d ]+,\d{2}\s+Euros\s+([\d ]+,\d{2})\sEuros' 4 | amount_untaxed: ([\d ]+,\d{2})\s+Euros\s+\d+,\d{2}\s\%\s+[\d ]+,\d{2}\s+Euros 5 | date: Date de facturation\s+:\s+(\d{2}\s.+\s\d{4}) 6 | invoice_number: Facture n\W\s+(\d+) 7 | static_vat: FR35433115904 8 | keywords: 9 | - FR 35 433 115 904 10 | - Facture 11 | - Euros 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d %B %Y" 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/fr/net.scaleway.yml: -------------------------------------------------------------------------------- 1 | issuer: Online SAS / Scaleway 2 | fields: 3 | amount: '.*Total\s+due\s+€(\d+.\d{2})' 4 | amount_untaxed: '.*Untaxed\s+total\s+€(\d+.\d{2})' 5 | amount_tax: '.*Total\s+taxes\s+€(\d+.\d{2})' 6 | date: '.*Due date:\s+(\w+\s\d{1,2},\s\d{4}).*' 7 | invoice_number: 'Invoice #(\d+)' 8 | static_vat: FR35433115904 9 | keywords: 10 | - FR 35 433 115 904 11 | - Invoice 12 | - scaleway 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%B %d, %Y" 17 | decimal_separator: "." 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.action.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: action 3 | fields: 4 | amount: 5 | parser: regex 6 | regex: 7 | - 'TOTAAL\s+(\d{1,3}[,]\d{2})' 8 | - PIN\s+(\d{1,3}.\d{2}) 9 | type: float 10 | date: (\d{1,2}[-|\/]\d{2}[-|\/]\d{4}) 11 | invoice_number: (\d{1,2}[-|\/]\d{2}[-|\/]\d{4}) 12 | static_partner_name: action 13 | country_code: 14 | parser: static 15 | value: nl 16 | payment_method: 17 | - (AMEX) 18 | - (VISA) 19 | - (MCARD) 20 | - (CONTANT) 21 | - (KAS):\s.\s\d+\.\d+ 22 | lines: 23 | start: ARTIKELEN 24 | end: BTW SPECIFICATIE 25 | line: '.(?P\w+).?\s+.?(?P\d+.\d{1,2})' 26 | types: 27 | qty: float 28 | price_unit: float 29 | keywords: 30 | - action 31 | - ARTIKELEN 32 | - € 33 | options: 34 | currency: EUR 35 | date_formats: 36 | - "%d-%m-%Y" 37 | languages: 38 | - nl 39 | decimal_separator: "," 40 | replace: 41 | - ["é", "€"] 42 | - ["Contant", "CONTANT"] 43 | - ['\s[l|&|@]', " ℓ"] 44 | - ["B.W", "BTW"] 45 | - [", ", ","] 46 | - ["°", " "] 47 | - ["©", " "] 48 | - ['(\s)([0-9]+)\.([0-9]{2})(\s)', '\1\2,\3\4'] # workaround for ocr 49 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.agrisneltank.json: -------------------------------------------------------------------------------- 1 | { 2 | "issuer": "Agrisneltank B.V.", 3 | "fields": { 4 | "amount": "TOTAAL\\s+.?€\\s+(\\d{1,3}.\\d{2})", 5 | "amount_untaxed": "Netto\\s+€\\s+(\\d{1,3}.\\d{2})", 6 | "date": "Datum\\s+(\\d{1,2}-\\d{2}-\\d{4})", 7 | "invoice_number": "ketnummer\\s+(\\d+)", 8 | "static_vat": "NL862414489B01", 9 | "partner_name": "(?i)(Agrisneltank B.V.)", 10 | "country_code": { 11 | "parser": "regex", 12 | "regex": "(?i)nl", 13 | "group": "first" 14 | }, 15 | "partner_website": { 16 | "parser": "regex", 17 | "regex": "agrisneltank.nl", 18 | "group": "first" 19 | }, 20 | "payment_method": [ 21 | "(MASTERCARD)", 22 | "(VISA)", 23 | "(MAESTRO)", 24 | "(CONTANT)", 25 | "(KAS):\\s.\\s\\d+\\.\\d+" 26 | ] 27 | }, 28 | "lines": { 29 | "start": "(?i)BON", 30 | "end": "(Netto\\s|\\Z)", 31 | "first_line": "(?i)(?PPOMP\\s+\\d+)\\s+(?P\\w+)", 32 | "line": [ 33 | "(?i)Volume.*\\s+(?P\\d+[,.]\\d+)\\s?(?P[l|ℓ|L])?", 34 | "(?i)Prijs\\s+[E|€] (?P\\d.\\d{2,3})\\s*[/ ]?(?P[l|ℓ|L])?", 35 | "(?i)B.W\\s+(?P\\d{2}[,.]\\d{2})\\s+[%]\\s+[E|€]?\\s+(?P\\d+[,.]\\d{2})?" 36 | ], 37 | "types": { 38 | "qty": "float", 39 | "price_unit": "float", 40 | "line_tax_percent": "float", 41 | "line_tax_amount": "float" 42 | } 43 | }, 44 | "keywords": ["(?i)Agri", "(?i)nl", "€", "NL862414489B01"], 45 | "options": { 46 | "currency": "EUR", 47 | "languages": ["nl"], 48 | "decimal_separator": ",", 49 | "replace": [ 50 | ["é", "€"], 51 | ["L.p.9.", "LPG"], 52 | ["L.P.G.", "LPG"], 53 | ["L.P.9.", "LPG"], 54 | ["LPLG", "LPG"], 55 | ["Contant", "CONTANT"], 56 | ["\\s[l|&|@]", " ℓ"], 57 | ["([0-9]{2,3})[ /][2]", "\\1 /ℓ"], 58 | ["B.W", "BTW"], 59 | [" - ", "-"], 60 | ["agrisneltank\\s+[.]?nl", "agrisneltank.nl"], 61 | ["Kuwait\\s+Petroleum\\s+Ned", "Kuwait Petroleum Nederland B.V."], 62 | ["€ ([0-9]+) ([0-9]{2})(\\s)", "€ \\1,\\2\\3"], 63 | ["(\\s)([0-9]+)\\.([0-9]{2,3})", "\\1\\2,\\3"] 64 | ] 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.albron.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Albron 3 | fields: 4 | amount: 5 | parser: regex 6 | regex: 7 | - Totaal\s+[:]\s+(\d{1,3}[.|,]\d{2}) 8 | - '\d{1,2}[,|.]\d{1,2}\S?\s+(\d{1,3}[,|.]\d{2})\s+\d{1,3}[,]\d{2}' 9 | type: float 10 | amount_tax: 11 | parser: regex 12 | regex: 13 | - '\d{1,2}[,|.]\d{1,2}\S?\s+\d{1,3}[,|.]\d{2}\s+\d{1,3}[,|.]\d{2}\s+(\d{1,3}[,]\d{2})' 14 | type: float 15 | amount_untaxed: 16 | parser: regex 17 | regex: 18 | - '\d{1,2}[,|.]\d{1,2}\S?\s+\d{1,3}[,|.]\d{2}\s+(\d{1,3}[,]\d{2})' 19 | type: float 20 | date: (\d{1,2}[-|\/]\d{2}[-|\/]\d{2}) 21 | invoice_number: (\d{1,2}[-|\/]\d{2}[-|\/]\d{2}) 22 | static_partner_name: Albron 23 | country_code: 24 | parser: static 25 | value: nl 26 | payment_method: 27 | - (AMEX) 28 | - (VISA) 29 | - (MCARD) 30 | - (CONTANT) 31 | - (CHIP) 32 | - (KAS):\s.\s\d+\.\d+ 33 | lines: 34 | start: antal 35 | end: Subtotaal 36 | line: '(?P\d+)(?P\w+)\s+(?P\d{1,4}[,|.]\d{2})' 37 | types: 38 | qty: float 39 | price_unit: float 40 | keywords: 41 | - Albron 42 | - EUR 43 | - eten 44 | options: 45 | currency: EUR 46 | date_formats: 47 | - "%d-%m-%Y" 48 | languages: 49 | - nl 50 | decimal_separator: "," 51 | replace: 52 | - ["é", "€"] 53 | - ["Contant", "CONTANT"] 54 | - ['\s[l|&|@]', " ℓ"] 55 | - ["B.W", "BTW"] 56 | - [", ", ","] 57 | - ["°", " "] 58 | - ["©", " "] 59 | - ['(\s)([0-9]+)\.([0-9]{2})(\s)', '\1\2,\3\4'] # workaround for ocr 60 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.anwb.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: ANWB B.V. 3 | fields: 4 | amount: 5 | parser: regex 6 | regex: 7 | - bedrag te voldoen:\s+[€]\s+(\d{1,4}[,]\d{2}) 8 | type: float 9 | amount_untaxed: 10 | parser: regex 11 | regex: 12 | - Door u te betalen\s+(\d{1,4}.\d{2}) 13 | type: float 14 | group: sum 15 | bic: BIC.(\w{8,11}) 16 | iban: ([A-Z]{2}[ \-]?[0-9]{2})(?=(?:[ \-]?[A-Z0-9]){9,30}$)((?:[ \-]?[A-Z0-9]{3,5}){2,7})([ \-]?[A-Z0-9]{1,3})? 17 | date: datum\s+(\d{1,2} .+ \d{4}) 18 | invoice_number: kenmerk.?\s+(\d{5,17}) 19 | vat: 20 | parser: static 21 | value: NL005637727B01 22 | country_code: 23 | parser: static 24 | value: NL 25 | payment_method: 26 | - (iDEAL) 27 | - (AMEX) 28 | - (VISA) 29 | - (MCARD) 30 | - (CONTANT) 31 | lines: 32 | start: Bedrag 33 | end: Factuurbedrag\s 34 | line: (?P\w+.*)\s+(?P(\d{2}-\d{2}-\d{2})) \S+\s(?P(\d{2}-\d{2}-\d{2}))\s+(?P\d+,\d{2}) 35 | types: 36 | qty: float 37 | price_unit: float 38 | keywords: 39 | - ANWB 40 | - NL0056.37.727B01 41 | - € 42 | options: 43 | currency: EUR 44 | languages: 45 | - nl 46 | decimal_separator: "," 47 | replace: 48 | - ["é", "€"] 49 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.argos.json: -------------------------------------------------------------------------------- 1 | { 2 | "issuer": "Kuwait Petroleum Ned. B.V.", 3 | "fields": { 4 | "amount": "TOTAAL\\s+.?€\\s+(\\d{1,3}.\\d{2})", 5 | "amount_untaxed": "Netto\\s+€\\s+(\\d{1,3}.\\d{2})", 6 | "date": "Datum\\s+(\\d{1,2}-\\d{2}-\\d{4})", 7 | "invoice_number": "Ticketnummer\\s+(\\d+)", 8 | "static_vat": "NL006816721B01", 9 | "partner_name": "(?i)(Kuwait Petroleum Nederland B.V.)", 10 | "country_code": { 11 | "parser": "regex", 12 | "regex": "(?i)nl", 13 | "group": "last" 14 | }, 15 | "partner_website": { 16 | "parser": "regex", 17 | "regex": "argos.nl" 18 | }, 19 | "payment_method": [ 20 | "(AMERICAN EXPRESS)", 21 | "(VISA)", 22 | "(MAESTRO)", 23 | "(CONTANT)", 24 | "(KAS):\\s.\\s\\d+\\.\\d+" 25 | ] 26 | }, 27 | "lines": { 28 | "start": "(?i)BON", 29 | "end": "(Netto\\s|\\Z)", 30 | "first_line": "(?i)(?PPOMP\\s+\\d+)\\s+(?P\\w+)", 31 | "line": [ 32 | "(?i)Volume.*\\s+(?P\\d+[,.]\\d+)\\s?(?P[l|ℓ|L])?", 33 | "(?i)Prijs\\s+[E|€] (?P\\d.\\d{2,3})\\s*[/ ]?(?P[l|ℓ|L])?", 34 | "(?i)B.W\\s+(?P\\d{2}[,.]\\d{2})\\s+[%]\\s+[E|€]?\\s+(?P\\d+[,.]\\d{2})?" 35 | ], 36 | "types": { 37 | "qty": "float", 38 | "price_unit": "float", 39 | "line_tax_percent": "float", 40 | "line_tax_amount": "float" 41 | } 42 | }, 43 | "keywords": ["(?i)Argos", "(?i)nl", "€"], 44 | "options": { 45 | "currency": "EUR", 46 | "languages": ["nl"], 47 | "decimal_separator": ",", 48 | "replace": [ 49 | ["é", "€"], 50 | ["L.p.9.", "LPG"], 51 | ["L.P.G.", "LPG"], 52 | ["L.P.9.", "LPG"], 53 | ["LPLG", "LPG"], 54 | ["Contant", "CONTANT"], 55 | ["\\s[l|&|@]", " ℓ"], 56 | ["([0-9]{2,3})[ /][2]", "\\1 /ℓ"], 57 | ["B.W", "BTW"], 58 | [" - ", "-"], 59 | ["argos\\s[.]?nl", "argos.nl"], 60 | ["Kuwait\\s+Petroleum\\s+Ned", "Kuwait Petroleum Nederland B.V."], 61 | ["€ ([0-9]+) ([0-9]{2})(\\s)", "€ \\1,\\2\\3"], 62 | ["(\\s)([0-9]+)\\.([0-9]{2,3})", "\\1\\2,\\3"] 63 | ] 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.begra.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Begra Magazijninrichting B.V. 3 | fields: 4 | amount: 'Totaal EUR incl[.] btw\s+(\d{1,4}[,]\d{2})' 5 | amount_tax: BTW\s+(\d{1,4}[,]\d{2}) 6 | amount_untaxed: Totaal EUR excl[.] btw\s+(\d{1,4}[,]\d{2}) 7 | date: Factuurdatum\s+(\d{1,2}[.]\s\w+\s\d{4}) 8 | invoice_number: Factuurnr[.]\s+(\S+) 9 | static_vat: NL818038524B01 10 | bic: SWIFT[\/]BIC[:]\s+(\w{8,11}) 11 | iban: '[A-Z]{2}\d{2}?\w{4}?\d{4}?\d{4}?\d{0,2}' 12 | telephone: 13 | parser: regex 14 | regex: '[+]\d{11}' 15 | partner_website: 16 | parser: static 17 | value: begra.nl 18 | partner_email: 19 | parser: static 20 | value: info@begra.nl 21 | partner_name: 22 | parser: static 23 | value: Begra Magazijninrichting B.V. 24 | country_code: 25 | parser: static 26 | value: nl 27 | partner_zip: 28 | parser: regex 29 | regex: 5347\sKG 30 | partner_city: 31 | parser: regex 32 | regex: Oss 33 | partner_street: 34 | parser: static 35 | value: Ijsselstraat 9 36 | partner_coc: 37 | parser: regex 38 | regex: KvK[:]\s\d{8} 39 | narration: 40 | parser: regex 41 | regex: Ordernr[.]\s+(\w+) 42 | lines: 43 | start: Product 44 | end: Totaal[:] 45 | first_line: (?P\S{8,12})\s+(?P(\w+(?:\s\S+){1,2}))\s+(?P\d+)Stuks\s+(?P\d+[,]\d{2})\s+(?P\d+[,]\d{2}) 46 | line: '(?P(\w+(?:\s\S+){1,2}))' 47 | types: 48 | qty: float 49 | price_unit: float 50 | price_subtotal: float 51 | keywords: 52 | - Begra 53 | - "Factuur" 54 | - NL818038524B01 55 | options: 56 | currency: EUR 57 | languages: 58 | - nl 59 | decimal_separator: "," 60 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.blokker.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: blokker 3 | fields: 4 | amount: 5 | parser: regex 6 | regex: 7 | - Totaal [(]incl. BTW[)]\s+[€]\s=(\d{1,3}[.]\d{2}) 8 | type: float 9 | amount_untaxed: 10 | parser: regex 11 | regex: 12 | - Subtotaal\s+[€]\s+(\d{1,3}[.]\d{2}) 13 | type: float 14 | group: sum 15 | partner_coc: 16 | parser: regex 17 | regex: 18 | - KvK\s+(\d{8}) 19 | telephone: 20 | parser: regex 21 | regex: 22 | - WhatsApp. \s+(\d{2}[-]\d{8}) 23 | partner_website: 24 | parser: regex 25 | regex: 26 | - (blokker.nl) 27 | partner_name: 28 | parser: static 29 | value: Blokker B.V. 30 | country_code: 31 | parser: static 32 | value: nl 33 | partner_zip: 34 | parser: static 35 | value: 1114 AM 36 | partner_city: 37 | parser: static 38 | value: Amsterdam-Duivendrecht 39 | partner_street: 40 | parser: static 41 | value: Van der Madeweg 13 42 | bic: BIC.(\w{8,11}) 43 | iban: IBAN.(\w{10,30}) 44 | date: Besteldatum\s+.?\s+[:]\s+(\d{1,2}[-]\d{1,2}[-]\d{2}) 45 | invoice_number: Factuurnummer\s+.?\s+(\d{7,10}) 46 | static_vat: NL854525154B01 47 | payment_method: 48 | - (iDEAL) 49 | - (AMEX) 50 | - (AMERICAN EXPRESS) 51 | - (VISA) 52 | - (MCARD) 53 | - (CONTANT) 54 | - (?i)(Paypal) 55 | lines: 56 | - start: Artikel 57 | end: Totaal [(]incl. BTW[)]\s+[€] 58 | first_line: (?P\w+.*)\s+(?P\w+.*)\s+(?P\d+)\s+(?P[€]\s+(\d+.\d{2}))\s+(?P[€]\s+(\d+.\d{2})) 59 | line: (?P\w+.*) # needs check 60 | types: 61 | qty: float 62 | price_unit: float 63 | price_subtotal: float 64 | - start: Subtotaal 65 | end: Totaal [(]incl. BTW[)]\s+[€] 66 | line: (?P(\w+(?:\s\S+)*))\s+(?P(\d+.\d+))? 67 | keywords: 68 | - blokker 69 | - NL854525154B01 70 | - € 71 | options: 72 | currency: EUR 73 | languages: 74 | - nl 75 | decimal_separator: "." 76 | replace: 77 | - ["é", "€"] 78 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.bp.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Bp Europa SE 3 | fields: 4 | amount: 5 | parser: regex 6 | regex: 7 | - Totaal\s+.?\s+(\d{1,3}[,]\d{2}) 8 | - Bedrag\s+(\d{1,3}.\d{2}) EUR 9 | - (?i)American\sExpress\s+€.?\s+(\d{1,3}.\d{2}) 10 | type: float 11 | amount_untaxed: 12 | parser: regex 13 | regex: 14 | - Totaal netto\s+.\s+(\d{1,3}.\d{2}) 15 | type: float 16 | group: sum 17 | date: 18 | parser: regex 19 | regex: 20 | - (\d{1,2}[.|-|\/]\d{2}[.|-|\/]\d{4})\s 21 | - (\d{1,2}-\d{2}-\d{4})\s+\d{1,2}.\d{2}.\d{2} 22 | type: date 23 | invoice_number: (\d{1,2}[.|-|\/]\d{2}[.|-|\/]\d{4})\s 24 | vat: 25 | parser: static 26 | value: NL801014682B01 27 | vat: 28 | parser: static 29 | value: NL 30 | payment_method: 31 | - (?i)(American Express) 32 | - (VISA) 33 | - (MCARD) 34 | - (CONTANT) 35 | - (KAS):\s.\s\d+\.\d+ 36 | keywords: 37 | - BP 38 | - NL8010 # or [+]31 39 | - € 40 | options: 41 | currency: EUR 42 | languages: 43 | - nl 44 | decimal_separator: ',' 45 | replace: 46 | - ['é' ,'€'] 47 | - ['L.p.9.' ,'LPG'] 48 | - ['L.P.G.' ,'LPG'] 49 | - ['L.P.9.' ,'LPG'] 50 | - ['L.P.\s' ,'LPG'] 51 | - ['LPLG' ,'LPG'] 52 | - ['Contant' ,'CONTANT'] 53 | - ['\s[l|&|@]' ,' ℓ'] 54 | - ['B.W', 'BTW'] 55 | - [', ', ','] 56 | - ['°', ' '] 57 | - ['©', ' '] 58 | - ['95\s+\w+' ,'E10 (Euro 95)'] 59 | - ['(\s)([0-9]+)\.([0-9]{2})(\s)', '\1\2,\3\4'] # workaround for ocr 60 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.bunq.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: bunq 3 | fields: 4 | amount: Totaal\s+[€]\s+(\d+.\d{2}) 5 | date: Datum.\s+(\d{4}-\d{2}-\d{2}) 6 | invoice_number: Nummer.\s+(\d{7}) 7 | static_vat: NL851519945B01 8 | lines: 9 | start: Bedrag 10 | end: Totaal 11 | last_line: '(?P(\w+(?:\S|[ ]\w\w+)*))\s+(?P\d+)\s+(?P€\s\d+.\d{2})\s+(?P€\s\d+.\d{2})' 12 | line: '^(?P\w+(?:\S|[ ]\w\w+){1})$' # te veel hits 13 | first_line: "^(?P(Subscription))$" 14 | types: 15 | qty: float 16 | price_unit: float 17 | keywords: 18 | - bunq 19 | - NL851519945B01 20 | options: 21 | date_formats: 22 | - "%Y-%m-%d" 23 | currency: EUR 24 | languages: 25 | - nl 26 | decimal_separator: "," 27 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.cpe.yml: -------------------------------------------------------------------------------- 1 | issuer: CPE B.V. 2 | fields: 3 | amount: Te\s+betalen\s+in\s+EUR:\s+(\d+,\d{2}) 4 | amount_tax: BTW\s+over\s+\d+,\d{2}\s+(\d+,\d{2}) 5 | amount_untaxed: Totaal\s+exclusief\s+BTW:\s+(\d+,\d{2}) 6 | date: Factuurdatum:\s+(\d{1,2}-\d{1,2}-\d{4}) 7 | invoice_number: Factuurnummer:\s+(\d+) 8 | keywords: 9 | - www.cpe.nl/algemene-voorwaarden 10 | - FACTUUR 11 | options: 12 | currency: EUR 13 | date_formats: 14 | - "%d-%m-%Y" 15 | languages: 16 | - nl 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.esso_eg_services.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: EG Services (Netherlands) B.V. 3 | fields: 4 | amount: 5 | parser: regex 6 | regex: 7 | - TOTAAL\s+€.?\s+(\d+.\d{2}) 8 | - (?i)Amex\s+€.?\s+(\d{1,3}.\d{2}) 9 | type: float 10 | amount_untaxed: 11 | parser: regex 12 | regex: 13 | - \d{2}.\d{2}\s+. (\d+.\d{2})\s+. \d+.\d{2} 14 | type: float 15 | group: sum 16 | date: 17 | parser: regex 18 | regex: 19 | - (\d{1,2}-\d{2}-\d{4}\s+\d{1,2}[:]\d{2}[:]\d{2}) 20 | - (\d{2}-\d{2}-\d{4})\s+\d{2}.\d{2}.\d{2} 21 | type: date 22 | group: first 23 | invoice_number: (\d{1,2}-\d{2}-\d{4})\s+\d{1,2}.\d{2}.\d{2} 24 | vat: 25 | parser: static 26 | value: NL001784249B01 27 | payment_method: 28 | - (?i)(AMEX) 29 | - (VISA) 30 | - (MCARD) 31 | - (CONTANT) 32 | - (KAS):\s.\s\d+\.\d+ 33 | lines: 34 | start: BTW type 35 | end: (\d{1,2}-\d{2}-\d{4})\s+\d{2}.\d{2}.\d{2} 36 | line: (?P<btwtype>\d)\s(?P<line_tax_percent>\d{2}.\d{2})\s+. (?P<amount_untaxed>\d+.\d{2})\s+. (?P<line_tax_amount>\d+.\d{2}) 37 | start: BEDRAG.? 38 | end: TOTAAL\s 39 | line: (?P<name>POMP.*) (?P<qty>\d+.\d{2})\s. .\s+€ (?P<price_unit>\d.\d{3}) 40 | first_line: '(?P<code>\w+).?\s+.\s+[E|€]\s*\d+.\d{0,2}' 41 | # line: .* (?P<qty>\d+.\d{2})\s. .\s+€ (?P<price_unit>\d.\d{3}) 42 | types: 43 | qty: float 44 | price_unit: float 45 | line_tax_percent: float 46 | line_tax_amount: float 47 | keywords: 48 | - eur 49 | - EG GROUP 50 | options: 51 | currency: EUR 52 | languages: 53 | - nl 54 | decimal_separator: ',' 55 | date_formats: 56 | - '%m-%d-%Y hh:mm' 57 | # Using replace to correct tesseract ocr 58 | # in Odoo use the following product names 'LPG','E10 (Euro 95)' 59 | replace: 60 | - ['L.p.9.' ,'LPG'] 61 | - ['L.P.G.' ,'LPG'] 62 | - ['L.P.9.' ,'LPG'] 63 | - ['L.P.\s' ,'LPG'] 64 | - ['Euro 95 E10' ,'E10 (Euro 95)'] 65 | - ['Diese.\s+', 'B7 (Diesel) '] 66 | - ['LPLG' ,'LPG'] 67 | - ['Contant' ,'CONTANT'] 68 | - ['\s[l|&|@]' ,' ℓ'] 69 | - ['B.W', 'BTW'] 70 | - [', ', ','] 71 | - ['°', ' '] 72 | - ['©', ' '] 73 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.fedex.json: -------------------------------------------------------------------------------- 1 | { 2 | "issuer": "FedEx Express Netherlands BV", 3 | "fields": { 4 | "amount": { 5 | "parser": "regex", 6 | "regex": "(?i)Totaal\\s+EUR\\s+(\\d*[.,]?\\d+[,.]\\d+)", 7 | "type": "float" 8 | }, 9 | "amount_tax": { 10 | "parser": "regex", 11 | "regex": "BTW\\s+(\\d*[.,]?\\d+[,.]\\d+)", 12 | "group": "sum", 13 | "type": "float" 14 | }, 15 | "date": "(?i)FactuurDatum[:]\\s+(\\d{1,2}[-/]\\d{2}[-/]\\d{4})", 16 | "invoice_number": { 17 | "parser": "regex", 18 | "regex": "Factuurnummer[:]\\s+(\\w+)\\s+" 19 | }, 20 | "static_vat": "NL857768578B01", 21 | "partner_name": "(?i)(FedEx Express Netherlands BV)", 22 | "country_code": { 23 | "parser": "regex", 24 | "regex": "(?i)nl", 25 | "group": "first" 26 | }, 27 | "partner_website": { 28 | "parser": "regex", 29 | "regex": "fedex.\\w+", 30 | "group": "first" 31 | }, 32 | "partner_coc": { 33 | "parser": "regex", 34 | "regex": "K[.]v[.]K[.] Nr\\s(\\d{8})" 35 | }, 36 | "iban": { 37 | "parser": "regex", 38 | "regex": "[A-Z]{2}\\d{2}?\\w{4}?\\d{4}?\\d{4}?\\d{0,2}" 39 | }, 40 | "bic": { 41 | "parser": "regex", 42 | "regex": "BIC[:]\\s+(\\w{8,11})" 43 | } 44 | }, 45 | "keywords": ["(?i)fedex", "(?i)NL857768578B01", "(?i)nl", "(?i)factuur"], 46 | "options": { 47 | "currency": "EUR", 48 | "languages": ["nl"], 49 | "decimal_separator": "," 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.ferbox.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: FERBOX 3 | fields: 4 | amount: 5 | parser: regex 6 | regex: 7 | - Totaal\s+[€](\d?.?\d{1,3}[.]\d{2}) 8 | type: float 9 | amount_untaxed: 10 | parser: regex 11 | regex: 12 | - Subtotaal\s+[€](\d?[,]?\d{1,4}.\d{2}) 13 | type: float 14 | group: sum 15 | bic: BIC.(\w{8,11}) 16 | iban: IBAN.(\w{10,30}) 17 | date: Besteldatum.?\s+(\d{1,2} \w+ \d{4}) 18 | invoice_number: Bestelnummer.?\s+(\d{4}) 19 | static_vat: NL861141350B01 20 | payment_method: 21 | - (iDEAL) 22 | - (AMEX) 23 | - (VISA) 24 | - (MCARD) 25 | - (CONTANT) 26 | - (?i)(Paypal) 27 | lines: 28 | start: Product 29 | end: Subtotaal 30 | first_line: (?P<product>\w+.*)\s+(?P<qty>\d+)\s+(?P<price_unit>[€]\s+(\d+[.]\d{2})) 31 | line: (?P<product>\w+.*) 32 | types: 33 | qty: float 34 | price_unit: float 35 | keywords: 36 | - FERBOX 37 | - NL861141350B01 38 | - € 39 | options: 40 | currency: EUR 41 | languages: 42 | - nl 43 | decimal_separator: "." 44 | replace: 45 | - ["é", "€"] 46 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.gamma.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Gamma 3 | fields: 4 | amount: 5 | parser: regex 6 | regex: 7 | - 'TOTAAL\s+\d{1,3}[,]\d{2}\s+\d{1,3}[,]\d{2}\s+(\d{1,3}[,]\d{2})' 8 | - Totaal\s+(\d{1,3}[.|,]\d{2}) 9 | type: float 10 | amount_tax: 11 | parser: regex 12 | regex: 13 | - 'BTW\s+\d{1,2}[,|.]\d{2}\s+[%]\s+\w+\s+\d{1,3}[,|.]\d{2}\s+(\d{1,3}[,]\d{2})' 14 | type: float 15 | telephone: 16 | parser: regex 17 | regex: 18 | - '([+]\d{2}\s+\d{1,4}\s+\d{3,7})' 19 | country_code: 20 | parser: static 21 | value: nl 22 | date: Datum[:]\s+(\d{1,2}[-|\/]\d{2}[-|\/]\d{4}) 23 | invoice_number: (\d{1,2}[-|\/]\d{2}[-|\/]\d{4}) 24 | payment_method: 25 | - (AMEX) 26 | - (VISA) 27 | - (MCARD) 28 | - (CONTANT) 29 | - (CHIP) 30 | - (KAS):\s.\s\d+\.\d+ 31 | lines: 32 | start: AANT 33 | end: Totaal 34 | line: '(?P<qty>\d+.\d{1,2})\s+(?P<product>\w+).?\s+.?(?P<price_unit>\d{1,4}[,|.]\d{2})' 35 | types: 36 | qty: float 37 | price_unit: float 38 | keywords: 39 | - Gamma 40 | - bouwmarkt 41 | options: 42 | currency: EUR 43 | date_formats: 44 | - "%d-%m-%Y" 45 | languages: 46 | - nl 47 | decimal_separator: "," 48 | replace: 49 | - ["é", "€"] 50 | - ["Contant", "CONTANT"] 51 | - ['\s[l|&|@]', " ℓ"] 52 | - ["B.W", "BTW"] 53 | - [", ", ","] 54 | - ["°", " "] 55 | - ["©", " "] 56 | - ['(\s)([0-9]+)\.([0-9]{2})(\s)', '\1\2,\3\4'] # workaround for ocr 57 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.goos.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Goos van Pelt B.V. 3 | fields: 4 | amount: 5 | parser: regex 6 | regex: 7 | - TOTAAL\s[:] Eur\s+(\d{1,4}[,]\d{2}) 8 | type: float 9 | amount_untaxed: 10 | parser: regex 11 | regex: 12 | - Totaal goederenbedrag\s+[:]\sEur\s+(\d{1,4}[,]\d{2}) 13 | type: float 14 | amount_tax: 15 | parser: regex 16 | regex: 17 | - \d[%]\sover\sEur\s+(\d{1,3}[,]\d{2})\s+\S\s+Eur\s+(\d{1,3}[,]\d{2}) 18 | type: float 19 | partner_coc: 20 | parser: regex 21 | regex: 22 | - K[.]v[.]K[.] Tilburg nr[.|,][:|;]\s+(\d{8}) 23 | telephone: 24 | parser: static 25 | value: 0416351166 26 | partner_website: 27 | parser: static 28 | value: goosvanpelt.com 29 | partner_name: 30 | parser: static 31 | value: Goos van Pelt B.V. 32 | country_code: 33 | parser: static 34 | value: nl 35 | partner_zip: 36 | parser: static 37 | value: 4264 AZ 38 | partner_city: 39 | parser: static 40 | value: Veen 41 | partner_street: 42 | parser: static 43 | value: Bagijnhof 31 44 | bic: SWIFT\s+(\w{8,11}) 45 | iban: IBAN[:]\s+(\w{10,30}) 46 | date: \s+(\d{2}[-]\d{2}[-]\d{4}) 47 | invoice_number: Faktuurnummer\s\S\s+(\w?\d{3,10}) 48 | static_vat: NL818356157B01 49 | payment_method: 50 | - (iDEAL) 51 | - (AMEX) 52 | - (AMERICAN EXPRESS) 53 | - (VISA) 54 | - (MCARD) 55 | - (CONTANT) 56 | - (?i)(Paypal) 57 | lines: 58 | start: "Artikelnr" 59 | end: "Betaling" 60 | line: (?P<barcode>(\w+(?:\S|[.]\w\w+|\n)*))\s+(?P<grp>\d{3})\s+(?P<product>\w+.*)\s+(?P<qty>\d+[.|,]?\d+?)\s+\w{3}\s+(?P<price_unit>(\d+[,]\d{2}))s+(?P<discount>\d+[.|,]?\d+?[%]?)\s+\w{3}\s+(?P<price_subtotal>\d+?[,]?\d{0,2}) 61 | types: 62 | qty: float 63 | price_unit: float 64 | price_subtotal: float 65 | keywords: 66 | - NL818356157B01 67 | - FACTUUR 68 | options: 69 | currency: EUR 70 | languages: 71 | - nl 72 | decimal_separator: "," 73 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.ipparking.paleiskwartier.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Ipparking 3 | fields: 4 | amount: 5 | parser: regex 6 | regex: 7 | - Betaald[:]\s+(\d{1,3}[,]\d{2}) 8 | type: float 9 | amount_tax: 10 | parser: regex 11 | regex: 12 | - BTW[:]\s+(\d{1,3}[,]\d{2}) 13 | type: float 14 | group: sum 15 | partner_coc: 16 | parser: static 17 | value: 17180180 18 | telephone: 19 | parser: static 20 | value: +31492521133 21 | partner_website: 22 | parser: regex 23 | regex: 24 | - ipparking.nl 25 | partner_name: 26 | parser: static 27 | value: IP Parking Europe B.V. 28 | country_code: 29 | parser: static 30 | value: nl 31 | partner_zip: 32 | parser: static 33 | value: 5753 PB 34 | partner_city: 35 | parser: static 36 | value: Deurne 37 | partner_street: 38 | parser: static 39 | value: Industrieweg 19 40 | bic: 41 | parser: static 42 | value: INGBNL2A 43 | date: 44 | parser: regex 45 | regex: 46 | - Betaald[:]\s+(\d{1,2}[-]\d{1,2}[-]\d{4}) 47 | type: date 48 | invoice_number: Kwitantienummer[:]\s+(\d{3,10}) 49 | vat: 50 | parser: static 51 | value: NL815432811B01 52 | payment_method: 53 | - (iDEAL) 54 | - (AMEX) 55 | - (AMERICAN EXPRESS) 56 | - (VISA) 57 | - (MASTERCARD) 58 | - (CONTANT) 59 | - (?i)(Paypal) 60 | lines: 61 | start: "Parkeren" 62 | end: "Terminal" 63 | line: (?P<line_note>Parkeerduur[:]\s+\S+) 64 | keywords: 65 | - Parkeren 66 | - Paleiskwartier 67 | - EUR 68 | options: 69 | currency: EUR 70 | languages: 71 | - nl 72 | decimal_separator: "," 73 | replace: 74 | - ["é", "€"] 75 | - ["~", "-"] 76 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.karwei.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Karwei 3 | fields: 4 | amount: 5 | parser: regex 6 | regex: 7 | - 'TOTAAL\s+\d{1,3}[,]\d{2}\s+\d{1,3}[,]\d{2}\s+(\d{1,3}[,]\d{2})' 8 | - Totaal\s+(\d{1,3}[.|,]\d{2}) 9 | type: float 10 | amount_tax: 11 | parser: regex 12 | regex: 13 | - 'BTW\s+\d{1,2}[,|.]\d{2}\s+[%]\s+\w+\s+\d{1,3}[,|.]\d{2}\s+(\d{1,3}[,]\d{2})' 14 | type: float 15 | telephone: 16 | parser: regex 17 | regex: 18 | - '([+]\d{2}\s+\d{1,4}\s+\d{3,7})' 19 | country_code: 20 | parser: static 21 | value: nl 22 | date: Datum[:]\s+(\d{1,2}[-|\/]\d{2}[-|\/]\d{4}) 23 | invoice_number: (\d{1,2}[-|\/]\d{2}[-|\/]\d{4}) 24 | payment_method: 25 | - (AMEX) 26 | - (VISA) 27 | - (MCARD) 28 | - (CONTANT) 29 | - (CHIP) 30 | - (KAS):\s.\s\d+\.\d+ 31 | lines: 32 | start: AANT 33 | end: Totaal 34 | line: '(?P<qty>\d+.\d{1,2})\s+(?P<product>\w+).?\s+.?(?P<price_unit>\d{1,4}[,|.]\d{2})' 35 | types: 36 | qty: float 37 | price_unit: float 38 | keywords: 39 | - karwei 40 | - EUR 41 | options: 42 | currency: EUR 43 | date_formats: 44 | - "%d-%m-%Y" 45 | languages: 46 | - nl 47 | decimal_separator: "," 48 | replace: 49 | - ["é", "€"] 50 | - ["Contant", "CONTANT"] 51 | - ['\s[l|&|@]', " ℓ"] 52 | - ["B.W", "BTW"] 53 | - [", ", ","] 54 | - ["°", " "] 55 | - ["©", " "] 56 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.kav.yml: -------------------------------------------------------------------------------- 1 | issuer: kavautoverhuur 2 | fields: 3 | amount: Totaal\s+[€]\s+-?(\d+.\d{2}) 4 | amount_untaxed: Subtotaal Ex[.] BTW\s+[€]\s+(\d+[,]\d{2}) 5 | date: Factuurdatum[:]\s+(\d{1,2}-\d{2}-\d{4}) 6 | invoice_number: Factuur[:]\s+(\d+) 7 | static_vat: NL008074720B01 8 | lines: 9 | start: Totaal[(] 10 | end: Subtotaal 11 | line: (?P<product>(\w+(?:\s\S+)*))\s+(?P<qty>(\d{0,4}))\w?\s+[€]\s+(?P<price_unit>\d+.\d{2})\s+[€]\s+(?P<unittotal>\d+.\d{2})?\s+[€]\s+(?P<line_total>(\d+.\d{2})) 12 | types: 13 | qty: float 14 | price_unit: float 15 | unittotal: float 16 | line_total: float 17 | keywords: 18 | - KAV Den Bosch 19 | - vestiging 20 | options: 21 | date_formats: 22 | - "%d %m %Y" 23 | currency: EUR 24 | languages: 25 | - nl 26 | decimal_separator: "," 27 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.koffiehenk.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: KoffieHenk 3 | fields: 4 | amount: 5 | parser: regex 6 | regex: 7 | - Eindtotaal [(]incl. BTW[)][:]\s+[€]\s+(\d{1,4}[,]\d{2}) 8 | type: float 9 | amount_untaxed: 10 | parser: regex 11 | regex: 12 | - Eindtotaal [(]excl. BTW[)][:]\s+[€]\s+(\d{1,4}.\d{2}) 13 | type: float 14 | group: sum 15 | bic: BIC.(\w{8,11}) 16 | iban: IBAN.(\w{10,30}) 17 | date: 18 | parser: regex 19 | regex: Factuurdatum[:]\s+(\d{1,2} .+[.] \d{4}) 20 | type: date 21 | invoice_number: Factuurnummer.?\s+(\d{5,17}) 22 | static_vat: NL001409338B42 23 | country_code: 24 | parser: static 25 | value: nl 26 | payment_method: 27 | - (iDEAL) 28 | - (AMEX) 29 | - (VISA) 30 | - (MCARD) 31 | - (CONTANT) 32 | - (?i)(Paypal) 33 | lines: 34 | start: Art.nr. 35 | end: Subtotaal [(]excl. BTW[)] 36 | line: (?P<barcode>\d+)\s+(?P<product>\w+.*)\s+[€]\s+(?P<price_unit>(\d+.\d{2}))\s+(?P<qty>\d+) 37 | types: 38 | qty: float 39 | price_unit: float 40 | keywords: 41 | - Koffie Henk 42 | - NL001409338B42 43 | - € 44 | options: 45 | currency: EUR 46 | languages: 47 | - nl 48 | decimal_separator: "," 49 | replace: 50 | - ["é", "€"] 51 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.ns.invoice.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: NS Reizigers 3 | fields: 4 | amount: Totaalbedrag\s+[€]\s+(\d+.\d{2}) 5 | amount_untaxed: Totaal exclusief BTW\s+[€]\s+(\d+.\d{2}) 6 | sum_amount_tax: 7 | - Totaal BTW\s9[%]\s+€\s+(\d+,\d{2}) 8 | - Totaal BTW\s21[%]\s+€\s+(\d+,\d{2}) 9 | date: Factuurdatum\s+(\d{2} .+ \d{4}) 10 | invoice_number: Factuurnummer\s+(\d{2} \d{3} \d{2} \d{3} \d{2}) 11 | date_start: 12 | parser: regex 13 | regex: (\W+\s\d{4})\s+Factuurdatum 14 | date_end: 15 | parser: regex 16 | regex: (\W+\s\d{4})\s+Factuurdatum 17 | purchase_order_id: 18 | parser: regex 19 | regex: Overeenkomstnummer\s((?:\d+\s)*) 20 | partner_ref: 21 | parser: regex 22 | regex: Debiteurnummer\s((?:\d+\s)*) 23 | customer_order_number: 24 | parser: regex 25 | regex: Uw kenmerk\s(\d+) 26 | vat: 27 | parser: static 28 | value: NL803882804B01 29 | mandate_id: machtiging is (\d{12}). 30 | Incassant-ID: is (NL\d{2}\s+ZZZ\s+\d{4}\s+\d{4}\s+\d{4}). 31 | payment_unece_code: 32 | parser: static 33 | value: 49 34 | partner_name: 35 | parser: static 36 | value: NS Reizigers B.V. 37 | partner_street: 38 | parser: static 39 | value: Laan van Puntenburg 100 40 | partner_zip: 41 | parser: regex 42 | regex: 3511\sER 43 | partner_city: 44 | parser: regex 45 | regex: Utrecht 46 | country_code: 47 | parser: static 48 | value: nl 49 | state_code: 50 | parser: static 51 | value: UT 52 | telephone: 53 | parser: regex 54 | regex: '\d{3}\s-\s\d{3}\s\d{4}' 55 | partner_coc: 56 | parser: static 57 | value: "30124362" 58 | partner_website: 59 | parser: static 60 | value: ns.nl 61 | lines: 62 | start: Omschrijving 63 | end: Subtotaal 64 | line: '^\s+(?P<name>\w+(?:\S|[ ]\w\w+){1}).+(?P<line_tax_percent>\d{1,2})[%]\s[€]\s+(?P<price_unit>\d{1,}.\d{2})' 65 | types: 66 | line_tax_percent: float 67 | price_unit: float 68 | keywords: 69 | - NS Reizigers 70 | - NL803882804B01 71 | options: 72 | date_formats: 73 | - "%Y %B %d" 74 | currency: EUR 75 | languages: 76 | - nl 77 | decimal_separator: "," 78 | replace: 79 | - ['\n\s+0000', " 0000"] 80 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.parkmobile.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Parkmobile 3 | fields: 4 | amount: TOTAAL\s+[€] (\d+.\d{2}) #legacy layout 5 | amount: TOTAAL TE VOLDOEN\s+\d+[,]\d{2}\s+\d+[,]\d{2}\s+(\d+[,]\d{2}) 6 | amount_untaxed: TOTAAL TE VOLDOEN\s+(\d+[,]\d{2})\s+ 7 | invoice_number: 8 | parser: regex 9 | regex: FACTUUR [#]\s+(\d+) 10 | group: first 11 | vat: 12 | parser: static 13 | value: NL813399051B01 14 | country_code: 15 | parser: static 16 | value: NL 17 | date: 18 | parser: regex 19 | regex: DATUM\s+(\d{1,2}-\d{2}-\d{4}) 20 | type: date 21 | group: first 22 | date_start: 23 | parser: regex 24 | regex: PERIODE \s+(\w+\s\d{4}) 25 | type: date 26 | group: first 27 | lines: 28 | start: OMSCHRIJVING 29 | end: VERSCHULDIGD BEDRAG 30 | line: (?P<name>(\w+(?:\s\S+)*))\s+(?P<qty>(\d{0,4}))\s+(?P<price_unit>\d+.\d{2})\s+(?P<line_tax_amount>\d+.\d{2})?\s+(?P<line_tax_percent>(\d{2}))?[%]?\s+(?P<line_total>(\d+.\d{2})) 31 | types: 32 | qty: float 33 | price_unit: float 34 | line_tax_percent: float 35 | line_tax_amount: float 36 | unittax: float 37 | line_total: float 38 | keywords: 39 | - Parkmobile 40 | - NL813399051B01 41 | options: 42 | date_formats: 43 | - '%d %m %Y' 44 | currency: EUR 45 | languages: 46 | - nl 47 | decimal_separator: ',' 48 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.praxis.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Praxis 3 | fields: 4 | amount: 5 | parser: regex 6 | regex: 7 | - 'TOTAAL\s+\d{1,3}[,]\d{2}\s+\d{1,3}[,]\d{2}\s+(\d{1,3}[,]\d{2})' 8 | - Totaal\s+[:]\s+(\d{1,3}.\d{2}) 9 | type: float 10 | amount_tax: 11 | parser: regex 12 | regex: 13 | - 'TOTAAL\s+\d{1,3}[,]\d{2}\s+(\d{1,3}[,]\d{2})\s+\d{1,3}[,]\d{2}' 14 | type: float 15 | telephone: 16 | parser: regex 17 | regex: 18 | - '(\d{1,3}[-]\d{5,6})' 19 | date: 20 | parser: regex 21 | regex: 22 | - \s(\d{1,2}.\d{2}.\d{4})\s 23 | - \s(\d{4}-\w{3}-\d{2})\s 24 | type: date 25 | invoice_number: 26 | parser: regex 27 | regex: \s(\d{4}-\w{3}-\d{2})\s 28 | static_vat: NL004371021B01 29 | country_code: 30 | parser: static 31 | value: nl 32 | partner_name: 33 | parser: regex 34 | regex: 35 | - '(Praxis\s\S+\s?\S+?)' 36 | partner_coc: 37 | parser: regex 38 | regex: 39 | - 'KVK[:]\s+(\d8)' 40 | payment_method: 41 | - (AMEX) 42 | - (VISA) 43 | - (MCARD) 44 | - (CONTANT) 45 | - (CHIP) 46 | - (KAS):\s.\s\d+\.\d+ 47 | lines: 48 | start: ARTIKELEN 49 | end: BTW SPECIFICATIE 50 | line: '.(?P<product>\w+).?\s+.?d+.\d{1,2}' 51 | keywords: 52 | - (?i)Praxis 53 | - (?i)Doe\sHet\s 54 | options: 55 | currency: EUR 56 | date_formats: 57 | - "%d-%m-%Y" 58 | languages: 59 | - nl 60 | decimal_separator: "," 61 | replace: 62 | - ["é", "€"] 63 | - ["Contant", "CONTANT"] 64 | - ['\s[l|&|@]', " ℓ"] 65 | - ["B.W", "BTW"] 66 | - [", ", ","] 67 | - ["°", " "] 68 | - ["©", " "] 69 | - ['(\s)([0-9]+)\.([0-9]{2})(\s)', '\1\2,\3\4'] # workaround for ocr 70 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.reclameland.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Reclameland BV 3 | fields: 4 | amount: Totaal\sinclusief\sbtw\s+€\s+(\d+,\d{2}) 5 | amount_untaxed: Totaal\s+exclusief\sbtw\s+€\s+(\d+,\d{2}) 6 | date: Factuurdatum:\s+(\d{1,2}-\d{1,2}-\d{4}) 7 | invoice_number: Factuurnummer:\s+(.+) 8 | static_vat: NL855104946B01 9 | keywords: 10 | - NL855104946B01 11 | - Factuurnummer 12 | - € 13 | options: 14 | currency: EUR 15 | date_formats: 16 | - "%d-%m-%Y" 17 | languages: 18 | - nl 19 | decimal_separator: "," 20 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.simpel.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Simpel 3 | fields: 4 | amount: inclusief Btw\s+[€|E]\s+(\d+.\d{2}) 5 | amount_tax: Btw\s21\s[%]\s+€\s+(\d+,\d{2}) 6 | date: Factuurdatum.\s+(\d{2}-\d{2}-\d{4}) 7 | invoice_number: Factuurnummer.\s+(F\d{10}) 8 | static_vat: NL853855249B01 9 | partner_coc: KvK.\s+(\d+) 10 | keywords: 11 | - NL853855249B01 12 | - KvK. 60316306 13 | options: 14 | currency: EUR 15 | languages: 16 | - nl 17 | decimal_separator: "," 18 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.transip.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Transip 3 | fields: 4 | amount: Totaal.\s+[€]\s+(\d+.\d{2}) 5 | amount_untaxed: Subtotaal\s+[€]\s+(\d+.\d{2}) 6 | sum_amount_tax: 7 | - BTW [(] 21[%] [)]\s+€\s+(\d+,\d{2}) 8 | - BTW [(] 9[%] [)]\s+€\s+(\d+,\d{2}) 9 | date: Factuurdatum.\s+(\d{2}-\d{2}-\d{4}) 10 | invoice_number: Factuur.\s+(F\d+[.]\d+[.]\d+[.]\d+) 11 | static_vat: NL812334966B01 12 | partner_coc: KvK\s(\d{8}) 13 | country_code: 14 | parser: static 15 | value: nl 16 | lines: 17 | start: Aantal 18 | end: Subtotaal\s 19 | line: (?P<description>\w+.*)\s+(?P<period>\w+.*)\s+(?P<date_start>\d{2}-\d{2}-\d{4})\s-\s(?P<date_end>\d{2}-\d{2}-\d{4})\s+(?P<qty>\d+)\s+[€]\s+(?P<price_unit>(\d+,\d{2})) 20 | types: 21 | qty: float 22 | vat_percent: float 23 | price_unit: float 24 | keywords: 25 | - NL812334966B01 26 | - TransIP 27 | options: 28 | currency: EUR 29 | languages: 30 | - nl 31 | decimal_separator: "," 32 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.tuynder.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Tuynder B.V. 3 | fields: 4 | amount: 5 | parser: regex 6 | regex: 7 | - Totaal EUR incl[.] btw\s+(\d{1,3}[,]\d{2}) 8 | type: float 9 | amount_untaxed: 10 | parser: regex 11 | regex: 12 | - Totaal EUR excl[.] btw\s+(\d{1,3}[,]\d{2}) 13 | type: float 14 | group: sum 15 | partner_coc: 16 | parser: regex 17 | regex: 18 | - KvK-nummer\s+(\d{8}) 19 | telephone: 20 | parser: regex 21 | regex: 22 | - T\s+([+]31\s\S+\d{2}\s+\d{3,8}) 23 | partner_website: 24 | parser: regex 25 | regex: 26 | - (tuynder.nl) 27 | partner_name: 28 | parser: static 29 | value: Tuynder B.V. 30 | country_code: 31 | parser: static 32 | value: nl 33 | partner_zip: 34 | parser: static 35 | value: 2632 BA 36 | partner_city: 37 | parser: static 38 | value: Nootdorp 39 | partner_street: 40 | parser: static 41 | value: Gildeweg 8 42 | bic: SWIFT\s+(\w{8,11}) 43 | iban: IBAN\s+(\w{10,30}) 44 | date: Factuur\sdatum\s+(\d{1,2}[-]\d{1,2}[-]\d{2}) 45 | invoice_number: Factuur\snummer\s+(\w+\d{3,10}) 46 | static_vat: NL820781812B01 47 | payment_method: 48 | - (iDEAL) 49 | - (AMEX) 50 | - (AMERICAN EXPRESS) 51 | - (VISA) 52 | - (MCARD) 53 | - (CONTANT) 54 | - (?i)(Paypal) 55 | lines: 56 | start: "ARTIKELNR" 57 | end: "Totaal EUR excl" 58 | line: (?P<code>(\w+(?:\S|[.]\w\w+|\n)*))\s+(?P<qty>\d+)\s+(?P<product>\w+.*)\s+(?P<price_unit>(\d+[,]\d{2}))\s+(?P<tax_percent>\d+?[,]?\d{0,2})\s+(?P<price_subtotal>(\d+[,]\d{2}))\s+(?P<line_subtotal>\d+[,]\d{2}) 59 | # line: (?P<product>\w+.*) # needs check 60 | types: 61 | qty: float 62 | price_unit: float 63 | price_subtotal: float 64 | line_subtotal: float 65 | keywords: 66 | - Tuynder 67 | - NL820781812B01 68 | - EUR 69 | options: 70 | currency: EUR 71 | languages: 72 | - nl 73 | decimal_separator: "," 74 | replace: 75 | - ["é", "€"] 76 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.vistaprint.yml: -------------------------------------------------------------------------------- 1 | issuer: Vistaprint 2 | fields: 3 | amount: '.*Total\s+(\d+,\d{2})\s+' 4 | date: 'Date\sde\sla\sfacture\s:\s+(\d{2}/\d{2}/\d{4}).*' 5 | invoice_number: '.*N.\sde\sla\sfacture\s:\s+(\d+)' 6 | static_vat: NL812139513B01 7 | keywords: 8 | - NL812139513B01 9 | - facture 10 | - Vistaprint 11 | options: 12 | currency: EUR 13 | date_formats: 14 | - "%d/%m/%Y" 15 | decimal_separator: "," 16 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.vodafone.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Vodafone Libertel B.V. 3 | fields: 4 | amount: Totaal te betalen\s+.\s(\d+,\d{2}) 5 | amount_untaxed: Totaal exclusief btw\s+(\d+.\d{2}) 6 | date: Datum\s+(\d{1,2} \w{3,9}\s\d{4}) 7 | date_due: Dit bedrag wordt omstreeks (\d{1,2}\s\w{3,8}\s\d{4}) 8 | invoice_number: Rekeningnummer.\s+(\d+) 9 | static_vat: NL800755133B01 10 | country_code: 11 | parser: static 12 | value: nl 13 | keywords: 14 | - NL800755133B01 15 | options: 16 | currency: EUR 17 | languages: 18 | - nl 19 | decimal_separator: "," 20 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.wasco.yml: -------------------------------------------------------------------------------- 1 | issuer: Wasco Groothandelsgroep B.V. 2 | fields: 3 | amount: Factuurbedrag\s+EUR\s+\(Incl\.\s+BTW\)\s+(\d+,\d{2}) 4 | amount_tax: BTW\s+(\d+,\d{2}) 5 | amount_untaxed: Subtotaal\(Excl.\s+BTW\)\s+(\d+,\d{2}) 6 | date: Datum\s+(\d{1,2}-\d{1,2}-\d{4}) 7 | invoice_number: Factuur\s+(\d+) 8 | keywords: 9 | - Wasco Groothandelsgroep B.V. 10 | - NL 8016 03 729 B01 11 | - Factuur 12 | options: 13 | currency: EUR 14 | date_formats: 15 | - "%d-%m-%Y" 16 | languages: 17 | - nl 18 | decimal_separator: "," 19 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.weid.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: connected information systems B.V. 3 | fields: 4 | amount: 5 | parser: regex 6 | regex: 7 | - Totaal te incasseren\s+(\d{1,3}[,]\d{2}) 8 | type: float 9 | amount_untaxed: 10 | parser: regex 11 | regex: 12 | - Totaalbedrag excl[.] BTW\s+(\d{1,3}[,]\d{2}) 13 | type: float 14 | amount_tax: 15 | parser: regex 16 | regex: 17 | - 'Totaal BTW-bedrag\s+(\d{1,3}[,]\d{2})' 18 | type: float 19 | partner_coc: 20 | parser: regex 21 | regex: 22 | - KvK\s+(\d{8}) 23 | telephone: 24 | parser: regex 25 | regex: 26 | - (\d{3}-\d{7}) 27 | partner_website: 28 | parser: regex 29 | regex: 30 | - (we-id.nl) 31 | partner_name: 32 | parser: static 33 | value: connected information systems B.V. 34 | country_code: 35 | parser: static 36 | value: nl 37 | partner_zip: 38 | parser: static 39 | value: 8200 AA 40 | bic: BIC\s+(\w{8,11}) 41 | iban: IBAN\s+([A-Z]{2}\d{2}?\w{4}?\d{4}?\d{4}?\d{0,2}) 42 | date: Factuurdatum\s+[:|;]\s+(\d{1,2}[\/]\d{1,2}[\/]\d{4}) 43 | invoice_number: Factuurnummer\s+[:|;]\s+(\d{3,10}) 44 | static_vat: NL819935177B01 45 | payment_method: 46 | - (iDEAL) 47 | - (AMEX) 48 | - (AMERICAN EXPRESS) 49 | - (VISA) 50 | - (MCARD) 51 | - (CONTANT) 52 | - (?i)(Paypal) 53 | lines: 54 | start: "Aantal" 55 | end: "Totaalbedrag" 56 | line: (?P<qty>\d+[,|.]\d{2})\s+(?P<product>\w+.*)\s+(?P<price_unit>(\d+[,]\d{2}))\s+(?P<tax_percent>\d+?[,]?\d{0,2})\s+(?P<price_subtotal>(\d+[,]\d{2})) 57 | types: 58 | qty: float 59 | price_unit: float 60 | price_subtotal: float 61 | tax_percent: float 62 | keywords: 63 | - we-id 64 | - We-ID 65 | - EUR 66 | options: 67 | currency: EUR 68 | languages: 69 | - nl 70 | decimal_separator: "," 71 | replace: 72 | - ["é", "€"] 73 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.yezzer.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Yezzer 3 | fields: 4 | amount: Totaal\s+(\d+.\d{2}) 5 | date: Datum\s+(\d{2}-\d{2}-\d{4}) 6 | invoice_number: Notanummer\s+(\d{7}) 7 | static_vat: NL855570477B01 8 | date_start: (\d{2}-\d{2}-\d{4}) tot 9 | date_end: tot (\d{2}-\d{2}-\d{4}) 10 | country_code: 11 | parser: static 12 | value: nl 13 | lines: 14 | start: (\d{2}-\d{2}-\d{4}) tot (\d{2}-\d{2}-\d{4}) 15 | end: Totaal 16 | line: \s+(?P<description>\w+)\s+(?P<price_unit>\d+.\d{2}) 17 | types: 18 | price_unit: float 19 | keywords: 20 | - Yezzer 21 | - Surebusiness 22 | options: 23 | date_formats: 24 | - "%d %m %Y" 25 | currency: EUR 26 | languages: 27 | - nl 28 | decimal_separator: "," 29 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/nl/nl.zinkunie.yml: -------------------------------------------------------------------------------- 1 | issuer: Zinkunie B.V. 2 | fields: 3 | amount: Totaal\s+EURO\s+:\s+(\d+,\d{2}) 4 | amount_tax: B\.T\.W\.\s+(\d+,\d{2}) 5 | date: Factuurdatum\s+:\s+(\d{1,2}-\d{1,2}-\d{4}) 6 | invoice_number: Factuurnummer\s+:\s+(\d+) 7 | keywords: 8 | - www.zinkunie.nl 9 | - F A C T U U R 10 | options: 11 | currency: EUR 12 | date_formats: 13 | - "%d-%m-%Y" 14 | languages: 15 | - nl 16 | decimal_separator: "," 17 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/pl/pl.bmw-fs.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: BMW Financial Services Polska spółka z ograniczoną odpowiedzialnością 3 | keywords: 4 | - BMW Financial Services Polska Sp. z o. o. 5 | - "7010308447" 6 | fields: 7 | date: Data wystawienia:\s+(\d{4}-\d{2}-\d{2}) 8 | sale_date: Data wystawienia:\s+(\d{4}-\d{2}-\d{2}) 9 | invoice_number: Faktura VAT Nr:\s+([\dA-Z/]+) 10 | amount: Do zapłaty:\s+-*\s+([\d\s]+,\d\d) 11 | vat: 12 | parser: static 13 | value: 7010308447 14 | vat_lines: 15 | parser: lines 16 | start: RAZEM:.* 17 | end: Słownie 18 | line: ^.*W tym:\s+(?P<net>\d[\d\s]*,\d\d)\s+(?P<rate>\d+)\s+(?P<vat>\d[\d\s]*,\d\d)\s+(?P<gross>\d[\d\s]*,\d\d)$ 19 | types: 20 | net: float 21 | rate: int 22 | vat: float 23 | gross: float 24 | options: 25 | currency: PLN 26 | date_formats: 27 | - "%Y-%m-%d" 28 | decimal_separator: "," 29 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/pl/pl.insert.subiekt-gt.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | keywords: 3 | - "Miejsce wystawienia:" 4 | - "Data wystawienia:" 5 | - "Sprzedawca:" 6 | - "Nabywca:" 7 | - "według stawki VAT" 8 | - "Razem do zapłaty:" 9 | - 'Wystawił\(a\)' 10 | - 'Odebrał\(a\)' 11 | - "Podpis osoby upoważnionej" 12 | fields: 13 | issuer: 14 | parser: regex 15 | regex: Sprzedawca:.*\n(.*?)\s{3,} 16 | vatin: 17 | parser: regex 18 | regex: NIP:\s+(\d{10}) 19 | type: int 20 | group: first 21 | date: 22 | parser: regex 23 | regex: 24 | - Data wystawienia:\n.*(\d{2}\.\d{2}\.\d{4}) 25 | - Data wystawienia:\n.*(\d{4}-\d{2}-\d{2}) 26 | type: date 27 | invoice_number: 28 | parser: regex 29 | regex: Faktura VAT\s+(.*?)\s+oryginał 30 | amount: 31 | parser: regex 32 | regex: Razem do zapłaty:\s+([\d\s]+,[\d][\d]) 33 | type: float 34 | nrb: 35 | parser: regex 36 | regex: PLN:\s+([0-9]{2}(?:\s?[0-9]{4}){6}) 37 | options: 38 | currency: PLN 39 | date_formats: 40 | - "%d.%m.%Y" 41 | - "%Y-%m-%d" 42 | decimal_separator: "," 43 | priority: 3 44 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/pl/pl.insert.subiekt-nexo.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | keywords: 3 | - "InsERT nexo" 4 | fields: 5 | issuer: 6 | parser: regex 7 | regex: Sprzedawca.*\n(.*?)\s{3,} 8 | vatin: 9 | parser: regex 10 | regex: NIP:\s+(\d{10}) 11 | type: int 12 | group: first 13 | date: 14 | parser: regex 15 | regex: Data wystawienia\s+(\d{2}-\d{2}-\d{4}) 16 | type: date 17 | invoice_number: 18 | parser: regex 19 | regex: Faktura VAT sprzedaży\s+(.*) 20 | group: first 21 | amount: 22 | parser: regex 23 | regex: Razem do zapłaty:\s+([\d\s]+,[\d][\d]) 24 | type: float 25 | nrb: 26 | parser: regex 27 | regex: PL\s+([0-9]{2}(?:\s?[0-9]{4}){6}) 28 | options: 29 | currency: PLN 30 | date_formats: 31 | - "%d-%m-%Y" 32 | decimal_separator: "," 33 | priority: 3 34 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/pl/pl.orlen.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Polski Koncern Naftowy ORLEN spółka akcyjna 3 | keywords: 4 | - Polski Koncern Naftowy ORLEN S.A. 5 | - "774-00-01-454" 6 | fields: 7 | date: Data wystawienia:\s+(\d{4}-\d{2}-\d{2}) 8 | invoice_number: Faktura nr:\s+([\dA-Z/]+) 9 | amount: Należność ogółem:\s+(\d+,\d\d) 10 | vat: 11 | parser: static 12 | value: 7740001454 13 | sums: 14 | parser: lines 15 | start: Razem:.* 16 | end: Należność ogółem 17 | line: ^\s*w tym:\s+(?P<net>\d+,\d\d)\s+(?P<rate>\d+)\s+(?P<vat>\d+,\d\d)\s+(?P<gross>\d+,\d\d)$ 18 | types: 19 | net: float 20 | rate: int 21 | vat: float 22 | gross: float 23 | options: 24 | currency: PLN 25 | date_formats: 26 | - "%Y-%m-%d" 27 | decimal_separator: "," 28 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/pl/pl.p4.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: P4 spółka z ograniczoną odpowiedzialnością 3 | keywords: 4 | - P4 Sp. z o.o. 5 | - "NIP: 951-21-20-077" 6 | fields: 7 | vatin: 8 | parser: static 9 | value: 9512120077 10 | amount: WARTOŚĆ FAKTURY:\s+(\d[\d\s]*,[\d][\d]) 11 | date: Data wystawienia\n\s*(\d{2}\.\d{2}\.\d{4}) 12 | invoice_number: Numer faktury\n\s*([\dA-Z/]+) 13 | nrb: Numer konta bankowego.*\n([0-9]{2}(?:\s?[0-9]{4}){6}) 14 | lines: 15 | parser: lines 16 | start: NAZWA TOWARU LUB USŁUGI\s+NETTO \(ZŁ\)\s+STAWKA VAT\s+VAT \(ZŁ\)\s+BRUTTO \(ZŁ\) 17 | end: WARTOŚĆ FAKTURY 18 | line: ^\s*(?P<name>.+?)\s+(?P<total_net>\d[\d\s]*,\d+)\s+(?P<vat_rate>\d+)\s*%\s+(?P<total_vat>\d[\d\s]*,\d+)\s+(?P<total_gross>\d[\d\s]*,\d+)\s*$ 19 | types: 20 | pos: int 21 | qty: float 22 | unit_net: float 23 | total_net: float 24 | vat_rate: int 25 | total_vat: float 26 | total_gross: float 27 | options: 28 | currency: PLN 29 | date_formats: 30 | - "%d.%m.%Y" 31 | decimal_separator: "," 32 | -------------------------------------------------------------------------------- /src/invoice2data/extract/templates/pl/pl.paypro.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: PayPro spółka akcyjna 3 | keywords: 4 | - PayPro Spółka Akcyjna 5 | - "NIP: PL779-236-98-87" 6 | fields: 7 | vatin: 8 | parser: static 9 | value: PL7792369887 10 | amount: Do zapłaty:\s+(\d[\d\s]*\.\d{2}) 11 | date: Data wystawienia:\s+(\d{4}-\d{2}-\d{2}) 12 | sale_date: Data sprzedaży:\s+(\d{4}-\d{2}-\d{2}) 13 | invoice_number: NR:\s+([\dA-Z/]+) 14 | lines: 15 | parser: lines 16 | start: Lp.\s+Nazwa towaru lub usługi\s+Ilość\s+J.m.\n.* 17 | end: Razem wartość 18 | first_line: ^\s*(?P<pos>[\d]+)\s+(?P<name>.+?)\s*$ 19 | line: ^\s+((?P<vat_rate>.+)\s+(?P<qty>[\d\.]+)\s+(?P<unit>[\w\.]+)\s+(?P<unit_net>\d+\.\d\d)\s+(?P<total_net>\d+\.\d\d)\s+(?P<total_gross>\d+\.\d\d)|(?P<name>.+))\s*$ 20 | types: 21 | pos: int 22 | qty: float 23 | unit_net: float 24 | total_net: float 25 | total_gross: float 26 | vat_lines: 27 | parser: lines 28 | start: Wg stawek VAT\s+Kwota VAT\s+Wartość brutto\n.* 29 | end: "Razem:" 30 | line: ^\s+(?P<vat_rate>.+)\s+(?P<net>\d+\.\d\d)\s+(?P<vat>\d+\.\d\d)\s+(?P<gross>\d+\.\d\d)$ 31 | types: 32 | net: float 33 | vat: float 34 | gross: float 35 | options: 36 | currency: PLN 37 | date_formats: 38 | - "%Y-%m-%d" 39 | decimal_separator: "." 40 | -------------------------------------------------------------------------------- /src/invoice2data/extract/utils.py: -------------------------------------------------------------------------------- 1 | """This module abstracts utilities for processing of the extracted values.""" 2 | 3 | from logging import getLogger 4 | from typing import Any 5 | from typing import Dict 6 | from typing import Optional 7 | 8 | 9 | logger = getLogger(__name__) 10 | 11 | 12 | def _apply_grouping(settings: Dict[str, Any], result: Any) -> Optional[Any]: 13 | """Apply grouping to the extracted values.""" 14 | if "group" in settings: 15 | result = list(filter(None, result)) 16 | if result: 17 | if settings["group"] == "sum": 18 | result = sum(result) 19 | elif settings["group"] == "min": 20 | result = min(result) 21 | elif settings["group"] == "max": 22 | result = max(result) 23 | elif settings["group"] == "first": 24 | result = result[0] 25 | elif settings["group"] == "last": 26 | result = result[-1] 27 | elif settings["group"] == "join": 28 | joined = " ".join(str(v) for v in result) if result else "" 29 | result = [joined] 30 | else: 31 | logger.warning("Unsupported grouping method: %s", settings["group"]) 32 | return None 33 | return result 34 | -------------------------------------------------------------------------------- /src/invoice2data/input/__init__.py: -------------------------------------------------------------------------------- 1 | """Initialize the Input modules.""" 2 | -------------------------------------------------------------------------------- /src/invoice2data/input/pdfminer_wrapper.py: -------------------------------------------------------------------------------- 1 | """pdminer input module for invoice2data.""" 2 | 3 | from io import StringIO 4 | from typing import Any 5 | from typing import Dict 6 | from typing import Set 7 | 8 | 9 | def to_text(path: str, **kwargs: Dict[str, Any]) -> str: 10 | """Wrapper around `pdfminer` to extract text from PDF. 11 | 12 | Args: 13 | path (str): Path to the PDF file. 14 | **kwargs (Dict[str, Any]): Keyword arguments to be passed to `pdfminer`. 15 | 16 | Returns: 17 | str: Extracted text from the PDF. 18 | """ 19 | from pdfminer.converter import TextConverter # type: ignore[import-not-found] 20 | from pdfminer.layout import LAParams # type: ignore[import-not-found] 21 | from pdfminer.pdfinterp import PDFPageInterpreter # type: ignore[import-not-found] 22 | from pdfminer.pdfinterp import PDFResourceManager 23 | from pdfminer.pdfpage import PDFPage # type: ignore[import-not-found] 24 | 25 | rsrcmgr = PDFResourceManager() 26 | retstr = StringIO() 27 | laparams = LAParams() 28 | laparams.all_texts = True 29 | device = TextConverter(rsrcmgr, retstr, laparams=laparams) 30 | with open(path, "rb") as fp: 31 | interpreter = PDFPageInterpreter(rsrcmgr, device) 32 | password = "" 33 | maxpages = 0 34 | caching = True 35 | pagenos: Set[int] = set() 36 | pages = PDFPage.get_pages( 37 | fp, 38 | pagenos, 39 | maxpages=maxpages, 40 | password=password, 41 | caching=caching, 42 | check_extractable=True, 43 | ) 44 | for page in pages: 45 | interpreter.process_page(page) 46 | device.close() 47 | out = retstr.getvalue() 48 | retstr.close() 49 | return out 50 | -------------------------------------------------------------------------------- /src/invoice2data/input/pdfplumber.py: -------------------------------------------------------------------------------- 1 | """pdfplumber input module for invoice2data.""" 2 | 3 | from logging import getLogger 4 | from typing import Any 5 | from typing import Dict 6 | from typing import List 7 | 8 | 9 | logger = getLogger(__name__) 10 | 11 | 12 | def to_text(path: str, **kwargs: Dict[str, Any]) -> str: 13 | """Extract text from PDF using pdfplumber. 14 | 15 | Args: 16 | path (str): Path to the PDF file. 17 | **kwargs (Dict[str, Any]): Keyword arguments to be passed to `pdfplumber`. 18 | 19 | Returns: 20 | str: Extracted text from the PDF. 21 | """ 22 | try: 23 | import pdfplumber # type: ignore[import-not-found] 24 | except ImportError: 25 | logger.debug("Cannot import pdfplumber") 26 | 27 | with pdfplumber.open(path, laparams={"detect_vertical": True}) as pdf: 28 | raw_text = "" 29 | for page in pdf.pages: 30 | # raw_text += page.extract_text(**kwargs) 31 | 32 | raw_text += page.extract_text( 33 | layout=True, 34 | use_text_flow=True, 35 | x_tolerance=6, 36 | y_tolerance=4, 37 | keep_blank_chars=True, 38 | **kwargs, 39 | ) # y_tolerance=6, dirty Fix for html table problem 40 | 41 | res = { 42 | "all": "\n\n".join( 43 | str(page) for page in pdf.pages 44 | ), # Convert pages to strings 45 | "first": (pdf.pages and str(pdf.pages[0])) or "", # Convert page to string 46 | } 47 | logger.debug("Text extraction made with pdfplumber") 48 | 49 | raw_text = res_to_raw_text([res]) 50 | return raw_text 51 | 52 | 53 | def res_to_raw_text(res: List[Dict[str, Any]]) -> str: 54 | """Extract raw text from pdfplumber result. 55 | 56 | Args: 57 | res (List[Dict[str, Any]]): Result from pdfplumber. 58 | 59 | Returns: 60 | str: The raw text extracted from the result. 61 | """ 62 | raw_text = "" 63 | for r in res: 64 | if "text" in r: 65 | raw_text += r["text"] 66 | return raw_text 67 | -------------------------------------------------------------------------------- /src/invoice2data/input/text.py: -------------------------------------------------------------------------------- 1 | """text input module for invoice2data.""" 2 | 3 | # SPDX-License-Identifier: MIT 4 | 5 | 6 | def to_text(path: str) -> str: 7 | """Reads the content of a text file. 8 | 9 | Args: 10 | path (str): The path to the text file. 11 | 12 | Returns: 13 | str: The content of the text file. 14 | """ 15 | with open(path) as f: 16 | return f.read() 17 | -------------------------------------------------------------------------------- /src/invoice2data/output/__init__.py: -------------------------------------------------------------------------------- 1 | """Initialize the output modules.""" 2 | -------------------------------------------------------------------------------- /src/invoice2data/output/to_csv.py: -------------------------------------------------------------------------------- 1 | """CSV output module for invoice2data.""" 2 | 3 | import csv 4 | import datetime # noqa 5 | from typing import Any 6 | from typing import Dict 7 | from typing import List 8 | 9 | 10 | def write_to_file( 11 | data: List[Dict[str, Any]], path: str, date_format: str = "%Y-%m-%d" 12 | ) -> None: 13 | """Export extracted fields to CSV. 14 | 15 | Appends .csv to path if missing and generates a CSV file in the specified 16 | directory, otherwise in the current directory. 17 | 18 | Args: 19 | data (List[Dict[str, Any]]): A list of dictionaries of extracted fields. If only a 20 | single file was processed, it must be passed as a 21 | single-element list. 22 | path (str): CSV file to save output to. 23 | date_format (str): Date format used in the generated file. 24 | Defaults to "%Y-%m-%d". 25 | 26 | Notes: 27 | Provide a filename to the `path` parameter. 28 | 29 | Examples: 30 | >>> from invoice2data.output import to_csv 31 | >>> data = [{'amount': 123.45, 'date': datetime.datetime(2024, 1, 1)}] 32 | >>> to_csv.write_to_file(data, "invoice.csv") 33 | """ 34 | if not path.endswith(".csv"): 35 | filename = path + ".csv" 36 | else: 37 | filename = path 38 | 39 | with open(filename, "w", newline="", encoding="utf-8") as csv_file: 40 | writer = csv.writer(csv_file, delimiter=",") 41 | 42 | last_header = None 43 | for line in data: 44 | header = list(line.keys()) 45 | 46 | if header != last_header: 47 | writer.writerow(header) 48 | last_header = header 49 | 50 | csv_items = [] 51 | for k, v in line.items(): 52 | if k.startswith("date") or k.endswith("date"): 53 | v = v.strftime(date_format) # Assuming v is a date object 54 | csv_items.append(v) 55 | writer.writerow(csv_items) 56 | -------------------------------------------------------------------------------- /src/invoice2data/output/to_json.py: -------------------------------------------------------------------------------- 1 | """JSON output module for invoice2data.""" 2 | 3 | import datetime 4 | import json 5 | from typing import Any 6 | from typing import Dict 7 | from typing import List 8 | 9 | 10 | def format_item(item: Any, date_format: str) -> Any: 11 | """Format an item for JSON serialization. 12 | 13 | Args: 14 | item (Any): The item to format. 15 | date_format (str): The date format to use. 16 | 17 | Returns: 18 | Any: The formatted item. 19 | """ 20 | if isinstance(item, datetime.date): 21 | return item.strftime(date_format) 22 | if isinstance(item, (dict, list)): 23 | iter_obj = item.items() if isinstance(item, dict) else enumerate(item) 24 | for k, v in iter_obj: 25 | item[k] = format_item(v, date_format) 26 | return item 27 | 28 | 29 | def write_to_file( 30 | data: List[Dict[str, Any]], path: str, date_format: str = "%Y-%m-%d" 31 | ) -> None: 32 | """Export extracted fields to JSON. 33 | 34 | Appends .json to path if missing and generates JSON file in 35 | the specified directory, otherwise in the current directory. 36 | 37 | Args: 38 | data (List[Dict[str, Any]]): Dictionary of extracted fields. 39 | path (str): Directory to save the generated JSON file. 40 | date_format (str): Date format used in the generated file. 41 | Defaults to "%Y-%m-%d". 42 | 43 | Notes: 44 | Provide a filename to the `path` parameter. 45 | 46 | Examples: 47 | >>> from invoice2data.output import to_json 48 | >>> data = [{'amount': 123.45, 'date': datetime.datetime(2024, 1, 1)}] 49 | >>> to_json.write_to_file(data, "invoice.json") 50 | """ 51 | for invoice in data: 52 | for k, v in invoice.items(): 53 | invoice[k] = format_item(v, date_format) 54 | 55 | if not path.endswith(".json"): 56 | filename = path + ".json" 57 | else: 58 | filename = path 59 | 60 | with open(filename, "w", encoding="utf-8") as json_file: 61 | json.dump(data, json_file, indent=4, ensure_ascii=False) 62 | -------------------------------------------------------------------------------- /src/invoice2data/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/src/invoice2data/py.typed -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Test suite for the invoice2data package.""" 2 | -------------------------------------------------------------------------------- /tests/common.py: -------------------------------------------------------------------------------- 1 | """Utility functions for testing.""" 2 | 3 | import logging 4 | import os 5 | from typing import List 6 | 7 | 8 | # Reduce log level of various modules 9 | logging.getLogger("pdfminer").setLevel(logging.WARNING) 10 | 11 | 12 | def get_sample_files(extension: str, exclude_input_specific: bool = True) -> List[str]: 13 | """Get the sample files. 14 | 15 | Args: 16 | extension (str): The extension of the files to get. 17 | exclude_input_specific (bool, optional): Whether to exclude input-specific files. Defaults to True. 18 | 19 | Returns: 20 | List[str]: A list of paths to the sample files. 21 | """ 22 | compare_files = [] 23 | compare_folder = os.path.dirname("./tests/compare") 24 | for path, _subdirs, files in os.walk(compare_folder): 25 | for file in files: 26 | if exclude_input_specific and inputparser_specific(file): 27 | continue 28 | if file.endswith(extension): 29 | compare_files.append(os.path.join(path, file)) 30 | return compare_files 31 | 32 | 33 | def exclude_template(test_list: List[str], exclude_list: List[str]) -> List[str]: 34 | """Exclude specific templates from the list. 35 | 36 | Args: 37 | test_list (List[str]): The list of templates to filter. 38 | exclude_list (List[str]): The list of templates to exclude. 39 | 40 | Returns: 41 | List[str]: The filtered list of templates. 42 | """ 43 | return [ 44 | elem 45 | for elem in test_list 46 | if not any(elem.endswith(end) for end in exclude_list) 47 | ] 48 | 49 | 50 | def inputparser_specific(file: str) -> bool: 51 | """Checks if a file requires a specific input parser. 52 | 53 | Args: 54 | file (str): The name of the file. 55 | 56 | Returns: 57 | bool: True if the file requires a specific input parser, False otherwise. 58 | """ 59 | return file.startswith("saeco") 60 | -------------------------------------------------------------------------------- /tests/compare/AmazonWebServices.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "issuer": "Amazon Web Services", 4 | "amount": 4.11, 5 | "amount_untaxed": 4.11, 6 | "date": "2014-08-03", 7 | "invoice_number": "42183017", 8 | "partner_name": "Amazon Web Services, Inc.", 9 | "partner_website": "aws.amazon.com", 10 | "currency": "USD", 11 | "lines": [ 12 | { 13 | "description": "AWS Data Transfer", 14 | "price_unit": "0.01" 15 | }, 16 | { 17 | "description": "Amazon Elastic Compute Cloud", 18 | "price_unit": "1.87" 19 | }, 20 | { 21 | "description": "Amazon Glacier", 22 | "price_unit": "2.22" 23 | }, 24 | { 25 | "description": "Amazon Simple Storage Service", 26 | "price_unit": "0.01" 27 | } 28 | ], 29 | "desc": "Invoice from Amazon Web Services" 30 | } 31 | ] 32 | -------------------------------------------------------------------------------- /tests/compare/AmazonWebServices.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/tests/compare/AmazonWebServices.pdf -------------------------------------------------------------------------------- /tests/compare/AmazonWebServices.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/tests/compare/AmazonWebServices.png -------------------------------------------------------------------------------- /tests/compare/AzureInterior.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/tests/compare/AzureInterior.pdf -------------------------------------------------------------------------------- /tests/compare/FlipkartInvoice.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "issuer": "Flipkart", 4 | "amount": 319.0, 5 | "date": "2015-10-20", 6 | "invoice_number": "#BLR_WFLD20151000982590", 7 | "order_id": "OD304175096047380001", 8 | "currency": "INR", 9 | "desc": "Invoice from Flipkart" 10 | } 11 | ] 12 | -------------------------------------------------------------------------------- /tests/compare/FlipkartInvoice.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/tests/compare/FlipkartInvoice.pdf -------------------------------------------------------------------------------- /tests/compare/FlipkartInvoice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/tests/compare/FlipkartInvoice.png -------------------------------------------------------------------------------- /tests/compare/NetpresseInvoice.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "issuer": "NETPRESSE", 4 | "amount": 56.02, 5 | "amount_untaxed": 46.68, 6 | "date": "2022-11-28", 7 | "invoice_number": "2022089083", 8 | "siren": 530848134, 9 | "currency": "EUR", 10 | "desc": "Invoice from NETPRESSE" 11 | } 12 | ] 13 | -------------------------------------------------------------------------------- /tests/compare/NetpresseInvoice.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/tests/compare/NetpresseInvoice.pdf -------------------------------------------------------------------------------- /tests/compare/Orlen.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "issuer": "Polski Koncern Naftowy ORLEN spółka akcyjna", 4 | "date": "2021-01-01", 5 | "invoice_number": "F", 6 | "amount": 316.83, 7 | "vat": 7740001454, 8 | "sums": [ 9 | { 10 | "net": 257.59, 11 | "rate": 23, 12 | "vat": 59.24, 13 | "gross": 316.83 14 | } 15 | ], 16 | "currency": "PLN", 17 | "desc": "Invoice from Polski Koncern Naftowy ORLEN spółka akcyjna" 18 | } 19 | ] 20 | -------------------------------------------------------------------------------- /tests/compare/Orlen.txt: -------------------------------------------------------------------------------- 1 | Faktura nr: F 1234K20/1234/12 Data wystawienia: 2021-01-01 2 | Sprzedawca: Polski Koncern Naftowy ORLEN S.A. 3 | NIP: 774-00-01-454 4 | Adres: Chemików 7 09-411 Płock 5 | Polski Koncern Naftowy ORLEN S.A. 6 | Stacja Paliw Nr 4445 7 | 58-100 Słotwina 8 | Słotwina 62x 9 | Nabywca: GŁÓWNY URZĄD STATYSTYCZNY 10 | NIP: 5261040828 11 | Adres: Aleja Niepodległości 208 00-925 Warszawa 12 | 13 | 14 | Cena 15 | Cena Wartość Wartość VAT Wartość 16 | Lp Nazwa towaru Jm Ilość [jm] brutto po Kwota VAT 17 | brutto rabatu netto [%] brutto 18 | rabacie 19 | 20 | 1 EFECTA 95 CN27101245 l 54,910 5,79 1,10 5,77 257,59 23 59,24 316,83 21 | 22 | 23 | 24 | Razem: 257,59 59,24 316,83 25 | w tym: 257,59 23 59,24 316,83 26 | 27 | 28 | 29 | Należność ogółem: 316,83 PLN 30 | 31 | Słownie: trzysta szesnaście PLN, 83/100 32 | 33 | 34 | Do dokumentów: Dok. wydania: 1234567 Dok.fisk.nr 12345 z dnia 2021-01-01 35 | Zapłacono: Aplikacja mobilna ORLEN Pay 36 | Faktura wygenerowana automatycznie 37 | -------------------------------------------------------------------------------- /tests/compare/QualityHosting.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "issuer": "QualityHosting AG", 4 | "amount": 34.73, 5 | "amount_untaxed": 34.73, 6 | "date": "2014-05-07", 7 | "invoice_number": "30064443", 8 | "vat": "DE 232 446 240", 9 | "currency": "EUR", 10 | "lines": [ 11 | { 12 | "pos": "1", 13 | "qty": 1.0, 14 | "desc": "Small Business StandardExchange 2010\nGrundgebühr pro Einheit\nDienst: OUDJQ_strukan\n01.05.14-31.05.14", 15 | "price": 3.89 16 | }, 17 | { 18 | "pos": "2", 19 | "qty": 1.0, 20 | "desc": "Small Business QualityExchange 2010\nGrundgebühr pro Einheit\nDienst: OUDJQ_schneider\n01.05.14-31.05.14", 21 | "price": 5.39 22 | }, 23 | { 24 | "pos": "3", 25 | "qty": 1.0, 26 | "desc": "Small Business QualityExchange 2010\nGrundgebühr pro Einheit\nDienst: OUDJQ_minar\n01.05.14-31.05.14", 27 | "price": 5.39 28 | }, 29 | { 30 | "pos": "4", 31 | "qty": 1.0, 32 | "desc": "Small Business QualityExchange 2010\nGrundgebühr pro Einheit\nDienst: OUDJQ_mayr\n01.05.14-31.05.14", 33 | "price": 5.39 34 | }, 35 | { 36 | "pos": "5", 37 | "qty": 1.0, 38 | "desc": "Small Business QualityExchange 2010\nGrundgebühr pro Einheit\nDienst: OUDJQ_jenewein\n01.05.14-31.05.14", 39 | "price": 5.39 40 | }, 41 | { 42 | "pos": "6", 43 | "qty": 1.0, 44 | "desc": "Small Business QualityExchange 2010\nGrundgebühr pro Einheit\nDienst: OUDJQ_jauernik\n01.05.14-31.05.14\nQualityHosting AG - Uferweg 40-42 - D-63571 Gelnhausen\niViveLabs Ltd.\n93B Sai Yu Chung\nYuen Long, N.T.\nHong Kong\nPos. Menge Beschreibung Rabatt % VK-Preis Zeilenbetrag\nOhne Ohne MwSt.\nMwSt.", 45 | "price": 5.39 46 | }, 47 | { 48 | "pos": "7", 49 | "qty": 1.0, 50 | "desc": "Small Business StandardExchange 2010\nGrundgebühr pro Einheit\nDienst: OUDJQ_office\n01.05.14-31.05.14\n", 51 | "price": 3.89 52 | } 53 | ], 54 | "desc": "Invoice from QualityHosting AG" 55 | } 56 | ] 57 | -------------------------------------------------------------------------------- /tests/compare/QualityHosting.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/tests/compare/QualityHosting.pdf -------------------------------------------------------------------------------- /tests/compare/SammyMaystoneLinesTest.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "issuer": "Sammy Maystone - For Lines testing", 4 | "invoice_number": "invoice_number_1", 5 | "date": "2022-01-01", 6 | "line_items": [ 7 | { 8 | "item": "A", 9 | "desc": "Parts: 1 x cap_a" 10 | }, 11 | { 12 | "item": "B", 13 | "desc": "Parts: 2 x shop supplies" 14 | } 15 | ], 16 | "currency": "USD", 17 | "desc": "Invoice from Sammy Maystone - For Lines testing" 18 | } 19 | ] 20 | -------------------------------------------------------------------------------- /tests/compare/SammyMaystoneLinesTest.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/tests/compare/SammyMaystoneLinesTest.pdf -------------------------------------------------------------------------------- /tests/compare/SammyMaystoneLinesTest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/tests/compare/SammyMaystoneLinesTest.png -------------------------------------------------------------------------------- /tests/compare/coolblue1.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "issuer": "Coolblue B.V.", 4 | "amount": 717.97, 5 | "amount_untaxed": 593.36, 6 | "partner_coc": "24330087", 7 | "narration": "Ordernummer: 12572103", 8 | "note": "Klantnummer: 6669263 Ordernummer: 12572103 Orderdatum: 18 april 2014 Alles voor een glimlach.", 9 | "partner_name": "Coolblue B.V.", 10 | "country_code": "NL", 11 | "partner_zip": "3012 CN", 12 | "partner_city": "Rotterdam", 13 | "partner_street": "Weena 664", 14 | "vat": "NL810433941B01", 15 | "bic": "INGBNL2A", 16 | "iban": "NL50INGB0683251309", 17 | "date": "2014-04-19", 18 | "invoice_number": "993548900", 19 | "payment_method": "iDEAL", 20 | "lines": [ 21 | { 22 | "name": "Apple iPad Air Wifi 16 GB Zilver", 23 | "qty": 1, 24 | "price_unit": 399.0, 25 | "line_tax_percent": 21, 26 | "price_subtotal": 399.0 27 | }, 28 | { 29 | "line_note": "Serienummer: SDMPP373MPP15" 30 | }, 31 | { 32 | "name": "Decoded Leather Slim Cover Apple iPad Air 2 Zwart", 33 | "qty": 1, 34 | "price_unit": 69.99, 35 | "line_tax_percent": 21, 36 | "price_subtotal": 69.99 37 | }, 38 | { 39 | "name": "Nintendo 3DS XL Wit + Blauw", 40 | "qty": 1, 41 | "price_unit": 189.0, 42 | "line_tax_percent": 21, 43 | "price_subtotal": 189.0 44 | }, 45 | { 46 | "name": "Nintendo AC-adapter", 47 | "qty": 1, 48 | "price_unit": 14.99, 49 | "line_tax_percent": 21, 50 | "price_subtotal": 14.99 51 | }, 52 | { 53 | "name": "Mario Kart 7 3DS", 54 | "qty": 1, 55 | "price_unit": 44.99, 56 | "line_tax_percent": 21, 57 | "price_subtotal": 44.99 58 | }, 59 | { 60 | "name": "Incl. Thuiskopieheffing: Thuiskopie €3.50", 61 | "qty": 1, 62 | "line_tax_percent": 21, 63 | "price_subtotal": 4.24 64 | } 65 | ], 66 | "currency": "EUR", 67 | "desc": "Invoice from Coolblue" 68 | } 69 | ] 70 | -------------------------------------------------------------------------------- /tests/compare/coolblue1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/tests/compare/coolblue1.pdf -------------------------------------------------------------------------------- /tests/compare/coolblue2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/tests/compare/coolblue2.pdf -------------------------------------------------------------------------------- /tests/compare/free_fiber.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "issuer": "Free", 4 | "amount": 29.99, 5 | "amount_untaxed": 24.99, 6 | "date": "2015-07-02", 7 | "date_due": "2015-07-05", 8 | "invoice_number": "562044387", 9 | "vat": "FR60421938861", 10 | "date_start": "2015-07-01", 11 | "date_end": "2015-07-31", 12 | "siren": "421 938 861", 13 | "currency": "EUR", 14 | "line_number": "FO10479674", 15 | "client_id": "10577874", 16 | "desc": "Invoice from Free" 17 | } 18 | ] 19 | -------------------------------------------------------------------------------- /tests/compare/free_fiber.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/tests/compare/free_fiber.pdf -------------------------------------------------------------------------------- /tests/compare/oyo.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "issuer": "OYO", 4 | "amount": 1939.0, 5 | "date": "2017-12-31", 6 | "invoice_number": "IBZY2087", 7 | "currency": "INR", 8 | "hotel_details": " OYO 4189 Resort Nanganallur", 9 | "date_check_in": "2017-12-31", 10 | "date_check_out": "2018-01-01", 11 | "amount_rooms": 1.0, 12 | "booking_id": "IBZY2087", 13 | "payment_method": "Cash at Hotel", 14 | "gstin": "06AABCO6063D1ZQ", 15 | "cin": "U63090DL2012PTC231770", 16 | "desc": "Invoice from OYO" 17 | } 18 | ] 19 | -------------------------------------------------------------------------------- /tests/compare/oyo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/tests/compare/oyo.pdf -------------------------------------------------------------------------------- /tests/compare/oyo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/tests/compare/oyo.png -------------------------------------------------------------------------------- /tests/compare/saeco.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "issuer": "e-Luscious Nederland B.V.", 4 | "amount": 49.99, 5 | "amount_tax": 8.68, 6 | "amount_untaxed": 41.31, 7 | "date": "2022-09-08", 8 | "date_due": "2022-09-22", 9 | "invoice_number": "VF1005193039", 10 | "payment_method": "Credit Card", 11 | "static_vat": "NL815254295B01", 12 | "bic": "RABONL2U", 13 | "iban": "NL58RABO0198723202", 14 | "telephone": "020 7604101", 15 | "partner_website": "saeco.com", 16 | "partner_email": "verkoop-nl@saeco.com", 17 | "partner_name": "e-Luscious Nederland B.V.", 18 | "country_code": "nl", 19 | "partner_zip": "8401 BL", 20 | "partner_city": "Gorredijk", 21 | "partner_street": "Badweg 48", 22 | "partner_coc": "04080176", 23 | "narration": ["SCONL0303006280999", "SCONL000000444"], 24 | "currency": "EUR", 25 | "lines": [ 26 | { 27 | "code": "E103184", 28 | "name": "Onderhoudsset CA6707/10", 29 | "price_unit": 49.99, 30 | "taxpercent": 21.0, 31 | "qty": 1.0, 32 | "uom": "PCS", 33 | "price_subtotal": 49.99 34 | }, 35 | { 36 | "code": "E103560", 37 | "name": "SX Flyer SAECO Korting KVDNL", 38 | "price_unit": 0.0, 39 | "taxpercent": 21.0, 40 | "qty": 1.0, 41 | "uom": "PCS", 42 | "price_subtotal": 0.0 43 | } 44 | ], 45 | "desc": "Invoice from e-Luscious Nederland B.V." 46 | } 47 | ] 48 | -------------------------------------------------------------------------------- /tests/compare/saeco.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invoice-x/invoice2data/1d3c5d3ef261fcba6a258e667c7091c72d807daa/tests/compare/saeco.pdf -------------------------------------------------------------------------------- /tests/custom/basic.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "issuer": "Basic Test", 4 | "date": "2022-09-27", 5 | "invoice_number": "0999/09/2022", 6 | "amount": 123.45, 7 | "currency": "EUR", 8 | "desc": "Invoice from Basic Test" 9 | } 10 | ] 11 | -------------------------------------------------------------------------------- /tests/custom/basic.txt: -------------------------------------------------------------------------------- 1 | Issue date: 2022-09-27 2 | Issuer: Basic Test 3 | Invoice number: 0999/09/2022 4 | Total: 123.45 EUR 5 | -------------------------------------------------------------------------------- /tests/custom/lines-basic.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "issuer": "Lines Tests", 4 | "date": "2022-10-22", 5 | "invoice_number": "007/10/2022", 6 | "amount": 123.4, 7 | "simple_lines": [ 8 | { "number": "One" }, 9 | { "number": "Two" }, 10 | { "number": "Three" } 11 | ], 12 | "column_lines": [ 13 | { "pos": 1, "name": "Apple", "qty": 2.0, "unit": "kg" }, 14 | { "pos": 2, "name": "Tomato", "qty": 0.5, "unit": "kg" }, 15 | { "pos": 3, "name": "Watermelon", "qty": 1.0, "unit": "" } 16 | ], 17 | "currency": "EUR", 18 | "desc": "Invoice from Lines Tests" 19 | } 20 | ] 21 | -------------------------------------------------------------------------------- /tests/custom/lines-basic.txt: -------------------------------------------------------------------------------- 1 | Issue date: 2022-10-22 2 | Issuer: Lines Tests 3 | Invoice number: 007/10/2022 4 | Total: 123.40 EUR 5 | 6 | Lines basic 7 | 8 | Simple lines start 9 | One 10 | Two 11 | Three 12 | Simple lines end 13 | 14 | Columns start 15 | 1. Apple 2 kg 16 | 2. Tomato 0.5 kg 17 | 3. Watermelon 1 18 | Columns end 19 | -------------------------------------------------------------------------------- /tests/custom/lines-blocks.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "issuer": "Lines Tests", 4 | "date": "2022-10-15", 5 | "invoice_number": "1234/10/2022", 6 | "amount": 99.99, 7 | "lines": [ 8 | { "pos": 1, "name": "Cat" }, 9 | { "pos": 2, "name": "Dog" }, 10 | { "pos": 3, "name": "Frog" }, 11 | { "pos": 4, "name": "Lizard" }, 12 | { "pos": 5, "name": "Unicorn" } 13 | ], 14 | "currency": "EUR", 15 | "desc": "Invoice from Lines Tests" 16 | } 17 | ] 18 | -------------------------------------------------------------------------------- /tests/custom/lines-blocks.txt: -------------------------------------------------------------------------------- 1 | Issue date: 2022-10-15 2 | Issuer: Lines Tests 3 | Invoice number: 1234/10/2022 4 | Total: 99.99 EUR 5 | 6 | Lines in multiple blocks 7 | 8 | Lines start 9 | 1. Cat 10 | 2. Dog 11 | Lines end 12 | 13 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus quis metus sagittis, fermentum 14 | risus et, vulputate orci. Curabitur id pellentesque mi, vel euismod nulla. Morbi tincidunt ipsum 15 | eu volutpat dictum. Nam hendrerit varius mauris, a venenatis ligula lacinia et. Sed blandit 16 | lobortis facilisis. Donec efficitur metus ac sapien luctus, eget facilisis dolor eleifend. In sapien 17 | erat, vestibulum in sollicitudin a, euismod nec nunc. 18 | 19 | Lines start 20 | 3. Frog 21 | Lines end 22 | 23 | Nulla elit dui, dictum in augue ac, rutrum mollis risus. In hac habitasse platea dictumst. Phasellus 24 | quis eros ac elit iaculis vehicula et vel nunc. Aenean consequat in velit vel luctus. Proin vel 25 | sapien cursus, ultrices turpis vel, fringilla dolor. Vestibulum ex leo, ullamcorper a quam quis, 26 | molestie convallis est. Nulla egestas posuere purus, eget viverra elit dapibus et. Pellentesque 27 | habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Duis posuere eros 28 | dui. 29 | 30 | Lines start 31 | 4. Lizard 32 | 5. Unicorn 33 | Lines end 34 | 35 | In varius nulla arcu, ac interdum velit ornare vel. Mauris a placerat lacus. Nam porta metus eget 36 | arcu mattis, non iaculis elit luctus. Etiam rutrum volutpat arcu, vitae semper turpis mollis id. 37 | Fusce orci dui, pellentesque et ipsum eget, pellentesque luctus leo. Nullam non mollis mi. In 38 | semper, ex sed mollis dapibus, lectus metus vestibulum turpis, vitae convallis mauris eros in orci. 39 | Interdum et malesuada fames ac ante ipsum primis in faucibus. 40 | -------------------------------------------------------------------------------- /tests/custom/lines-multiple-patterns.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "issuer": "Lines Tests", 4 | "date": "2022-10-22", 5 | "invoice_number": "9999/10/2022", 6 | "amount": 50.0, 7 | "lines": [ 8 | { "group": "Mammals" }, 9 | { "pos": 1, "name": "Hedgehog" }, 10 | { "pos": 2, "name": "Squirrel" }, 11 | { "group": "Birds" }, 12 | { "subgroup": "Flying" }, 13 | { "pos": 3, "name": "Raven" }, 14 | { "pos": 4, "name": "Owl" }, 15 | { "pos": 5, "name": "Crow" }, 16 | { "subgroup": "Flightless" }, 17 | { "pos": 6, "name": "Penguin" }, 18 | { "pos": 7, "name": "Ostrich" } 19 | ], 20 | "dimensions": [ 21 | { "pos": 1, "angle": 30, "length": 30 }, 22 | { "pos": 2, "angle": 45, "length": 40 }, 23 | { "pos": 3, "angle": 90, "length": 60 }, 24 | { "pos": 4, "length": 80, "angle": 135 }, 25 | { "pos": 5, "length": 100, "angle": 180 } 26 | ], 27 | "currency": "EUR", 28 | "desc": "Invoice from Lines Tests" 29 | } 30 | ] 31 | -------------------------------------------------------------------------------- /tests/custom/lines-multiple-patterns.txt: -------------------------------------------------------------------------------- 1 | Issue date: 2022-10-22 2 | Issuer: Lines Tests 3 | Invoice number: 9999/10/2022 4 | Total: 50.00 EUR 5 | 6 | Lines with multiple patterns 7 | 8 | 9 | Lines start 10 | 11 | Group: Mammals 12 | 1. Hedgehog 13 | 2. Squirrel 14 | 15 | Group: Birds 16 | Subgroup: Flying 17 | 3. Raven 18 | 4. Owl 19 | 5. Crow 20 | Subgroup: Flightless 21 | 6. Penguin 22 | 7. Ostrich 23 | 24 | Lines end 25 | 26 | 27 | No Angle [°] Length [cm] 28 | 1 30 30 29 | 2 45 40 30 | 3 90 60 31 | Count: 3 32 | 33 | No Length [cm] Angle [°] 34 | 4 80 135 35 | 5 100 180 36 | Count: 2 37 | -------------------------------------------------------------------------------- /tests/custom/table-groups.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "issuer": "Table Groups Tests", 4 | "date": "2024-12-20", 5 | "invoice_number": "007/10/2024", 6 | "amount": 123.4, 7 | "currency": "EUR", 8 | "hotel_details": [ 9 | "OYO 4189 Resort Nanganallur", 10 | "OYO 4189 Resort Nanganallur Suite A" 11 | ], 12 | "date_check_in": "2024-01-08", 13 | "date_check_out": "2024-12-31", 14 | "qty_rooms": 2, 15 | "line_tax_percent": ["1%", "2%", "0%"], 16 | "lamount_tax": ["3.00", "2.00", "0.00"], 17 | "random_num_to_sum": 11.01, 18 | "desc": "Invoice from Table Groups Tests" 19 | } 20 | ] 21 | -------------------------------------------------------------------------------- /tests/custom/table-groups.txt: -------------------------------------------------------------------------------- 1 | Issue date: 2024-12-20 2 | Issuer: Table Group Tests 3 | Invoice number: 007/10/2024 4 | Total: 123.40 EUR 5 | 6 | Table basic 7 | 8 | Simple table start 9 | Tax precentage amount qty 10 | 1% 3.00 7.00 11 | 2% 2.00 4.00 12 | 0% 0.00 0.01 13 | Simple table end 14 | 15 | 16 | 17 | Sample data below to test advanced grouping functions of table parser. 18 | 19 | Guest Name: Sanjay 20 | 21 | Hotel Details Check In Check Out Rooms 22 | OYO 4189 Resort Nanganallur, 01/08/2024 01/01/2018 1 23 | OYO 4189 Resort Nanganallur Suite A, 31/12/2017 31/12/2024 1 24 | 25,Vembuliamman Koil Street,, Pazhavanthangal, Chennai 25 | Booking ID Payment Mode 26 | IBZY2087 Cash at Hotel 27 | 28 | 29 | invoice2data --input-reader=text --debug ./table-groups.txt 30 | 31 | invoice2data ./table-groups.txt --debug -t ./templates 32 | -------------------------------------------------------------------------------- /tests/custom/templates/basic.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Basic Test 3 | keywords: 4 | - Basic Test 5 | fields: 6 | date: 7 | parser: regex 8 | regex: Issue date:\s*(\d{4}-\d{2}-\d{2}) 9 | type: date 10 | invoice_number: 11 | parser: regex 12 | regex: Invoice number:\s*([\d/]+) 13 | amount: 14 | parser: regex 15 | regex: Total:\s*(\d+\.\d\d) 16 | type: float 17 | missing: 18 | parser: regex 19 | regex: This field is missing (.*) 20 | options: 21 | currency: EUR 22 | date_formats: 23 | - "%Y-%m-%d" 24 | decimal_separator: "." 25 | -------------------------------------------------------------------------------- /tests/custom/templates/lines-basic.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Lines Tests 3 | keywords: 4 | - Lines Tests 5 | - Lines basic 6 | fields: 7 | date: 8 | parser: regex 9 | regex: Issue date:\s*(\d{4}-\d{2}-\d{2}) 10 | type: date 11 | invoice_number: 12 | parser: regex 13 | regex: Invoice number:\s*([\d/]+) 14 | amount: 15 | parser: regex 16 | regex: Total:\s*(\d+\.\d\d) 17 | type: float 18 | simple_lines: 19 | parser: lines 20 | start: Simple lines start 21 | end: Simple lines end 22 | line: ^(?P<number>.+)$ 23 | zero_lines: 24 | parser: lines 25 | start: Simple lines start 26 | end: Simple lines end 27 | line: ^Nonexistent lines pattern (?P<name>.+)$ 28 | column_lines: 29 | parser: lines 30 | start: Columns start 31 | end: Columns end 32 | line: ^(?P<pos>\d+)\.\s+(?P<name>.+)\s+(?P<qty>\d+(\.\d+)?)(?:\s+(?P<unit>.+))?$ 33 | types: 34 | pos: int 35 | qty: float 36 | options: 37 | currency: EUR 38 | date_formats: 39 | - "%Y-%m-%d" 40 | decimal_separator: "." 41 | -------------------------------------------------------------------------------- /tests/custom/templates/lines-blocks.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Lines Tests 3 | keywords: 4 | - Lines Tests 5 | - Lines in multiple blocks 6 | fields: 7 | date: 8 | parser: regex 9 | regex: Issue date:\s*(\d{4}-\d{2}-\d{2}) 10 | type: date 11 | invoice_number: 12 | parser: regex 13 | regex: Invoice number:\s*([\d/]+) 14 | amount: 15 | parser: regex 16 | regex: Total:\s*(\d+\.\d\d) 17 | type: float 18 | lines: 19 | parser: lines 20 | start: Lines start 21 | end: Lines end 22 | line: ^(?P<pos>\d+)\.\s+(?P<name>.+)$ 23 | types: 24 | pos: int 25 | options: 26 | currency: EUR 27 | date_formats: 28 | - "%Y-%m-%d" 29 | decimal_separator: "." 30 | -------------------------------------------------------------------------------- /tests/custom/templates/lines-multiple-patterns.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Lines Tests 3 | keywords: 4 | - Lines Tests 5 | - Lines with multiple patterns 6 | fields: 7 | date: 8 | parser: regex 9 | regex: Issue date:\s*(\d{4}-\d{2}-\d{2}) 10 | type: date 11 | invoice_number: 12 | parser: regex 13 | regex: Invoice number:\s*([\d/]+) 14 | amount: 15 | parser: regex 16 | regex: Total:\s*(\d+\.\d\d) 17 | type: float 18 | lines: 19 | parser: lines 20 | start: Lines start 21 | end: Lines end 22 | line: 23 | - ^Group:\s*(?P<group>.+)$ 24 | - ^(?P<pos>\d+)\.\s+(?P<name>.+)$ 25 | - ^Subgroup:\s*(?P<subgroup>.+)$ 26 | types: 27 | pos: int 28 | dimensions: 29 | parser: lines 30 | rules: 31 | - start: No.*Angle.*Length 32 | end: Count 33 | line: ^(?P<pos>\d+)\s+(?P<angle>\d+)\s+(?P<length>\d+)$ 34 | types: 35 | pos: int 36 | angle: int 37 | length: int 38 | - start: No.*Length.*Angle 39 | end: Count 40 | line: ^(?P<pos>\d+)\s+(?P<length>\d+)\s+(?P<angle>\d+)$ 41 | types: 42 | pos: int 43 | angle: int 44 | length: int 45 | options: 46 | currency: EUR 47 | date_formats: 48 | - "%Y-%m-%d" 49 | decimal_separator: "." 50 | -------------------------------------------------------------------------------- /tests/custom/templates/table-groups.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | issuer: Table Groups Tests 3 | keywords: 4 | - Table basic 5 | - Simple table start 6 | 7 | tables: 8 | - start: Hotel Details\s+Check In\s+Check Out\s+Rooms 9 | end: Booking ID 10 | body: (?P<hotel_details>[\S ]+),\s+(?P<date_check_in>\d{1,2}\/\d{1,2}\/\d{4})\s+(?P<date_check_out>\d{1,2}\/\d{1,2}\/\d{4})\s+(?P<qty_rooms>\d) 11 | types: 12 | qty_rooms: int 13 | fields: 14 | qty_rooms: 15 | group: sum 16 | date_check_in: 17 | group: first 18 | date_check_out: 19 | group: last 20 | - start: Tax precentage amount qty 21 | end: Simple table end 22 | body: (?P<line_tax_percent>\d[%])\s+(?P<lamount_tax>\d\.\d{2})\s+(?P<random_num_to_sum>\d\.\d{2}) 23 | fields: 24 | random_num_to_sum: 25 | group: sum 26 | # type: float # This is also supported 27 | types: 28 | random_num_to_sum: float # this is supported 29 | 30 | fields: 31 | date: 32 | parser: regex 33 | regex: Issue date:\s*(\d{4}-\d{2}-\d{2}) 34 | type: date 35 | invoice_number: 36 | parser: regex 37 | regex: Invoice number:\s*([\d/]+) 38 | amount: 39 | parser: regex 40 | regex: Total:\s*(\d+\.\d\d) 41 | type: float 42 | options: 43 | currency: EUR 44 | date_formats: 45 | - "%Y-%m-%d" 46 | decimal_separator: "." 47 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | """Test cases for the __main__ module.""" 2 | 3 | import pytest 4 | from click.testing import CliRunner 5 | 6 | from invoice2data import __main__ 7 | 8 | 9 | @pytest.fixture 10 | def runner() -> CliRunner: 11 | """Fixture for invoking command-line interfaces.""" 12 | return CliRunner() 13 | 14 | 15 | def test_main_succeeds(runner: CliRunner) -> None: 16 | """It exits with a status code of zero.""" 17 | result = runner.invoke(__main__.main) 18 | assert result.exit_code == 0 19 | --------------------------------------------------------------------------------