├── DATASET_LICENSE
├── DISCLAIMER
├── Dockerfile
├── LICENSE
├── README.md
├── docxpand
    ├── blank.svg
    ├── canvas.py
    ├── conditionals.py
    ├── dataset.py
    ├── generator.py
    ├── geometry.py
    ├── image.py
    ├── instantiable.py
    ├── metrics.py
    ├── normalizer.py
    ├── providers
    │   ├── __init__.py
    │   ├── address
    │   │   ├── de_DE
    │   │   │   └── __init__.py
    │   │   ├── es_ES
    │   │   │   └── __init__.py
    │   │   ├── fr_FR
    │   │   │   └── __init__.py
    │   │   ├── nl_NL
    │   │   │   └── __init__.py
    │   │   └── pt_PT
    │   │   │   └── __init__.py
    │   ├── authority
    │   │   ├── en_GB
    │   │   │   └── __init__.py
    │   │   ├── fr_FR
    │   │   │   └── __init__.py
    │   │   ├── nl_NL
    │   │   │   └── __init__.py
    │   │   └── pt_PT
    │   │   │   └── __init__.py
    │   ├── barcode
    │   │   └── __init__.py
    │   ├── date_time
    │   │   └── __init__.py
    │   ├── id
    │   │   └── en_GB
    │   │   │   └── driving_license.py
    │   ├── mrz
    │   │   └── __init__.py
    │   ├── person
    │   │   └── es_ES
    │   │   │   └── __init__.py
    │   ├── photo
    │   │   ├── __init__.py
    │   │   └── halftone
    │   │   │   ├── LICENSE
    │   │   │   └── __init__.py
    │   ├── residence_permit
    │   │   └── __init__.py
    │   └── signature
    │   │   └── __init__.py
    ├── scene_insertion.py
    ├── specimen.py
    ├── specimens
    │   ├── SOURCES.md
    │   └── photos
    │   │   ├── SOURCES.md
    │   │   ├── man_1.jpg
    │   │   ├── man_2.jpg
    │   │   ├── woman_1.jpg
    │   │   └── woman_2.jpg
    ├── svg_to_image.py
    ├── template.py
    ├── templates
    │   ├── id_card_td1_a
    │   │   ├── back.png
    │   │   ├── back.svg
    │   │   ├── fonts
    │   │   │   ├── MsMadi-Regular.ttf
    │   │   │   ├── OCR-B.ttf
    │   │   │   └── SpaceGrotesk-VariableFont_wght.ttf
    │   │   ├── front.png
    │   │   ├── front.svg
    │   │   └── generator.json
    │   ├── id_card_td1_b
    │   │   ├── back.png
    │   │   ├── back.svg
    │   │   ├── fonts
    │   │   │   ├── Allura-Regular.ttf
    │   │   │   ├── Karla-VariableFont_wght.ttf
    │   │   │   └── OCR-B.ttf
    │   │   ├── front.png
    │   │   ├── front.svg
    │   │   └── generator.json
    │   ├── id_card_td2_a
    │   │   ├── back.png
    │   │   ├── back.svg
    │   │   ├── fonts
    │   │   │   ├── Bellefair-Regular.ttf
    │   │   │   ├── Gill-Sans.otf
    │   │   │   ├── HomemadeApple-Regular.ttf
    │   │   │   └── OCR-B.ttf
    │   │   ├── front.png
    │   │   ├── front.svg
    │   │   └── generator.json
    │   ├── id_card_td2_b
    │   │   ├── back.png
    │   │   ├── back.svg
    │   │   ├── fonts
    │   │   │   ├── ComforterBrush-Regular.ttf
    │   │   │   ├── OCR-B.ttf
    │   │   │   ├── Oswald-Light.ttf
    │   │   │   ├── Oswald-Medium.ttf
    │   │   │   ├── Raleway-Black.ttf
    │   │   │   └── Raleway-Bold.ttf
    │   │   ├── front.png
    │   │   ├── front.svg
    │   │   └── generator.json
    │   ├── pp_td3_a
    │   │   ├── fonts
    │   │   │   ├── Codystar-Regular.ttf
    │   │   │   ├── OCR-B.ttf
    │   │   │   ├── OpenSans-VariableFont_wdth,wght.ttf
    │   │   │   └── Outfit-VariableFont_wght.ttf
    │   │   ├── front.png
    │   │   ├── front.svg
    │   │   └── generator.json
    │   ├── pp_td3_b
    │   │   ├── fonts
    │   │   │   ├── Cabin-Italic-VariableFont_wdth,wght.ttf
    │   │   │   ├── Cabin-VariableFont_wdth,wght.ttf
    │   │   │   ├── FuzzyBubbles-Bold.ttf
    │   │   │   ├── JosefinSans-VariableFont_wght.ttf
    │   │   │   └── OCR-B.ttf
    │   │   ├── front.png
    │   │   ├── front.svg
    │   │   └── generator.json
    │   ├── pp_td3_c
    │   │   ├── fonts
    │   │   │   ├── BarlowCondensed-Bold.ttf
    │   │   │   ├── BarlowCondensed-Medium.ttf
    │   │   │   ├── BarlowCondensed-MediumItalic.ttf
    │   │   │   ├── BarlowCondensed-SemiBold.ttf
    │   │   │   ├── Codystar-Regular.ttf
    │   │   │   ├── Kristi-Regular.ttf
    │   │   │   └── OCR-B.ttf
    │   │   ├── front.png
    │   │   ├── front.svg
    │   │   └── generator.json
    │   ├── rp_card_td1
    │   │   ├── back.png
    │   │   ├── back.svg
    │   │   ├── fonts
    │   │   │   ├── ComforterBrush-Regular.ttf
    │   │   │   ├── Lack-Regular.otf
    │   │   │   ├── Montserrat-Bold.ttf
    │   │   │   ├── Montserrat-Regular.ttf
    │   │   │   └── OCR-B.ttf
    │   │   ├── front.png
    │   │   ├── front.svg
    │   │   └── generator.json
    │   ├── rp_card_td2
    │   │   ├── back.png
    │   │   ├── back.svg
    │   │   ├── fonts
    │   │   │   ├── OCR-B.ttf
    │   │   │   ├── Rubik-Italic-VariableFont_wght.ttf
    │   │   │   └── Rubik-VariableFont_wght.ttf
    │   │   ├── front.png
    │   │   ├── front.svg
    │   │   └── generator.json
    │   ├── showcase.css
    │   ├── showcase.html
    │   └── showcase.js
    ├── tesseract.py
    ├── translations
    │   ├── __init__.py
    │   ├── labels.py
    │   └── residence_permit.py
    └── utils.py
├── poetry.lock
├── pyproject.toml
├── scripts
    ├── dataset
    │   ├── delete_other_side_fields.py
    │   ├── extract_field_locations_from_svgs.py
    │   ├── extract_image_fields_from_svgs.py
    │   ├── generate_fake_structured_documents.py
    │   ├── insert_generated_documents_in_scenes.py
    │   ├── reinsert_generated_documents_in_scenes.py
    │   └── transform_field_locations_to_inserted_documents.py
    ├── field_recognition_baseline
    │   ├── evaluate.py
    │   └── predict.py
    ├── localization_baseline
    │   └── plot.py
    └── lpips
    │   ├── lpips_dirs.py
    │   └── lpips_metrics.py
└── stable_diffusion
    ├── Dockerfile
    └── README.md


/DATASET_LICENSE:
--------------------------------------------------------------------------------
1 | The synthetic ID document images dataset ("DocXPand-25k"), released alongside this tool, is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. To view a copy of this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.


--------------------------------------------------------------------------------
/DISCLAIMER:
--------------------------------------------------------------------------------
1 | The data used for the generation of the DocXPand-25k dataset are not personal data in the meaning of GDPR as they are not related to an identified or identifiable natural person. They are test data generated through Faker, Stable Diffusion v1.5 and Quicksign anomyzed scene images. The photos of faces are generated through an AI system. Therefore, the application of GDPR is excluded. Should personal data in the meaning of GDPR be used through the algorithm, an assessment should be made to evaluate the conformity of the data processing with GDPR.
2 | The ID designs used to generate the DocXPand-25k dataset and other fictitious ID documents in the DocXPand-25k are fictitious ID and could not be assimilated to forgery ID. 
3 | The purpose of the DocXPand-25k and the algorithm is to provide a dataset and a algorithm to generate fictitious ID documents to train on document localization, text recognition but not fraud detection as the fictitious ID documents could not be assimilated to valid ID documents.
4 | QuickSign disclaims all responsibility for the use of the DocXPand-25k dataset and the associated code
5 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
 2 | RUN apt-get -y update && DEBIAN_FRONTEND=noninteractive apt-get -y install --no-install-recommends git curl make cmake xz-utils pkg-config build-essential wget locales libxi-dev libxrandr-dev libfreetype6-dev libfontconfig1-dev python3.10-dev libjpeg-dev libcairo2-dev liblcms2-dev libboost-dev libopenjp2-7-dev libopenjp2-tools libleptonica-dev imagemagick qpdf pdftk libdmtx0b mesa-common-dev libgl1-mesa-dev libglu1-mesa-dev libgl1-mesa-glx libmagic1
 3 | # poetry environment variable (https://python-poetry.org/docs/configuration/#using-environment-variables)
 4 | ENV POETRY_VERSION=1.6.1 \
 5 |     # make poetry install to this location
 6 |     POETRY_HOME="/opt/poetry" \
 7 |     # avoid poetry creating virtual environment in the project's root
 8 |     POETRY_VIRTUALENVS_IN_PROJECT=false \
 9 |     # do not ask any interactive question
10 |     POETRY_NO_INTERACTION=1
11 | 
12 | # install poetry - respects $POETRY_VERSION & $POETRY_HOME
13 | RUN curl -sSL https://install.python-poetry.org | python3 -
14 | ENV PATH="${POETRY_HOME}/bin:$PATH"
15 | COPY . /app
16 | WORKDIR /app
17 | RUN poetry install
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 QuickSign
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | 
 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 8 | 
 9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DocXPand tool
 2 | 
 3 | ## Requirements
 4 | * [Python](https://www.python.org/downloads/) 3.9 or 3.10
 5 | * [Poetry](https://python-poetry.org/)
 6 | * [Chrome](https://www.google.com/chrome/) and the corresponding [webdriver](https://googlechromelabs.github.io/chrome-for-testing/)
 7 | * Stable diffusion for face generation, see [stable_diffusion](stable_diffusion/README.md)
 8 | 
 9 | ## Functionalities
10 | 
11 | This repository exposes functions to generate documents using templates and generators, contained in [docxpand/templates](docxpand/templates):
12 | 
13 | * Templates are SVG files, containing information about the appearence of the documents to generate, i.e. their backgrounds, the fields contained in the document, the positions of these fields etc.
14 | * Generators are JSON files, containing information on how to generate the fields content.
15 | 
16 | This repository allows to :
17 | * Generate documents for known templates ([id_card_td1_a](docxpand/templates/id_card_td1_a), [id_card_td1_b](docxpand/templates/id_card_td1_b), [id_card_td2_a](docxpand/templates/id_card_td2_a), [id_card_td2_b](docxpand/templates/id_card_td2_b), [pp_td3_a](docxpand/templates/pp_td3_a), [pp_td3_b](docxpand/templates/pp_td3_b), [pp_td3_c](docxpand/templates/pp_td3_c), [rp_card_td1](docxpand/templates/rp_card_td1) and [rp_card_td2](docxpand/templates/rp_card_td2) ), by filling the templates with random fake information.
18 |     - These templates are inspired from European ID cards, passports and residence permits. Their format follow the [ISO/IEC 7810
19 | ](https://en.wikipedia.org/wiki/ISO/IEC_7810), and they contains machine-readable zone (MRZ) that follow the [Machine Readable Travel Documents Specifications](https://www.icao.int/publications/Documents/9303_p3_cons_en.pdf).  
20 |     - To generate documents, use the [generate_fake_structured_documents.py](scripts/dataset/generate_fake_structured_documents.py) script, that takes as input the name of one of the templates, the number of fake documents to generate, an output directory, an url to request that can serve generated photos of human faces using [stable diffusion](stable_diffusion/README.md), and a [chrome webdriver](https://googlechromelabs.github.io/chrome-for-testing/) corresponding to the installed version of your installed chrome browser.
21 | * Integrate generated document in some scenes, to replace other documents originally present in the scenes.
22 |     - It implies you have some dataset of background scenes usable for this task, with coordinates of original documents to replace by generated fake documents. 
23 |     - To integrate documents, use the [insert_generated_documents_in_scenes.py](scripts/dataset/insert_generated_documents_in_scenes.py) script, that takes as input the directory containing the generated document images, a JSON dataset containing information obout those document images (generated by above script), the directory containing "scene" (background) images, a JSON dataset containing localization information, and an output directory to store the final images. The background scene images must contain images that are present in the [docxpand/specimens](docxpand/specimens) directory. See the [SOURCES.md](docxpand/specimens/SOURCES.md) file for more information.
24 |     - All JSON datasets must follow the `DocFakerDataset` format, defined in [docxpand/dataset.py](docxpand/dataset.py).
25 | 
26 | ### Installation
27 | 
28 | Run 
29 | 
30 |     poetry install
31 | 
32 | ### Usage
33 | 
34 | To generate documents, run:
35 | 
36 |     poetry run python scripts/generate_fake_structured_documents.py -n <number_to_generate> -o <output_directory> -t <template_to_use> -w <path_to_chrome_driver_path>
37 | 
38 | To insert document in target images, run:
39 | 
40 |     poetry run python scripts/insert_generated_documents_in_scenes.py -di <document_images_directory> -dd <documents_dataset> -si <scene_images_directory> -sd <scenes_dataset> -o <output_directory>
41 | 
42 | # *DocXPand-25k* dataset
43 | The synthetic ID document images dataset ("DocXPand-25k"), released alongside this tool, is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. To view a copy of this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
44 | 
45 | You can download the dataset from [this release](https://github.com/QuickSign/docxpand/releases/tag/v1.0.0). It's split into 12 parts (DocXPand-25k.tar.gz.xx, from 00 to 11). Once you've downloaded all 12 binary files, you can extract the content using the following command : `cat DocXPand-25k.tar.gz.* | tar xzvf -`.
46 | The labels are stored in a JSON format, which is readable using the [DocFakerDataset class](https://github.com/QuickSign/docxpand/blob/v1.0.0/docxpand/dataset.py#L276C7-L276C22). The document images are stored in the `images/` folder, which contains one sub-folder per-class. The original image fields (identity photos, ghost images, barcodes, datamatrices) integrated in the documents are stored in the `fields/` sub-folder.
47 | 


--------------------------------------------------------------------------------
/docxpand/blank.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <svg
 3 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
 4 |    xmlns:cc="http://creativecommons.org/ns#"
 5 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 6 |    xmlns:svg="http://www.w3.org/2000/svg"
 7 |    xmlns="http://www.w3.org/2000/svg"
 8 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 9 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
10 |    width="210mm"
11 |    height="297mm"
12 |    viewBox="0 0 840 1188"
13 |    version="1.1"
14 |    id="svg426"
15 |    inkscape:version="1.0.1 (3bc2e813f5, 2020-09-07)"
16 |    sodipodi:docname="blank.svg">
17 |   <defs
18 |      id="defs420" />
19 |   <sodipodi:namedview
20 |      id="base"
21 |      pagecolor="#ffffff"
22 |      bordercolor="#666666"
23 |      borderopacity="1.0"
24 |      inkscape:pageopacity="0.0"
25 |      inkscape:pageshadow="2"
26 |      inkscape:zoom="1"
27 |      inkscape:cx="112.54623"
28 |      inkscape:cy="646.0639"
29 |      inkscape:document-units="mm"
30 |      inkscape:current-layer="layerManual"
31 |      showgrid="false"
32 |      inkscape:window-width="1920"
33 |      inkscape:window-height="1057"
34 |      inkscape:window-x="-8"
35 |      inkscape:window-y="-8"
36 |      inkscape:window-maximized="1"
37 |      inkscape:document-rotation="0" />
38 |   <metadata
39 |      id="metadata423">
40 |     <rdf:RDF>
41 |       <cc:Work
42 |          rdf:about="">
43 |         <dc:format>image/svg+xml</dc:format>
44 |         <dc:type
45 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
46 |         <dc:title />
47 |       </cc:Work>
48 |     </rdf:RDF>
49 |   </metadata>
50 |   <g
51 |      inkscape:label="Layer 1"
52 |      inkscape:groupmode="layer"
53 |      id="layer1" />
54 | </svg>
55 | 


--------------------------------------------------------------------------------
/docxpand/canvas.py:
--------------------------------------------------------------------------------
  1 | """Helper functions to manipulate Inkscape SVG content.
  2 | 
  3 | Original version can be found at https://github.com/letuananh/pyinkscape
  4 | 
  5 | @author: Le Tuan Anh <tuananh.ke@gmail.com>
  6 | @license: MIT
  7 | """
  8 | 
  9 | # Copyright (c) 2017, Le Tuan Anh <tuananh.ke@gmail.com>
 10 | #
 11 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 12 | # of this software and associated documentation files (the "Software"), to deal
 13 | # in the Software without restriction, including without limitation the rights
 14 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 15 | # copies of the Software, and to permit persons to whom the Software is
 16 | # furnished to do so, subject to the following conditions:
 17 | #
 18 | # The above copyright notice and this permission notice shall be included in
 19 | # all copies or substantial portions of the Software.
 20 | #
 21 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 22 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 23 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 24 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 25 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 26 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 27 | # THE SOFTWARE.
 28 | 
 29 | ########################################################################
 30 | 
 31 | import logging
 32 | import os
 33 | import typing as tp
 34 | from xml.dom.minidom import Element
 35 | 
 36 | from lxml import etree
 37 | from lxml.etree import XMLParser
 38 | 
 39 | _BLANK_CANVAS = os.path.join(os.path.dirname(os.path.realpath(__file__)), "blank.svg")
 40 | 
 41 | logger = logging.getLogger(__name__)
 42 | logging.basicConfig()
 43 | logger.setLevel(logging.INFO)
 44 | 
 45 | INKSCAPE_NS = "http://www.inkscape.org/namespaces/inkscape"
 46 | SVG_NS = "http://www.w3.org/2000/svg"
 47 | SVG_NAMESPACES = {
 48 |     "ns": SVG_NS,
 49 |     "svg": SVG_NS,
 50 |     "dc": "http://purl.org/dc/elements/1.1/",
 51 |     "cc": "http://creativecommons.org/ns#",
 52 |     "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
 53 |     "sodipodi": "http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd",
 54 |     "inkscape": INKSCAPE_NS,
 55 | }
 56 | XLINK_NS = "http://www.w3.org/1999/xlink"
 57 | 
 58 | 
 59 | class Point:
 60 |     def __init__(self, x: float, y: float):
 61 |         self.x = x
 62 |         self.y = y
 63 | 
 64 | 
 65 | class Dimension:
 66 |     def __init__(self, width, height):
 67 |         self.width = width
 68 |         self.height = height
 69 | 
 70 | 
 71 | class BBox:
 72 |     """A bounding box represents by a top-left anchor (x1, y1) and a dimension (width, height)"""
 73 | 
 74 |     def __init__(self, x, y, width, height):
 75 |         self._anchor = Point(x, y)
 76 |         self._dimension = Dimension(width, height)
 77 | 
 78 |     @property
 79 |     def width(self):
 80 |         """Width of the bounding box"""
 81 |         return self._dimension.width
 82 | 
 83 |     @property
 84 |     def height(self):
 85 |         """Height of the bounding box"""
 86 |         return self._dimension.height
 87 | 
 88 | 
 89 | class Canvas:
 90 |     """This class represents an Inkscape drawing page (i.e. a SVG file)."""
 91 | 
 92 |     def __init__(self, filepath=tp.Optional[str], *args, **kwargs):
 93 |         """Create a new blank canvas or read from an existing file.
 94 | 
 95 |         To create a blank canvas, just ignore the filepath property.
 96 |         >>> c = Canvas()
 97 | 
 98 |         To open an existing file, use
 99 |         >>> c = Canvas("/path/to/file.svg")
100 | 
101 |         Arguments:
102 |             filepath: Path to an existing SVG file.
103 |         """
104 |         self._filepath = filepath
105 |         self._tree = None
106 |         self._root = None
107 |         self._units = "mm"
108 |         self._width = 0
109 |         self._height = 0
110 |         self._viewbox = None
111 |         self._scale = 1.0
112 |         self._elem_group_map = {}
113 |         self._elements_by_ids = {}
114 |         if filepath is not None:
115 |             self._load_file(*args, **kwargs)
116 | 
117 |     def _load_file(self, remove_blank_text=True, encoding="utf-8", **kwargs):
118 |         with open(
119 |             _BLANK_CANVAS if not self._filepath else self._filepath,
120 |             encoding=encoding,
121 |         ) as infile:
122 |             kwargs["remove_blank_text"] = remove_blank_text  # lxml specific
123 |             parser = XMLParser(**kwargs)
124 |             self._tree = etree.parse(infile, parser)
125 |             self._root = self._tree.getroot()
126 |             self._update_svg_info()
127 | 
128 |     def _update_svg_info(self):
129 |         # load SVG information
130 |         if self._svg_node.get("viewBox"):
131 |             self._viewbox = BBox(
132 |                 *(float(x) for x in self._svg_node.get("viewBox").split())
133 |             )
134 |             if not self._width:
135 |                 self._width = self._viewbox.width
136 |             if not self._height:
137 |                 self._width = self._viewbox.height
138 |         if self.viewBox and self._width:
139 |             self._scale = self.viewBox.width / self._width
140 | 
141 |     @property
142 |     def _svg_node(self):
143 |         return self._root
144 | 
145 |     @property
146 |     def viewBox(self):
147 |         return self._viewbox
148 | 
149 |     def to_xml_string(self, encoding="utf-8", pretty_print=True, **kwargs):
150 |         return etree.tostring(
151 |             self._root,
152 |             encoding=encoding,
153 |             pretty_print=pretty_print,
154 |             **kwargs,
155 |         ).decode("utf-8")
156 | 
157 |     def _xpath_query(self, query_string, namespaces=None):
158 |         return self._root.xpath(query_string, namespaces=namespaces)
159 | 
160 |     def element_by_id(self, id: str) -> tp.Optional[Element]:
161 |         """Get one XML element by its ID.
162 | 
163 |         Arguments:
164 |             id: the ID of the element
165 | 
166 |         Raises:
167 |             RuntimeError: when more than two elements share the exact same ID
168 |         """
169 |         elements = self._xpath_query(f".//ns:*[@id='{id}']", namespaces=SVG_NAMESPACES)
170 |         if not elements:
171 |             return None
172 |         if len(elements) > 1:
173 |             raise RuntimeError(f"Found {len(elements)} elements with the same id {id}")
174 |         return elements[0]
175 | 
176 |     def render(self, outpath, overwrite=False, encoding="utf-8"):
177 |         if not overwrite and os.path.isfile(outpath):
178 |             logger.warning(f"File {outpath} exists. SKIPPED")
179 |         else:
180 |             output = self.to_xml_string(pretty_print=False)
181 |             with open(outpath, mode="w", encoding=encoding) as outfile:
182 |                 outfile.write(output)
183 |                 logger.info("Written output to {}".format(outfile.name))
184 | 


--------------------------------------------------------------------------------
/docxpand/conditionals.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import typing as tp
 3 | 
 4 | 
 5 | class Conditional:
 6 |     def __init__(seed: tp.Optional[int] = None):
 7 |         if seed is not None:
 8 |             random.seed(seed)
 9 | 
10 |     @staticmethod
11 |     def uniform(probability: float = 0.5) -> bool:
12 |         return random.random() <= probability
13 | 
14 |     @staticmethod
15 |     def maybe(**kwargs) -> bool:
16 |         raise NotImplementedError("Must be implemented in child class")
17 | 
18 | 
19 | class BirthNameConditional(Conditional):
20 |     @staticmethod
21 |     def maybe(**kwargs) -> bool:
22 |         gender: str = kwargs.get("gender", "nonbinary")
23 |         probability_by_gender = kwargs.get(
24 |             "probability_by_gender",
25 |             {"male": 0.05, "female": 0.2, "nonbinary": 0.2},
26 |         )
27 |         probability = probability_by_gender.get(gender, 0.2)
28 |         return Conditional.uniform(probability)
29 | 


--------------------------------------------------------------------------------
/docxpand/dataset.py:
--------------------------------------------------------------------------------
  1 | """DocFaker dataset."""
  2 | 
  3 | import datetime
  4 | import getpass
  5 | import json
  6 | import numpy as np
  7 | import os
  8 | import typing as tp
  9 | 
 10 | from pydantic import BaseModel, Field
 11 | 
 12 | from docxpand.image import ColorSpace, Image
 13 | 
 14 | 
 15 | class PointModel(BaseModel):
 16 |     """Typing for a point."""
 17 | 
 18 |     x: float
 19 |     """The x-coordinate of the point."""
 20 | 
 21 |     y: float
 22 |     """The y-coordinate of the point."""
 23 | 
 24 |     label: tp.Optional[str]
 25 |     """An optional name attached to the point (e.g. "top_left")."""
 26 | 
 27 | 
 28 | class QuadrangleModel(BaseModel):
 29 |     """Typing for a quadrangle.
 30 | 
 31 |     The points p1, p2, p3, p4 (in this order) must form a simple quadrangle.
 32 |     """
 33 | 
 34 |     p1: PointModel
 35 |     """First point of the quadrangle."""
 36 | 
 37 |     p2: PointModel
 38 |     """Second point of the quadrangle."""
 39 | 
 40 |     p3: PointModel
 41 |     """Third point of the quadrangle."""
 42 | 
 43 |     p4: PointModel
 44 |     """Fourth (and last) point of the quadrangle."""
 45 | 
 46 | 
 47 | class BaseDocumentModel(BaseModel):
 48 |     """Typing for data stored in `BaseDatasetModel.documents`.
 49 | 
 50 |     A document is either an image or a PDF file that is stored in MinIO,
 51 |     or in the old blobstore.
 52 |     """
 53 | 
 54 |     id: str = Field(..., alias="_id")
 55 |     """The identifier of this document in MongoDB (it is usually the md5 of the
 56 |     original document).
 57 |     """
 58 | 
 59 |     copy_name: tp.Optional[str]
 60 |     """The name of the copy to use to find the file from MongoDB. It may
 61 |     reference the original document (in this case, the copy name is in general
 62 |     "images") or the result of the transformation (crop, rectification,
 63 |     page split, ...) of the original document (in this case, the copy name is
 64 |     in general "extracted_images")."""
 65 | 
 66 |     md5: tp.Optional[str]
 67 |     """The MD5 hash of the document file, mainly used to distinguish between
 68 |     several transformations of the same original document (i.e. various copies)
 69 |     """
 70 | 
 71 |     url: tp.Optional[str]
 72 |     """The URL used to know the source of the image."""
 73 | 
 74 |     filename: str
 75 |     """The name of the file representing the document, with its extension
 76 |     (e.g.: "xxx.jpeg", "xxx.pdf"). When the the dataset is downloaded, each
 77 |     document is saved using this filename.
 78 |     """
 79 | 
 80 | 
 81 | class BaseAnnotationModel(BaseModel):
 82 |     """Typing for data stored in any annotation."""
 83 | 
 84 |     id: int = Field(..., alias="_id")
 85 |     """The identifier of the annotation"""
 86 | 
 87 |     annotator: str
 88 |     """The name or e-mail of the person who has labeled the document"""
 89 | 
 90 |     created_at: datetime.datetime
 91 |     """The date when the annotation has been created"""
 92 | 
 93 |     updated_at: datetime.datetime
 94 |     """The date when the annotation has been last updated"""
 95 | 
 96 | 
 97 | class DocFakerAnnotationModel(BaseAnnotationModel):
 98 |     """Typing for data stored in `DocFakerDocumentModel.annotations`."""
 99 | 
100 |     fields: tp.Optional[tp.Dict[str, tp.Any]]
101 |     """The list of labeled fields contained in the annotation."""
102 | 
103 |     position: QuadrangleModel
104 |     """The list of points of interest, in the following format:
105 |         {"p1" : {'x': 0.1, 'y': 0.2}, "p2" : {'x': 0.1, 'y': 0.2}, ...}
106 |     """
107 | 
108 |     scene_image: tp.Optional[str]
109 |     """The image of the scene."""
110 | 
111 |     template: str
112 |     """The template used by the image."""
113 | 
114 | 
115 | class DocFakerDocumentModel(BaseDocumentModel):
116 |     """Typing for data stored in `DocFakerDatasetModel.documents`."""
117 | 
118 |     annotations: tp.Sequence[DocFakerAnnotationModel]
119 |     """The list of Doc faker annotations on the given document."""
120 | 
121 | 
122 | class BaseDatasetModel(BaseModel):
123 |     """Typing for data stored in `BaseDataset.dataset`."""
124 | 
125 |     info: tp.Dict[str, tp.Any]
126 |     """Information about the current dataset."""
127 | 
128 |     documents: tp.Sequence[BaseDocumentModel]
129 |     """The list of documents contained in the dataset."""
130 | 
131 |     class Config:
132 |         """Configuration to allow mutation especially during inheritance."""
133 | 
134 |         allow_mutation = True
135 |         """Allows to easily change the type of documents when inheriting from
136 |          BaseDatasetModel."""
137 | 
138 | 
139 | class DocFakerDatasetModel(BaseDatasetModel):
140 |     """Typing for data stored in `DocFakerDataset.dataset`."""
141 | 
142 |     documents: tp.Sequence[DocFakerDocumentModel]
143 |     """The labeled documents contained in the dataset."""
144 | 
145 | 
146 | class BaseDataset:
147 |     """Base class for loading and handling a dataset.
148 | 
149 |     Attributes:
150 |         dataset: the content of the dataset, represented as a dictionary and
151 |             typed using pydantic and the data classes defined in this library
152 |         images_dir: the destination directory to download document images
153 |         _info: a copy of info contained in `dataset`, or some very basic info
154 |             dictionary with placeholder name and description, and generated
155 |             author and creation date.
156 |         documents: a look-up table between document identifiers and documents
157 |     """
158 | 
159 |     def __init__(
160 |         self,
161 |         dataset_input: tp.Union[str, dict],
162 |         images_dir: str,
163 |         validate: bool = True,
164 |     ):
165 |         """Init a dataset module from a path to a json file.
166 | 
167 |         Load said dataset and if validate = True, run pydantic type and
168 |         format validation on the dataset.
169 | 
170 |         Args:
171 |             dataset_input: can be either a path to local JSON dataset file, or
172 |                 a URL to a JSON dataset file stored on MinIO, or a dataset
173 |                 dictionary
174 |             validate: Validate a dataset using pydantic. Defaults to True.
175 |             images_dir: path to folder where to store images. Defaults to None.
176 |         """
177 |         self.dataset = (
178 |             json.load(open(dataset_input, "r", encoding="utf-8"))
179 |             if isinstance(dataset_input, str)
180 |             else dataset_input
181 |         )
182 | 
183 |         # validate dataset is in the correct format
184 |         if validate:
185 |             self._validate()
186 | 
187 |         # set images_dir, this where images will be downloaded
188 |         self.images_dir = images_dir
189 | 
190 |         # set basic properties of a dataset
191 |         self._info = self.dataset.get(
192 |             "info",
193 |             {
194 |                 "name": "basic_dataset",
195 |                 "createdAt": datetime.datetime.utcnow().isoformat(),
196 |                 "description": "basic_dataset_description",
197 |                 "author": getpass.getuser(),
198 |             },
199 |         )
200 |         self.documents: tp.Dict[str, BaseDocumentModel] = dict()
201 |         self._create_index()
202 |         self._seed = None
203 | 
204 |     @property
205 |     def _model(self) -> tp.Type[BaseDatasetModel]:
206 |         """Return the dataset model associated to this dataset class."""
207 |         return BaseDatasetModel
208 | 
209 |     def _validate(self) -> None:
210 |         """Validate dataset dictionary using pydantic validators."""
211 |         self._model.parse_obj(self.dataset)
212 | 
213 |     def _create_index(self) -> None:
214 |         """Index the dataset documents to make them easily accessible."""
215 |         self.documents = {
216 |             document["_id"]: document for document in self.dataset["documents"]
217 |         }
218 | 
219 |     def info(self) -> tp.Dict[str, tp.Any]:
220 |         """Print information about the dataset."""
221 |         cls = self.__class__
222 |         info = {
223 |             **self._info,
224 |             "size": len(self.documents),
225 |             "__class__": f"{cls.__module__}.{cls.__qualname__}",
226 |         }
227 |         return info
228 | 
229 |     def save(self, export_path: str, overwrite: bool = False) -> None:
230 |         """Save the dataset to a json file.
231 | 
232 |         Args:
233 |             export_path: path to json file to export to.
234 |             overwrite: True if you want to overwrite file if it already exists.
235 |         """
236 |         export_path = os.path.abspath(export_path)
237 |         if os.path.isfile(export_path) and not overwrite:
238 |             raise OSError(
239 |                 f"The file {export_path} already exists",
240 |                 "if you want to overwrite, set overwrite parameter to True.",
241 |             )
242 |         os.makedirs(os.path.dirname(export_path), exist_ok=True)
243 |         cls = self.__class__
244 |         dataset_copy = {
245 |             "__class__": f"{cls.__module__}.{cls.__qualname__}",
246 |         }
247 |         dataset_copy.update(self.dataset)
248 |         with open(export_path, "w") as file:
249 |             json.dump(dataset_copy, file, indent=2, sort_keys=True, default=str)
250 | 
251 |     def load_image(
252 |         self,
253 |         document_id,
254 |         space: ColorSpace = ColorSpace.BGR,
255 |         ignore_orientation: bool = True,
256 |     ) -> np.ndarray:
257 |         """Load and (optionally) resize document provided it's document_id.
258 | 
259 |         Args:
260 |             document_id: _id of the document in dataset.
261 |             images_dir: optional images_dir if it's defined or different
262 |                 from what is defined in dataset level
263 |             space: the target color space of the image.
264 |                 Defaults to ColorSpace.BGR.
265 |             ignore_orientation: if True, ignores the orientation flag
266 |                     in EXIF metadata; else it is used to rotate the image
267 |                     accordingly. Defaults to True.
268 | 
269 |         Returns:
270 |             np.ndarray image
271 |         """
272 |         filepath = os.path.join(self.images_dir, self.documents[document_id].filename)
273 |         return Image.read(filepath, space=space, ignore_orientation=ignore_orientation)
274 | 
275 | 
276 | class DocFakerDataset(BaseDataset):
277 |     """Specialization of BaseDataset class for Doc Faker."""
278 | 
279 |     def __init__(
280 |         self,
281 |         dataset_input: tp.Union[str, dict],
282 |         validate: bool = True,
283 |         images_dir: tp.Optional[str] = None,
284 |     ):
285 |         """Init a dataset module from a path to a json file or python dict.
286 | 
287 |         Load said dataset and if validate = True, run pydantic type and
288 |         format validation on the dataset.
289 | 
290 |         Args:
291 |             dataset_input: Can be either a path to json dataset,
292 |                 or a dataset_dict
293 |             validate: Validate a dataset using pydantic.
294 |                 Defaults to True.
295 |             images_dir: path to folder where to store images.
296 |                 Defaults to None.
297 |             dataset_cache_dir: optional cache dir for downloaded dataset
298 |                 from minIO
299 |         """
300 |         super().__init__(
301 |             dataset_input,
302 |             validate=validate,
303 |             images_dir=images_dir,
304 |         )
305 | 
306 |     @property
307 |     def _model(self) -> tp.Type[DocFakerDatasetModel]:
308 |         """Return the dataset model associated to this dataset class."""
309 |         return DocFakerDatasetModel
310 | 


--------------------------------------------------------------------------------
/docxpand/instantiable.py:
--------------------------------------------------------------------------------
  1 | """Instantiable class definition."""
  2 | import copy
  3 | import importlib
  4 | import re
  5 | import typing as tp
  6 | 
  7 | 
  8 | def _is_string_formattable(value: str) -> bool:
  9 |     """Test if a string is formattable (i.e. contains {.*} pattern).
 10 | 
 11 |     Args:
 12 |         value: the string to test
 13 | 
 14 |     Returns:
 15 |         True if it is formattable, else False
 16 |     """
 17 |     return re.search("{.*}", value) is not None
 18 | 
 19 | 
 20 | def import_object(full_name: str) -> tp.Any:
 21 |     """Dynamically import an object using its fully qualified name.
 22 | 
 23 |     Args:
 24 |         full_name: fully qualified name of the object to import
 25 | 
 26 |     Returns:
 27 |         the imported object
 28 |     """
 29 |     module_name, object_name = full_name.rsplit(".", 1)
 30 |     module = importlib.import_module(module_name)
 31 |     return getattr(module, object_name)
 32 | 
 33 | 
 34 | class Instantiable:
 35 |     """Factory that instantiates object from dictionary definitions."""
 36 | 
 37 |     @staticmethod
 38 |     def is_instantiable(object: tp.Any) -> bool:
 39 |         """Indicate whether any object is candidate to `instantiate` method.
 40 | 
 41 |         The object must follow these rules to be instantiable:
 42 |             - be a python dictionary
 43 |             - have a "__class__" key
 44 | 
 45 |         The "init_args" and "init_context" keys are optional.
 46 | 
 47 |         Args:
 48 |             object: the object to test
 49 | 
 50 |         Returns:
 51 |             True if it is compatible with `instantiate` method, else False.
 52 |         """
 53 |         if not isinstance(object, dict):
 54 |             return False
 55 |         return "__class__" in object
 56 | 
 57 |     @staticmethod
 58 |     def instantiate(instantiable: tp.Dict, **kwargs) -> tp.Any:
 59 |         """Instantiate an object from a dictionary definition.
 60 | 
 61 |         The object is instantiated using these pieces of information:
 62 |             - the value under the "__class__" is imported as the class
 63 |               to instantiate
 64 |             - the keywords arguments under the "init_args" are extracted
 65 |             - these arguments are also instantiated if applicable
 66 |             - these arguments are formatted using the context if applicable
 67 |             - the values missing at configuration writing time are completed
 68 |               using the context if they are available at runtime
 69 | 
 70 |         Args:
 71 |             instantiable: an instantiable dictionary
 72 |             **kwargs: the context
 73 | 
 74 |         Returns:
 75 |             the fully instantiated object
 76 |         """
 77 |         instantiable = copy.deepcopy(instantiable)
 78 | 
 79 |         # Initialize arguments
 80 |         arguments = instantiable.get("init_args", {})
 81 |         assert isinstance(arguments, dict)
 82 | 
 83 |         # Format configuration arguments using context and instantiate
 84 |         # sub-objects when applicable
 85 |         for key, value in arguments.items():
 86 |             if isinstance(value, str) and _is_string_formattable(value):
 87 |                 arguments[key] = value.format(**kwargs)
 88 |             elif isinstance(value, list):
 89 |                 arguments[key] = [
 90 |                     Instantiable.try_instantiate(element, **kwargs) for element in value
 91 |                 ]
 92 |             else:
 93 |                 arguments[key] = Instantiable.try_instantiate(value, **kwargs)
 94 | 
 95 |         # Add other arguments from context
 96 |         for key, value in instantiable.get("init_context", {}).items():
 97 |             if value in kwargs:
 98 |                 arguments[key] = kwargs.get(value)
 99 | 
100 |         # Import module and class
101 |         the_class_str = instantiable.get("__class__")
102 |         assert isinstance(the_class_str, str)
103 |         if _is_string_formattable(the_class_str):
104 |             the_class_str = the_class_str.format(**kwargs)
105 |         the_class = import_object(the_class_str)
106 | 
107 |         # Make instance
108 |         instance = the_class(**arguments)
109 | 
110 |         return instance
111 | 
112 |     @staticmethod
113 |     def try_instantiate(an_object: tp.Any, **kwargs) -> tp.Any:
114 |         """Instantiate an object if possible.
115 | 
116 |         Args:
117 |             an_object: an object that may be an instantiable dictionary
118 |             **kwargs: the context
119 | 
120 |         Returns:
121 |             the fully instantiated object when applicable, or the input object
122 |         """
123 |         if Instantiable.is_instantiable(an_object):
124 |             return Instantiable.instantiate(an_object, **kwargs)
125 | 
126 |         return an_object
127 | 
128 | 
129 | class CallableInstantiable(Instantiable):
130 |     """Factory that calls methods from instantiable objects."""
131 | 
132 |     @staticmethod
133 |     def is_callable(object: tp.Any) -> bool:
134 |         """Indicate whether any object is candidate to `call` method.
135 | 
136 |         The object must follow these rules to be instantiable:
137 |             - be a python dictionary
138 |             - be instantiable
139 |             - have a "__method__" key
140 | 
141 |         The "call_args" and "call_context" keys are optional.
142 | 
143 |         Args:
144 |             object: the object to test
145 | 
146 |         Returns:
147 |             True if it is compatible with `call` method, else False.
148 | 
149 |         """
150 |         if not isinstance(object, dict):
151 |             return False
152 |         if not Instantiable.is_instantiable(object):
153 |             return False
154 |         return "__method__" in object
155 | 
156 |     @staticmethod
157 |     def get_methods(
158 |         callable: tp.Dict, **kwargs
159 |     ) -> tp.Union[tp.Callable, tp.Dict[str, tp.Callable]]:
160 |         """Get method(s) of an instantiable object from a dictionary.
161 | 
162 |         The object is first instantiated using Instantiable factory. Then,
163 |         the method(s) are retrieved by their names, stored in "__method__" key.
164 | 
165 |         Args:
166 |             callable: a callable object as dictionary
167 |             **kwargs: the context
168 | 
169 |         Returns:
170 |             the method or a dictionary of methods
171 |         """
172 |         # Instantiate object and get method
173 |         instance = Instantiable.instantiate(callable, **kwargs)
174 |         method_names = callable["__method__"]
175 | 
176 |         # Method could be a dictionary of method names
177 |         if isinstance(method_names, dict):
178 |             methods = {}
179 |             for method_key, method_name in method_names.items():
180 |                 if _is_string_formattable(method_name):
181 |                     method_name = method_name.format(**kwargs)
182 |                 method = getattr(instance, method_name)
183 |                 methods[method_key] = method
184 |         # Or a single method name
185 |         elif isinstance(method_names, str):
186 |             if _is_string_formattable(method_names):
187 |                 method_names = method_names.format(**kwargs)
188 |             methods = getattr(instance, method_names)
189 |         else:
190 |             raise ValueError(f"Unsupported __method__ argument: {method_names}")
191 | 
192 |         return methods
193 | 
194 |     @staticmethod
195 |     def call(callable: tp.Dict, **kwargs) -> tp.Any:
196 |         """Call method(s) of an instantiable object from a dictionary.
197 | 
198 |         The object is first instantiated using Instantiable factory. Then :
199 |             - the method to call is retrieved by its name, stored in
200 |               "__method__" key
201 |             - the keywords arguments under the "call_args" are extracted
202 |             - these arguments are also instantiated if applicable
203 |             - these arguments are formatted using the context if applicable
204 |             - the values missing at configuration writing time are completed
205 |               using the context if they are available at runtime
206 | 
207 |         Args:
208 |             callable: a callable object as dictionary
209 |             **kwargs: the context
210 | 
211 |         Returns:
212 |             the result of the method called on the instantiated object
213 |         """
214 |         callable = copy.deepcopy(callable)
215 | 
216 |         # Get methods
217 |         methods = CallableInstantiable.get_methods(callable, **kwargs)
218 |         # Initialize arguments
219 |         arguments = callable.get("call_args", {})
220 |         assert isinstance(arguments, dict)
221 |         # Format configuration arguments using context and instantiate
222 |         # sub-objects when applicable
223 |         for key, value in arguments.items():
224 |             if isinstance(value, str) and _is_string_formattable(value):
225 |                 arguments[key] = value.format(**kwargs)
226 |             elif isinstance(value, list):
227 |                 arguments[key] = [
228 |                     Instantiable.try_instantiate(element, **kwargs) for element in value
229 |                 ]
230 |             else:
231 |                 arguments[key] = Instantiable.try_instantiate(value, **kwargs)
232 | 
233 |         # Add other arguments from context
234 |         for key, value in callable.get("call_context", {}).items():
235 |             if value in kwargs:
236 |                 arguments[key] = kwargs.get(value)
237 |         if isinstance(methods, dict):
238 |             results = {}
239 |             for method_key, method in methods.items():
240 |                 results[method_key] = method(**arguments)
241 |         else:
242 |             results = methods(**arguments)
243 | 
244 |         return results
245 | 
246 |     @staticmethod
247 |     def try_call(an_object: tp.Any, **kwargs) -> tp.Any:
248 |         """Call a method of an object if possible.
249 | 
250 |         Args:
251 |             an_object: an object that may be an instantiable and callable
252 |                 dictionary
253 |             **kwargs: the context
254 | 
255 |         Returns:
256 |             the result of the method called on the instantiated object when
257 |             applicable, or the input object
258 |         """
259 |         if CallableInstantiable.is_callable(an_object):
260 |             return CallableInstantiable.call(an_object, **kwargs)
261 | 
262 |         return an_object
263 | 


--------------------------------------------------------------------------------
/docxpand/metrics.py:
--------------------------------------------------------------------------------
 1 | from rapidfuzz.distance import Levenshtein
 2 | from shapely.geometry import Polygon
 3 | 
 4 | from docxpand.geometry import Quadrangle
 5 | 
 6 | 
 7 | def iou(quad_detected: Quadrangle, quad_ground_truth: Quadrangle):
 8 |     """Calculate iou between two quadrangles."""
 9 |     polygon_detected = Polygon(quad_detected)
10 |     polygon_ground_truth = Polygon(quad_ground_truth)
11 |     polygon_intersection = polygon_detected.intersection(
12 |         polygon_ground_truth
13 |     ).area
14 |     polygon_union = polygon_detected.union(polygon_ground_truth).area
15 |     if polygon_union:
16 |         return polygon_intersection / polygon_union
17 |     else:
18 |         return 0
19 | 
20 | 
21 | def character_error_rate(prediction: str, ground_truth: str):
22 |     """Calculate character error rate between two strings."""
23 |     return Levenshtein.distance(prediction, ground_truth) / len(ground_truth)
24 | 


--------------------------------------------------------------------------------
/docxpand/normalizer.py:
--------------------------------------------------------------------------------
 1 | """Module that defines string normalization."""
 2 | 
 3 | import re
 4 | import string
 5 | import typing as tp
 6 | import unicodedata as ud
 7 | 
 8 | LIGATURES = {"Æ": "AE", "Œ": "OE", "æ": "oe", "œ": "oe"}
 9 | 
10 | 
11 | def rm_accents(value: str) -> str:
12 |     """Remove accents.
13 | 
14 |     Args:
15 |         value: text to normalize
16 | 
17 |     Returns:
18 |         text without accents
19 |     """
20 |     return ud.normalize("NFKD", value).encode("ascii", "ignore").decode("ascii")
21 | 
22 | 
23 | def rm_punct(value: str) -> str:
24 |     """Remove punctuation and special chars.
25 | 
26 |     Args:
27 |         value: text to normalize
28 | 
29 |     Returns:
30 |         text without punctuation
31 |     """
32 |     return re.sub(rf"[{string.punctuation}]", " ", value)
33 | 
34 | 
35 | def collapse_whitespace(value: str) -> str:
36 |     """Collapses whitespaces.
37 | 
38 |     Args:
39 |         value: text to normalize
40 | 
41 |     Returns:
42 |         text without multi spaces
43 |     """
44 |     return re.sub(rf"[{string.whitespace}]+", " ", value).strip()
45 | 
46 | 
47 | def replace_ligatures(value: str) -> str:
48 |     """Replace the ligatures of a text by their 2-characters counterparts.
49 | 
50 |     Args:
51 |         value: text to normalize
52 | 
53 |     Returns:
54 |         normalized text
55 |     """
56 |     for ligature, replacement in LIGATURES.items():
57 |         if ligature in value:
58 |             value = value.replace(ligature, replacement)
59 |     return value
60 | 
61 | 
62 | def normalize(value: str, operations: tp.Optional[tp.List[tp.Callable]] = None) -> str:
63 |     """Apply a list of operations to normalize a text.
64 | 
65 |     Args:
66 |         value: text to normalize
67 |         operations: list of operations
68 | 
69 |     Returns:
70 |         normalized text
71 |     """
72 |     if operations:
73 |         for operation in operations:
74 |             value = operation(value)
75 |     return value
76 | 
77 | 
78 | def cut_and_pad_right(string: str, length: int, padding_string: str = "") -> str:
79 |     cut = string[:length]
80 |     padded = cut + padding_string * (length - len(cut))
81 |     return padded.upper()
82 | 
83 | 
84 | def normalize_name(name: tp.Union[str, tp.List[str]], padding_string: str = "") -> tp.Union[str, tp.List[str]]:
85 |     if isinstance(name, list):
86 |         return [normalize_name(val, padding_string) for val in name]
87 |     return (
88 |         normalize(
89 |             name,
90 |             [rm_accents, rm_punct, replace_ligatures, collapse_whitespace],
91 |         )
92 |         .strip()
93 |         .upper()
94 |         .replace(" ", padding_string)
95 |     )
96 | 


--------------------------------------------------------------------------------
/docxpand/providers/__init__.py:
--------------------------------------------------------------------------------
  1 | import typing as tp
  2 | from collections import OrderedDict
  3 | import gettext
  4 | import numpy as np
  5 | import random
  6 | 
  7 | from faker import Faker
  8 | import pycountry
  9 | 
 10 | from docxpand.instantiable import Instantiable
 11 | from docxpand.utils import get_field_from_any_side
 12 | 
 13 | GENERIC_FAKER = Faker()
 14 | 
 15 | 
 16 | class ChoiceProvider:
 17 |     def __init__(self, choices: tp.Union[tp.Dict[str, float], tp.List[str]]) -> None:
 18 |         self.choices = choices
 19 | 
 20 |     def choice(self) -> str:
 21 |         return ChoiceProvider.random_choice(self.choices)
 22 | 
 23 |     @staticmethod
 24 |     def random_choice(choices: tp.Any) -> tp.Any:
 25 |         if isinstance(choices, (list, tuple, set, OrderedDict)):
 26 |             return GENERIC_FAKER.random_elements(
 27 |                 choices, length=1, use_weighting=True
 28 |             )[0]
 29 |         if isinstance(choices, dict):
 30 |             return GENERIC_FAKER.random_elements(
 31 |                 OrderedDict(choices), length=1, use_weighting=True
 32 |             )[0]
 33 |         return choices
 34 | 
 35 | 
 36 | class CopyProvider:
 37 |     @staticmethod
 38 |     def copy(
 39 |         field_name: str,
 40 |         default: tp.Union[str, tp.List[str]],
 41 |         existing_fields: tp.Optional[tp.Dict] = None,
 42 |     ) -> tp.Any:
 43 |         if not existing_fields:
 44 |             return default
 45 |         return get_field_from_any_side(existing_fields, field_name, default)
 46 | 
 47 | 
 48 | class FormatProvider:
 49 |     @staticmethod
 50 |     def format(
 51 |         formatter: str,
 52 |         existing_fields: tp.Optional[tp.Dict] = None,
 53 |     ) -> tp.Any:
 54 |         formatter = formatter.replace("|", "{").replace("&", "}")
 55 |         if not existing_fields:
 56 |             raise ValueError
 57 |         fields = {}
 58 |         for v in existing_fields.values():
 59 |             fields.update(v)
 60 |         return formatter.format(**fields)
 61 | 
 62 | 
 63 | class InitialsProvider:
 64 |     @staticmethod
 65 |     def initials(existing_fields: tp.Optional[tp.Dict] = None) -> str:
 66 |         if not existing_fields:
 67 |             raise ValueError
 68 | 
 69 |         family_name = get_field_from_any_side(existing_fields, "family_name", None)
 70 |         given_name = get_field_from_any_side(existing_fields, "given_name", None)
 71 |         if family_name and isinstance(family_name, list):
 72 |             family_name = family_name[0]
 73 |         if not family_name:
 74 |             family_name = "?"
 75 |         if given_name and isinstance(given_name, list):
 76 |             given_name = given_name[0]
 77 |         if not given_name:
 78 |             given_name = "?"
 79 |         return f"{family_name[0]}{given_name[0]}"
 80 | 
 81 | 
 82 | class GenderProvider:
 83 |     @staticmethod
 84 |     def get_gender_letter(gender: tp.Optional[str] = None) -> str:
 85 |         if not gender:
 86 |             raise ValueError("Gender is needed.")
 87 |         first_letter = gender[0].upper()
 88 |         return first_letter if first_letter in ["M", "F"] else "U"
 89 | 
 90 | 
 91 | class HeightProvider:
 92 |     # Source: https://ourworldindata.org/human-height
 93 |     stats = {
 94 |         "male": (178.4, 7.6),  # mean, std
 95 |         "female": (164.7, 7.1)  # mean, std
 96 |     }
 97 | 
 98 |     @staticmethod
 99 |     def height_in_centimeters(gender: str) -> str:
100 |         if gender == "nonbinary":
101 |             gender = random.choice(list(HeightProvider.stats.keys()))
102 |         
103 |         mean, std = HeightProvider.stats[gender]
104 |         return "%d cm" % round(np.random.normal(mean, std))
105 | 
106 |     @staticmethod
107 |     def height_in_meters(gender: str) -> str:
108 |         if gender == "nonbinary":
109 |             gender = random.choice(list(HeightProvider.stats.keys()))
110 |         
111 |         mean, std = HeightProvider.stats[gender]
112 |         return "%.2f m" % (np.random.normal(mean, std) / 100)
113 | 
114 | 
115 | class NationalityProvider:
116 |     @staticmethod
117 |     def nationality_from_locale(locale: str) -> str:
118 |         country = pycountry.countries.get(alpha_2=locale.split("_")[1])
119 |         return country.alpha_3
120 | 
121 | 
122 | class ResidencePermitBirthPlaceProvider:
123 |     @staticmethod
124 |     def birth_city(
125 |         locale: str, name_locale: str
126 |     ) -> str:
127 |         address_provider = Instantiable.instantiate(
128 |             {
129 |                 "__class__": f"faker.providers.address.{name_locale}.Provider",
130 |                 "init_args": {
131 |                     "generator": Faker(name_locale)
132 |                 }
133 |             }
134 |         )
135 |         return address_provider.city()
136 |     
137 |     @staticmethod
138 |     def birth_country(
139 |         locale: str, name_locale: str
140 |     ) -> str:
141 |         country_code = name_locale.split("_")[1]
142 |         country = pycountry.countries.get(alpha_2=country_code)
143 | 
144 |         translator = gettext.translation(
145 |             "iso3166", pycountry.LOCALES_DIR, languages=[locale]
146 |         )
147 |         translator.install()
148 |         return translator.gettext(country.name)


--------------------------------------------------------------------------------
/docxpand/providers/address/es_ES/__init__.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | import typing as tp
 3 | 
 4 | from faker.providers.address.es_ES import Provider as AddressProvider
 5 | 
 6 | 
 7 | class Provider(AddressProvider):
 8 |     __use_weighting__ = True
 9 | 
10 |     city_formats = OrderedDict(
11 |         (
12 |             ("{{first_name}}", 0.1),
13 |             ("{{last_name}}", 0.65),
14 |             ("{{city_prefix}} {{first_name}}", 0.05),
15 |             ("{{city_prefix}} {{last_name}}", 0.05),
16 |             ("{{last_name}} de {{last_name}}", 0.1),
17 |             ("{{city_prefix}} {{last_name}} de {{last_name}}", 0.05),
18 |         )
19 |     )
20 | 
21 |     city_prefixes = [
22 |         "San",
23 |         "La",
24 |         "El"
25 |     ]
26 | 
27 |     states = (
28 |         "Albadina",
29 |         "Astirión",
30 |         "Bamorques",
31 |         "Crón",
32 |         "Grandilla",
33 |         "La Cicantina",
34 |         "Juliosá",
35 |         "La Partibola",
36 |         "Meridiola",
37 |         "Róguela",
38 |         "Sandrid",
39 |         "Triosta",
40 |         "Vamósco"
41 |     )
42 | 
43 |     place_of_birth_format = OrderedDict(
44 |         (
45 |             ("{{city}}\n{{administrative_unit}}", 0.9),
46 |             ("{{city}}\n({{country}})", 0.05),
47 |             ("{{country}}", 0.05),
48 |         )
49 |     )
50 | 
51 |     street_address_formats = (
52 |         "{{street_name}} {{building_number}}",
53 |         "{{street_name}} {{building_number}}\n{{secondary_address}}",
54 |     )
55 | 
56 |     address_formats = (
57 |         "{{street_address}}\n{{city}}\n{{administrative_unit}}", 
58 |     )
59 | 
60 |     def city_prefix(self) -> str:
61 |         return self.random_element(self.city_prefixes)
62 | 
63 |     def city_name(self) -> str:
64 |         if self not in self.generator.providers:
65 |             self.generator.add_provider(self)
66 |         pattern: str = self.random_element(self.city_formats)
67 |         return self.generator.parse(pattern)
68 | 
69 |     city = city_name
70 | 
71 |     def place_of_birth(self) -> str:
72 |         if self not in self.generator.providers:
73 |             self.generator.add_provider(self)
74 |         pattern: str = self.random_element(self.place_of_birth_format)
75 |         return self.generator.parse(pattern)
76 | 
77 |     def street_address(self) -> str:
78 |         if self not in self.generator.providers:
79 |             self.generator.add_provider(self)
80 |         pattern: str = self.random_element(self.street_address_formats)
81 |         return self.generator.parse(pattern)
82 |     
83 |     def address(self) -> str:
84 |         if self not in self.generator.providers:
85 |             self.generator.add_provider(self)
86 |         pattern: str = self.random_element(self.address_formats)
87 |         return self.generator.parse(pattern)
88 | 


--------------------------------------------------------------------------------
/docxpand/providers/address/fr_FR/__init__.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import typing as tp
  3 | from collections import OrderedDict
  4 | 
  5 | from faker.providers.address.fr_FR import Provider as AddressProvider
  6 | 
  7 | 
  8 | class Provider(AddressProvider):
  9 |     __use_weighting__ = True
 10 | 
 11 |     city_suffixes = (
 12 |         "ville",
 13 |         "bourg",
 14 |         "-les-Bains",
 15 |         "-sur-Mer",
 16 |         "-la-Forêt",
 17 |         "boeuf",
 18 |         "nec",
 19 |         "dan",
 20 |     )
 21 | 
 22 |     city_prefixes = ("Le", "La", "Saint", "Sainte")
 23 | 
 24 |     street_prefixes = OrderedDict(
 25 |         (
 26 |             ("aire", 0.1),
 27 |             ("allée", 0.1),
 28 |             ("boulevard", 0.2),
 29 |             ("bourg", 0.1),
 30 |             ("carrefour", 0.1),
 31 |             ("chaussée", 0.1),
 32 |             ("chemin", 0.2),
 33 |             ("cité", 0.1),
 34 |             ("clos", 0.1),
 35 |             ("côte", 0.1),
 36 |             ("cour", 0.1),
 37 |             ("espace", 0.1),
 38 |             ("esplanade", 0.1),
 39 |             ("faubourg", 0.2),
 40 |             ("halle", 0.1),
 41 |             ("hameau", 0.1),
 42 |             ("impasse", 0.2),
 43 |             ("lieu-dit", 0.2),
 44 |             ("lotissement", 0.2),
 45 |             ("place", 0.2),
 46 |             ("promenade", 0.1),
 47 |             ("quai", 0.1),
 48 |             ("route", 0.2),
 49 |             ("ruelle", 0.2),
 50 |             ("rue", 1.0),
 51 |             ("sentier", 0.1),
 52 |             ("square", 0.1),
 53 |             ("voie", 0.2),
 54 |             ("zone", 0.1),
 55 |         )
 56 |     )
 57 | 
 58 |     building_number_extensions = OrderedDict(
 59 |         (
 60 |             ("bis", 0.7),
 61 |             ("ter", 0.2),
 62 |             ("quater", 0.05),
 63 |             ("quinquies", 0.05)
 64 |         )
 65 |     )
 66 | 
 67 |     building_number_formats = OrderedDict(
 68 |         (
 69 |             ("#", 0.2),
 70 |             ("##", 0.4),
 71 |             ("###", 0.1),
 72 |             ("####", 0.05),
 73 |             ("# {{building_number_extension}}", 0.2),
 74 |             ("## {{building_number_extension}}", 0.05),
 75 |         )
 76 |     )
 77 | 
 78 |     street_address_formats = OrderedDict(
 79 |         (
 80 |             ("{{building_number}} {{street_name}} ", 0.6),
 81 |             ("{{building_number}}, {{street_name}} ", 0.35),
 82 |             ("{{street_name}}", 0.05),
 83 |         )
 84 |     )
 85 | 
 86 |     address_formats = OrderedDict(
 87 |         (
 88 |             ("{{street_address}}\n{{postcode}} {{city}}", 0.8),
 89 |             (
 90 |                 "{{street_address}}\n{{building_name}}\n{{postcode}} {{city}}",
 91 |                 0.2,
 92 |             ),
 93 |         )
 94 |     )
 95 | 
 96 |     building_name_formats = OrderedDict(
 97 |         (
 98 |             ("Appartement {{building_number}}.", 0.2),
 99 |             ("Appt. {{building_number}}.", 0.1),
100 |             ("Bâtiment {{building_number}}", 0.2),
101 |             ("Bâtiment {{city}}", 0.1),
102 |             ("Immeuble {{building_number}}", 0.2),
103 |             ("Immeuble {{city}}", 0.1),
104 |             ("Bâtiment {{city}}, Appt. {{building_number}}", 0.1),
105 |             ("Immeuble {{city}}, Appt. {{building_number}}", 0.1),
106 |             ("Villa {{city}}", 0.1),
107 |             ("Résidence {{city}}", 0.2),
108 |         )
109 |     )
110 | 
111 | 
112 |     place_of_birth_format = OrderedDict(
113 |         (
114 |             ("{{city}} ({{department_number}})", 0.95),
115 |             ("{{city}} ({{country}})", 0.05),
116 |         )
117 |     )
118 | 
119 |     def department_number(self) -> str:
120 |         choices = list(range(1, 96)) + list(range(971, 990))
121 |         return str(random.choice(choices)).zfill(2)
122 | 
123 |     def place_of_birth(self) -> str:
124 |         if self not in self.generator.providers:
125 |             self.generator.add_provider(self)
126 |         pattern: str = self.random_element(self.place_of_birth_format)
127 |         return self.generator.parse(pattern)
128 | 
129 |     def building_number_extension(self) -> str:
130 |         return self.random_element(self.building_number_extensions)
131 |     
132 |     def building_number(self) -> str:
133 |         if self not in self.generator.providers:
134 |             self.generator.add_provider(self)
135 |         pattern: str = self.random_element(self.building_number_formats)
136 |         return self.numerify(self.generator.parse(pattern))
137 |     
138 |     def building_name(self) -> str:
139 |         if self not in self.generator.providers:
140 |             self.generator.add_provider(self)
141 |         pattern: str = self.random_element(self.building_name_formats)
142 |         return self.generator.parse(pattern)
143 |     
144 |     def street_address(self) -> str:
145 |         if self not in self.generator.providers:
146 |             self.generator.add_provider(self)
147 |         pattern: str = self.random_element(self.street_address_formats)
148 |         return self.generator.parse(pattern)
149 | 
150 |     def address(self) -> str:
151 |         if self not in self.generator.providers:
152 |             self.generator.add_provider(self)
153 |         pattern: str = self.random_element(self.address_formats)
154 |         return self.generator.parse(pattern)
155 | 


--------------------------------------------------------------------------------
/docxpand/providers/address/nl_NL/__init__.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import typing as tp
  3 | 
  4 | from faker.providers.address.nl_NL import Provider as AddressProvider
  5 | 
  6 | 
  7 | class Provider(AddressProvider):
  8 |     __use_weighting__ = True
  9 | 
 10 |     city_formats = OrderedDict(
 11 |         (
 12 |             ("{{first_name}}", 0.1),
 13 |             ("{{last_name}}", 0.6),
 14 |             ("{{city_prefix}} {{first_name}}", 0.05),
 15 |             ("{{city_prefix}} {{last_name}}", 0.1),
 16 |             ("{{city_prefix}}{{first_name}}", 0.05),
 17 |             ("{{city_prefix}}{{last_name}}", 0.1),
 18 |             ("{{city_prefix}}-{{first_name}}", 0.05),
 19 |             ("{{city_prefix}}-{{last_name}}", 0.1),
 20 | 
 21 |         )
 22 |     )
 23 | 
 24 |     city_prefixes = [
 25 |         "'s",
 26 |         "'t",
 27 |         "De",
 28 |         "Den",
 29 |         "Nieuw",
 30 |         "Oost",
 31 |         "Noord",
 32 |         "Oud",
 33 |         "Sint",
 34 |         "Ten",
 35 |         "Ter",
 36 |         "West",
 37 |         "Zuid",
 38 |     ]
 39 | 
 40 |     building_number_extensions = OrderedDict(
 41 |         (
 42 |             ("A", 0.5),
 43 |             ("B", 0.2),
 44 |             ("C", 0.1),
 45 |             ("½", 0.1),
 46 |             ("¼", 0.05),
 47 |             ("¾", 0.05),
 48 |         )
 49 |     )
 50 | 
 51 |     building_number_formats = OrderedDict(
 52 |         (
 53 |             ("#", 0.2),
 54 |             ("##", 0.4),
 55 |             ("###", 0.1),
 56 |             ("####", 0.05),
 57 |             ("#/#", 0.1),
 58 |             ("##/#", 0.05),
 59 |             ("#{{building_number_extension}}", 0.05),
 60 |             ("##{{building_number_extension}}", 0.05),
 61 |         )
 62 |     )
 63 | 
 64 |     street_address_formats = OrderedDict(
 65 |         (
 66 |             ("{{building_number}} {{street_name}}", 0.95),
 67 |             ("{{street_name}}", 0.05),
 68 |         )
 69 |     )
 70 | 
 71 |     address_formats = OrderedDict(
 72 |         (
 73 |             ("{{postcode}} {{city}}\n{{street_address}}", 0.7),
 74 |             (
 75 |                 "{{postcode}} {{city}}\n{{building_name}}\n{{street_address}}",
 76 |                 0.2,
 77 |             ),
 78 |         )
 79 |     )
 80 | 
 81 |     building_name_formats = OrderedDict(
 82 |         (
 83 |             ("Gebouw {{building_number}}", 0.4),
 84 |             ("Gebouw {{city}}", 0.4),
 85 |             ("Huis {{building_number}}", 0.2),
 86 |             ("Appartement {{building_number}}", 0.2),
 87 |         )
 88 |     )
 89 | 
 90 |     provinces = (
 91 |         "Bakkermol",
 92 |         "Culemtrop",
 93 |         "Dekkerstal",
 94 |         "Fietslie",
 95 |         "Gemertland",
 96 |         "Martrijk",
 97 |         "Noord-Bremdal",
 98 |         "Prontijse",
 99 |         "Ugodeek",
100 |         "Zeemania",
101 |         "Zuid-Bremdal",
102 |     )
103 | 
104 |     place_of_birth_format = OrderedDict(
105 |         (
106 |             ("{{city}} ({{administrative_unit}})", 0.9),
107 |             ("{{city}} ({{country}})", 0.05),
108 |             ("{{country}}", 0.05),
109 |         )
110 |     )
111 | 
112 |     def city_prefix(self) -> str:
113 |         return self.random_element(self.city_prefixes)
114 | 
115 |     def city_name(self) -> str:
116 |         if self not in self.generator.providers:
117 |             self.generator.add_provider(self)
118 |         pattern: str = self.random_element(self.city_formats)
119 |         return self.generator.parse(pattern).title()
120 | 
121 |     city = city_name
122 | 
123 |     def administrative_unit(self) -> str:
124 |         return self.random_element(self.provinces)
125 | 
126 |     province = administrative_unit
127 | 
128 |     def building_number_extension(self) -> str:
129 |         return self.random_element(self.building_number_extensions)
130 | 
131 |     def building_number(self) -> str:
132 |         if self not in self.generator.providers:
133 |             self.generator.add_provider(self)
134 |         pattern: str = self.random_element(self.building_number_formats)
135 |         return self.numerify(self.generator.parse(pattern))
136 | 
137 |     def building_name(self) -> str:
138 |         if self not in self.generator.providers:
139 |             self.generator.add_provider(self)
140 |         pattern: str = self.random_element(self.building_name_formats)
141 |         return self.generator.parse(pattern)
142 | 
143 |     def street_address(self) -> str:
144 |         if self not in self.generator.providers:
145 |             self.generator.add_provider(self)
146 |         pattern: str = self.random_element(self.street_address_formats)
147 |         return self.generator.parse(pattern)
148 | 
149 |     def address(self) -> str:
150 |         if self not in self.generator.providers:
151 |             self.generator.add_provider(self)
152 |         pattern: str = self.random_element(self.address_formats)
153 |         return self.generator.parse(pattern)
154 | 
155 |     def place_of_birth(self) -> str:
156 |         if self not in self.generator.providers:
157 |             self.generator.add_provider(self)
158 |         pattern: str = self.random_element(self.place_of_birth_format)
159 |         return self.generator.parse(pattern)
160 | 
161 | 
162 | 


--------------------------------------------------------------------------------
/docxpand/providers/address/pt_PT/__init__.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | import typing as tp
 3 | 
 4 | from faker.providers.address.pt_PT import Provider as AddressProvider
 5 | 
 6 | 
 7 | class Provider(AddressProvider):
 8 |     __use_weighting__ = True
 9 | 
10 |     city_formats = OrderedDict(
11 |         (
12 |             ("{{first_name}}", 0.1),
13 |             ("{{last_name}}", 0.5),
14 |             ("{{city_prefix}} {{first_name}}", 0.05),
15 |             ("{{city_prefix}} {{last_name}}", 0.05),
16 |             ("{{last_name}} de {{last_name}}", 0.1),
17 |             ("{{last_name}} do {{last_name}}", 0.1),
18 |             ("{{city_prefix}} {{last_name}} de {{last_name}}", 0.05),
19 |             ("{{city_prefix}} {{last_name}} do {{last_name}}", 0.05),
20 |         )
21 |     )
22 | 
23 |     city_prefixes = [
24 |         "Santa",
25 |         "Vila",
26 |         "São"
27 |     ]
28 | 
29 |     distritos = (
30 |         "Binaurèm",
31 |         "Bregoniá",
32 |         "Carçalèm",
33 |         "Castelo Bianca",
34 |         "Colação",
35 |         "Émiria",
36 |         "Folacia",
37 |         "Grandiá",
38 |         "Lesboám",
39 |         "Mereu",
40 |         "Nolita",
41 |         "Portalèm",
42 |         "Ribeiro",
43 |         "Santa Gaía",
44 |         "Vila Francica",
45 |     )
46 | 
47 |     place_of_birth_format = OrderedDict(
48 |         (
49 |             ("{{city}}*{{administrative_unit}}", 0.9),
50 |             ("{{city}} ({{country}})", 0.05),
51 |             ("{{country}}", 0.05),
52 |         )
53 |     )
54 | 
55 |     def city_prefix(self) -> str:
56 |         return self.random_element(self.city_prefixes)
57 | 
58 |     def city_name(self) -> str:
59 |         if self not in self.generator.providers:
60 |             self.generator.add_provider(self)
61 |         pattern: str = self.random_element(self.city_formats)
62 |         return self.generator.parse(pattern)
63 | 
64 |     city = city_name
65 | 
66 |     def place_of_birth(self) -> str:
67 |         if self not in self.generator.providers:
68 |             self.generator.add_provider(self)
69 |         pattern: str = self.random_element(self.place_of_birth_format)
70 |         return self.generator.parse(pattern)
71 | 


--------------------------------------------------------------------------------
/docxpand/providers/authority/en_GB/__init__.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | import typing as tp
 3 | 
 4 | from faker.providers.address.en_GB import Provider as AddressProvider
 5 | 
 6 | class Provider(AddressProvider):
 7 |     __use_weighting__ = True
 8 | 
 9 |     authority_format = OrderedDict(
10 |         (
11 |             ("{{last_name}}, Prefecture Officer of {{city}}shire", 0.2),
12 |             ("{{last_name}}, Civil Status Officer in {{city}}", 0.5),
13 |             ("{{last_name}}, Mayor of {{city}}", 0.1),
14 |             ("{{last_name}}, Ambassador of Leobrit in {{country}}", 0.05),
15 |             ("{{last_name}}, Prime Minister", 0.05),
16 |             ("{{last_name}}, Home Secretary", 0.05),
17 |             ("{{last_name}}, Department of Defense", 0.05)
18 |         )
19 |     )
20 | 
21 |     def authority(
22 |         self,
23 |         max_length: tp.Optional[int] = None,
24 |     ) -> str:
25 |         if self not in self.generator.providers:
26 |             self.generator.add_provider(self)
27 |         pattern: str = self.random_element(self.authority_format)
28 |         value = self.generator.parse(pattern)
29 |         if max_length is None:
30 |             max_length = 255
31 |         while len(value) > max_length:
32 |             value = self.generator.parse(pattern)
33 |         return value
34 | 


--------------------------------------------------------------------------------
/docxpand/providers/authority/fr_FR/__init__.py:
--------------------------------------------------------------------------------
 1 | import typing as tp
 2 | import random
 3 | from collections import OrderedDict
 4 | 
 5 | from docxpand.providers.address.fr_FR import Provider as AddressProvider
 6 | 
 7 | class Provider(AddressProvider):
 8 |     __use_weighting__ = True
 9 |     
10 |     authority_format = OrderedDict(
11 |         (
12 |             ("Préfecture de {{city}} ({{department_number}})", 0.70),
13 |             ("Sous-préfecture de {{city}} ({{department_number}})", 0.25),
14 |             ("Ministère de l'intérieur", 0.03),
15 |             ("Ministère des affaires étrangères", 0.02),
16 |         )
17 |     )
18 | 
19 |     def department_number(self) -> str:
20 |         choices = list(range(1, 96)) + list(range(971, 990))
21 |         return str(random.choice(choices)).zfill(2)
22 | 
23 |     def authority(
24 |         self,
25 |         max_length: tp.Optional[int] = None,
26 |     ):
27 |         if self not in self.generator.providers:
28 |             self.generator.add_provider(self)
29 |         pattern: str = self.random_element(self.authority_format)
30 | 
31 |         value = self.generator.parse(pattern)
32 |         if max_length is None:
33 |             max_length = 255
34 |         while len(value) > max_length:
35 |             value = self.generator.parse(pattern)
36 |         
37 |         return value
38 | 


--------------------------------------------------------------------------------
/docxpand/providers/authority/nl_NL/__init__.py:
--------------------------------------------------------------------------------
 1 | import typing as tp
 2 | from collections import OrderedDict
 3 | 
 4 | from docxpand.providers.address.nl_NL import Provider as AddressProvider
 5 | 
 6 | class Provider(AddressProvider):
 7 |     __use_weighting__ = True
 8 |     
 9 |     authority_format = OrderedDict(
10 |         (
11 |             ("Burg. van {{city}}", 0.95),
12 |             ("Gouverneur van {{administrative_unit}}", 0.05),
13 |         )
14 |     )
15 | 
16 |     def authority(
17 |         self,
18 |         max_length: tp.Optional[int] = None,
19 |     ):
20 |         if self not in self.generator.providers:
21 |             self.generator.add_provider(self)
22 |         pattern: str = self.random_element(self.authority_format)
23 | 
24 |         value = self.generator.parse(pattern)
25 |         if max_length is None:
26 |             max_length = 255
27 |         while len(value) > max_length:
28 |             value = self.generator.parse(pattern)
29 |         
30 |         return value
31 | 


--------------------------------------------------------------------------------
/docxpand/providers/authority/pt_PT/__init__.py:
--------------------------------------------------------------------------------
 1 | import typing as tp
 2 | from collections import OrderedDict
 3 | 
 4 | from docxpand.providers.address.pt_PT import Provider as AddressProvider
 5 | 
 6 | class Provider(AddressProvider):
 7 |     __use_weighting__ = True
 8 |     
 9 |     authority_format = OrderedDict(
10 |         (
11 |             ("{{city}}*{{administrative_unit}}", 0.8),
12 |             ("{{city}} ({{country}})", 0.1),
13 |             ("Serv. Estr. e Fronteiras", 0.1)
14 |         )
15 |     )
16 | 
17 |     def authority(
18 |         self,
19 |         max_length: tp.Optional[int] = None,
20 |     ):
21 |         if self not in self.generator.providers:
22 |             self.generator.add_provider(self)
23 |         pattern: str = self.random_element(self.authority_format)
24 | 
25 |         value = self.generator.parse(pattern)
26 |         if max_length is None:
27 |             max_length = 255
28 |         while len(value) > max_length:
29 |             value = self.generator.parse(pattern)
30 |         
31 |         return value
32 | 


--------------------------------------------------------------------------------
/docxpand/providers/barcode/__init__.py:
--------------------------------------------------------------------------------
 1 | """Barcode and datamatrix provider."""
 2 | import typing as tp
 3 | 
 4 | import dateparser
 5 | import zxingcpp
 6 | 
 7 | from docxpand.image import ColorSpace, Image
 8 | from docxpand.normalizer import (
 9 |     cut_and_pad_right,
10 |     normalize_name,
11 | )
12 | from docxpand.utils import get_field_from_any_side
13 | 
14 | BARCODE_MAPPING = {
15 |     "Barcode": zxingcpp.BarcodeFormat.Code128,
16 |     "Datamatrix": zxingcpp.BarcodeFormat.DataMatrix,
17 | }
18 | 
19 | 
20 | class Provider:
21 |     """Barcode and datamatrix provider class."""
22 | 
23 |     def generate_barcode(
24 |         self,
25 |         document_code: str,
26 |         barcode_format_name: str,
27 |         width: int,
28 |         height: int,
29 |         existing_fields: tp.Optional[tp.Dict] = None,
30 |     ) -> Image:
31 |         """Generate a barcode or a datamatrix.
32 | 
33 |         Args:
34 |             document_code: the doc code
35 |             barcode_format_name: the format of the barcode
36 |             width: width
37 |             height: height
38 |             existing_fields: the other fields of the doc
39 | 
40 |         Returns:
41 |             barcode as an Image
42 |         """
43 |         if barcode_format_name not in BARCODE_MAPPING.keys():
44 |             raise ValueError("Barcode format is not valid.")
45 |         barcode_format = BARCODE_MAPPING[barcode_format_name]
46 |         cut_document_code = cut_and_pad_right(document_code, 2)
47 |         nationality = cut_and_pad_right(
48 |             get_field_from_any_side(existing_fields, "nationality", "UTO"), 3
49 |         )
50 |         family_name = normalize_name(
51 |             get_field_from_any_side(existing_fields, "family_name", "SAMPLE")
52 |         )
53 | 
54 |         given_name = normalize_name(
55 |             get_field_from_any_side(existing_fields, "given_name", "SAMPLE")[0]
56 |         )
57 | 
58 |         birth_date = dateparser.parse(
59 |             get_field_from_any_side(existing_fields, "birth_date", "01.01.1970")
60 |         ).strftime("%Y%m%d")
61 |         document_number = get_field_from_any_side(
62 |             existing_fields, "document_number", "123456789"
63 |         )
64 |         generated_barcode = zxingcpp.write_barcode(
65 |             barcode_format,
66 |             "/".join(
67 |                 [
68 |                     cut_document_code,
69 |                     nationality,
70 |                     family_name,
71 |                     given_name,
72 |                     birth_date,
73 |                     document_number,
74 |                 ]
75 |                 if barcode_format_name == "Datamatrix"
76 |                 else [document_number]
77 |             ),
78 |             width,
79 |             height,
80 |         )
81 | 
82 |         return Image(generated_barcode, space=ColorSpace.GRAYSCALE)
83 | 


--------------------------------------------------------------------------------
/docxpand/providers/date_time/__init__.py:
--------------------------------------------------------------------------------
 1 | import typing as tp
 2 | from datetime import datetime
 3 | 
 4 | import dateparser
 5 | from dateutil.relativedelta import relativedelta
 6 | 
 7 | from docxpand.utils import nested_get
 8 | 
 9 | DATE_PARSER_SETTINGS = {
10 |     "DATE_ORDER": "DMY",
11 |     "PARSERS": ["absolute-time"],
12 |     "STRICT_PARSING": True,
13 | }
14 | 
15 | 
16 | class Provider:
17 |     def date_plus_delta(
18 |         self,
19 |         field_path: tp.List[str],
20 |         years: int,
21 |         months: int,
22 |         days: int,
23 |         existing_fields: tp.Optional[tp.Dict] = None,
24 |     ) -> tp.Optional[datetime]:
25 |         if not existing_fields:
26 |             return None
27 | 
28 |         field_value = nested_get(existing_fields, field_path, None)
29 |         if not field_value:
30 |             return None
31 | 
32 |         datetime = dateparser.parse(field_value, settings=DATE_PARSER_SETTINGS)
33 |         return datetime + relativedelta(years=years, months=months, days=days)
34 | 


--------------------------------------------------------------------------------
/docxpand/providers/id/en_GB/driving_license.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import typing as tp
  3 | from datetime import datetime
  4 | 
  5 | import dateparser
  6 | from dateutil.relativedelta import relativedelta
  7 | from faker import Faker
  8 | from faker.providers.date_time import Provider as DateTimeProvider
  9 | 
 10 | from docxpand.normalizer import (
 11 |     collapse_whitespace,
 12 |     normalize,
 13 |     replace_ligatures,
 14 |     rm_accents,
 15 |     rm_punct,
 16 | )
 17 | from docxpand.utils import get_field_from_any_side
 18 | 
 19 | # Settings for dateparser
 20 | DATE_PARSER_SETTING = {
 21 |     "DATE_ORDER": "DMY",
 22 |     "PARSERS": ["absolute-time"],
 23 |     "STRICT_PARSING": True,
 24 | }
 25 | 
 26 | 
 27 | class Provider:
 28 |     def __init__(self, generator: Faker) -> None:
 29 |         self.generator = generator
 30 |         self.date_time_provider = DateTimeProvider(generator)
 31 | 
 32 |     @staticmethod
 33 |     def normalize_name(name: str) -> str:
 34 |         return (
 35 |             normalize(
 36 |                 name,
 37 |                 [rm_accents, rm_punct, replace_ligatures, collapse_whitespace],
 38 |             )
 39 |             .strip()
 40 |             .upper()
 41 |             .replace(" ", "")
 42 |         )
 43 | 
 44 |     @staticmethod
 45 |     def document_number_full(
 46 |         existing_fields: tp.Optional[tp.Dict] = None,
 47 |     ) -> str:
 48 |         """Full document number generator for GBR driving license."""
 49 |         # Get names
 50 |         family_name = get_field_from_any_side(existing_fields, "family_name", "SAMPLE")
 51 |         family_name = Provider.normalize_name(family_name)
 52 |         if isinstance(family_name, list):
 53 |             family_name = " ".join(family_name)
 54 |         given_name = get_field_from_any_side(existing_fields, "given_name", "SAMPLE")
 55 |         if isinstance(given_name, list):
 56 |             given_name = " ".join(given_name)
 57 |         given_name = Provider.normalize_name(given_name)
 58 |         full_name = family_name + given_name
 59 | 
 60 |         # Get document number
 61 |         document_number = get_field_from_any_side(
 62 |             existing_fields, "document_number_short", "123456AB7CD"
 63 |         )
 64 | 
 65 |         # Make fake checksum
 66 |         checksum = f"{random.randint(0, 99):02d}"
 67 | 
 68 |         return f"{full_name[:5]}{document_number}  {checksum}"
 69 | 
 70 |     @staticmethod
 71 |     def allowed_categories(
 72 |         probabilities: tp.Optional[tp.Dict] = None,
 73 |     ) -> str:
 74 |         """Return a list of allowed categories separated by '/'."""
 75 |         if not probabilities:
 76 |             return ""
 77 | 
 78 |         allowed_categories = []
 79 |         for category, probability in probabilities.items():
 80 |             if random.random() <= probability:
 81 |                 allowed_categories.append(category)
 82 | 
 83 |         return "/".join(allowed_categories)
 84 | 
 85 |     def license_date(
 86 |         self,
 87 |         category: str,
 88 |         date_type: str = "start",
 89 |         existing_fields: tp.Optional[tp.Dict] = None,
 90 |     ) -> tp.Optional[datetime]:
 91 |         if date_type not in ["start", "end"]:
 92 |             raise ValueError(
 93 |                 f"The date_type must be 'start' or 'end', got {date_type}."
 94 |             )
 95 | 
 96 |         # We need special checks for f/k/l/n/p/q categories since the dates
 97 |         # are grouped in one single line in the table
 98 |         fklnpq = "fklnpq"
 99 |         categories = get_field_from_any_side(
100 |             existing_fields, "license_categories", ""
101 |         ).split("/")
102 |         if category != fklnpq and category not in categories:
103 |             return None
104 |         if category == fklnpq and not any([cat in categories for cat in fklnpq]):
105 |             return None
106 | 
107 |         start_date = None
108 |         if date_type == "start":
109 |             if existing_fields:
110 |                 for key, value in existing_fields.get("back", {}).items():
111 |                     if "start_date" in key and key[0].lower() == category[0].lower():
112 |                         start_date = dateparser.parse(
113 |                             value, settings=DATE_PARSER_SETTING
114 |                         )
115 |                         if start_date:
116 |                             return start_date
117 |             birth_date = dateparser.parse(
118 |                 get_field_from_any_side(existing_fields, "birth_date", "01.01.1970"),
119 |                 settings=DATE_PARSER_SETTING,
120 |             )
121 |             end_date = dateparser.parse(
122 |                 get_field_from_any_side(existing_fields, "date_issued", ""),
123 |                 settings=DATE_PARSER_SETTING,
124 |             )
125 |             if not end_date:
126 |                 end_date = "today"
127 | 
128 |             return self.date_time_provider.date_between(
129 |                 start_date=birth_date + relativedelta(years=17),
130 |                 end_date=end_date,
131 |             )
132 | 
133 |         # elif date_type == "end":
134 |         if existing_fields:
135 |             for key, value in existing_fields.get("back", {}).items():
136 |                 if "start_date" in key and key[0].lower() == category[0].lower():
137 |                     start_date = dateparser.parse(value, settings=DATE_PARSER_SETTING)
138 |                     if start_date:
139 |                         break
140 | 
141 |         if not start_date:
142 |             birth_date = dateparser.parse(
143 |                 get_field_from_any_side(existing_fields, "birth_date", "01.01.1970"),
144 |                 settings=DATE_PARSER_SETTING,
145 |             )
146 |             end_date = dateparser.parse(
147 |                 get_field_from_any_side(existing_fields, "date_issued", ""),
148 |                 settings=DATE_PARSER_SETTING,
149 |             )
150 |             if not end_date:
151 |                 end_date = "today"
152 |             start_date = self.date_time_provider.date_between(
153 |                 start_date=birth_date + relativedelta(years=17),
154 |                 end_date=end_date,
155 |             )
156 | 
157 |         return start_date + relativedelta(years=50, days=-1)
158 | 
159 |     @staticmethod
160 |     def license_restrictions(
161 |         category: str,
162 |         max_restrictions: int = 5,
163 |         existing_fields: tp.Optional[tp.Dict] = None,
164 |     ) -> str:
165 |         # We need special checks for f/k/l/n/p/q categories since the
166 |         # restrictions are grouped in one single cell in the table
167 |         fklnpq = "fklnpq"
168 |         categories = get_field_from_any_side(
169 |             existing_fields, "license_categories", ""
170 |         ).split("/")
171 |         if category != fklnpq and category not in categories:
172 |             return ""
173 |         if category == fklnpq and not any([cat in categories for cat in fklnpq]):
174 |             return ""
175 | 
176 |         # Sources :
177 |         # https://www.gov.uk/driving-licence-codes
178 |         # https://www.thesun.co.uk/motors/4016787/codes-on-your-driving-licence-revealed-fine/
179 |         probabilities = {
180 |             "01": 0.05,
181 |             "02": 0.05,
182 |             "10": 0.01,
183 |             "15": 0.01,
184 |             "20": 0.01,
185 |             "25": 0.01,
186 |             "30": 0.01,
187 |             "31": 0.01,
188 |             "32": 0.01,
189 |             "33": 0.01,
190 |             "35": 0.01,
191 |             "40": 0.05,
192 |             "42": 0.01,
193 |             "43": 0.01,
194 |             "44": 0.01,
195 |             "44(1)": 0.01,
196 |             "44(2)": 0.01,
197 |             "44(3)": 0.01,
198 |             "44(4)": 0.01,
199 |             "44(5)": 0.01,
200 |             "44(6)": 0.01,
201 |             "44(7)": 0.01,
202 |             "44(8)": 0.01,
203 |             "44(11)": 0.01,
204 |             "44(12)": 0.01,
205 |             "45": 0.01,
206 |             "46": 0.01,
207 |             "70": 0.01,
208 |             "71": 0.01,
209 |             "78": 0.01,
210 |             "79": 0.01,
211 |             "79(2)": 0.01,
212 |             "79(3)": 0.01,
213 |             "96": 0.01,
214 |             "97": 0.01,
215 |             "101": 0.05,
216 |             "102": 0.01,
217 |             "103": 0.01,
218 |             "105": 0.05,
219 |             "106": 0.05,
220 |             "107": 0.02,
221 |             "108": 0.01,
222 |             "110": 0.01,
223 |             "111": 0.02,
224 |             "113": 0.01,
225 |             "114": 0.01,
226 |             "115": 0.02,
227 |             "118": 0.01,
228 |             "119": 0.01,
229 |             "121": 0.01,
230 |             "122": 0.02,
231 |             "125": 0.02,
232 |         }
233 | 
234 |         restrictions = []
235 |         for retriction, probability in probabilities.items():
236 |             if random.random() <= probability:
237 |                 restrictions.append(retriction)
238 | 
239 |         return ",".join(restrictions[:max_restrictions])
240 | 
241 |     @staticmethod
242 |     def expires_short(
243 |         existing_fields: tp.Optional[tp.Dict] = None,
244 |     ) -> str:
245 |         expires = get_field_from_any_side(existing_fields, "expires", None)
246 |         if not expires:
247 |             return ""
248 | 
249 |         expires_datetime = dateparser.parse(expires, settings=DATE_PARSER_SETTING)
250 |         if not expires_datetime:
251 |             return ""
252 |         return expires_datetime.strftime("%b%y").upper()
253 | 


--------------------------------------------------------------------------------
/docxpand/providers/mrz/__init__.py:
--------------------------------------------------------------------------------
  1 | import typing as tp
  2 | 
  3 | import dateparser
  4 | 
  5 | from docxpand.normalizer import (
  6 |     cut_and_pad_right,
  7 |     normalize_name,
  8 | )
  9 | from docxpand.utils import get_field_from_any_side
 10 | 
 11 | 
 12 | class Provider:
 13 |     filler: str = "<"
 14 | 
 15 |     @staticmethod
 16 |     def checksum(string: str) -> str:
 17 |         """Checksum of a string in a MRZ.
 18 | 
 19 |         Code inspired from section "Clés de contrôle" in
 20 |         http://fr.wikipedia.org/wiki/Carte_nationale_d%27identit%C3%A9_en_France
 21 | 
 22 |         Args:
 23 |             s: string to check
 24 | 
 25 |         Returns:
 26 |             checksum value for string s
 27 |         """
 28 |         checker = 0
 29 |         weights = [7, 3, 1]
 30 |         for position, char in enumerate(string):
 31 |             weight = weights[position % 3]
 32 |             if char == "<":
 33 |                 val = 0
 34 |             elif char.isdigit():
 35 |                 val = int(char)
 36 |             else:
 37 |                 val = ord(char) - 55
 38 |             checker += val * weight
 39 |         return str(checker % 10)
 40 | 
 41 |     def td1(
 42 |         self,
 43 |         document_code: str,
 44 |         gender: str,
 45 |         existing_fields: tp.Optional[tp.Dict] = None,
 46 |     ) -> tp.List[str]:
 47 |         """Generate a TD1 MRZ.
 48 | 
 49 |         See Also:
 50 |             ICAO Doc 9303 Part 5, paragraph 4.2.2
 51 |         """
 52 |         line_length = 30
 53 | 
 54 |         # Line 1
 55 |         document_code = cut_and_pad_right(document_code, 2, self.filler)
 56 |         nationality = cut_and_pad_right(
 57 |             get_field_from_any_side(existing_fields, "nationality", "UTO"),
 58 |             3,
 59 |             self.filler,
 60 |         )
 61 |         document_number = get_field_from_any_side(
 62 |             existing_fields, "document_number", "123456789"
 63 |         )
 64 |         document_number_check_digit = self.checksum(document_number)
 65 |         if len(document_number) <= 9:
 66 |             document_number += document_number_check_digit
 67 |         else:  # long document number (see paragraph 4.2.4)
 68 |             document_number = (
 69 |                 document_number[:9]
 70 |                 + self.filler
 71 |                 + document_number[9:]
 72 |                 + document_number_check_digit
 73 |             )
 74 |         line_1 = document_code + nationality + document_number
 75 |         line_1 = cut_and_pad_right(line_1, line_length, self.filler)
 76 | 
 77 |         # Line 2
 78 |         birth_date = dateparser.parse(
 79 |             get_field_from_any_side(existing_fields, "birth_date", "01.01.1970")
 80 |         ).strftime("%y%m%d")
 81 |         sex = {"male": "M", "female": "F", "nonbinary": self.filler}[gender]
 82 |         expires = dateparser.parse(
 83 |             get_field_from_any_side(existing_fields, "expires", "31.12.2030")
 84 |         ).strftime("%y%m%d")
 85 |         line_2 = (
 86 |             birth_date
 87 |             + self.checksum(birth_date)
 88 |             + sex
 89 |             + expires
 90 |             + self.checksum(expires)
 91 |             + nationality
 92 |         )
 93 |         line_2 = cut_and_pad_right(line_2, line_length - 1, self.filler)
 94 |         composite_check_digit = self.checksum(
 95 |             line_1[5:] + line_2[0:7] + line_2[8:15] + line_2[18:29]
 96 |         )
 97 |         line_2 += composite_check_digit
 98 | 
 99 |         # Line 3
100 |         family_name = get_field_from_any_side(existing_fields, "family_name", "SAMPLE")
101 |         if isinstance(family_name, list):
102 |             family_name = ", ".join(family_name)
103 |         family_name = normalize_name(family_name, self.filler)
104 |         given_name = get_field_from_any_side(existing_fields, "given_name", "SAMPLE")
105 |         if isinstance(given_name, list):
106 |             given_name = ", ".join(given_name)
107 |         given_name = normalize_name(given_name, self.filler)
108 |         line_3 = family_name + self.filler * 2 + given_name
109 |         line_3 = cut_and_pad_right(line_3, line_length, self.filler)
110 | 
111 |         return [line_1, line_2, line_3]
112 | 
113 |     def td2(
114 |         self,
115 |         document_code: str,
116 |         gender: str,
117 |         existing_fields: tp.Optional[tp.Dict] = None,
118 |     ) -> tp.List[str]:
119 |         """Generate a TD2 MRZ.
120 | 
121 |         See Also:
122 |             ICAO Doc 9303 Part 5, paragraph 4.2.2
123 |         """
124 |         line_length = 36
125 | 
126 |         document_code = cut_and_pad_right(document_code, 2, self.filler)
127 |         nationality = cut_and_pad_right(
128 |             get_field_from_any_side(existing_fields, "nationality", "UTO"),
129 |             3,
130 |             self.filler,
131 |         )
132 |         document_number = get_field_from_any_side(
133 |             existing_fields, "document_number", "123456789"
134 |         )
135 |         birth_date = dateparser.parse(
136 |             get_field_from_any_side(existing_fields, "birth_date", "01.01.1970")
137 |         ).strftime("%y%m%d")
138 |         sex = {"male": "M", "female": "F", "nonbinary": self.filler}[gender]
139 |         expires = dateparser.parse(
140 |             get_field_from_any_side(existing_fields, "expires", "31.12.2030")
141 |         ).strftime("%y%m%d")
142 |         family_name = get_field_from_any_side(existing_fields, "family_name", "SAMPLE")
143 |         if isinstance(family_name, list):
144 |             family_name = ", ".join(family_name)
145 |         family_name = normalize_name(family_name, self.filler)
146 |         given_name = get_field_from_any_side(existing_fields, "given_name", "SAMPLE")
147 |         if isinstance(given_name, list):
148 |             given_name = ", ".join(given_name)
149 |         given_name = normalize_name(given_name, self.filler)
150 | 
151 |         # line_1
152 |         line_1 = (
153 |             document_code + nationality + family_name + self.filler * 2 + given_name
154 |         )
155 |         line_1 = cut_and_pad_right(line_1, line_length, self.filler)
156 |         # line_2
157 |         line_2 = (
158 |             document_number
159 |             + self.checksum(document_number)
160 |             + nationality
161 |             + birth_date
162 |             + self.checksum(birth_date)
163 |             + sex
164 |             + expires
165 |             + self.checksum(expires)
166 |         )
167 |         line_2 = cut_and_pad_right(line_2, line_length - 1, self.filler)
168 |         line_2 += self.checksum(
169 |             line_2[0:10] + line_2[13:20] + line_2[21:35],
170 |         )
171 | 
172 |         return [line_1, line_2]
173 | 
174 |     def td3(
175 |         self,
176 |         document_code: str,
177 |         gender: str,
178 |         existing_fields: tp.Optional[tp.Dict] = None,
179 |     ) -> tp.List[str]:
180 |         """Generate a TD3 MRZ.
181 | 
182 |         See Also:
183 |             ICAO Doc 9303 Part 5, paragraph 4.2.2
184 |         """
185 |         line_length = 44
186 | 
187 |         document_code = cut_and_pad_right(document_code, 2, self.filler)
188 |         nationality = cut_and_pad_right(
189 |             get_field_from_any_side(existing_fields, "nationality", "UTO"),
190 |             3,
191 |             self.filler,
192 |         )
193 |         document_number = get_field_from_any_side(
194 |             existing_fields, "document_number", "123456789"
195 |         )
196 |         birth_date = dateparser.parse(
197 |             get_field_from_any_side(existing_fields, "birth_date", "01.01.1970")
198 |         ).strftime("%y%m%d")
199 |         sex = {"male": "M", "female": "F", "nonbinary": self.filler}[gender]
200 |         expires = dateparser.parse(
201 |             get_field_from_any_side(existing_fields, "expires", "31.12.2030")
202 |         ).strftime("%y%m%d")
203 |         family_name = get_field_from_any_side(existing_fields, "family_name", "SAMPLE")
204 |         if isinstance(family_name, list):
205 |             family_name = ", ".join(family_name)
206 |         family_name = normalize_name(family_name, self.filler)
207 |         given_name = get_field_from_any_side(existing_fields, "given_name", "SAMPLE")
208 |         if isinstance(given_name, list):
209 |             given_name = ", ".join(given_name)
210 |         given_name = normalize_name(given_name, self.filler)
211 | 
212 |         # line_1
213 |         line_1 = (
214 |             document_code + nationality + family_name + self.filler * 2 + given_name
215 |         )
216 |         line_1 = cut_and_pad_right(line_1, line_length, self.filler)
217 |         # line_2
218 |         line_2 = (
219 |             document_number
220 |             + self.checksum(document_number)
221 |             + nationality
222 |             + birth_date
223 |             + self.checksum(birth_date)
224 |             + sex
225 |             + expires
226 |             + self.checksum(expires)
227 |         )
228 |         line_2 = cut_and_pad_right(line_2, line_length - 1, self.filler)
229 |         line_2 += self.checksum(line_2[0:10] + line_2[13:20] + line_2[21:44])
230 | 
231 |         return [line_1, line_2]
232 | 


--------------------------------------------------------------------------------
/docxpand/providers/person/es_ES/__init__.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | from faker.providers.person.es_ES import Provider as PersonProvider
 4 | 
 5 | 
 6 | class Provider(PersonProvider):
 7 |     __use_weighting__ = True
 8 | 
 9 |     parents_names_format = OrderedDict(
10 |         (
11 |             ("{{first_name_male}} / {{first_name_female}}", 0.9),
12 |             ("{{first_name_male}} / NC", 0.04),
13 |             ("NC / {{first_name_female}}", 0.04),
14 |             ("NC", 0.02),
15 |         )
16 |     )
17 | 
18 |     def parents_names(self) -> str:
19 |         if self not in self.generator.providers:
20 |             self.generator.add_provider(self)
21 |         pattern: str = self.random_element(self.parents_names_format)
22 |         return self.generator.parse(pattern)
23 | 


--------------------------------------------------------------------------------
/docxpand/providers/photo/halftone/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Gonçalo Oliveira
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/docxpand/providers/photo/halftone/__init__.py:
--------------------------------------------------------------------------------
 1 | """Largely inspired from https://github.com/GravO8/halftone. MIT LICENSE."""
 2 | import typing as tp
 3 | from math import ceil
 4 | 
 5 | import cv2
 6 | import numpy as np
 7 | 
 8 | from docxpand.image import Image
 9 | 
10 | 
11 | def halftone(
12 |     image: Image,
13 |     side: int = 20,
14 |     jump: tp.Optional[int] = None,
15 |     bg_color: tp.Tuple[int, int, int] = (255, 255, 255),
16 |     fg_color: tp.Tuple[int, int, int] = (0, 0, 0),
17 |     alpha: float = 1.4,
18 | ) -> Image:
19 |     """Generate an half-tone image.
20 | 
21 |     Args:
22 |         image: input image
23 |         side: length (in pixels) of the side of each square that composes the
24 |             output image (default is 20)
25 |         jump: length (in pixels) of the side of each square the program will
26 |             scan from original image (default is 1% of the minimum between
27 |             the width and height)
28 |         bg_color: rgb value of the background color of the output image
29 |             (default is white)
30 |         fg_color: rgb value of the color of the circles of the output image
31 |             (default is black)
32 |         alpha: coefficient that determines how big the circles can be; when
33 |             alpha is 1, the maximum radius is side/2 (default is 1.4)
34 | 
35 |     Returns:
36 |         the half-toned image
37 |     """
38 |     if side <= 0:
39 |         raise ValueError(f"side must be strictly positive (got {side}).")
40 |     if alpha <= 0:
41 |         raise ValueError(f"alpha must be strictly positive (got {alpha}).")
42 |     height, width = image.shape[:2]
43 |     if jump is None:
44 |         jump = ceil(min(height, height) * 0.01)
45 |     if jump <= 0:
46 |         raise ValueError(f"jump must be strictly positive (got {jump}).")
47 |     bg_color = bg_color[::-1]
48 |     fg_color = fg_color[::-1]
49 | 
50 |     height_output, width_output = side * ceil(height / jump), side * ceil(
51 |         width / jump
52 |     )
53 |     canvas = np.zeros((height_output, width_output, 3), np.uint8)
54 |     output_square = np.zeros((side, side, 3), np.uint8)
55 | 
56 |     x_output, y_output = 0, 0
57 |     for y in range(0, height, jump):
58 |         for x in range(0, width, jump):
59 |             output_square[:] = bg_color
60 |             intensity = 1 - np.mean(image[y : y + jump, x : x + jump]) / 255
61 |             radius = int(alpha * intensity * side / 2)
62 |             cv2.circle(
63 |                 output_square, (side // 2, side // 2), radius, fg_color, -1
64 |             )
65 |             canvas[
66 |                 y_output : y_output + side, x_output : x_output + side
67 |             ] = output_square
68 |             x_output += side
69 |         y_output += side
70 |         x_output = 0
71 | 
72 |     return canvas
73 | 


--------------------------------------------------------------------------------
/docxpand/providers/residence_permit/__init__.py:
--------------------------------------------------------------------------------
 1 | import typing as tp
 2 | 
 3 | from docxpand.providers import ChoiceProvider
 4 | from docxpand.translations.residence_permit import (
 5 |     RESIDENCE_PERMIT_OBSERVATIONS,
 6 |     RESIDENCE_PERMIT_TYPES_TRANSLATIONS,
 7 |     RESIDENCE_PERMIT_WORK_OBSERVATIONS,
 8 | )
 9 | from docxpand.utils import get_field_from_any_side
10 | 
11 | 
12 | class Provider(ChoiceProvider):
13 |     def generate_permit_type(
14 |             self, locale: str, multiline: bool = True
15 |         ) -> tp.Union[str, tp.List[str]]:
16 |         permit_type_as_lines: tp.List[str] = (
17 |             RESIDENCE_PERMIT_TYPES_TRANSLATIONS[self.choice()][locale]
18 |         )
19 |         return_val = (
20 |             permit_type_as_lines
21 |             if multiline
22 |             else " ".join(permit_type_as_lines)
23 |         )
24 |         return return_val
25 | 
26 |     def generate_observations(self, locale: str):
27 |         return RESIDENCE_PERMIT_OBSERVATIONS[self.choice()][locale]
28 | 
29 |     def generate_observations_multilines(self, locale, existing_fields):
30 |         observations = [
31 |             get_field_from_any_side(existing_fields, "observations", "")
32 |         ]
33 |         observations.extend(
34 |             RESIDENCE_PERMIT_WORK_OBSERVATIONS[self.choice()][locale]
35 |         )
36 |         return observations
37 | 


--------------------------------------------------------------------------------
/docxpand/providers/signature/__init__.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import typing as tp
 3 | from collections import OrderedDict
 4 | 
 5 | from docxpand.providers import ChoiceProvider
 6 | from docxpand.utils import get_field_from_any_side
 7 | 
 8 | 
 9 | class Provider:
10 |     signature_formats = OrderedDict(
11 |         (
12 |             ("{family_name}", 1.0),
13 | 
14 |             ("{family_name} {given_name:1.1}.", 0.1),
15 |             ("{given_name:1.1}. {family_name}", 0.1),
16 |             ("{given_name:1.1}.{family_name}", 0.1),
17 |             ("{given_name:1.1} {family_name}", 0.1),
18 | 
19 |             ("{given_name:1.1}{family_name:.1}", 0.1),
20 |             ("{given_name:1.1}.{family_name:.1}.", 0.1),
21 |             ("{given_name:1.1} {family_name:.1}", 0.1),
22 |             ("{given_name:1.1}. {family_name:.1}.", 0.1),
23 | 
24 |             ("{family_name:1.1}{given_name:1.1}", 0.1),
25 |             ("{family_name:1.1}.{given_name:1.1}.", 0.1),
26 |             ("{family_name:1.1} {given_name:1.1}", 0.1),
27 |             ("{family_name:1.1}. {given_name:1.1}.", 0.1),
28 |         )
29 |     )
30 | 
31 |     def signature(self, existing_fields: tp.Dict) -> str:
32 |         pattern: str = ChoiceProvider.random_choice(self.signature_formats)
33 |         arguments = {}
34 |         if "family_name" in pattern:
35 |             name = existing_fields["front"]["family_name"]
36 |             if isinstance(name, list):
37 |                 name = name[0]
38 |             if "given_name" in pattern and ("-" in name or " " in name):
39 |                 separator = "-" if "-" in name else " "
40 |                 arguments["family_name"] = (
41 |                     name[0] + separator + name.split(separator)[1][0]
42 |                 )
43 |             else:
44 |                 arguments["family_name"] = name
45 | 
46 |         if "given_name" in pattern:
47 |             arguments["given_name"] = existing_fields["front"]["given_name"]
48 |             if isinstance(arguments["given_name"], list):
49 |                 arguments["given_name"] = arguments["given_name"][0]
50 |         
51 |         return pattern.format(**arguments)
52 | 
53 |     def signature_knowing_key(self, existing_fields: tp.Dict, key: str) -> str:
54 |         field = get_field_from_any_side(existing_fields, key, None)
55 | 
56 |         # Particular case of Leobrit (Name, ...)
57 |         if "," in field:
58 |             field = field.split(",")[0]
59 | 
60 |         # Particular case of "Préfecture de ..." / "Sous-préfecture de ..."
61 |         for prefix in ["Préfecture de", "Sous-préfecture de"]:
62 |             if prefix in field:
63 |                 field = field.replace(prefix, "")
64 |                 # Remove departement number from: {city} ({department_number})
65 |                 field = re.sub("[\(\[].*?[\)\]]", "", field).strip()
66 |                 break
67 | 
68 |         if not field:
69 |             raise ValueError(f"{key} doesn't exist in existing_fields")
70 | 
71 |         return field
72 | 


--------------------------------------------------------------------------------
/docxpand/specimen.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import typing as tp
 3 | 
 4 | from docxpand.image import Image 
 5 | 
 6 | SPECIMENS_DIR = os.path.join(
 7 |     os.path.dirname(os.path.abspath(__file__)), "specimens"
 8 | )
 9 | 
10 | def load_specimen(specimen_name: str) -> tp.Optional[Image]:
11 |     """Load document Prado specimen by name.
12 | 
13 |     Args:
14 |         subtype: document classification subtype
15 |         side: document classification side
16 | 
17 |     Returns:
18 |          specimen image or None if it is not found
19 |     """
20 |     try:
21 |         specimen_path = os.path.join(
22 |             SPECIMENS_DIR,
23 |             f"{specimen_name.lower().replace('_', '-')}.jpg",
24 |         )
25 |         if not os.path.exists(specimen_path):
26 |             print(f"{specimen_path} not found")
27 |             return None
28 |         specimen_img = Image.read(specimen_path)
29 |         return specimen_img
30 |     except Exception:
31 |         return None
32 | 


--------------------------------------------------------------------------------
/docxpand/specimens/SOURCES.md:
--------------------------------------------------------------------------------
 1 | # Specimens
 2 | This directory must contain specimens of documents present in the original images (scenes). For our paper, we mainly use driving licenses, identity cards, passport and residence permits that have been obtained on the [PRADO](https://www.consilium.europa.eu/prado/) website. We convert them in jpeg format. However, we have no right to distribute or duplicate any materials contained on the PRADO section as expressed in the [COPYRIGHT](https://www.consilium.europa.eu/en/about-site/copyright/).
 3 | 
 4 | These specimens are used mainly to estimate color shift and lighting variations to be applied to the synthetically generated document images.
 5 | 
 6 | To use our code base, you will have to download them, rename them, convert them in jpeg format and put them in this directory by your own means. 
 7 | The naming convention we used is the following : `specimen_path = os.path.join(SPECIMENS_DIR,f"{specimen_name.lower().replace('_', '-')}.jpg")`, e.g. if the class used in the orginal dataset is "ID_CARD_FRA_2021_FRONT", the name of the specimen image must be "id-card-fra-2021-front.jpg".
 8 | 
 9 | # Photos
10 | The sub-folder "photos" contains example photos to control the pose of generated faces using stable diffusion. The sources and license of these photos are described in "photos/SOURCES.md".
11 | 
12 | 


--------------------------------------------------------------------------------
/docxpand/specimens/photos/SOURCES.md:
--------------------------------------------------------------------------------
 1 | # Sources
 2 | - man_1.jpg: https://commons.wikimedia.org/wiki/File:Jacob%27s_passport_photo_2020.jpg
 3 | - man_2.jpg: https://commons.wikimedia.org/wiki/File:Russian_passport_photo.JPG
 4 | - woman_1.jpg: https://commons.wikimedia.org/wiki/File:Usmanova_Sh._photo.jpg
 5 | - woman_2.jpg: https://commons.wikimedia.org/wiki/File:Alison_passport_photo.jpg
 6 | 
 7 | # License and re-use
 8 | These photos are distributed under the terms of the CC-BY-SA license.
 9 | You may re-use or re-distribute these photos, under the restrictions stated in this license. Please consult the Wikimedia Commons links for more details.
10 | 


--------------------------------------------------------------------------------
/docxpand/specimens/photos/man_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/specimens/photos/man_1.jpg


--------------------------------------------------------------------------------
/docxpand/specimens/photos/man_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/specimens/photos/man_2.jpg


--------------------------------------------------------------------------------
/docxpand/specimens/photos/woman_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/specimens/photos/woman_1.jpg


--------------------------------------------------------------------------------
/docxpand/specimens/photos/woman_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/specimens/photos/woman_2.jpg


--------------------------------------------------------------------------------
/docxpand/template.py:
--------------------------------------------------------------------------------
  1 | """Templates definition."""
  2 | import enum
  3 | import json
  4 | import os.path as osp
  5 | import typing as tp
  6 | 
  7 | TEMPLATES_DIR = osp.join(osp.dirname(__file__), "templates")
  8 | 
  9 | 
 10 | class FieldType(enum.Enum):
 11 |     """Enum class representing the field type.
 12 | 
 13 |     Attributes:
 14 |         FieldType.ADDRESS (int): an address field (composite places, zip, ...)
 15 |         FieldType.DATE (int): a date field
 16 |         FieldType.MRZ (int): a MRZ field
 17 |         FieldType.NAME (int): a name field
 18 |         FieldType.PHOTO (int): an identity photo, or a ghost image
 19 |         FieldType.TEXT (int): a generic text field
 20 |     """
 21 | 
 22 |     ADDRESS = 0
 23 |     DATE = 1
 24 |     MRZ = 2
 25 |     NAME = 3
 26 |     PHOTO = 4
 27 |     TEXT = 5
 28 | 
 29 |     @staticmethod
 30 |     def from_name(name: str) -> "FieldType":
 31 |         """Return the enum from its name (case-insensitive).
 32 | 
 33 |         Args:
 34 |             name: the name of the enum to return
 35 | 
 36 |         Returns:
 37 |             the corresponding enum
 38 | 
 39 |         Raises:
 40 |             ValueError: if the name doesn't correspond to a known enum.
 41 |         """
 42 |         return FieldType[name.upper()]
 43 | 
 44 | 
 45 | class Named:
 46 |     """Defines a named object.
 47 | 
 48 |     Attributes:
 49 |         _name: name of the object
 50 |     """
 51 | 
 52 |     def __init__(
 53 |         self,
 54 |         name: tp.Optional[str] = None,
 55 |     ) -> None:
 56 |         """Initialize a Named.
 57 | 
 58 |         Args:
 59 |             name: name of the object
 60 |         """
 61 |         self._name = name
 62 | 
 63 |     @property
 64 |     def name(self) -> str:
 65 |         """Property for name.
 66 | 
 67 |         Returns:
 68 |             name of the field position
 69 |         """
 70 |         assert self._name is not None
 71 |         return self._name
 72 | 
 73 |     @name.setter
 74 |     def name(self, value) -> None:
 75 |         self._name = value
 76 | 
 77 |     def fill_from_dict(self, dictionary: tp.Dict[str, tp.Any]) -> None:
 78 |         """Fill a named zone object from a dict.
 79 | 
 80 |         Args:
 81 |             dictionary: dict containing whole information
 82 |         """
 83 |         if "name" in dictionary:
 84 |             self.name = dictionary["name"]
 85 | 
 86 | 
 87 | class Field(Named):
 88 |     """Defines a field."""
 89 | 
 90 |     def __init__(
 91 |         self,
 92 |         name: tp.Optional[str] = None,
 93 |         field_type: tp.Optional[FieldType] = None,
 94 |         field_format: tp.Optional[str] = None,
 95 |         default: tp.Optional[str] = None,
 96 |         provider: tp.Optional[tp.Dict] = None,
 97 |         parts: tp.Optional[tp.Dict] = None,
 98 |         separator: tp.Optional[str] = None,
 99 |         max_chars_per_line: tp.Optional[int] = None,
100 |         lines: tp.Optional[int] = None,
101 |         conditional: tp.Optional[tp.Dict] = None
102 |     ) -> None:
103 |         """Initialize a Field."""
104 |         super().__init__(name)
105 |         self.type = field_type
106 |         self.format = field_format
107 |         self.default = default
108 |         self.provider = provider
109 |         self.parts = parts
110 |         self.separator = separator
111 |         self.max_chars_per_line = max_chars_per_line
112 |         self.lines = lines
113 |         self.conditional = conditional
114 | 
115 |     @staticmethod
116 |     def from_dict(dictionary: tp.Dict[str, tp.Any]) -> "Field":
117 |         """Create a field from a dict containing needed data.
118 | 
119 |         Args:
120 |             dictionary: dict containing data
121 | 
122 |         Returns:
123 |             field containing needed data
124 |         """
125 |         field = Field()
126 |         super(Field, field).fill_from_dict(dictionary)
127 |         for key, value in dictionary.items():
128 |             if key not in ["name", "type"]:
129 |                 setattr(field, key, value)
130 |             elif key == "type":
131 |                 field.type = FieldType.from_name(value)
132 | 
133 |         return field
134 | 
135 | 
136 | class DocumentSide:
137 |     """Defines a document side.
138 | 
139 |     Attributes:
140 |         fields: fields to search
141 |         template: path to the template vectorial (SVG) image
142 |     """
143 | 
144 |     def __init__(self):
145 |         """Initialize a DocumentSide."""
146 |         self.fields: tp.List[Field] = []
147 |         self.template: tp.Optional[str] = None
148 |         self.translatable_labels: tp.List[str] = []
149 | 
150 |     @staticmethod
151 |     def from_dict(dictionary: tp.Dict[str, tp.Any]) -> "DocumentSide":
152 |         """Create a DocumentSide from a dict containing needed data.
153 | 
154 |         Args:
155 |             dictionary: dict containing data
156 | 
157 |         Returns:
158 |             DocumentSide containing needed data
159 |         """
160 |         side = DocumentSide()
161 |         side.fields = [Field.from_dict(entry) for entry in dictionary.get("fields", [])]
162 |         side.template = dictionary.get("template")
163 |         side.translatable_labels = dictionary.get("translatable_labels", [])
164 |         return side
165 | 
166 |     def get_field(self, name: str) -> Field:
167 |         """Get back a field definition using its name.
168 | 
169 |         Args:
170 |             name: the name of the field
171 | 
172 |         Returns:
173 |             the field definition
174 | 
175 |         Raises:
176 |             KeyError: when the requested field is not defined
177 |         """
178 |         for field in self.fields:
179 |             if field.name == name:
180 |                 return field
181 |         raise KeyError(f"No field definition found with name {name}.")
182 | 
183 | 
184 | class DocumentTemplate(Named):
185 |     """Defines a document template.
186 | 
187 |     Attributes:
188 |         width: width of the template image
189 |         height: height of template image
190 |         dpi: dpi of the template image
191 |         default_side: default side of document
192 |         sides: possible sides of document
193 |     """
194 | 
195 |     def __init__(
196 |         self,
197 |         name: tp.Optional[str] = None,
198 |         size: tp.Optional[tp.Tuple[int, int]] = None,
199 |         dpi: tp.Optional[int] = None,
200 |         context: tp.Optional[tp.Dict] = None,
201 |     ):
202 |         """Initialize a DocumentTemplate.
203 | 
204 |         Args:
205 |             name: name of the template
206 |             size: (width, height) of the template image
207 |             dpi: dpi of the template image
208 |             context: list of context variables to generate prior to fields
209 |         """
210 |         self.name = name
211 |         self.width = -1
212 |         self.height = -1
213 |         self.dpi = dpi if dpi else None
214 |         self.context = context
215 |         self.sides: tp.Dict[str, DocumentSide] = {}
216 |         self.filename: tp.Optional[str] = None
217 | 
218 |         if size:
219 |             try:
220 |                 self.width, self.height = size
221 |             except Exception as err:
222 |                 raise ValueError(
223 |                     "The given size doesn't have the form (width, height)"
224 |                 ) from err
225 | 
226 |     @staticmethod
227 |     def from_dict(dictionary: tp.Dict[str, tp.Any]) -> "DocumentTemplate":
228 |         """Create a DocumentTemplate from a dict containing needed data.
229 | 
230 |         Args:
231 |             dictionary: dict containing data
232 | 
233 |         Returns:
234 |             template containing needed data
235 |         """
236 |         template = DocumentTemplate()
237 |         super(DocumentTemplate, template).fill_from_dict(dictionary)
238 | 
239 |         for key, value in dictionary.items():
240 |             if key == "sides":
241 |                 for sub_key, sub_value in value.items():
242 |                     template.sides[sub_key] = DocumentSide.from_dict(sub_value)
243 |             elif key not in ["name"]:
244 |                 setattr(template, key, value)
245 | 
246 |         return template
247 | 
248 |     def search_field_side(self, name: str) -> str:
249 |         """Get back the document side containing a field name.
250 | 
251 |         Args:
252 |             name: the name of the field
253 | 
254 |         Returns:
255 |             the field side
256 | 
257 |         Raises:
258 |             KeyError: when the requested field is not defined
259 |         """
260 |         for side in self.sides:
261 |             for field in self.sides[side].fields:
262 |                 if field.name == name:
263 |                     return side
264 |         raise KeyError(f"No field {name} found.")
265 | 
266 |     @staticmethod
267 |     def load(filename: str) -> "DocumentTemplate":
268 |         """Load the template from a file.
269 | 
270 |         Args:
271 |             filename: path to the file containing the template description,
272 |                 or name of the file of an existing template in the packaged ones
273 | 
274 |         Returns:
275 |             Loaded template
276 |         """
277 |         if not osp.exists(filename) and osp.exists(osp.join(TEMPLATES_DIR, filename)):
278 |             filename = osp.join(TEMPLATES_DIR, filename)
279 | 
280 |         if osp.isdir(filename):
281 |             filename = osp.join(filename, "generator.json")
282 | 
283 |         with open(filename, "r", encoding="utf-8") as f_in:
284 |             data = json.load(f_in)
285 | 
286 |         template = DocumentTemplate.from_dict(data)
287 | 
288 |         # Set relative path from TEMPLATES_DIR to filename
289 |         template.filename = osp.relpath(filename, TEMPLATES_DIR)
290 | 
291 |         return template
292 | 


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td1_a/back.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td1_a/back.png


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td1_a/fonts/MsMadi-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td1_a/fonts/MsMadi-Regular.ttf


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td1_a/fonts/OCR-B.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td1_a/fonts/OCR-B.ttf


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td1_a/fonts/SpaceGrotesk-VariableFont_wght.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td1_a/fonts/SpaceGrotesk-VariableFont_wght.ttf


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td1_a/front.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td1_a/front.png


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td1_b/back.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td1_b/back.png


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td1_b/fonts/Allura-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td1_b/fonts/Allura-Regular.ttf


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td1_b/fonts/Karla-VariableFont_wght.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td1_b/fonts/Karla-VariableFont_wght.ttf


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td1_b/fonts/OCR-B.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td1_b/fonts/OCR-B.ttf


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td1_b/front.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td1_b/front.png


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td2_a/back.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td2_a/back.png


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td2_a/fonts/Bellefair-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td2_a/fonts/Bellefair-Regular.ttf


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td2_a/fonts/Gill-Sans.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td2_a/fonts/Gill-Sans.otf


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td2_a/fonts/HomemadeApple-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td2_a/fonts/HomemadeApple-Regular.ttf


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td2_a/fonts/OCR-B.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td2_a/fonts/OCR-B.ttf


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td2_a/front.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td2_a/front.png


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td2_b/back.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td2_b/back.png


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td2_b/fonts/ComforterBrush-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td2_b/fonts/ComforterBrush-Regular.ttf


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td2_b/fonts/OCR-B.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td2_b/fonts/OCR-B.ttf


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td2_b/fonts/Oswald-Light.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td2_b/fonts/Oswald-Light.ttf


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td2_b/fonts/Oswald-Medium.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td2_b/fonts/Oswald-Medium.ttf


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td2_b/fonts/Raleway-Black.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td2_b/fonts/Raleway-Black.ttf


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td2_b/fonts/Raleway-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td2_b/fonts/Raleway-Bold.ttf


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td2_b/front.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/id_card_td2_b/front.png


--------------------------------------------------------------------------------
/docxpand/templates/id_card_td2_b/generator.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "name": "ID_CARD_TD2_B",
  3 |     "dpi": 300,
  4 |     "width": 1240,
  5 |     "height": 878,
  6 |     "context": {
  7 |         "gender": {
  8 |             "male": 0.48,
  9 |             "female": 0.48,
 10 |             "nonbinary": 0.04
 11 |         },
 12 |         "ethnicity": {
 13 |             "west european": 0.1,
 14 |             "north european": 0.1,
 15 |             "east european": 0.1,
 16 |             "south european": 0.1,
 17 |             "north african": 0.1,
 18 |             "african": 0.1,
 19 |             "asian": 0.1,
 20 |             "indian": 0.1,
 21 |             "middle eastern": 0.1,
 22 |             "south american": 0.1
 23 |         },
 24 |         "name_locale": {
 25 |             "en_GB": 0.02,
 26 |             "de_DE": 0.02,
 27 |             "tr_TR": 0.08,
 28 |             "fr_FR": 0.04,
 29 |             "it_IT": 0.02,
 30 |             "es_ES": 0.74,
 31 |             "pt_PT": 0.06
 32 |         },
 33 |         "locale": "es_ES"
 34 |     },
 35 |     "sides": {
 36 |         "front": {
 37 |             "template": "front.svg",
 38 |             "fields": [
 39 |                 {
 40 |                     "name": "expires",
 41 |                     "type": "date",
 42 |                     "format": "%d.%m.%Y",
 43 |                     "provider": {
 44 |                         "__class__": "faker.providers.date_time.Provider",
 45 |                         "init_context": {
 46 |                             "generator": "generator"
 47 |                         },
 48 |                         "__method__": "date_between",
 49 |                         "call_args": {
 50 |                             "start_date": "today",
 51 |                             "end_date": "+10y"
 52 |                         }
 53 |                     },
 54 |                     "lines": 1
 55 |                 },
 56 |                 {
 57 |                     "name": "gender",
 58 |                     "type": "text",
 59 |                     "provider": {
 60 |                         "__class__": "docxpand.providers.GenderProvider",
 61 |                         "__method__": "get_gender_letter",
 62 |                         "call_context": {
 63 |                             "gender": "gender"
 64 |                         }
 65 |                     }
 66 |                 },
 67 |                 {
 68 |                     "name": "family_name",
 69 |                     "type": "name",
 70 |                     "provider": {
 71 |                         "__class__": "faker.providers.person.{name_locale}.Provider",
 72 |                         "init_context": {
 73 |                             "generator": "generator"
 74 |                         },
 75 |                         "__method__": "last_name_{gender}"
 76 |                     },
 77 |                     "parts": {
 78 |                         "1": 0.85,
 79 |                         "2": 0.15
 80 |                     },
 81 |                     "separator": "-",
 82 |                     "max_chars_per_line": 24
 83 |                 },
 84 |                 {
 85 |                     "name": "second_family_name",
 86 |                     "type": "name",
 87 |                     "provider": {
 88 |                         "__class__": "faker.providers.person.{name_locale}.Provider",
 89 |                         "init_context": {
 90 |                             "generator": "generator"
 91 |                         },
 92 |                         "__method__": "last_name_{gender}"
 93 |                     },
 94 |                     "parts": {
 95 |                         "1": 0.85,
 96 |                         "2": 0.15
 97 |                     },
 98 |                     "separator": "-",
 99 |                     "max_chars_per_line": 24
100 |                 },
101 |                 {
102 |                     "name": "given_name",
103 |                     "type": "name",
104 |                     "provider": {
105 |                         "__class__": "faker.providers.person.{name_locale}.Provider",
106 |                         "init_context": {
107 |                             "generator": "generator"
108 |                         },
109 |                         "__method__": "first_name_{gender}"
110 |                     },
111 |                     "parts": {
112 |                         "1": 0.25,
113 |                         "2": 0.5,
114 |                         "3": 0.2,
115 |                         "4": 0.03,
116 |                         "5": 0.015,
117 |                         "6": 0.005
118 |                     },
119 |                     "separator": ", ",
120 |                     "max_chars_per_line": 45
121 |                 },
122 |                 {
123 |                     "name": "birth_date",
124 |                     "type": "date",
125 |                     "format": "%d.%m.%Y",
126 |                     "provider": {
127 |                         "__class__": "faker.providers.date_time.Provider",
128 |                         "init_context": {
129 |                             "generator": "generator"
130 |                         },
131 |                         "__method__": "date_of_birth",
132 |                         "call_args": {
133 |                             "minimum_age": 18,
134 |                             "maximum_age": 100
135 |                         }
136 |                     },
137 |                     "lines": 1
138 |                 },
139 |                 {
140 |                     "name": "nationality",
141 |                     "type": "text",
142 |                     "provider": {
143 |                         "__class__": "docxpand.providers.ChoiceProvider",
144 |                         "init_args": {
145 |                             "choices": {
146 |                                 "CICERANA": 1.0
147 |                             }
148 |                         },
149 |                         "__method__": "choice"
150 |                     }
151 |                 },
152 |                 {
153 |                     "name": "signature",
154 |                     "type": "text",
155 |                     "provider": {
156 |                         "__class__": "docxpand.providers.signature.Provider",
157 |                         "init_context": {},
158 |                         "__method__": "signature",
159 |                         "call_context": {
160 |                             "existing_fields": "existing_fields"
161 |                         }
162 |                     }
163 |                 },
164 |                 {
165 |                     "name": "photo",
166 |                     "type": "photo",
167 |                     "provider": {
168 |                         "__class__": "docxpand.providers.photo.StableDiffusionProvider",
169 |                         "__method__": "id_photo",
170 |                         "call_args": {
171 |                             "width": 881,
172 |                             "height": 1132
173 |                         },
174 |                         "call_context": {
175 |                             "existing_fields": "existing_fields",
176 |                             "gender": "gender",
177 |                             "ethnicity": "ethnicity",
178 |                             "url": "url"
179 |                         }
180 |                     }
181 |                 },
182 |                 {
183 |                     "name": "ghost",
184 |                     "type": "photo",
185 |                     "provider": {
186 |                         "__class__": "docxpand.providers.photo.Provider",
187 |                         "__method__": "ghost_image",
188 |                         "call_args": {
189 |                             "width": 881,
190 |                             "height": 1132,
191 |                             "mode": "halftone"
192 |                         },
193 |                         "call_context": {
194 |                             "existing_fields": "existing_fields"
195 |                         }
196 |                     }
197 |                 },
198 |                 {
199 |                     "name": "document_number",
200 |                     "type": "text",
201 |                     "provider": {
202 |                         "__class__": "faker.providers.BaseProvider",
203 |                         "init_context": {
204 |                             "generator": "generator"
205 |                         },
206 |                         "__method__": "bothify",
207 |                         "call_args": {
208 |                             "text": "??#?#??##",
209 |                             "letters": "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
210 |                         }
211 |                     }
212 |                 },
213 |                 {
214 |                     "name": "mrz",
215 |                     "type": "mrz",
216 |                     "provider": {
217 |                         "__class__": "docxpand.providers.mrz.Provider",
218 |                         "__method__": "td2",
219 |                         "call_args": {
220 |                             "document_code": "ID"
221 |                         },
222 |                         "call_context": {
223 |                             "existing_fields": "existing_fields",
224 |                             "gender": "gender"
225 |                         }
226 |                     },
227 |                     "lines": 2
228 |                 }
229 |             ]
230 |         },
231 |         "back": {
232 |             "template": "back.svg",
233 |             "fields": [
234 |                 {
235 |                     "name": "birth_place",
236 |                     "type": "address",
237 |                     "format": "{place_of_birth}",
238 |                     "provider": {
239 |                         "__class__": "docxpand.providers.address.{locale}.Provider",
240 |                         "init_context": {
241 |                             "generator": "generator"
242 |                         },
243 |                         "__method__": {
244 |                             "place_of_birth": "place_of_birth"
245 |                         }
246 |                     },
247 |                     "separator": "\n",
248 |                     "lines": 2
249 |                 },
250 |                 {
251 |                     "name": "address",
252 |                     "type": "address",
253 |                     "provider": {
254 |                         "__class__": "docxpand.providers.address.{locale}.Provider",
255 |                         "init_context": {
256 |                             "generator": "generator"
257 |                         },
258 |                         "__method__": "address"
259 |                     },
260 |                     "separator": "\n",
261 |                     "lines": 4
262 |                 },
263 |                 {
264 |                     "name": "parents",
265 |                     "type": "text",
266 |                     "provider": {
267 |                         "__class__": "docxpand.providers.person.{locale}.Provider",
268 |                         "init_context": {
269 |                             "generator": "generator"
270 |                         },
271 |                         "__method__": "parents_names"
272 |                     }
273 |                 },
274 |                 {
275 |                     "name": "Barcode",
276 |                     "type": "photo",
277 |                     "provider": {
278 |                         "__class__": "docxpand.providers.barcode.Provider",
279 |                         "__method__": "generate_barcode",
280 |                         "call_args": {
281 |                             "document_code": "ID",
282 |                             "barcode_format_name": "Barcode",
283 |                             "width": 530,
284 |                             "height": 86
285 |                         },
286 |                         "call_context": {
287 |                             "existing_fields": "existing_fields"
288 |                         }
289 |                     }
290 |                 }
291 |             ]
292 |         }
293 |     }
294 | }
295 | 


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_a/fonts/Codystar-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_a/fonts/Codystar-Regular.ttf


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_a/fonts/OCR-B.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_a/fonts/OCR-B.ttf


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_a/fonts/OpenSans-VariableFont_wdth,wght.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_a/fonts/OpenSans-VariableFont_wdth,wght.ttf


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_a/fonts/Outfit-VariableFont_wght.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_a/fonts/Outfit-VariableFont_wght.ttf


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_a/front.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_a/front.png


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_b/fonts/Cabin-Italic-VariableFont_wdth,wght.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_b/fonts/Cabin-Italic-VariableFont_wdth,wght.ttf


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_b/fonts/Cabin-VariableFont_wdth,wght.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_b/fonts/Cabin-VariableFont_wdth,wght.ttf


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_b/fonts/FuzzyBubbles-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_b/fonts/FuzzyBubbles-Bold.ttf


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_b/fonts/JosefinSans-VariableFont_wght.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_b/fonts/JosefinSans-VariableFont_wght.ttf


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_b/fonts/OCR-B.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_b/fonts/OCR-B.ttf


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_b/front.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_b/front.png


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_b/generator.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "name": "PP_TD3_B",
  3 |     "dpi": 300,
  4 |     "width": 1476,
  5 |     "height": 1039,
  6 |     "context": {
  7 |         "gender": {
  8 |             "male": 0.48,
  9 |             "female": 0.48,
 10 |             "nonbinary": 0.04
 11 |         },
 12 |         "ethnicity": {
 13 |             "west european": 0.1,
 14 |             "north european": 0.1,
 15 |             "east european": 0.1,
 16 |             "south european": 0.1,
 17 |             "north african": 0.1,
 18 |             "african": 0.1,
 19 |             "asian": 0.1,
 20 |             "indian": 0.1,
 21 |             "middle eastern": 0.1,
 22 |             "south american": 0.1
 23 |         },
 24 |         "name_locale": {
 25 |             "en_GB": 0.02,
 26 |             "de_DE": 0.02,
 27 |             "tr_TR": 0.08,
 28 |             "fr_FR": 0.04,
 29 |             "it_IT": 0.02,
 30 |             "es_ES": 0.06,
 31 |             "pt_PT": 0.74
 32 |         },
 33 |         "locale": "pt_PT"
 34 |     },
 35 |     "sides": {
 36 |         "front": {
 37 |             "template": "front.svg",
 38 |             "fields": [
 39 |                 {
 40 |                     "name": "birth_place",
 41 |                     "type": "text",
 42 |                     "format": "{place_of_birth}",
 43 |                     "provider": {
 44 |                         "__class__": "docxpand.providers.address.{locale}.Provider",
 45 |                         "init_context": {
 46 |                             "generator": "generator"
 47 |                         },
 48 |                         "__method__": {
 49 |                             "place_of_birth": "place_of_birth"
 50 |                         }
 51 |                     }
 52 |                 },
 53 |                 {
 54 |                     "name": "authority",
 55 |                     "type": "text",
 56 |                     "provider": {
 57 |                         "__class__": "docxpand.providers.authority.{locale}.Provider",
 58 |                         "init_context": {
 59 |                             "generator": "generator"
 60 |                         },
 61 |                         "__method__": "authority",
 62 |                         "call_args": {
 63 |                             "max_length": 40
 64 |                         }
 65 |                     }
 66 |                 },
 67 |                 {
 68 |                     "name": "expires",
 69 |                     "type": "date",
 70 |                     "format": "%d.%m.%Y",
 71 |                     "provider": {
 72 |                         "__class__": "faker.providers.date_time.Provider",
 73 |                         "init_context": {
 74 |                             "generator": "generator"
 75 |                         },
 76 |                         "__method__": "date_between",
 77 |                         "call_args": {
 78 |                             "start_date": "today",
 79 |                             "end_date": "+10y"
 80 |                         }
 81 |                     },
 82 |                     "lines": 1
 83 |                 },
 84 |                 {
 85 |                     "name": "date_issued",
 86 |                     "type": "date",
 87 |                     "format": "%d.%m.%Y",
 88 |                     "provider": {
 89 |                         "__class__": "docxpand.providers.date_time.Provider",
 90 |                         "__method__": "date_plus_delta",
 91 |                         "call_context": {
 92 |                             "existing_fields": "existing_fields"
 93 |                         },
 94 |                         "call_args": {
 95 |                             "field_path": [
 96 |                                 "front",
 97 |                                 "expires"
 98 |                             ],
 99 |                             "years": -10,
100 |                             "months": 0,
101 |                             "days": 1
102 |                         }
103 |                     },
104 |                     "lines": 1
105 |                 },
106 |                 {
107 |                     "name": "gender",
108 |                     "type": "text",
109 |                     "provider": {
110 |                         "__class__": "docxpand.providers.GenderProvider",
111 |                         "__method__": "get_gender_letter",
112 |                         "call_context": {
113 |                             "gender": "gender"
114 |                         }
115 |                     }
116 |                 },
117 |                 {
118 |                     "name": "family_name",
119 |                     "type": "name",
120 |                     "provider": {
121 |                         "__class__": "faker.providers.person.{name_locale}.Provider",
122 |                         "init_context": {
123 |                             "generator": "generator"
124 |                         },
125 |                         "__method__": "last_name_{gender}"
126 |                     },
127 |                     "parts": {
128 |                         "1": 0.85,
129 |                         "2": 0.15
130 |                     },
131 |                     "separator": "-",
132 |                     "max_chars_per_line": 24
133 |                 },
134 |                 {
135 |                     "name": "given_name",
136 |                     "type": "name",
137 |                     "provider": {
138 |                         "__class__": "faker.providers.person.{name_locale}.Provider",
139 |                         "init_context": {
140 |                             "generator": "generator"
141 |                         },
142 |                         "__method__": "first_name_{gender}"
143 |                     },
144 |                     "parts": {
145 |                         "1": 0.25,
146 |                         "2": 0.5,
147 |                         "3": 0.2,
148 |                         "4": 0.03,
149 |                         "5": 0.015,
150 |                         "6": 0.005
151 |                     },
152 |                     "separator": ", ",
153 |                     "max_chars_per_line": 45
154 |                 },
155 |                 {
156 |                     "name": "birth_date",
157 |                     "type": "date",
158 |                     "format": "%d.%m.%Y",
159 |                     "provider": {
160 |                         "__class__": "faker.providers.date_time.Provider",
161 |                         "init_context": {
162 |                             "generator": "generator"
163 |                         },
164 |                         "__method__": "date_of_birth",
165 |                         "call_args": {
166 |                             "minimum_age": 18,
167 |                             "maximum_age": 100
168 |                         }
169 |                     },
170 |                     "lines": 1
171 |                 },
172 |                 {
173 |                     "name": "type",
174 |                     "type": "text",
175 |                     "provider": {
176 |                         "__class__": "docxpand.providers.ChoiceProvider",
177 |                         "init_args": {
178 |                             "choices": {
179 |                                 "P": 1.0
180 |                             }
181 |                         },
182 |                         "__method__": "choice"
183 |                     }
184 |                 },
185 |                 {
186 |                     "name": "country_code",
187 |                     "type": "text",
188 |                     "provider": {
189 |                         "__class__": "docxpand.providers.ChoiceProvider",
190 |                         "init_args": {
191 |                             "choices": {
192 |                                 "VAL": 1.0
193 |                             }
194 |                         },
195 |                         "__method__": "choice"
196 |                     }
197 |                 },
198 |                 {
199 |                     "name": "nationality",
200 |                     "type": "text",
201 |                     "provider": {
202 |                         "__class__": "docxpand.providers.ChoiceProvider",
203 |                         "init_args": {
204 |                             "choices": {
205 |                                 "VAL": 1.0
206 |                             }
207 |                         },
208 |                         "__method__": "choice"
209 |                     }
210 |                 },
211 |                 {
212 |                     "name": "height",
213 |                     "type": "text",
214 |                     "provider": {
215 |                         "__class__": "docxpand.providers.HeightProvider",
216 |                         "__method__": "height_in_centimeters",
217 |                          "call_context": {
218 |                             "gender": "gender"
219 |                         }
220 |                     }
221 |                 },
222 |                 {
223 |                     "name": "signature",
224 |                     "type": "text",
225 |                     "provider": {
226 |                         "__class__": "docxpand.providers.signature.Provider",
227 |                         "init_context": {},
228 |                         "__method__": "signature",
229 |                         "call_context": {
230 |                             "existing_fields": "existing_fields"
231 |                         }
232 |                     }
233 |                 },
234 |                 {
235 |                     "name": "photo",
236 |                     "type": "photo",
237 |                     "provider": {
238 |                         "__class__": "docxpand.providers.photo.StableDiffusionProvider",
239 |                         "__method__": "id_photo",
240 |                         "call_args": {
241 |                             "width": 881,
242 |                             "height": 1132
243 |                         },
244 |                         "call_context": {
245 |                             "existing_fields": "existing_fields",
246 |                             "gender": "gender",
247 |                             "ethnicity": "ethnicity",
248 |                             "url": "url"
249 |                         }
250 |                     }
251 |                 },
252 |                 {
253 |                     "name": "document_number",
254 |                     "type": "text",
255 |                     "provider": {
256 |                         "__class__": "faker.providers.BaseProvider",
257 |                         "init_context": {
258 |                             "generator": "generator"
259 |                         },
260 |                         "__method__": "bothify",
261 |                         "call_args": {
262 |                             "text": "??#?#??##",
263 |                             "letters": "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
264 |                         }
265 |                     }
266 |                 },
267 |                 {
268 |                     "name": "personal_number",
269 |                     "type": "text",
270 |                     "provider": {
271 |                         "__class__": "faker.providers.BaseProvider",
272 |                         "init_context": {
273 |                             "generator": "generator"
274 |                         },
275 |                         "__method__": "bothify",
276 |                         "call_args": {
277 |                             "text": "##### ######",
278 |                             "letters": "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
279 |                         }
280 |                     }
281 |                 },
282 |                 {
283 |                     "name": "mrz",
284 |                     "type": "mrz",
285 |                     "provider": {
286 |                         "__class__": "docxpand.providers.mrz.Provider",
287 |                         "__method__": "td3",
288 |                         "call_args": {
289 |                             "document_code": "P"
290 |                         },
291 |                         "call_context": {
292 |                             "existing_fields": "existing_fields",
293 |                             "gender": "gender"
294 |                         }
295 |                     },
296 |                     "lines": 2
297 |                 }
298 |             ]
299 |         }
300 |     }
301 | }
302 | 


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_c/fonts/BarlowCondensed-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_c/fonts/BarlowCondensed-Bold.ttf


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_c/fonts/BarlowCondensed-Medium.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_c/fonts/BarlowCondensed-Medium.ttf


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_c/fonts/BarlowCondensed-MediumItalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_c/fonts/BarlowCondensed-MediumItalic.ttf


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_c/fonts/BarlowCondensed-SemiBold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_c/fonts/BarlowCondensed-SemiBold.ttf


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_c/fonts/Codystar-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_c/fonts/Codystar-Regular.ttf


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_c/fonts/Kristi-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_c/fonts/Kristi-Regular.ttf


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_c/fonts/OCR-B.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_c/fonts/OCR-B.ttf


--------------------------------------------------------------------------------
/docxpand/templates/pp_td3_c/front.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/pp_td3_c/front.png


--------------------------------------------------------------------------------
/docxpand/templates/rp_card_td1/back.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/rp_card_td1/back.png


--------------------------------------------------------------------------------
/docxpand/templates/rp_card_td1/fonts/ComforterBrush-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/rp_card_td1/fonts/ComforterBrush-Regular.ttf


--------------------------------------------------------------------------------
/docxpand/templates/rp_card_td1/fonts/Lack-Regular.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/rp_card_td1/fonts/Lack-Regular.otf


--------------------------------------------------------------------------------
/docxpand/templates/rp_card_td1/fonts/Montserrat-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/rp_card_td1/fonts/Montserrat-Bold.ttf


--------------------------------------------------------------------------------
/docxpand/templates/rp_card_td1/fonts/Montserrat-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/rp_card_td1/fonts/Montserrat-Regular.ttf


--------------------------------------------------------------------------------
/docxpand/templates/rp_card_td1/fonts/OCR-B.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/rp_card_td1/fonts/OCR-B.ttf


--------------------------------------------------------------------------------
/docxpand/templates/rp_card_td1/front.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/rp_card_td1/front.png


--------------------------------------------------------------------------------
/docxpand/templates/rp_card_td2/back.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/rp_card_td2/back.png


--------------------------------------------------------------------------------
/docxpand/templates/rp_card_td2/fonts/OCR-B.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/rp_card_td2/fonts/OCR-B.ttf


--------------------------------------------------------------------------------
/docxpand/templates/rp_card_td2/fonts/Rubik-Italic-VariableFont_wght.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/rp_card_td2/fonts/Rubik-Italic-VariableFont_wght.ttf


--------------------------------------------------------------------------------
/docxpand/templates/rp_card_td2/fonts/Rubik-VariableFont_wght.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/rp_card_td2/fonts/Rubik-VariableFont_wght.ttf


--------------------------------------------------------------------------------
/docxpand/templates/rp_card_td2/front.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/templates/rp_card_td2/front.png


--------------------------------------------------------------------------------
/docxpand/templates/showcase.css:
--------------------------------------------------------------------------------
  1 | :root {
  2 |     --gx: 50%;
  3 |     --gy: 50%;
  4 |     --sx: 8px;
  5 |     --sy: 8px;
  6 |     --s: 1;
  7 |     --o: 0;
  8 |     --rx: 0deg;
  9 |     --ry: 0deg;
 10 |     --rx-delta: 0deg;
 11 |     --ry-delta: 0deg;
 12 | }
 13 | 
 14 | .grid {
 15 |     display: grid;
 16 |     grid-template-columns: 1fr 1fr;
 17 |     grid-gap: 64px;
 18 |     align-items: stretch;
 19 |     justify-items: center;
 20 |     margin: 64px;
 21 | }
 22 | 
 23 | .rotator img {
 24 |     border: 0;
 25 |     border-radius: 1.8vw;
 26 |     max-width: 100%;
 27 | }
 28 | 
 29 | .rotator img.td3 {
 30 |     border-radius: 0 0 1.0vw 1.0vw;
 31 | }
 32 | 
 33 | .rotator.interactive {
 34 |     transform: rotateX(calc(var(--rx) + var(--rx-delta))) rotateY(calc(var(--ry) + var(--ry-delta)));
 35 |     transform-style: preserve-3d;
 36 | }
 37 | 
 38 | .rotator.interactive img {
 39 |     box-shadow: var(--sx) var(--sy) 12px 0px  rgba(0, 0, 0, 0.5);
 40 | }
 41 | 
 42 | .rotator {
 43 |     position: relative;
 44 |     appearance: none;
 45 |     -webkit-appearance: none;
 46 |     border: none;
 47 |     background: top;
 48 |     padding: 0;
 49 |     width: 100%;
 50 |     transition: transform 0.1s ease-in-out;
 51 | }
 52 | 
 53 | .rotator.flipping {
 54 |     transition: transform 1.0s ease-in-out;
 55 | }
 56 | 
 57 | /* Glowing effect */
 58 | .glare {
 59 |     position: absolute;
 60 |     top: 0;
 61 |     /* background: radial-gradient(farthest-corner circle at var(--gx) var(--gy), rgba(255,255,255,.8) 10%, rgba(255,255,255,.65) 20%, rgba(0,0,0,.5) 90%); */
 62 |     background: radial-gradient(farthest-corner circle at var(--gx) var(--gy),
 63 |         rgba(255,255,255,.8) 10%,
 64 |         rgba(255,255,255,.65) 20%,
 65 |         rgba(0,0,0,.5) 90%
 66 |     );
 67 |     mix-blend-mode: overlay;
 68 |     transform: translateZ(3px);
 69 |     display: grid;
 70 |     width: 100%;
 71 |     height: 100%;
 72 |     opacity: 0%;
 73 |     border-radius: 1.8vw;
 74 |     transition: opacity 0.3s ease-in-out;
 75 | }
 76 | 
 77 | .rotator.interactive .glare {
 78 |     opacity: 60%;
 79 | }
 80 | 
 81 | /* Lens flare effect */
 82 | .rotator.interactive .glare::before {
 83 |     position: relative;
 84 |     top: 0;
 85 |     display: block;
 86 |     width: 100%;
 87 |     height: 200%;
 88 |     content: "";
 89 |     background: radial-gradient(farthest-corner circle at calc(90% - 0.8*var(--gx)) calc(90% - 0.8*var(--gy)),
 90 |         rgba(255,255,255,.8) 0.5%,
 91 |         rgba(255,255,255,.65) 1%,
 92 |         transparent 2%
 93 |     );
 94 |     opacity: 100%;
 95 |     border-radius: 1.8vw;
 96 |     mix-blend-mode: overlay;
 97 | }
 98 | .rotator.interactive .glare::after {
 99 |     position: relative;
100 |     top: -100%;
101 |     display: block;
102 |     width: 100%;
103 |     height: 200%;
104 |     content: "";
105 |     background: radial-gradient(farthest-corner circle at calc(100% - var(--gx)) calc(100% - var(--gy)),
106 |         rgba(255,255,255,.8) 1%,
107 |         rgba(255,255,255,.65) 3%,
108 |         transparent 5%
109 |     );
110 |     opacity: 100%;
111 |     border-radius: 1.8vw;
112 |     mix-blend-mode: overlay;
113 | }
114 | 


--------------------------------------------------------------------------------
/docxpand/templates/showcase.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 | <head>
 4 |     <title>DocXPand document templates</title>
 5 |     <link rel="stylesheet" href="showcase.css">
 6 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js" defer></script>
 7 |     <script src="showcase.js" defer></script>
 8 | </head>
 9 | <body>
10 | 
11 | <main class="grid">
12 |     <button class="rotator">
13 |         <img src="id_card_td1_a/front.png" class="td1" alt="id_card_td1_a_front" />
14 |         <div class="glare"></div>
15 |     </button>
16 |     <button class="rotator">
17 |         <img src="id_card_td1_a/back.png" class="td1" alt="id_card_td1_a_back" />
18 |         <div class="glare"></div>
19 |     </button>
20 |     <button class="rotator">
21 |         <img src="id_card_td1_b/front.png" class="td1" alt="id_card_td1_b_front" />
22 |         <div class="glare"></div>
23 |     </button>
24 |     <button class="rotator">
25 |         <img src="id_card_td1_b/back.png" class="td1" alt="id_card_td1_b_back" />
26 |         <div class="glare"></div>
27 |     </button>
28 |     <button class="rotator">
29 |         <img src="rp_card_td1/front.png" class="td1" alt="rp_card_td1_front" />
30 |         <div class="glare"></div>
31 |     </button>
32 |     <button class="rotator">
33 |         <img src="rp_card_td1/back.png" class="td1" alt="rp_card_td1_back" />
34 |         <div class="glare"></div>
35 |     </button>
36 |     <button class="rotator">
37 |         <img src="id_card_td2_a/front.png" class="td2" alt="id_card_td2_a_front" />
38 |         <div class="glare"></div>
39 |     </button>
40 |     <button class="rotator">
41 |         <img src="id_card_td2_a/back.png" class="td2" alt="id_card_td2_a_back" />
42 |         <div class="glare"></div>
43 |     </button>
44 |     <button class="rotator">
45 |         <img src="id_card_td2_b/front.png" class="td2" alt="id_card_td2_b_front" />
46 |         <div class="glare"></div>
47 |     </button>
48 |     <button class="rotator">
49 |         <img src="id_card_td2_b/back.png" class="td2" alt="id_card_td2_b_back" />
50 |         <div class="glare"></div>
51 |     </button>
52 |     <button class="rotator">
53 |         <img src="rp_card_td2/front.png" class="td2" alt="rp_card_td2_front" />
54 |         <div class="glare"></div>
55 |     </button>
56 |     <button class="rotator">
57 |         <img src="rp_card_td2/back.png" class="td2" alt="rp_card_td2_back" />
58 |         <div class="glare"></div>
59 |     </button>
60 |     <button class="rotator">
61 |         <img src="pp_td3_a/front.png" class="td3" alt="pp_td3_a" />
62 |         <div class="glare"></div>
63 |     </button>
64 |     <button class="rotator">
65 |         <img src="pp_td3_b/front.png" class="td3" alt="pp_td3_b" />
66 |         <div class="glare"></div>
67 |     </button>
68 |     <button class="rotator">
69 |         <img src="pp_td3_c/front.png" class="td3" alt="pp_td3_c" />
70 |         <div class="glare"></div>
71 |     </button>
72 | </main>
73 | 
74 | </body>
75 | </html>
76 | 


--------------------------------------------------------------------------------
/docxpand/templates/showcase.js:
--------------------------------------------------------------------------------
 1 | const clamp = (num, min, max) => Math.min(Math.max(num, min), max);
 2 | 
 3 | $(document).ready(function() {
 4 | 
 5 |     $("button.rotator").on("mouseover", function(event) {
 6 |         var rotator = null;
 7 |         if(event.target.tagName == "BUTTON") {
 8 |             rotator = $(event.target);
 9 |         } else {
10 |             rotator = $(event.target).parent();
11 |         }
12 |         rotator.addClass("interactive");
13 |         if(rotator.hasClass("flipped")) {
14 |             document.documentElement.style.setProperty('--ry-delta', "180deg");
15 |         } else {
16 |             document.documentElement.style.setProperty('--ry-delta', "0deg");
17 |         }
18 |     });
19 | 
20 |     $("button.rotator").on("mouseleave", function(event) {
21 |         var rotator = null;
22 |         if(event.target.tagName == "BUTTON") {
23 |             rotator = $(event.target);
24 |         } else {
25 |             rotator = $(event.target).parent();
26 |         }
27 |         rotator.removeClass("interactive");
28 |         document.documentElement.style.setProperty('--ry-delta', "0deg");
29 |     });
30 | 
31 |     $("button.rotator").on("click", function(event) {
32 |         var rotator = null;
33 |         if(event.target.tagName == "BUTTON") {
34 |             rotator = $(event.target);
35 |         } else {
36 |             rotator = $(event.target).parent();
37 |         }
38 |         rotator.addClass("flipping");
39 |         timeout = setTimeout(function() {
40 |             rotator.removeClass("flipping");
41 |         }, 1000);
42 | 
43 |         if(rotator.hasClass("flipped")) {
44 |             rotator.removeClass("flipped");
45 |             document.documentElement.style.setProperty('--ry-delta', "0deg");
46 |         } else {
47 |             rotator.addClass("flipped");
48 |             document.documentElement.style.setProperty('--ry-delta', "180deg");
49 |         }
50 |     });
51 | 
52 |     $("button.rotator").on("mousemove", function(event) {
53 |         var rotator = null;
54 |         if(event.target.tagName == "BUTTON") {
55 |             rotator = $(event.target);
56 |         } else {
57 |             rotator = $(event.target).parent();
58 |         }
59 |         if(rotator.hasClass("flipping")) {
60 |             return;
61 |         }
62 | 
63 |         const rect = event.target.getBoundingClientRect(); // get element's current size/position
64 |         const percent = {
65 |             x: Math.round((100 / rect.width) * (event.clientX - rect.left)),
66 |             y: Math.round((100 / rect.height) * (event.clientY - rect.top)),
67 |         };
68 |         const center = {
69 |             x: percent.x - 50,
70 |             y: percent.y - 50,
71 |         };
72 | 
73 |         const max_rotation = { x: 24, y: 26 };
74 |         const degrees = {
75 |             x: -clamp(-center.y / 1.5, -max_rotation.x, max_rotation.x),
76 |             y: clamp(-center.x / 1.5, -max_rotation.y, max_rotation.y),
77 |         };
78 |         const max_shadow = { x: 16, y: 16 };
79 |         const shadow = {
80 |             x: clamp(center.x / 2, -max_shadow.x, max_shadow.x),
81 |             y: clamp(center.y / 2, -max_shadow.y, max_shadow.y),
82 |         };
83 | 
84 |         document.documentElement.style.setProperty('--rx', degrees.x+"deg");
85 |         document.documentElement.style.setProperty('--ry', degrees.y+"deg");
86 |         document.documentElement.style.setProperty('--gx', percent.x+"%");
87 |         document.documentElement.style.setProperty('--gy', percent.y+"%");
88 |         document.documentElement.style.setProperty('--sx', shadow.x+"px");
89 |         document.documentElement.style.setProperty('--sy', shadow.y+"px");
90 |     });
91 | });
92 | 


--------------------------------------------------------------------------------
/docxpand/tesseract.py:
--------------------------------------------------------------------------------
  1 | """Tesseract API wrapper."""
  2 | 
  3 | import typing as tp
  4 | 
  5 | import numpy as np
  6 | from tesserocr import OEM, PSM, PyTessBaseAPI, RIL
  7 | 
  8 | from docxpand.image import ColorSpace, Image
  9 | 
 10 | TESSDATA_PATH = "/usr/share/tesseract-ocr/4.00/tessdata/"
 11 | 
 12 | 
 13 | class Tesseract:
 14 |     """Class that provides a wrapper for Tesseract executable.
 15 | 
 16 |     Args:
 17 |         oem : recognition mode. (see :class:`tesserocr.OEM`)
 18 |         psm : page segmentation mode (see :`class:tesserocr.PSM`)
 19 |         languages: languages to use (using ISO 639-3 3-character language code)
 20 |         config : config dict (using tesseract's parameters)
 21 | 
 22 |     Attributes:
 23 |         _oem : recognition mode
 24 |         _psm :  page segmentation mode
 25 |         _languages: languages to use
 26 |         _config : config dict
 27 |         _loaded_models : cache to index models already loaded
 28 |     """
 29 | 
 30 |     def __init__(
 31 |         self,
 32 |         oem: OEM = OEM.DEFAULT,
 33 |         psm: PSM = PSM.AUTO_OSD,
 34 |         languages: tp.Tuple[str, ...] = (
 35 |             "ocrb", "eng", "fra", "deu", "nld", "por", "ita", "spa", "cat"
 36 |         ),
 37 |         config: tp.Optional[tp.Dict[str, tp.Any]] = None,
 38 |     ):
 39 |         """Init."""
 40 |         self._oem = oem
 41 |         self._psm = psm  # Automatic page segmentation with OSD.
 42 |         self._languages = languages
 43 |         self._config = config if config else {}
 44 |         self._loaded_apis: tp.Dict[str, PyTessBaseAPI] = {}
 45 |         self._current_api: PyTessBaseAPI = self._load_api(
 46 |             self._oem, self._psm, self._languages
 47 |         )
 48 | 
 49 |     def _load_api(
 50 |         self, oem: OEM, psm: PSM, languages: tp.Tuple[str, ...]
 51 |     ) -> PyTessBaseAPI:
 52 |         """Load and initialize Tesseract API knowing OEM, PSM and languages to use.
 53 | 
 54 |         Args:
 55 |             oem : recognition mode. (see :class:`tesserocr.OEM`)
 56 |             psm : page segmentation mode (see :`class:tesserocr.PSM`)
 57 |             languages: languages to use (using ISO 639-3 3-character code)
 58 |         """
 59 |         lang = "+".join(list(languages))
 60 |         key = f"{lang}_{psm}_{oem}"
 61 |         if key in self._loaded_apis:
 62 |             return self._loaded_apis[key]
 63 |         api = PyTessBaseAPI(init=False)
 64 |         config = {str(k): str(v) for k, v in self._config.items()}
 65 |         api.InitFull(path=TESSDATA_PATH, lang=lang, variables=config, oem=oem)
 66 |         api.SetPageSegMode(psm)
 67 |         self._loaded_apis[key] = api
 68 |         return api
 69 | 
 70 |     @property
 71 |     def config(self) -> tp.Optional[tp.Dict[str, tp.Any]]:
 72 |         """Return the config dict."""
 73 |         return self._config
 74 | 
 75 |     @property
 76 |     def oem(self) -> OEM:
 77 |         """Return oem parameter.
 78 |         
 79 |         Returns:
 80 |             oem parameter.
 81 |         """
 82 |         return self._oem
 83 |     
 84 |     @oem.setter
 85 |     def oem(self, value: OEM) -> None:
 86 |         """Set the oem parameter value.
 87 |         
 88 |         Args:
 89 |             value: the oem parameter value.
 90 |         """
 91 |         self._oem = value
 92 | 
 93 |     @property
 94 |     def psm(self) -> PSM:
 95 |         """Return psm parameter.
 96 |         
 97 |         Returns:
 98 |             psm parameter.
 99 |         """
100 |         return self._psm
101 |     
102 |     @psm.setter
103 |     def psm(self, value: PSM) -> None:
104 |         """Set the psm parameter value.
105 |         
106 |         Args:
107 |             value: the psm parameter value.
108 |         """
109 |         self._psm = value
110 | 
111 |     @property
112 |     def languages(self) -> tp.Tuple[str, ...]:
113 |         """Return languages parameter.
114 |         
115 |         Returns:
116 |             languages parameter.
117 |         """
118 |         return self._languages
119 |     
120 |     @languages.setter
121 |     def languages(self, value: tp.Tuple[str, ...]) -> None:
122 |         """Set the languages parameter value.
123 |         
124 |         Args:
125 |             value: the languages parameter value.
126 |         """
127 |         self._languages = value
128 | 
129 |     def _set_array(self, array: np.ndarray) -> None:
130 |         """Define numpy array to use for recognition. Prefer using set_image.
131 | 
132 |         If the array has 2 dimensions (i.e. 1 channel), it is considered
133 |         to be binary or grayscale depending on the type (bool or uint8). Else,
134 |         if the array has 3 dimensions, 3 channels is considered BGR and
135 |         4 channels is considered BGRA.
136 | 
137 |         Other values for number of dimensions and channels raise a RuntimeError.
138 | 
139 |         Args:
140 |             array : numpy array
141 |             recognizer : model to use as recognizer
142 |         """
143 |         self._set_image(Image.from_array(array))
144 | 
145 |     def _set_image(self, image: Image) -> None:
146 |         """Define image to use for recognition.
147 | 
148 |         Args:
149 |             image: image to use
150 |             recognizer : model to use as recognizer
151 |         """
152 |         height = image.height
153 |         width = image.width
154 |         image_for_ocr = (
155 |             image.convert_color(ColorSpace.GRAYSCALE)
156 |             if image.channels == 1
157 |             else image.convert_color(ColorSpace.RGB)
158 |         )
159 |         byte_per_pixel = image_for_ocr.channels
160 |         bytes_per_line = byte_per_pixel * width
161 |         image_data = image_for_ocr.array.tobytes()
162 |         self._current_api.SetImageBytes(
163 |             image_data, width, height, byte_per_pixel, bytes_per_line
164 |         )
165 | 
166 |     def _set_image_file(self, image_file: str) -> None:
167 |         """Define image file to use for recognition.
168 | 
169 |         Args:
170 |             image_file: image file to use.
171 |         """
172 |         self._current_api.SetImageFile(str(image_file))
173 | 
174 |     def set_input(
175 |         self, image: tp.Union[np.ndarray, str, Image]
176 |     ) -> "Tesseract":
177 |         """Set image in current Tesseract API.
178 | 
179 |         The image will be converted to RGB before ocr process.
180 | 
181 |         Args:
182 |             image: numpy array or path to image on disk
183 | 
184 |         Returns:
185 |             self object to chain with call to recognize
186 |         """
187 |         self._current_api = self._load_api(self._oem, self._psm, self._languages)
188 |         if isinstance(image, str):
189 |             self._set_image_file(image)
190 |         elif isinstance(image, np.ndarray):
191 |             self._set_array(image)
192 |         elif isinstance(image, Image):
193 |             self._set_image(image)
194 |         return self
195 |     
196 |     def recognize(self) -> tp.Tuple[str, float]:
197 |         """Recognize input image and return text and score.
198 | 
199 |         Returns:
200 |             tuple containing text and score
201 |         """
202 |         text: str = self._current_api.GetUTF8Text()
203 |         score: float = self._current_api.MeanTextConf() / 100
204 |         return text, score
205 | 


--------------------------------------------------------------------------------
/docxpand/translations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuickSign/docxpand/291b0cf7fc8e198ce02e6ce22b9c5b825be33f12/docxpand/translations/__init__.py


--------------------------------------------------------------------------------
/docxpand/translations/labels.py:
--------------------------------------------------------------------------------
 1 | LABELS_TRANSLATION = {
 2 |     "family_name_and_given_name": {
 3 |         "de_DE": "NAME, Vornamen",
 4 |         "fr_FR": "NOM, Prénoms",
 5 |         "nl_NL": "NAAM, Voornamen",
 6 |     },
 7 |     "document_name": {
 8 |         "de_DE": "AUFENTHALTSTITEL",
 9 |         "fr_FR": "TITRE DE SEJOUR",
10 |         "nl_NL": "VERBLIJFSVERGUNNING",
11 |     },
12 |     "gender": {
13 |         "de_DE": "Geschlecht",
14 |         "fr_FR": "Sexe",
15 |         "nl_NL": "Geschlacht",
16 |     },
17 |     "foreign_nationality": {
18 |         "de_DE": "Staatsangehörigkeit",
19 |         "fr_FR": "Nationalité",
20 |         "nl_NL": "Nationaliteit",
21 |     },
22 |     "birth_date": {
23 |         "de_DE": "Geburtsdatum",
24 |         "fr_FR": "Date de naissance",
25 |         "nl_NL": "Geboortdatum",
26 |     },
27 |     "expires": {
28 |         "de_DE": "Gültig bis",
29 |         "fr_FR": "Valable jusqu'au",
30 |         "nl_NL": "Geldig tot",
31 |     },
32 |     "observations": {
33 |         "de_DE": "Bemerkungen",
34 |         "fr_FR": "Observations",
35 |         "nl_NL": "Opmerkingen",
36 |     },
37 |     "permit_type": {
38 |         "de_DE": "Art des Titels",
39 |         "fr_FR": "Cat. du titre",
40 |         "nl_NL": "Soort titel",
41 |     },
42 |     "date_issued": {
43 |         "de_DE": "Austellungsdatum",
44 |         "fr_FR": "Date de délivrance",
45 |         "nl_NL": "Datum van afgifte",
46 |     },
47 |     "place_issued": {
48 |         "de_DE": "Ausstellungsort",
49 |         "fr_FR": "Lieu de délivrance",
50 |         "nl_NL": "Plaats van afgifte",
51 |     },
52 |     "birth_place": {
53 |         "de_DE": "Geburtsort",
54 |         "fr_FR": "Lieu de naissance",
55 |         "nl_NL": "Geboorteplaats",
56 |     },
57 | }
58 | 


--------------------------------------------------------------------------------
/docxpand/translations/residence_permit.py:
--------------------------------------------------------------------------------
 1 | RESIDENCE_PERMIT_TYPES_TRANSLATIONS = {
 2 |     "TEMPORARY": {
 3 |         "de_DE": ["VORÜBERGEHENDE", "AUFENTHALTSKARTE"],
 4 |         "fr_FR": ["CARTE DE SEJOUR", "TEMPORAIRE"],
 5 |         "nl_NL": ["TIJDELIJKE", "VERBLIJFSKAART"],
 6 |         "it_IT": ["CARTA DI SOGGIORNO", "TEMPORANEO"],
 7 |     },
 8 |     "MULTI_YEAR": {
 9 |         "de_DE": ["MEHRJÄHRIGE", "AUFENTHALTSKARTE"],
10 |         "fr_FR": ["CARTE DE SEJOUR", "PLURIANNUELLE"],
11 |         "nl_NL": ["MEERJAAR", "VERBLIJFSKAART"],
12 |         "it_IT": ["CARTA DI SOGGIORNO", "PLURIENNALE"],
13 |     },
14 |     "RESIDENT_CARD": {
15 |         "de_DE": ["MELDEBESCHEINIGUNG"],
16 |         "fr_FR": ["CARTE DE RESIDENT"],
17 |         "nl_NL": ["BEWONER KAART"],
18 |         "it_IT": ["CARTA DI RESIDENTE"],
19 |     },
20 |     "CITIZEN_CARD": {
21 |         "de_DE": ["BÜRGERAUFENT", "HALTSKARTE"],
22 |         "fr_FR": ["CARTE DE SEJOUR", "CITOYEN"],
23 |         "nl_NL": ["BURGER", "VERBLIJFSKAART"],
24 |         "it_IT": ["CARTA DI RESIDENZA", "CITTADINA"],
25 |     },
26 |     "RESIDENCE_CERTIFICATE": {
27 |         "de_DE": ["AUFENTHALTS", "BESCHEINIGUNG"],
28 |         "fr_FR": ["CERTIFICAT DE", "RESIDENCE"],
29 |         "nl_NL": ["WOONPLAATS", "CERTIFICAAT"],
30 |         "it_IT": ["CERTIFICATO", "DI RESIDENZA"],
31 |     },
32 | }
33 | 
34 | RESIDENCE_PERMIT_OBSERVATIONS = {
35 |     "WORK": {
36 |         "de_DE": "JEDER BERUF",
37 |         "fr_FR": "TOUTE PROFESSION",
38 |         "nl_NL": "ELK BEROEP",
39 |         "it_IT": "QUALSIASI PROFESSIONE",
40 |     },
41 |     "FAMILY": {
42 |         "de_DE": "PRIVAT- UND FAMILIENLEBEN",
43 |         "fr_FR": "VIE PRIVÉE ET FAMILIALE",
44 |         "nl_NL": "PRIVÉ- EN GEZINSLEVEN",
45 |         "it_IT": "VITA PRIVATA E FAMILIARE",
46 |     },
47 |     "STUDENT": {
48 |         "de_DE": "SCHÜLER",
49 |         "fr_FR": "ÉTUDIANT - ÉLÈVE",
50 |         "nl_NL": "STUDENT",
51 |         "it_IT": "STUDENTE",
52 |     },
53 |     "STUDENT_2": {
54 |         "de_DE": "STUDENT",
55 |         "fr_FR": "ÉTUDIANT",
56 |         "nl_NL": "STUDENT",
57 |         "it_IT": "STUDENTE",
58 |     },
59 | }
60 | 
61 | RESIDENCE_PERMIT_WORK_OBSERVATIONS = {
62 |     "FULL": {
63 |         "de_DE": ["ERMÄCHTIGT DEN HALTER", "ZUR ARBEIT"],
64 |         "fr_FR": ["AUTORISE SON PORTEUR", "À TRAVAILLER"],
65 |         "nl_NL": ["MACHTIGT ZIJN DRAGER", "OM TE WERKEN"],
66 |         "it_IT": ["AUTORIZZA IL SUO TITOLARE", "A LAVORARE"],
67 |     },
68 |     "PARTIAL": {
69 |         "de_DE": ["ERMÄCHTIGT DEN HALTER", "TEILWEISE ZU ARBEITEN"],
70 |         "fr_FR": ["AUTORISE SON PORTEUR", "À TRAVAILLER PARTIELLEMENT"],
71 |         "nl_NL": ["MACHTIGT ZIJN DRAGER", "GEDEELTELIJK TE WERKEN"],
72 |         "it_IT": ["AUTORIZZA IL SUO TITOLARE", "A LAVORARE PARZIALMENTE"],
73 |     },
74 |     "ACCESSORY": {
75 |         "de_DE": ["ERMÄCHTIGT DEN HALTER,", "ALS ZUBEHÖR ZU ARBEITEN"],
76 |         "fr_FR": ["AUTORISE SON PORTEUR", "À TRAVAILLER À TITRE ACCESSOIRE"],
77 |         "nl_NL": ["MACHTIGT ZIJN DRAGER OM", "NEVENWERKZAAMHEDEN TE VERRICHTEN"],
78 |         "it_IT": ["AUTORIZZA IL SUO TITOLARE", "A LAVORARE COME ACCESSORIO"],
79 |     },
80 | }
81 | 


--------------------------------------------------------------------------------
/docxpand/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import magic
  3 | import typing as tp
  4 | 
  5 | from docxpand.image import ColorSpace, Image
  6 | 
  7 | 
  8 | 
  9 | def guess_mimetype(
 10 |     filename: tp.Optional[str] = None, filecontent: tp.Optional[bytes] = None
 11 | ) -> str:
 12 |     """Guess the mime type of a file or a binary content.
 13 | 
 14 |     Either filename or filecontent must be set.
 15 | 
 16 |     Args:
 17 |         filename: path to the file on which the type must be guessed
 18 |         filecontent: binary content on which the type must be guessed
 19 | 
 20 |     Returns:
 21 |         the mimetype of the file or binary content (e.g.: 'application/pdf').
 22 | 
 23 |     Raises:
 24 |         ValueError: if both arguments are not set
 25 |     """
 26 |     if filecontent:
 27 |         return str(magic.from_buffer(filecontent, mime=True))
 28 |     if not filename:
 29 |         raise ValueError("You should set either filename or filecontent")
 30 |     return str(magic.from_file(filename, mime=True))
 31 | 
 32 | 
 33 | def nested_get(
 34 |     dictionary: tp.Dict[str, tp.Any], keys: tp.List[str], default: tp.Any
 35 | ) -> tp.Any:
 36 |     """Get a value in a dictionary using nested string keys.
 37 | 
 38 |     Args:
 39 |         dictionary: the dictionary from which the value must be extracted
 40 |         keys: the list of keys
 41 |         default: the default value to return if the path does not exist
 42 | 
 43 |     Returns:
 44 |         the value contained in the dictionary after getting the nested keys,
 45 |         or the default value if the path does not exist
 46 |     """
 47 |     current = dictionary
 48 |     for key in keys:
 49 |         if key not in current:
 50 |             return default
 51 |         current = current.get(key)
 52 |     return current
 53 | 
 54 | 
 55 | def get_field_from_any_side(
 56 |     dictionary: tp.Dict[str, tp.Any],
 57 |     key: str,
 58 |     default: tp.Any,
 59 |     side_names: tp.Optional[tp.List[str]] = None,
 60 | ) -> tp.Any:
 61 |     """Search a value using its key in second-level nested dictionaries.
 62 | 
 63 |     A side name is used as first key. If `side_names` is not given, then
 64 |     `["front", "back"]` is used.
 65 | 
 66 |     Args:
 67 |         dictionary: the dictionary from which the value must be extracted
 68 |         key: the key of the field to get
 69 |         default: the default value to return if the field does not exist
 70 |         side_names: name of the sides
 71 | 
 72 |     Returns:
 73 |         the value contained in one of the second-level dictionaries,
 74 |         or the default value if the path does not exist
 75 |     """
 76 |     if side_names is None:
 77 |         side_names = ["front", "back"]
 78 | 
 79 |     for side_name in side_names:
 80 |         if side_name not in dictionary:
 81 |             continue
 82 |         if key in dictionary[side_name]:
 83 |             return dictionary[side_name][key]
 84 | 
 85 |     return default
 86 | 
 87 | 
 88 | def floor_to_multiple(number: float, base: int) -> int:
 89 |     """Return the closest integer multiple of a base less than an upper bound.
 90 | 
 91 |     Only works for positive numbers.
 92 | 
 93 |     Args:
 94 |         number: the upper bound
 95 |         base: the output must be a multiple of this base number
 96 | 
 97 |     Returns:
 98 |         the closest integer multiple of the base less than the upper bound
 99 | 
100 |     Raises:
101 |         NotImplementedError: for negative numbers
102 |     """
103 |     if number < 0:
104 |         raise NotImplementedError(
105 |             "floor_to_multiple not implemented for negative numbers"
106 |         )
107 |     return int(number / base) * base
108 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "docxpand"
 3 | version = "0.1.0"
 4 | description = "DocXPand dataset generation code"
 5 | authors = ["QuickSign"]
 6 | license = "MIT License"
 7 | readme = "README.md"
 8 | packages = [{include = "docxpand"}]
 9 | 
10 | [tool.poetry.dependencies]
11 | python = ">=3.9,<3.11"
12 | opencv-python = "^4.7.0.72"
13 | selenium = "^4.6.1"
14 | python-magic = "^0.4.27"
15 | Faker = "^19.1.0"
16 | lxml = "^4.9.1"
17 | zxing-cpp = "^2.0.0"
18 | dateparser = "^1.1.8"
19 | deepface = "^0.0.79"
20 | imagehash = "^4.3.1"
21 | protobuf = "^3.14.0"
22 | numpy = "^1.20.0"
23 | pycountry = "^22.3.5"
24 | python-dateutil = "^2.8.2"
25 | tensorflow-cpu = "2.9.0"
26 | pydantic = "^1.10.7"
27 | shapely = "^2.0.1"
28 | rapidfuzz = "^3.4.0"
29 | tesserocr = "^2.6.2"
30 | lpips = "^0.1.4"
31 | torch = "2.0.0"
32 | matplotlib = "^3.8.2"
33 | 
34 | [tool.poetry.group.dev.dependencies]
35 | black = "^23.3.0"
36 | pytest = "^7.3.0"
37 | coverage = "^7.2.3"
38 | autoflake = "^2.0.2"
39 | pytest-cov = "^4.0.0"
40 | 
41 | [build-system]
42 | requires = ["poetry>=1.0"]
43 | build-backend = "poetry.masonry.api"
44 | 


--------------------------------------------------------------------------------
/scripts/dataset/delete_other_side_fields.py:
--------------------------------------------------------------------------------
 1 | """Script to delete fields from other side."""
 2 | import datetime
 3 | import getpass
 4 | import logging
 5 | import os
 6 | 
 7 | import click
 8 | import tqdm
 9 | 
10 | from docxpand.dataset import DocFakerDataset
11 | 
12 | 
13 | logger = logging.getLogger(__name__)
14 | 
15 | 
16 | @click.command()
17 | @click.option(
18 |     "-dd",
19 |     "--document-dataset",
20 |     type=click.Path(dir_okay=False, file_okay=True, readable=True),
21 |     required=True,
22 |     help="Path to the dataset with generated SVG documents.",
23 | )
24 | @click.option(
25 |     "-o",
26 |     "--output-directory",
27 |     type=click.Path(dir_okay=True, file_okay=False, writable=True),
28 |     required=True,
29 |     help="Path to output directory where generated photos will be stored.",
30 | )
31 | def delete_other_side_fields(
32 |     document_dataset: str,
33 |     output_directory: str,
34 | ) -> None:
35 |     """Delete fields from other side."""
36 |     documents = []
37 |     input_dataset = DocFakerDataset(
38 |         dataset_input=document_dataset,
39 |     )
40 |     progress = tqdm.tqdm(input_dataset.documents.items())
41 |     progress.set_description("Deleting other fields from annotations")
42 |     for doc_id, doc_entry in progress:
43 |         fields = doc_entry["annotations"][0]["fields"]
44 |         for side in list(fields.keys()):
45 |             # Only process the right side
46 |             if not doc_id.endswith(side):
47 |                 del fields[side]
48 |         documents.append(doc_entry)
49 | 
50 |     output_dataset_dict = {
51 |         "__class__": "DocFakerDataset",
52 |         "documents": documents,
53 |         "info": {
54 |             "author": getpass.getuser(),
55 |             "createdAt": datetime.datetime.utcnow().isoformat(),
56 |             "description": input_dataset.info().get("description"),
57 |             "name": input_dataset.info().get("name")
58 |         }
59 |     }
60 |     output_dataset = DocFakerDataset(output_dataset_dict)
61 |     filename = os.path.join(output_directory, os.path.basename(document_dataset))
62 |     output_dataset.save(filename)
63 | 
64 | if __name__ == "__main__":
65 |     delete_other_side_fields()
66 | 


--------------------------------------------------------------------------------
/scripts/dataset/extract_field_locations_from_svgs.py:
--------------------------------------------------------------------------------
  1 | """Script to extract field locations from SVGs."""
  2 | import datetime
  3 | import getpass
  4 | import logging
  5 | import os
  6 | 
  7 | import click
  8 | import tqdm
  9 | 
 10 | from docxpand.dataset import DocFakerDataset
 11 | from docxpand.svg_to_image import ChromeSVGRenderer
 12 | from docxpand.utils import guess_mimetype
 13 | 
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | @click.command()
 19 | @click.option(
 20 |     "-dd",
 21 |     "--document-dataset",
 22 |     type=click.Path(dir_okay=False, file_okay=True, readable=True),
 23 |     required=True,
 24 |     help="Path to the dataset with generated SVG documents.",
 25 | )
 26 | @click.option(
 27 |     "-di",
 28 |     "--document-images",
 29 |     type=click.Path(dir_okay=True, file_okay=True, readable=True),
 30 |     required=True,
 31 |     help="Path to the directory containing documents dataset images.",
 32 | )
 33 | @click.option(
 34 |     "-o",
 35 |     "--output-directory",
 36 |     type=click.Path(dir_okay=True, file_okay=False, writable=True),
 37 |     required=True,
 38 |     help="Path to output directory where new datasets will be stored.",
 39 | )
 40 | def extract_fields_locations_from_svgs(
 41 |     document_dataset: str,
 42 |     document_images: str,
 43 |     output_directory: str,
 44 | ) -> None:
 45 |     """Extract fields locations from SVGs."""
 46 |     documents = []
 47 |     renderer = ChromeSVGRenderer()
 48 |     input_dataset = DocFakerDataset(
 49 |         dataset_input=document_dataset,
 50 |         images_dir=document_images
 51 |     )
 52 |     progress = tqdm.tqdm(input_dataset.documents.items())
 53 |     progress.set_description("Extracting field locations from SVGs")
 54 |     for doc_id, doc_entry in progress:
 55 |         filename = os.path.join(document_images, doc_entry["filename"])
 56 |         if guess_mimetype(filename) != 'image/svg+xml':
 57 |             raise RuntimeError(
 58 |                 "Cannot extract field locations from non-SVG images."
 59 |             )
 60 |         fields = doc_entry["annotations"][0]["fields"]
 61 |         for side in fields:
 62 |             # Only process the right side
 63 |             if not doc_id.endswith(side):
 64 |                 continue
 65 |             field_names_and_multiline = {
 66 |                 field_name:
 67 |                 (
 68 |                     (
 69 |                         f"{field_name}_field"
 70 |                         if field_value.get("type") == "text"
 71 |                         else f"{field_name}_image"
 72 |                     ),
 73 |                     isinstance(field_value.get("value"), list)
 74 |                 )
 75 |                 for field_name, field_value in fields[side].items()
 76 |             }
 77 |             positions = renderer.get_coordinates(
 78 |                 filename,
 79 |                 element_ids=list(field_names_and_multiline.values())
 80 |             )
 81 |             for field_name in field_names_and_multiline:
 82 |                 element_id = field_names_and_multiline[field_name][0]
 83 |                 position = positions[element_id]
 84 |                 fields[side][field_name]["position"] = (
 85 |                     position.to_dict() if position else None
 86 |                 )
 87 | 
 88 |         documents.append(doc_entry)
 89 | 
 90 |     output_dataset_dict = {
 91 |         "__class__": "DocFakerDataset",
 92 |         "documents": documents,
 93 |         "info": {
 94 |             "author": getpass.getuser(),
 95 |             "createdAt": datetime.datetime.utcnow().isoformat(),
 96 |             "description": input_dataset.info().get("description"),
 97 |             "name": input_dataset.info().get("name")
 98 |         }
 99 |     }
100 |     output_dataset = DocFakerDataset(output_dataset_dict)
101 |     filename = os.path.join(output_directory, os.path.basename(document_dataset))
102 |     output_dataset.save(filename)
103 | 
104 | if __name__ == "__main__":
105 |     extract_fields_locations_from_svgs()
106 | 


--------------------------------------------------------------------------------
/scripts/dataset/extract_image_fields_from_svgs.py:
--------------------------------------------------------------------------------
  1 | """Script to extract image fields (photo, datamatrix, barcodes) from SVGs."""
  2 | import datetime
  3 | import getpass
  4 | import logging
  5 | import os
  6 | 
  7 | import click
  8 | import tqdm
  9 | 
 10 | from docxpand.canvas import Canvas, XLINK_NS
 11 | from docxpand.dataset import DocFakerDataset
 12 | from docxpand.image import Image
 13 | from docxpand.utils import guess_mimetype
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | @click.command()
 19 | @click.option(
 20 |     "-dd",
 21 |     "--document-dataset",
 22 |     type=click.Path(dir_okay=False, file_okay=True, readable=True),
 23 |     required=True,
 24 |     help="Path to the dataset with generated SVG documents.",
 25 | )
 26 | @click.option(
 27 |     "-di",
 28 |     "--document-images",
 29 |     type=click.Path(dir_okay=True, file_okay=True, readable=True),
 30 |     required=True,
 31 |     help="Path to the directory containing documents dataset images.",
 32 | )
 33 | @click.option(
 34 |     "-o",
 35 |     "--output-directory",
 36 |     type=click.Path(dir_okay=True, file_okay=False, writable=True),
 37 |     required=True,
 38 |     help="Path to output directory where extracted images will be stored.",
 39 | )
 40 | def extract_image_fields_from_svgs(
 41 |     document_dataset: str,
 42 |     document_images: str,
 43 |     output_directory: str,
 44 | ) -> None:
 45 |     """Extract image fields (photo, datamatrix, barcodes) from SVGs."""
 46 |     documents = []
 47 |     input_dataset = DocFakerDataset(
 48 |         dataset_input=document_dataset,
 49 |         images_dir=document_images
 50 |     )
 51 |     progress = tqdm.tqdm(input_dataset.documents.items())
 52 |     progress.set_description("Extracting image fields from SVGs")
 53 |     for doc_id, doc_entry in progress:
 54 |         filename = os.path.join(document_images, doc_entry["filename"])
 55 |         basename, _ = os.path.splitext(doc_entry["filename"])
 56 |         basename = "-".join(basename.split("-")[:-1])  # remove side
 57 |         if guess_mimetype(filename) != 'image/svg+xml':
 58 |             raise RuntimeError(
 59 |                 "Cannot extract image fields from non-SVG images."
 60 |             )
 61 |         canvas = Canvas(filename)
 62 |         fields = doc_entry["annotations"][0]["fields"]
 63 |         href_key = f"{{{XLINK_NS}}}href"
 64 |         factors = {  # resize factor to reduce output size
 65 |             "barcode": 1.0,
 66 |             "datamatrix": 0.25,
 67 |             "ghost": 0.5,
 68 |             "photo": 0.5,
 69 |             "default": 0.5
 70 |         }
 71 |         formats = {  # use "png" for barcodes and ghosts to reduce output size
 72 |             "barcode": "png",
 73 |             "datamatrix": "png",
 74 |             "ghost": "png",
 75 |             "photo": "jpg",
 76 |             "default": "jpg"
 77 |         }
 78 |         for side in fields:
 79 |             for field_name, field_value in fields[side].items():
 80 |                 if (
 81 |                     isinstance(field_value, str) and
 82 |                     "Image object" in field_value
 83 |                 ):
 84 |                     format = formats.get(field_name.lower(), formats["default"])
 85 |                     image_filename = f"{basename}-{side}-{field_name}.{format}"
 86 |                     fields[side][field_name] = {
 87 |                         "type": "image",
 88 |                         "filename": image_filename
 89 |                     }
 90 | 
 91 |                     if doc_id.endswith(side):  # don't do it on wrong side
 92 |                         # Load image from base64 encoded string in SVG
 93 |                         image_element = canvas.element_by_id(f"{field_name}_image")
 94 |                         encoded = image_element.attrib[href_key]
 95 |                         field_image = Image.base64decode(encoded)
 96 | 
 97 |                         # Resize and select format to optimize weight
 98 |                         factor = factors.get(field_name.lower(), factors["default"])
 99 |                         height, width = map(
100 |                             lambda x: int(x*factor), field_image.shape[:2]
101 |                         )
102 |                         field_image = field_image.resize(height, width)
103 | 
104 |                         # Save image
105 |                         field_image.write(os.path.join(
106 |                             output_directory, image_filename
107 |                         ))
108 |                 else:
109 |                     fields[side][field_name] = {
110 |                         "type": "text",
111 |                         "value": field_value
112 |                     }
113 | 
114 |         documents.append(doc_entry)
115 | 
116 |     output_dataset_dict = {
117 |         "__class__": "DocFakerDataset",
118 |         "documents": documents,
119 |         "info": {
120 |             "author": getpass.getuser(),
121 |             "createdAt": datetime.datetime.utcnow().isoformat(),
122 |             "description": input_dataset.info().get("description"),
123 |             "name": input_dataset.info().get("name")
124 |         }
125 |     }
126 |     output_dataset = DocFakerDataset(output_dataset_dict)
127 |     filename = os.path.join(output_directory, os.path.basename(document_dataset))
128 |     output_dataset.save(filename)
129 | 
130 | if __name__ == "__main__":
131 |     extract_image_fields_from_svgs()
132 | 


--------------------------------------------------------------------------------
/scripts/dataset/generate_fake_structured_documents.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import getpass
 3 | import logging
 4 | import typing as tp
 5 | 
 6 | import click
 7 | from docxpand.dataset import DocFakerDataset
 8 | 
 9 | from docxpand.generator import Generator
10 | from docxpand.svg_to_image import ChromeSVGRenderer
11 | 
12 | logger = logging.getLogger(__name__)
13 | import os
14 | 
15 | 
16 | @click.command()
17 | @click.option(
18 |     "-t",
19 |     "--template",
20 |     type=click.Path(dir_okay=True, file_okay=True, readable=True),
21 |     required=True,
22 |     help="Name of input template directory (containing SVG and JSON files).",
23 | )
24 | @click.option(
25 |     "-n",
26 |     "--number",
27 |     type=int,
28 |     required=False,
29 |     default=1,
30 |     help="Number of documents to generate.",
31 | )
32 | @click.option(
33 |     "-o",
34 |     "--output-directory",
35 |     type=click.Path(dir_okay=True, file_okay=False, writable=True),
36 |     required=True,
37 |     help="Path to output directory where fake documents will be stored.",
38 | )
39 | @click.option(
40 |     "-s",
41 |     "--stable-diffusion-api-url",
42 |     type=str,
43 |     required=True,
44 |     help="URL pointing to Stable Diffusion API, used to generate identity photos.",
45 | )
46 | def generate_fake_structured_documents(
47 |     template: str,
48 |     number: int,
49 |     output_directory: str,
50 |     stable_diffusion_api_url: str,
51 | ) -> None:
52 |     """Generate fake structured documents from an SVG template."""
53 |     os.makedirs(os.path.abspath(output_directory), exist_ok=True)
54 |     basename = os.path.basename(os.path.normpath(output_directory))
55 |     output_dataset_filename = os.path.abspath(
56 |         os.path.join(output_directory, f"{basename}.json")
57 |     )
58 |     if os.path.exists(output_dataset_filename):
59 |         raise RuntimeError(
60 |             f"A JSON dataset already exists in {output_directory}. "
61 |             "Please set a new output directory, or remove the existing files."
62 |         )
63 |     generator = Generator(template, None, stable_diffusion_api_url or "")
64 |     all_docs = []
65 |     for _ in range(number):
66 |         try:
67 |             side_entries = generator.generate_images(output_directory)
68 |         except Exception as err:
69 |             logger.warning(
70 |                 f"Got an error while generating images ({type(err)}: {err}), "
71 |                 "continuing..."
72 |             )
73 |             continue
74 |         all_docs.extend(side_entries)
75 |     dataset = DocFakerDataset(
76 |         {
77 |             "__class__": "DocFakerDataset",
78 |             "documents": all_docs,
79 |             "info": {
80 |                 "author": getpass.getuser(),
81 |                 "createdAt": datetime.datetime.utcnow().isoformat(),
82 |                 "description": (
83 |                     f"Generated document images for template {template}."
84 |                 ),
85 |                 "name": basename,
86 |             },
87 |         }
88 |     )
89 |     dataset.save(output_dataset_filename)
90 |     logger.info(
91 |         f"Dataset written in {os.path.abspath(output_directory)}. "
92 |         f"See {output_dataset_filename}."
93 |     )
94 | 
95 | 
96 | if __name__ == "__main__":
97 |     generate_fake_structured_documents()
98 | 


--------------------------------------------------------------------------------
/scripts/dataset/insert_generated_documents_in_scenes.py:
--------------------------------------------------------------------------------
  1 | """Script to process localization dataset and insert fake IDs in scenes."""
  2 | import logging
  3 | import os
  4 | import typing as tp
  5 | 
  6 | import click
  7 | 
  8 | from docxpand.scene_insertion import insert_generated_documents_in_scenes
  9 | from docxpand.svg_to_image import ChromeSVGRenderer
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | @click.command()
 15 | @click.option(
 16 |     "-dd",
 17 |     "--document-dataset",
 18 |     type=click.Path(dir_okay=False, file_okay=True, readable=True),
 19 |     required=True,
 20 |     help="Path to the dataset with generated documents that we put in scenes.",
 21 | )
 22 | @click.option(
 23 |     "-di",
 24 |     "--document-images",
 25 |     type=click.Path(dir_okay=True, file_okay=True, readable=True),
 26 |     required=True,
 27 |     help="Path to the directory containing documents dataset images.",
 28 | )
 29 | @click.option(
 30 |     "-sd",
 31 |     "--scene-dataset",
 32 |     type=click.Path(dir_okay=False, file_okay=True, readable=True),
 33 |     required=True,
 34 |     help=(
 35 |         "Path to the dataset with localised documents to be used as scene "
 36 |         "images."
 37 |     ),
 38 | )
 39 | @click.option(
 40 |     "-si",
 41 |     "--scene-images",
 42 |     type=click.Path(dir_okay=True, file_okay=False, readable=True),
 43 |     required=True,
 44 |     help=(
 45 |         "Path to directory containing scene dataset images (scene images)."
 46 |     ),
 47 | )
 48 | @click.option(
 49 |     "-o",
 50 |     "--output-directory",
 51 |     type=click.Path(dir_okay=True, file_okay=False, writable=True),
 52 |     required=True,
 53 |     help="Path to output directory where fake documents will be stored.",
 54 | )
 55 | @click.option(
 56 |     "-m",
 57 |     "--margins",
 58 |     type=float,
 59 |     required=False,
 60 |     default=None,
 61 |     help=(
 62 |         "Relative margins (between 0.0 and 1.0) to add around the localized "
 63 |         "quadrangle ."
 64 |     ),
 65 | )
 66 | @click.option(
 67 |     "--seed",
 68 |     type=int,
 69 |     required=False,
 70 |     default=None,
 71 |     help="Seed to initialize backgrounds shuffling.",
 72 | )
 73 | def main(
 74 |     document_dataset: str,
 75 |     document_images: str,
 76 |     scene_dataset: str,
 77 |     scene_images: str,
 78 |     output_directory: str,
 79 |     margins: tp.Optional[float],
 80 |     seed: tp.Optional[int]
 81 | ) -> None:
 82 |     """Generate fake structured documents from an SVG template."""
 83 |     renderer = ChromeSVGRenderer()
 84 |     output_dataset_filename = insert_generated_documents_in_scenes(
 85 |         document_dataset,
 86 |         document_images,
 87 |         scene_dataset,
 88 |         scene_images,
 89 |         renderer,
 90 |         output_directory,
 91 |         margins,
 92 |         seed
 93 |     )
 94 |     logger.info(
 95 |         f"Dataset written in {os.path.abspath(output_directory)}. "
 96 |         f"See {output_dataset_filename}."
 97 |     )
 98 | 
 99 | 
100 | if __name__ == "__main__":
101 |     main()
102 | 


--------------------------------------------------------------------------------
/scripts/dataset/reinsert_generated_documents_in_scenes.py:
--------------------------------------------------------------------------------
  1 | """Script to process localization dataset and insert fake IDs in scenes."""
  2 | import logging
  3 | import os
  4 | import typing as tp
  5 | 
  6 | import click
  7 | from docxpand.dataset import DocFakerDataset
  8 | from docxpand.geometry import Point, Quadrangle, estimate_doc_homography
  9 | from docxpand.image import ColorSpace, load_document
 10 | from docxpand.scene_insertion import color_transfer_reinhard, illumination_transfer, insert_image_in_background, rectify_document
 11 | from docxpand.specimen import load_specimen
 12 | 
 13 | #from docxpand.scene_insertion import insert_generated_documents_in_scenes,
 14 | from docxpand.svg_to_image import ChromeSVGRenderer
 15 | import tqdm
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | def load_documents(
 21 |     document_dataset_filename: str,
 22 |     document_images_directory: str,
 23 | ) -> tp.List[tp.Tuple[str, str, tp.Dict[str, tp.Any]]]:
 24 |     """Loading documents images.
 25 | 
 26 |     Args:
 27 |         document_images_directory: path to directory containing document images.
 28 |         dataset_path: path to dataset containing images information if wanted.
 29 | 
 30 |     Returns:
 31 |         List of image path, document id, dataset entry.
 32 |     """
 33 |     documents = []
 34 |     dataset = DocFakerDataset(
 35 |         dataset_input=document_dataset_filename,
 36 |         images_dir=document_images_directory
 37 |     )
 38 |     progress = tqdm.tqdm(dataset.documents.items())
 39 |     progress.set_description("Loading documents dataset")
 40 |     for doc_id, entry in progress:
 41 |         image_path = (
 42 |             os.path.join(document_images_directory, entry["filename"])
 43 |         )
 44 |         image_path = image_path.replace(".jpg", ".svg")
 45 |         documents.append((image_path, doc_id, entry))
 46 |     return documents
 47 | 
 48 | 
 49 | def load_scenes(
 50 |     scene_images_directory: str,
 51 | ) -> tp.Dict[str, str]:
 52 |     """Loading scene images.
 53 | 
 54 |     Args:
 55 |         scene_images_directory: path to directory containing scene images.
 56 | 
 57 |     Returns:
 58 |         Dict of doc_id to scene image path.
 59 |     """
 60 |     scenes = {}
 61 |     for filename in os.listdir(scene_images_directory):
 62 |         basename, _ = os.path.splitext(os.path.basename(filename))
 63 |         basename = basename.split("-")[0]
 64 |         scenes[basename] = os.path.join(scene_images_directory,filename)
 65 |     return scenes
 66 | 
 67 | 
 68 | @click.command()
 69 | @click.option(
 70 |     "-dd",
 71 |     "--document-dataset",
 72 |     type=click.Path(dir_okay=False, file_okay=True, readable=True),
 73 |     required=True,
 74 |     help="Path to the final dataset to regenerate.",
 75 | )
 76 | @click.option(
 77 |     "-di",
 78 |     "--document-images",
 79 |     type=click.Path(dir_okay=True, file_okay=True, readable=True),
 80 |     required=True,
 81 |     help="Path to the directory containing SVG document images.",
 82 | )
 83 | @click.option(
 84 |     "-si",
 85 |     "--scene-images",
 86 |     type=click.Path(dir_okay=True, file_okay=False, readable=True),
 87 |     required=True,
 88 |     help=(
 89 |         "Path to directory containing scene dataset images (scene images)."
 90 |     ),
 91 | )
 92 | @click.option(
 93 |     "-o",
 94 |     "--output-directory",
 95 |     type=click.Path(dir_okay=True, file_okay=False, writable=True),
 96 |     required=True,
 97 |     help="Path to output directory where fake documents will be stored.",
 98 | )
 99 | def main(
100 |     document_dataset: str,
101 |     document_images: str,
102 |     scene_images: str,
103 |     output_directory: str,
104 | ) -> None:
105 |     """Generate fake structured documents from an SVG template."""
106 |     renderer = ChromeSVGRenderer()
107 |     os.makedirs(os.path.abspath(output_directory), exist_ok=True)
108 | 
109 |     # Read the documents dataset and the scenes directory
110 |     documents = load_documents(document_dataset, document_images)
111 |     scenes = load_scenes(scene_images)
112 | 
113 |     # Specimen image
114 |     specimen_img = load_specimen("passport_fra_2006")
115 | 
116 |     # Iterate on documents
117 |     progress = tqdm.tqdm(documents)
118 |     progress.set_description("Re-inserting documents in scenes...")
119 |     for svg_path, doc_id, doc_entry in progress:
120 |         # Check template
121 |         annotations = doc_entry["annotations"][0]
122 |         template = annotations["template"]
123 |         if template != "PP_TD3_C-front":
124 |             continue
125 | 
126 |         # Check file is not already generated
127 |         output_filename, _ = os.path.splitext(
128 |             os.path.basename(doc_entry["filename"])
129 |         )
130 |         output_filename = os.path.join(output_directory, f"{output_filename}.jpg")
131 |         if os.path.exists(output_filename):
132 |             continue
133 | 
134 |         # Load images
135 |         try:
136 |             scene_path = scenes[annotations["scene_image"]]
137 |         except:
138 |             scene_path = os.path.join("/home/qsuser/Work/DocXPand/Data/DocXPand_Generated/JPG/images/", doc_entry["filename"])
139 | 
140 |         scene_img = load_document(
141 |             scene_path, space=ColorSpace.BGRA, ignore_orientation=False
142 |         )
143 |         scene_quad = Quadrangle.from_dict(annotations["position"])
144 |         scene_quad = scene_quad.rescale(scene_img.width, scene_img.height)
145 | 
146 |         doc_img = renderer.render(filename=svg_path)
147 | 
148 |         # Rectification
149 |         document_in_scene_image = rectify_document(scene_img, scene_quad)
150 | 
151 |         # Color transfer
152 |         document_img_transferred = (
153 |             color_transfer_reinhard(
154 |                 scene=document_in_scene_image,
155 |                 specimen=specimen_img,
156 |                 document=doc_img
157 |             )
158 |         )
159 | 
160 |         # Illumination transfer
161 |         document_img_transferred = illumination_transfer(
162 |             scene=document_in_scene_image,
163 |             specimen=specimen_img,
164 |             document=document_img_transferred
165 |         )
166 | 
167 |         # Insert document
168 |         original_quad = Quadrangle(
169 |             Point(0, 0),
170 |             Point(doc_img.width, 0),
171 |             Point(doc_img.width, doc_img.height),
172 |             Point(0, doc_img.height),
173 |         )
174 |         homography = estimate_doc_homography(original_quad, scene_quad)
175 |         inserted_img = insert_image_in_background(
176 |             background_img=scene_img,
177 |             img_to_insert=document_img_transferred,
178 |             homography=homography,
179 |             quad=scene_quad,
180 |         )
181 | 
182 |         # Save result image
183 |         inserted_img.write(output_filename)
184 | 
185 | if __name__ == "__main__":
186 |     main()
187 | 


--------------------------------------------------------------------------------
/scripts/dataset/transform_field_locations_to_inserted_documents.py:
--------------------------------------------------------------------------------
  1 | """Script to transform field locations from generated to inserted documents."""
  2 | import datetime
  3 | import getpass
  4 | import logging
  5 | import os
  6 | 
  7 | import click
  8 | import tqdm
  9 | 
 10 | from docxpand.dataset import DocFakerDataset
 11 | from docxpand.geometry import Quadrangle, BoundingBox, estimate_doc_homography, project_quad_to_target_image
 12 | 
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | @click.command()
 18 | @click.option(
 19 |     "-gd",
 20 |     "--generated-dataset",
 21 |     "generated_dataset_filename",
 22 |     type=click.Path(dir_okay=False, file_okay=True, readable=True),
 23 |     required=True,
 24 |     help="Path to the dataset with generated SVG documents.",
 25 | )
 26 | @click.option(
 27 |     "-id",
 28 |     "--inserted-dataset",
 29 |     "inserted_dataset_filename",
 30 |     type=click.Path(dir_okay=False, file_okay=True, readable=True),
 31 |     required=True,
 32 |     help="Path to the dataset with documents inserted on base scenes.",
 33 | )
 34 | @click.option(
 35 |     "-o",
 36 |     "--output-directory",
 37 |     type=click.Path(dir_okay=True, file_okay=False, writable=True),
 38 |     required=True,
 39 |     help="Path to output directory where new datasets will be stored.",
 40 | )
 41 | def transform_field_locations_to_inserted_documents(
 42 |     generated_dataset_filename: str,
 43 |     inserted_dataset_filename: str,
 44 |     output_directory: str,
 45 | ) -> None:
 46 |     """Transform field locations from from generated to inserted documents."""
 47 |     documents = []
 48 |     generated_dataset = DocFakerDataset(
 49 |         dataset_input=generated_dataset_filename,
 50 |     )
 51 |     inserted_dataset = DocFakerDataset(
 52 |         dataset_input=inserted_dataset_filename,
 53 |     )
 54 |     progress = tqdm.tqdm(inserted_dataset.documents.items())
 55 |     progress.set_description("Transforming field coordinates")
 56 |     for doc_id, doc_entry in progress:
 57 |         annotation = doc_entry["annotations"][0]
 58 |         fields = annotation["fields"]
 59 |         original_doc_entry = generated_dataset.documents[doc_id]
 60 |         original_annotation = original_doc_entry["annotations"][0]
 61 |         original_fields = original_annotation["fields"]
 62 | 
 63 |         # Find homography from generated doc to inserted doc
 64 |         original_quad = Quadrangle.from_dict(original_annotation["position"])
 65 |         target_quad = Quadrangle.from_dict(annotation["position"])
 66 |         homography = estimate_doc_homography(original_quad, target_quad)
 67 | 
 68 |         for side in fields:
 69 |             # Only process the right side
 70 |             if not doc_id.endswith(side):
 71 |                 continue
 72 |             for field_name, field_value in fields[side].items():
 73 |                 original_field_value = original_fields[side][field_name]
 74 |                 original_field_position = original_field_value.get("position")
 75 |                 field_value["position"] = original_field_position
 76 |                 if original_field_position:
 77 |                     original_bbox = BoundingBox.from_dict(original_field_position)
 78 |                     field_quad = project_quad_to_target_image(
 79 |                         original_bbox.to_quad(), homography
 80 |                     )
 81 |                     field_value["position"] = field_quad.to_dict()
 82 | 
 83 |         documents.append(doc_entry)
 84 | 
 85 |     output_dataset_dict = {
 86 |         "__class__": "DocFakerDataset",
 87 |         "documents": documents,
 88 |         "info": {
 89 |             "author": getpass.getuser(),
 90 |             "createdAt": datetime.datetime.utcnow().isoformat(),
 91 |             "description": inserted_dataset.info().get("description"),
 92 |             "name": inserted_dataset.info().get("name")
 93 |         }
 94 |     }
 95 |     output_dataset = DocFakerDataset(output_dataset_dict)
 96 |     filename = os.path.join(
 97 |         output_directory, os.path.basename(generated_dataset_filename)
 98 |     )
 99 |     output_dataset.save(filename)
100 | 
101 | if __name__ == "__main__":
102 |     transform_field_locations_to_inserted_documents()
103 | 


--------------------------------------------------------------------------------
/scripts/field_recognition_baseline/predict.py:
--------------------------------------------------------------------------------
  1 | """Script to run field recognition using Tesseract."""
  2 | import logging
  3 | import os
  4 | 
  5 | import click
  6 | import tqdm
  7 | 
  8 | from docxpand.dataset import DocFakerDataset
  9 | from docxpand.scene_insertion import rectify_document
 10 | from docxpand.geometry import Point, Quadrangle
 11 | from docxpand.image import Image
 12 | from docxpand.tesseract import Tesseract, PSM
 13 | 
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | ENLARGE_FACTOR = 0.02
 18 | 
 19 | LANGUAGES_PER_TEMPLATE = {
 20 |     "ID_CARD_TD1_A-back": ("deu", ),
 21 |     "ID_CARD_TD1_A-front": ("deu", ),
 22 |     "ID_CARD_TD1_B-back": ("eng", ),
 23 |     "ID_CARD_TD1_B-front": ("eng", ),
 24 |     "ID_CARD_TD2_A-back": ("fra", ),
 25 |     "ID_CARD_TD2_A-front": ("fra", ),
 26 |     "ID_CARD_TD2_B-back": ("spa", ),
 27 |     "ID_CARD_TD2_B-front": ("spa", ),
 28 |     "PP_TD3_A-front": ("fra", ),
 29 |     "PP_TD3_B-front": ("por", ),
 30 |     "PP_TD3_C-front": ("nld", ),
 31 |     "RP_CARD_TD1-back": ("nld", "fra", "deu", ),
 32 |     "RP_CARD_TD1-front": ("nld", "fra", "deu", ),
 33 |     "RP_CARD_TD2-back": ("ita", ),
 34 |     "RP_CARD_TD2-front": ("ita", )
 35 | }
 36 | 
 37 | @click.command()
 38 | @click.option(
 39 |     "-td",
 40 |     "--test-dataset",
 41 |     type=click.Path(dir_okay=False, file_okay=True, readable=True),
 42 |     required=True,
 43 |     help="Path to the input test dataset, with generated documents and ground-truth values.",
 44 | )
 45 | @click.option(
 46 |     "-di",
 47 |     "--document-images",
 48 |     type=click.Path(dir_okay=True, file_okay=True, readable=True),
 49 |     required=True,
 50 |     help="Path to the directory containing documents dataset images.",
 51 | )
 52 | @click.option(
 53 |     "-pd",
 54 |     "--prediction-dataset",
 55 |     type=click.Path(dir_okay=False, file_okay=True, writable=True),
 56 |     required=True,
 57 |     help="Path to the output prediction dataset, with field predictions.",
 58 | )
 59 | def predict(
 60 |     test_dataset: str,
 61 |     document_images: str,
 62 |     prediction_dataset: str,
 63 | ) -> None:
 64 |     """Run field recognition using Tesseract."""
 65 |     input_dataset = DocFakerDataset(
 66 |         dataset_input=test_dataset,
 67 |         images_dir=document_images
 68 |     )
 69 |     progress = tqdm.tqdm(input_dataset.documents.values())
 70 |     progress.set_description("Recognizing text fields")
 71 |     tesseract = Tesseract(psm = PSM.SINGLE_BLOCK)
 72 |     for doc_entry in progress:
 73 |         filename = os.path.join(document_images, doc_entry["filename"])
 74 |         image = Image.read(filename)
 75 |         annotations = doc_entry["annotations"][0]
 76 |         template = annotations["template"]
 77 |         fields = annotations["fields"]
 78 |         side = list(fields.keys())[0]
 79 |         fields = fields[side]
 80 |         for field_name, field in fields.items():
 81 |             if (
 82 |                 (field["type"] != "text") or
 83 |                 ("signature" in field_name) or  # signatures (not evaluated)
 84 |                 (field["position"] is None) or  # intermediary fields (to generate mixed fields)
 85 |                 (field["value"] is None)        # empty fields (not evaluated)
 86 |             ):
 87 |                 continue
 88 |             position = Quadrangle.from_dict(field["position"])
 89 |             position = Quadrangle(*[
 90 |                 Point(pt.x * image.width, pt.y * image.height)
 91 |                 for pt in position
 92 |             ])
 93 |             position = position.enlarge(ENLARGE_FACTOR, viewport=(image.width, image.height))
 94 |             field_width = int(position.estimate_length())
 95 |             field_image = rectify_document(image, position, field_width)
 96 | 
 97 |             # Set the right model for tesseract
 98 |             if "mrz" in field_name.lower():
 99 |                 tesseract.languages = ("ocrb", )
100 |             else:
101 |                 tesseract.languages = LANGUAGES_PER_TEMPLATE[template]
102 |             
103 |             # Handle rotated field
104 |             if field_image.width < field_image.height:
105 |                 text_scores = {}
106 |                 for angle in [0, 90, 270]:
107 |                     text, score = tesseract.set_input(field_image.rotate90(angle)).recognize()
108 |                     text_scores[text] = score
109 |                 text = max(text_scores, key=text_scores.__getitem__)
110 |             # Normal field
111 |             else:
112 |                 text, _ = tesseract.set_input(field_image).recognize()
113 | 
114 |             if "\n" in text:
115 |                 text = [
116 |                     sub_text.strip()
117 |                     for sub_text in text.split("\n")
118 |                     if sub_text.strip()
119 |                 ]
120 | 
121 |             field["prediction"] = text
122 | 
123 |     input_dataset.save(prediction_dataset)
124 | 
125 | if __name__ == "__main__":
126 |     predict()
127 | 


--------------------------------------------------------------------------------
/scripts/localization_baseline/plot.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import matplotlib.pyplot as plt
 3 | import json
 4 | 
 5 | 
 6 | data = json.load(open("/home/qsuser/Work/DocXPand/Expes/unet_v2/results.json", "r"))
 7 | plt.boxplot(
 8 |     data,
 9 |     patch_artist=True,
10 |     labels=["SDL-Net [9]"]
11 | )
12 | plt.ylabel("IoU")
13 | plt.show()
14 | 


--------------------------------------------------------------------------------
/scripts/lpips/lpips_dirs.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import time
  4 | 
  5 | import cv2
  6 | import lpips
  7 | import numpy as np
  8 | import pandas
  9 | 
 10 | 
 11 | def resize_image_keep_ratio_and_pad(
 12 |     img,
 13 |     dim: int,
 14 | ):
 15 |     """Resizes an image keeping the aspect ratio unchanged.
 16 | 
 17 |     dim: resize the image such that it's dim*dim
 18 | 
 19 |     Returns:
 20 |             image: the resized image
 21 |     """
 22 |     height, width = img.shape[:2]
 23 |     scale = 1.0
 24 |     padding = [(0, 0), (0, 0), (0, 0)]
 25 |     scale = dim / max(height, width)
 26 |     new_image = cv2.resize(
 27 |         img,
 28 |         (round(height * scale), round(width * scale)),
 29 |         interpolation=cv2.INTER_AREA,
 30 |     )
 31 |     height, width = new_image.shape[:2]
 32 |     top_pad = (dim - height) // 2
 33 |     bottom_pad = dim - height - top_pad
 34 |     left_pad = (dim - width) // 2
 35 |     right_pad = dim - width - left_pad
 36 |     padding = [
 37 |         (int(top_pad), int(bottom_pad)),
 38 |         (int(left_pad), int(right_pad)),
 39 |         (0, 0),
 40 |     ]
 41 |     new_image = np.pad(
 42 |         new_image, padding, mode="constant", constant_values=0
 43 |     )  # type: ignore
 44 |     return new_image
 45 | 
 46 | 
 47 | if __name__ == "__main__":
 48 |     parser = argparse.ArgumentParser(
 49 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter
 50 |     )
 51 |     parser.add_argument("-d0", "--dir0", type=str, required=True)
 52 |     parser.add_argument("-d1", "--dir1", type=str, required=True)
 53 |     parser.add_argument("-o", "--out", type=str, default="./output.csv")
 54 |     parser.add_argument("-v", "--version", type=str, default="0.1")
 55 |     parser.add_argument(
 56 |         "--use-gpu", action="store_true", help="turn on flag to use GPU"
 57 |     )
 58 |     parser.add_argument("--dim", type=int, default="512")
 59 |     opt = parser.parse_args()
 60 |     use_gpu = opt.use_gpu
 61 |     dir_sources = opt.dir0
 62 |     dir_targets = opt.dir1
 63 |     dim = opt.dim
 64 |     ## Initializing the model
 65 |     loss_fn = lpips.LPIPS(net="alex", version=opt.version)
 66 |     if opt.use_gpu:
 67 |         loss_fn.cuda()
 68 |     IMGS = {}
 69 |     # crawl directories
 70 |     sources_files = [
 71 |         f
 72 |         for f in os.listdir(dir_sources)
 73 |         if f.lower().endswith((".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif"))
 74 |     ]
 75 |     target_files = [
 76 |         f
 77 |         for f in os.listdir(dir_targets)
 78 |         if f.lower().endswith((".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif"))
 79 |     ]
 80 |     begin = time.time()
 81 |     all_dists = []
 82 |     for source_file in sources_files:
 83 |         dists_cur = []
 84 |         source_img = os.path.join(dir_sources, source_file)
 85 |         assert os.path.exists(source_img)
 86 |         min_dist = float("inf")
 87 |         min_img_path = ""
 88 |         # Load images
 89 |         img0 = lpips.im2tensor(
 90 |             resize_image_keep_ratio_and_pad(cv2.imread(source_img), dim)
 91 |         )
 92 |         if use_gpu:
 93 |             img0 = img0.cuda()
 94 |         for target_file in target_files:
 95 |             target_img = os.path.join(dir_targets, target_file)
 96 |             assert os.path.exists(target_img)
 97 |             if target_img not in IMGS:
 98 |                 img1 = lpips.im2tensor(
 99 |                     resize_image_keep_ratio_and_pad(cv2.imread(target_img), dim)
100 |                 )
101 |                 IMGS[target_img] = img1
102 |             img1 = IMGS[target_img]
103 |             if use_gpu:
104 |                 img1 = img1.cuda()
105 |             # Compute distance
106 |             dist = float(loss_fn.forward(img0, img1))
107 |             if min_dist > dist:
108 |                 min_dist = dist
109 |                 min_img_path = target_img
110 |             dists_cur.append(dist)
111 |         all_dists.append(dists_cur)
112 |         print(f"{source_file}: {min_dist:.3f} / {min_img_path}")
113 |     elapsed = time.time() - begin
114 |     len_src = len(sources_files)
115 |     len_target = len(target_files)
116 |     print(
117 |         f"Total took : {elapsed} s, for sources size : {len_src} and targets size : {len_target}, i.e. {len_target*len_src} comparisons. This is {elapsed/(len_target*len_src)} s/pairs."
118 |     )
119 |     matrix = np.matrix(all_dists)
120 |     df = pandas.DataFrame(matrix, columns=target_files, index=sources_files)
121 |     print(df)
122 |     df.to_csv(opt.out)
123 | 


--------------------------------------------------------------------------------
/scripts/lpips/lpips_metrics.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from pathlib import Path
 3 | 
 4 | import matplotlib.pyplot as plt
 5 | import pandas
 6 | 
 7 | 
 8 | def load_from_csv(filename):
 9 |     return pandas.read_csv(filename, index_col=0)
10 | 
11 | 
12 | if __name__ == "__main__":
13 |     parser = argparse.ArgumentParser(
14 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter
15 |     )
16 |     parser.add_argument("-i", "--input-csv", action="append", required=True)
17 |     opt = parser.parse_args()
18 |     inputs_csv = [Path(file) for file in opt.input_csv]
19 |     datas = [load_from_csv(input_csv) for input_csv in inputs_csv]
20 |     mins = [data.to_numpy().min(axis=0) for data in datas]
21 |     plt.boxplot(
22 |         mins,
23 |         patch_artist=True,
24 |         labels=[input_csv.stem.replace("+", "\n") for input_csv in inputs_csv]
25 |     )
26 |     plt.xlabel("Benchmark datasets")
27 |     plt.ylabel("Min-LPIPS")
28 |     plt.show()
29 | 


--------------------------------------------------------------------------------
/stable_diffusion/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10-bullseye
 2 | RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6
 3 | RUN mkdir /work
 4 | WORKDIR /work
 5 | RUN git clone -b v1.5.0 https://github.com/AUTOMATIC1111/stable-diffusion-webui.git
 6 | WORKDIR /work/stable-diffusion-webui 
 7 | ARG EXTRA_COMMANDLINE_ARGS
 8 | ENV COMMANDLINE_ARGS="--skip-torch-cuda-test ${EXTRA_COMMANDLINE_ARGS}"
 9 | RUN pip install httpx==0.24.1
10 | RUN python -c 'import launch; launch.prepare_environment()'
11 | RUN python -c 'import webui; webui.initialize();'
12 | CMD ["python", "/work/stable-diffusion-webui/launch.py", "--api", "--listen"]
13 | 


--------------------------------------------------------------------------------
/stable_diffusion/README.md:
--------------------------------------------------------------------------------
 1 | # Build the docker container
 2 | * With GPU
 3 | If you have a compatible GPU available on your server, build the docker image for Stable Diffusion like this :
 4 | ```
 5 | docker build -t stable_diffusion .
 6 | ```
 7 | 
 8 | Skipping the CUDA test is only done because GPUs are not available during build.
 9 | 
10 | * Without GPU (not recommended)
11 | If you only have CPU available on your server, build it like this :
12 | ```
13 | docker build -t stable_diffusion  --build-arg EXTRA_COMMANDLINE_ARGS="--precision full --no-half" .
14 | ```
15 | 
16 | # Run the docker container
17 | * With GPU
18 | ```
19 | docker run --gpus all --privileged --rm -it --network host stable_diffusion
20 | ```
21 | 
22 | We have tested the inference using a NVidia GTX Titan X (12 GB) card, with NVidia driver version 510.108.03 and CUDA version 11.6. Please refer to https://github.com/AUTOMATIC1111/stable-diffusion-webui documentation in case of difficulty.
23 | 
24 | * Without GPU (not recommended)
25 | ```
26 | docker run --rm -it --network host stable_diffusion
27 | ```
28 | 


--------------------------------------------------------------------------------