├── .dockerignore ├── .editorconfig ├── .github ├── dependabot.yml └── workflows │ ├── ghcr.yml │ ├── pypi.yml │ └── tests.yml ├── .gitignore ├── .gitlint ├── .mypy.ini ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── CONFIGURATION.md ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── MAINTAINING.md ├── MIGRATE.md ├── Makefile ├── README.md ├── USAGE.md ├── docker-compose.override.yml ├── docker-compose.yml ├── document_merge_service ├── __init__.py ├── api │ ├── __init__.py │ ├── apps.py │ ├── authentication.py │ ├── data │ │ ├── 2023.test.test.docx-template.docx │ │ ├── __init__.py │ │ ├── black.png │ │ ├── docx-template-filters.docx │ │ ├── docx-template-image-placeholder-header-footer.docx │ │ ├── docx-template-loopcontrols.docx │ │ ├── docx-template-placeholdercheck.docx │ │ ├── docx-template-syntax.docx │ │ ├── docx-template.docx │ │ ├── invalid-template.xlsx │ │ ├── loadtest │ │ │ ├── 1.doc │ │ │ ├── 2.docx │ │ │ ├── 3.docx │ │ │ └── 4.docx │ │ ├── odt-template.odt │ │ ├── test.txt │ │ ├── xlsx-not-valid.xlsx │ │ ├── xlsx-structure.xlsx │ │ ├── xlsx-syntax.xlsx │ │ └── xlsx-template.xlsx │ ├── engines.py │ ├── factories.py │ ├── file_converter.py │ ├── filters.py │ ├── jinja.py │ ├── management │ │ ├── __init__.py │ │ └── commands │ │ │ ├── __init__.py │ │ │ ├── clean_dangling_files.py │ │ │ ├── dms_encrypt_templates.py │ │ │ └── upload_local_templates.py │ ├── migrations │ │ ├── 0001_initial.py │ │ ├── 0002_template_group.py │ │ ├── 0003_template_meta.py │ │ ├── 0004_cleanup_files.py │ │ ├── 0005_xlsx_template_engine.py │ │ ├── 0006_remove_template_group.py │ │ ├── 0007_template_created_at_template_created_by_group_and_more.py │ │ ├── 0008_alter_template_engine.py │ │ └── __init__.py │ ├── models.py │ ├── pagination.py │ ├── permissions.py │ ├── serializers.py │ ├── tests │ │ ├── __init__.py │ │ ├── __snapshots__ │ │ │ └── test_template.ambr │ │ ├── test_authentication.py │ │ ├── test_clean_dangling_files.py │ │ ├── test_convert.py │ │ ├── test_encrypt_templates.py │ │ ├── test_excel.py │ │ ├── test_filters.py │ │ ├── test_jinja.py │ │ ├── test_pagination.py │ │ ├── test_template.py │ │ ├── test_unoconv.py │ │ └── test_upload_local_templates.py │ ├── unoconv.py │ ├── urls.py │ └── views.py ├── conftest.py ├── extensions │ ├── __init__.py │ ├── permissions.py │ └── visibilities.py ├── gunicorn.py ├── sentry.py ├── settings.py ├── tests │ ├── __init__.py │ ├── test_sentry.py │ └── test_settings.py ├── urls.py └── wsgi.py ├── manage.py ├── poetry.lock └── pyproject.toml /.dockerignore: -------------------------------------------------------------------------------- 1 | .cache 2 | .coverage 3 | .coverage.* 4 | docker-compose.* 5 | Dockerfile 6 | .dockerignore 7 | .env 8 | .git 9 | *.pyc 10 | __pycache__ 11 | *.pyd 12 | *.pyo 13 | .pytest_cache 14 | .Python 15 | .python-version 16 | *.swp 17 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | insert_final_newline = true 5 | end_of_line = lf 6 | charset = utf-8 7 | trim_trailing_whitespace = true 8 | 9 | [*.sql] 10 | indent_style = space 11 | indent_size = 4 12 | 13 | [*.py] 14 | indent_style = space 15 | indent_size = 4 16 | 17 | [*.json] 18 | indent_style = space 19 | indent_size = 2 20 | 21 | [*.yml] 22 | indent_style = space 23 | indent_size = 2 24 | 25 | [Makefile] 26 | indent_style = tab 27 | indent_size = 4 28 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | day: friday 8 | time: "12:00" 9 | timezone: "Europe/Zurich" 10 | open-pull-requests-limit: 10 11 | ignore: 12 | - dependency-name: django 13 | versions: 14 | - ">=4.3" 15 | - dependency-name: python-semantic-release 16 | versions: 17 | - ">=8" 18 | - package-ecosystem: docker 19 | directory: "/" 20 | schedule: 21 | interval: weekly 22 | day: friday 23 | time: "12:00" 24 | timezone: "Europe/Zurich" 25 | ignore: 26 | - dependency-name: python 27 | versions: 28 | - ">3.13" 29 | - package-ecosystem: github-actions 30 | directory: "/" 31 | schedule: 32 | interval: weekly 33 | day: friday 34 | time: "12:00" 35 | timezone: "Europe/Zurich" 36 | -------------------------------------------------------------------------------- /.github/workflows/ghcr.yml: -------------------------------------------------------------------------------- 1 | name: Container image 2 | 3 | on: 4 | release: 5 | types: 6 | - created 7 | - published 8 | 9 | # Run build for any PRs - we won't push in those however 10 | pull_request: 11 | branches: 12 | - main 13 | 14 | # Publish `main` as Docker `dev` image. 15 | push: 16 | branches: 17 | - main 18 | 19 | concurrency: 20 | group: build-${{ github.ref }} 21 | cancel-in-progress: true 22 | 23 | env: 24 | REGISTRY: ghcr.io 25 | 26 | jobs: 27 | # Push image to GitHub Packages. 28 | # See also https://docs.docker.com/build/ci/github-actions/ 29 | container-registry: 30 | runs-on: ubuntu-22.04 31 | permissions: 32 | packages: write 33 | contents: read 34 | 35 | strategy: 36 | matrix: 37 | include: 38 | - variant: slim 39 | suffix: -slim 40 | - variant: full 41 | suffix: 42 | 43 | steps: 44 | - uses: actions/checkout@v4 45 | 46 | - name: Set up Docker Buildx 47 | uses: docker/setup-buildx-action@v3 48 | 49 | - name: Log in to the Container registry 50 | uses: docker/login-action@v3 51 | if: github.event_name != 'pull_request' 52 | with: 53 | registry: ${{ env.REGISTRY }} 54 | username: ${{ github.actor }} 55 | password: ${{ secrets.GITHUB_TOKEN }} 56 | 57 | - name: Extract metadata (tags, labels) for Docker 58 | id: meta 59 | uses: docker/metadata-action@v5 60 | with: 61 | images: ${{ env.REGISTRY }}/${{ github.repository }} 62 | tags: | 63 | type=raw,value=dev${{ matrix.suffix }},enable={{is_default_branch}} 64 | type=semver,pattern={{version}}${{ matrix.suffix }} 65 | type=semver,pattern={{major}}.{{minor}}${{ matrix.suffix }} 66 | type=semver,pattern={{major}}${{ matrix.suffix }} 67 | labels: | 68 | org.opencontainers.image.title=document-merge-service 69 | org.opencontainers.image.description=${{ github.event.repository.description }} 70 | org.opencontainers.image.url=${{ github.event.repository.html_url }} 71 | org.opencontainers.image.source=${{ github.event.repository.clone_url }} 72 | org.opencontainers.image.revision=${{ github.sha }} 73 | org.opencontainers.image.licenses=${{ github.event.repository.license.spdx_id }} 74 | 75 | - name: Build and push 76 | uses: docker/build-push-action@v6 77 | with: 78 | context: . 79 | file: ./Dockerfile 80 | push: ${{ github.event_name != 'pull_request' }} 81 | tags: ${{ steps.meta.outputs.tags }} 82 | labels: ${{ steps.meta.outputs.labels }} 83 | cache-from: type=gha 84 | cache-to: type=gha,mode=max 85 | build-args: VARIANT=${{ matrix.variant }} 86 | -------------------------------------------------------------------------------- /.github/workflows/pypi.yml: -------------------------------------------------------------------------------- 1 | name: PyPI 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | publish: 9 | runs-on: ubuntu-22.04 10 | steps: 11 | - uses: actions/checkout@v4 12 | 13 | - name: Install poetry 14 | run: pipx install poetry 15 | 16 | - name: Setup python 17 | uses: actions/setup-python@v5 18 | with: 19 | python-version: "3.12" 20 | cache: "poetry" 21 | 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install -U twine 25 | poetry install 26 | - name: Build package 27 | run: poetry build 28 | 29 | - name: Upload to PyPI 30 | run: twine upload dist/* 31 | env: 32 | TWINE_USERNAME: __token__ 33 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 34 | TWINE_NON_INTERACTIVE: true 35 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | schedule: 11 | - cron: "0 0 * * 0" 12 | 13 | concurrency: 14 | group: tests-${{ github.ref }} 15 | cancel-in-progress: true 16 | 17 | jobs: 18 | lint: 19 | name: Lint 20 | runs-on: ubuntu-22.04 21 | env: 22 | ENV: dev 23 | steps: 24 | - uses: actions/checkout@v4 25 | with: 26 | fetch-depth: 0 27 | - name: Install poetry 28 | run: pipx install poetry 29 | - name: Setup python 30 | uses: actions/setup-python@v5 31 | with: 32 | python-version: "3.12" 33 | cache: "poetry" 34 | - name: Install dependencies 35 | run: poetry install --with dev 36 | - name: Run gitlint 37 | run: poetry run gitlint --contrib contrib-title-conventional-commits 38 | - name: Run ruff check 39 | run: poetry run ruff check . 40 | - name: Run mypy 41 | run: poetry run mypy document_merge_service 42 | - name: Run ruff format 43 | run: poetry run ruff format --check --diff . 44 | - name: Run migration check 45 | run: poetry run python manage.py makemigrations --check --dry-run --no-input 46 | 47 | docker-tests: 48 | name: Docker tests 49 | needs: [lint] 50 | runs-on: ubuntu-22.04 51 | env: 52 | ENV: dev 53 | steps: 54 | - uses: actions/checkout@v4 55 | - name: Set up Docker Buildx 56 | uses: docker/setup-buildx-action@v3 57 | - name: Set UID 58 | run: | 59 | echo "UID=$(id --user)" > .env 60 | echo "ISOLATE_UNOCONV=true" >> .env 61 | - name: Build docker containers 62 | run: docker compose up -d --build 63 | - name: Run pytest 64 | run: docker compose exec -T document-merge-service pytest --no-cov-on-fail --cov --create-db -vv 65 | 66 | compatibility-tests: 67 | name: Compatibility tests 68 | needs: [lint] 69 | runs-on: ubuntu-22.04 70 | strategy: 71 | fail-fast: false 72 | matrix: 73 | version: 74 | - "3.10" 75 | - "3.11" 76 | - "3.12" 77 | - "3.13" 78 | database: 79 | - "sqlite" 80 | - "postgres" 81 | services: 82 | postgres: 83 | image: postgres:alpine 84 | env: 85 | POSTGRES_USER: document-merge-service 86 | POSTGRES_PASSWORD: document-merge-service 87 | options: >- 88 | --health-cmd pg_isready 89 | --health-interval 10s 90 | --health-timeout 5s 91 | --health-retries 5 92 | ports: 93 | - 5432:5432 94 | steps: 95 | - uses: actions/checkout@v4 96 | - name: Install poetry 97 | run: pipx install poetry 98 | - name: Setup python 99 | uses: actions/setup-python@v5 100 | with: 101 | python-version: ${{ matrix.version }} 102 | cache: "poetry" 103 | - name: Prepare directories 104 | run: mkdir -p ${{ runner.temp }}/document-merge-service/data ${{ runner.temp }}/document-merge-service/media/attachments ${{ runner.temp }}/document-merge-service/media/__convert__ 105 | - name: Install dependendies 106 | run: | 107 | sudo apt-get update 108 | sudo apt-get install -y --no-install-recommends util-linux unoconv libreoffice-writer libmagic1 109 | poetry install --extras full --with dev 110 | - name: Set environment 111 | run: | 112 | echo "ENV=dev" >> .env 113 | echo "UID=$(id --user)" >> .env 114 | echo "ISOLATE_UNOCONV=true" >> .env 115 | echo "DATABASE_DIR=${{ runner.temp }}/document-merge-service/data" >> .env 116 | echo "MEDIA_ROOT=${{ runner.temp }}/document-merge-service/media" >> .env 117 | - name: Configure postgres 118 | if: ${{ matrix.database == 'postgres' }} 119 | run: | 120 | echo "DATABASE_ENGINE=django.db.backends.postgresql" 121 | echo "DATABASE_HOST=localhost" 122 | echo "DATABASE_PORT=5432" 123 | echo "DATABASE_NAME=document-merge-service" 124 | echo "DATABASE_USER=document-merge-service" 125 | echo "DATABASE_PASSWORD=document-merge-service" 126 | - name: Run tests 127 | run: poetry run pytest --no-cov-on-fail --cov --create-db -vv 128 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Temporary files 7 | .~* 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | .pytest_cache 47 | nosetests.xml 48 | coverage.xml 49 | *,cover 50 | .hypothesis/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | 59 | # Sphinx documentation 60 | docs/_build/ 61 | 62 | # PyBuilder 63 | target/ 64 | 65 | #Ipython Notebook 66 | .ipynb_checkpoints 67 | 68 | # Pyenv 69 | .python-version 70 | 71 | # Dotenv 72 | .env 73 | 74 | # Editor swap files 75 | *.swp 76 | 77 | # IDE specific configuration 78 | .idea/ 79 | 80 | /.dmypy.json 81 | -------------------------------------------------------------------------------- /.gitlint: -------------------------------------------------------------------------------- 1 | [general] 2 | ignore=body-is-missing,body-min-length 3 | -------------------------------------------------------------------------------- /.mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | exclude = document_merge_service/api/migrations 3 | 4 | [mypy-rest_framework.*] 5 | ignore_missing_imports = True 6 | 7 | [mypy-babel.dates.*] 8 | ignore_missing_imports = True 9 | 10 | [mypy-docx.*] 11 | ignore_missing_imports = True 12 | 13 | [mypy-factory.*] 14 | ignore_missing_imports = True 15 | 16 | [mypy-docxtpl.*] 17 | ignore_missing_imports = True 18 | 19 | [mypy-lxml.*] 20 | ignore_missing_imports = True 21 | 22 | [mypy-jsonpath.*] 23 | ignore_missing_imports = True 24 | 25 | [mypy-psutil.*] 26 | ignore_missing_imports = True 27 | 28 | [mypy-pytest_factoryboy.*] 29 | ignore_missing_imports = True 30 | 31 | [mypy-snapshottest.*] 32 | ignore_missing_imports = True 33 | 34 | [mypy-environ.*] 35 | ignore_missing_imports = True 36 | 37 | [mypy-xltpl.*] 38 | ignore_missing_imports = True 39 | 40 | [mypy-openpyxl.*] 41 | ignore_missing_imports = True 42 | 43 | [mypy-generic_permissions.*] 44 | ignore_missing_imports = True 45 | 46 | [mypy-django_filters.*] 47 | ignore_missing_imports = True 48 | 49 | [mypy-tqdm.*] 50 | ignore_missing_imports = True 51 | 52 | [mypy-storages.*] 53 | ignore_missing_imports = True 54 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: local 3 | hooks: 4 | - id: ruff-format 5 | stages: [commit] 6 | name: format code 7 | language: system 8 | entry: ruff format . 9 | types: [python] 10 | - id: ruff check 11 | stages: [commit] 12 | name: check format,import 13 | language: system 14 | entry: ruff check --diff . 15 | types: [python] 16 | - id: gitlint 17 | stages: [commit-msg] 18 | name: gitlint 19 | description: Validate commit lint 20 | entry: gitlint --msg-filename 21 | language: system 22 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 8.0.0 (16. May 2025) 4 | 5 | ### Feature 6 | 7 | * **docker:** Add image variants ([`72fb354`](https://github.com/adfinis/document-merge-service/commit/72fb354aef0f2b4dbd9d1249d2cc6d8df4f30aba)) 8 | * **docker:** Replace uwsgi with gunicorn as app server ([`aa3f24b`](https://github.com/adfinis/document-merge-service/commit/aa3f24b7f49c30a4b969ba83f89c522a4779929d)) 9 | * **build:** Use multi stage docker build for smaller images ([`73b9e43`](https://github.com/adfinis/document-merge-service/commit/73b9e4351636da53629acd4fe54eb9fba0c2855a)) 10 | * **docker:** Use python 3.13 for docker image ([`e38f4a4`](https://github.com/adfinis/document-merge-service/commit/e38f4a4fca21be43d93196ac1675ab223cd1bbdf)) 11 | * **deps:** Add python 3.13 to compatibility matrix ([`05e9268`](https://github.com/adfinis/document-merge-service/commit/05e9268dc28d209f9099ad2ea889b0b9b9f14cb9)) 12 | * **engine:** Remove obsolete docx mailmerge engine ([`698da36`](https://github.com/adfinis/document-merge-service/commit/698da3682b5657f3290ed274c4caf312c2e489f7)) 13 | 14 | ### Fix 15 | 16 | * **settings:** Make DOCXTEMPLATE_JINJA_EXTENSIONS setting optional ([`2ec2b63`](https://github.com/adfinis/document-merge-service/commit/2ec2b63b455792e4c586fc51f311f9cbc901d3f0)) 17 | * **deps:** Replace imghdr with python-magic ([`b6a4ca2`](https://github.com/adfinis/document-merge-service/commit/b6a4ca211a2aef19aefac7cc0bc4ce6998f86f0b)) 18 | 19 | ### Breaking 20 | 21 | * Remove dependency on MySQL. If you need to use mysql as a database, you may install the required dependency on top of the standard docker image. ([`08d909d`](https://github.com/adfinis/document-merge-service/commit/08d909db0adf7e32b0ed04fb4c00cc88c302be01)) 22 | * `document-merge-service` now uses Gunicorn as its app server instead of uWSGI. If you are using a custom uWSGI configuration via the `UWSGI_INI` env variable, replace it with a custom Gunicorn configuration as explained in [the documentation](CONFIGURATION.md#gunicorn). ([`aa3f24b`](https://github.com/adfinis/document-merge-service/commit/aa3f24b7f49c30a4b969ba83f89c522a4779929d)) 23 | * This will remove poetry entirely from the production image. If you customized the command, make sure to remove `poetry run` as the binaries are now globally available without using poetry. ([`73b9e43`](https://github.com/adfinis/document-merge-service/commit/73b9e4351636da53629acd4fe54eb9fba0c2855a)) 24 | * This fully removes the support of the docx mailmerge engine as the used library is quite inactive and there is no current use case for this engine. For more information, check issue #570. ([`698da36`](https://github.com/adfinis/document-merge-service/commit/698da3682b5657f3290ed274c4caf312c2e489f7)) 25 | 26 | ## 7.3.7 (10 April 2025) 27 | 28 | ### Fix 29 | 30 | * **ci:** Create container images also when pusblishing a draft release ([fb79d06](https://github.com/adfinis/document-merge-service/commit/fb79d06490661b06c66049b10d292b5ed92dcf55)) 31 | 32 | ## 7.3.6 (8 April 2025) 33 | 34 | ### Fix 35 | 36 | * **deps:** Fix locked dependency on django-storages ([`bf085a9`](https://github.com/adfinis/document-merge-service/commit/bf085a9808bdb76bb70bb8e834b92b6290384c73)) 37 | * **api:** Delete old file when updating template file ([`f98e38b`](https://github.com/adfinis/document-merge-service/commit/f98e38bdb1c1fff84e9e4f3c82d234250cd44c8d)) 38 | 39 | ## 7.3.5 (13 January 2025) 40 | 41 | ### Fix 42 | 43 | * **deps:** Update dependencies ([`16d4f76`](https://github.com/adfinis/document-merge-service/commit/16d4f767999989f46368f475e15a6335b5c06b28)) 44 | 45 | ## 7.3.4 (12 December 2024) 46 | 47 | ### Fix 48 | 49 | * **cache:** Cast common options for memcached cache ([`59f1727`](https://github.com/adfinis/document-merge-service/commit/59f1727b872f34da51757c894db5385170ff4fb4)) 50 | 51 | ## 7.3.3 (10 December 2024) 52 | 53 | ### Fix 54 | 55 | * **cache:** Add environment variable to configure cache options ([`6b45365`](https://github.com/adfinis/document-merge-service/commit/6b45365ac6cbc60b1739c75982f54934d57638bc)) 56 | 57 | ## 7.3.2 (29 November 2024) 58 | 59 | ### Fix 60 | 61 | * **deps:** Update dependencies ([`322e9ad`](https://github.com/adfinis/document-merge-service/commit/322e9ad7fb20c05198477f7c9c53598ca0c617e3)) 62 | 63 | ## 7.3.1 (14 November 2024) 64 | 65 | ### Fix 66 | 67 | * **merge:** Fix extension in filename when merging without convert ([`9676dab`](https://github.com/adfinis/document-merge-service/commit/9676dab2a859bfa3da4846d8e795bff09f870fbc)) 68 | 69 | ## 7.3.0 (12 November 2024) 70 | 71 | ### Feature 72 | 73 | * **api:** Accept available placeholders as json list ([`88887c4`](https://github.com/adfinis-sygroup/document-merge-service/commit/88887c464825301c04c7642a69977ed68bb6b512)) 74 | 75 | ## 7.2.0 (11 October 2024) 76 | ### Feature 77 | * **auth:** Make OIDC_USERNAME_CLAIM configurable ([`be82b1e`](https://github.com/luytena/document-merge-service/commit/be82b1ec20c4cc651c3cb61f0e48d71447a3a6c3)) 78 | 79 | ### Fix 80 | * **deps:** Update dependencies ([`4264f66`](https://github.com/luytena/document-merge-service/commit/4264f6657e2b56993a20b0ff1907a099094de03e)) 81 | 82 | ## 7.1.1 (18 September 2024) 83 | ### Fix 84 | 85 | * **sse-c:** Bump django-storages to include fix for head_object ([`b0a9601`](https://github.com/adfinis/document-merge-service/commit/b0a96015974f2f1fe0762b94b2459f10e3006ac8)) 86 | 87 | ## 7.1.0 (18 September 2024) 88 | ### Fix 89 | 90 | * **sse-c:** Bump django-storages to include fix for head_object ([`b0a9601`](https://github.com/adfinis/document-merge-service/commit/b0a96015974f2f1fe0762b94b2459f10e3006ac8)) 91 | 92 | ## 7.1.0 (21 August 2024) 93 | ### Feature 94 | 95 | * **template:** add new template model properties ([`1624693`](https://github.com/adfinis/document-merge-service/commit/16246933a821584ad5d88c65489a9849d8d4904b)) 96 | 97 | ### Fix 98 | 99 | * **template:** Only delete file if it exists ([`eef256a`](https://github.com/adfinis/document-merge-service/commit/eef255add8a5fff5705ba9ce063acc7b09beef03)) 100 | * **template:** Raise the maximum number of fields for the data upload ([`bbbb82b`](https://github.com/adfinis/document-merge-service/commit/bbbb82ba805a2741ddf9364c697713e31b644ce2)) 101 | 102 | ## 7.0.2 (9 August 2024) 103 | ### Fix 104 | 105 | * **deps:** Update django ([`4f0cee5`](https://github.com/adfinis/document-merge-service/commit/4f0cee592bd5636c696281df0ff1471ea734c09f)) 106 | 107 | ## 7.0.1 (19 July 2024) 108 | ### Fix 109 | 110 | * Ssec download ([`d6e043d`](https://github.com/adfinis/document-merge-service/commit/d6e043d93203c0edcbae7962b1008717dcae9f08)) 111 | 112 | ## 7.0.0 (17 July 2024) 113 | ### Feature 114 | 115 | * **s3:** Add s3 ssec option ([`7829a2c`](https://github.com/adfinis/document-merge-service/commit/7829a2cba627eedc7f3c97bca666973f387e61d6)) 116 | 117 | ### Breaking 118 | 119 | * prefix storage env vars with dms for django-storages to avoid conflicts ([`8fc5649`](https://github.com/adfinis/document-merge-service/commit/8fc564985d9516a71a5acdd9651134ec5add2a86)) 120 | * removed support for python 3.8 and 3.9 ([`8ef1733`](https://github.com/adfinis/document-merge-service/commit/8ef1733ed279594ab415d1a53f2fa926bebfc758)) 121 | 122 | ## 6.6.1 (1 July 2024) 123 | ### Chore 124 | 125 | This is a pure maintenance release. Most noteworthy commit is: 126 | 127 | * chore: replace unmaintained docx-mailmerge with docx-mailmerge2 ([`57fcd23`](https://github.com/adfinis/document-merge-service/commit/57fcd230b08557f43128b8df70e23caa629494fb)) 128 | 129 | ## 6.6.0 (18 June 2024) 130 | ### Feature 131 | 132 | * **image:** Allow images to keep their original aspect ratio ([`05ade2b`](https://github.com/adfinis/document-merge-service/commit/05ade2b41f6668d7c92fb9fde9951977a8972753)) 133 | 134 | ## 6.5.2 (12 June 2024) 135 | ### Fix 136 | 137 | * Use a sandboxed environment ([#763](https://github.com/adfinis/document-merge-service/issues/763)) ([`a1edd39`](https://github.com/adfinis/document-merge-service/commit/a1edd39d33d1bdf75c31ea01c317547be90ca074)) 138 | 139 | ## 6.5.1 (3 June 2024) 140 | ### Fix 141 | 142 | * **deps:** Allow caching with memcache ([`2f7c2bc`](https://github.com/adfinis/document-merge-service/commit/2f7c2bc196f56dd5488101c8ad1e639671002005)) 143 | * **deps:** Fix usage with postgres database ([`f29c763`](https://github.com/adfinis/document-merge-service/commit/f29c7635d3fa7759c9c9e98bc6e437da650ebb26)) 144 | 145 | ## 6.5.0 (30 May 2024) 146 | ### Feature 147 | 148 | * **docker:** Update python to v3.12 ([`14c4d7f`](https://github.com/adfinis/document-merge-service/commit/14c4d7f97005ce9651be6dc37eae904125614e29)) 149 | * **deps:** Update dependencies ([`5773d5c`](https://github.com/adfinis/document-merge-service/commit/5773d5c5283543c843c4986daf8b71cc1cafa611)) 150 | * **deps:** Update django to v4.2 LTS ([`5287a3e`](https://github.com/adfinis/document-merge-service/commit/5287a3e4a6b26c12e122026f789e80d2c70f892b)) 151 | 152 | ### Fix 153 | 154 | * **docker:** Install dependencies as dms user ([`6a17a8f`](https://github.com/adfinis/document-merge-service/commit/6a17a8f1cfce053440790e9b62a7d6c21405f580)) 155 | 156 | ## 6.4.6 (12 June 2024) (Backport) 157 | ### Fix 158 | 159 | * Use a sandboxed environment ([#763](https://github.com/adfinis/document-merge-service/issues/763)) ([`a1edd39`](https://github.com/adfinis/document-merge-service/commit/a1edd39d33d1bdf75c31ea01c317547be90ca074)) 160 | 161 | ## 6.4.5 (25 April 2024) 162 | ### Fix 163 | * **image:** Fix template validation with images in headers / footers ([`eccbb34`](https://github.com/adfinis/document-merge-service/commit/eccbb34ce69cd26a998a8ef15db109e8faf2a1e3)) 164 | 165 | ## 6.4.4 (24 January 2024) 166 | ### Fix 167 | * **settings:** Add s3 ssl settings ([#698](https://github.com/adfinis/document-merge-service/issues/698)) ([`c92b381`](https://github.com/adfinis/document-merge-service/commit/c92b381ce6ebc45c0b96eba828b471f2bd28a169)) 168 | 169 | 170 | ## 6.4.3 (23 October 2023) 171 | ### Fix 172 | * **convert:** Handle file names with multiple dots ([`3ae90f1`](https://github.com/adfinis/document-merge-service/commit/3ae90f1d3e8e2bfce86528be2af142975b6f1a58)) 173 | 174 | ## 6.4.2 (25 September 2023) 175 | ### Fix 176 | 177 | * **deps:** Downgrade urllib to v1 ([`d7ab8ce`](https://github.com/adfinis/document-merge-service/commit/d7ab8ce0159b61d0c661953735bfed5ead370605)) 178 | 179 | ## 6.4.1 (09 August 2023) 180 | ### Fix 181 | 182 | * Improve command to upload local template files to storage backend ([`4589dcb`](https://github.com/adfinis/document-merge-service/commit/4589dcba025f82ffc23726f7284856755268ab10)) 183 | 184 | ## 6.4.0 (03 August 2023) 185 | ### Feature 186 | * Allow conversion of docx and odt files to pdf using a new convert endpoint ([`e00e49e`](https://github.com/adfinis/document-merge-service/commit/e00e49e210b17469457b76eccf306f17b40da43a)) 187 | 188 | ## 6.3.1 (31 July 2023) 189 | ### Fix 190 | 191 | * **auth:** Don't run any authentication logic if auth is disabled ([`564b504`](https://github.com/adfinis/document-merge-service/commit/564b504be673f34677eb6736a3b26dbbfdd3d7ec)) 192 | 193 | ## 6.3.0 (25 July 2023) 194 | ### Feature 195 | 196 | * **extensions:** Add setting for passing custom arguments into extensions ([`b76e293`](https://github.com/adfinis/document-merge-service/commit/b76e2930535f15820e449930e57d004c54e1ba2d)) 197 | 198 | ## 6.2.2 (24 July 2023) 199 | ### Fix 200 | 201 | * **template:** Migrate group to meta property before removing ([`4480877`](https://github.com/adfinis/document-merge-service/commit/448087728f3103744b8245ff5400b63201352b19)) 202 | 203 | ## 6.2.1 (19 July 2023) 204 | ### Fix 205 | 206 | * **dgap:** Add env variables to configure permissions and visibilities ([`67fc95a`](https://github.com/adfinis/document-merge-service/commit/67fc95a16f72e7afed37342972c6101c492d529a)) 207 | * Storage generic file cleanup ([`0633fd2`](https://github.com/adfinis/document-merge-service/commit/0633fd20a7a11f00a8d7eb6aa903aa38520fe8b1)) 208 | 209 | ## 6.2.0 (11 July 2023) 210 | ### Feature 211 | * Add django storages and settings for s3 storage ([`6df1a83`](https://github.com/Yelinz/document-merge-service/commit/6df1a83a4befbb8687a951d45fe6910deba83272)) 212 | 213 | ## 6.1.2 (10 May 2023) 214 | Maintenance release only containing dependency updates. 215 | 216 | ## 6.1.1 (03 May 2023) 217 | ### Fix 218 | * **excel:** Set `sheet_name` and `tpl_name` to load the correct sheet 219 | ([`13a2a07`](https://github.com/adfinis/document-merge-service/commit/13a2a073aa1a7f65cbf7c794210f460db1a2509e)) 220 | 221 | ## 6.1.0 (27 January 2023) 222 | ### Feature 223 | * **filters:** Add template meta filter ([`2daf8ec`](https://github.com/Yelinz/document-merge-service/commit/2daf8ec736a9ff5ee424548ef9eef53362e284e0)) 224 | * Add sentry integration ([`abe37f1`](https://github.com/Yelinz/document-merge-service/commit/abe37f1417554119299acb8aa852892bae823490)) 225 | 226 | ### Fix 227 | * **auth:** Add userinfo to authenticated user ([`21ae809`](https://github.com/Yelinz/document-merge-service/commit/21ae809dd6e08d1b5823373637ef17805640d73a)) 228 | ## 6.0.0 (12 January 2023) 229 | ### Feature 230 | * Add dgap mixins ([`1b9f486`](https://github.com/Yelinz/document-merge-service/commit/1b9f486db20fc8856086f91e22e92801fb4b5079)) 231 | 232 | ### Fix 233 | * Remove oidc group api fetching ([`e64e9d5`](https://github.com/Yelinz/document-merge-service/commit/e64e9d5c563f8ab961b5d35aa87d280c1d6a39ca)) 234 | * **api:** Fix install failing without mysql ([`b984054`](https://github.com/Yelinz/document-merge-service/commit/b9840542058f06895bdcfa19559a115bef9dedb6)) 235 | * **settings:** Enable email settings without email error handler ([`e12480d`](https://github.com/Yelinz/document-merge-service/commit/e12480d58b00e4aa7409ff9c8f68bf6b4cec31d9)) 236 | 237 | ### Breaking 238 | * remove oidc group api fetching ([`e64e9d5`](https://github.com/Yelinz/document-merge-service/commit/e64e9d5c563f8ab961b5d35aa87d280c1d6a39ca)) 239 | * add dgap mixins ([`1b9f486`](https://github.com/Yelinz/document-merge-service/commit/1b9f486db20fc8856086f91e22e92801fb4b5079)) 240 | 241 | ## 5.2.1 (6 January 2023) 242 | 243 | ### Fix 244 | * Fix wrong env variable for server email address ([`d1006b9`](https://github.com/adfinis/document-merge-service/commit/d1006b9f9aaf74d15d076e4a0856416f3ff9e6aa)) 245 | 246 | ## 5.2.0 (6 January 2023) 247 | 248 | ### Feature 249 | * Add email error handler ([`012a893`](https://github.com/adfinis/document-merge-service/commit/012a893eb5b17b5f90899035d292a005fe118279)) 250 | 251 | ## 5.1.0 (4 October 2022) 252 | 253 | ### Feature 254 | * **api:** Make pagination configurable ([`dd6615f`](https://github.com/adfinis/document-merge-service/commit/dd6615f14b81ec005b697bf43c58d3c74e8d3fe3)) 255 | 256 | ## 5.0.6 (13 September 2022) 257 | 258 | ### Fix 259 | * Also log unshare in formats-call / only test unshare in error-path ([`dd0f22c`](https://github.com/adfinis-sygroup/document-merge-service/commit/dd0f22c0e97139e9f5559da70683d20a6927fb5d)) 260 | 261 | ## 5.0.5 (01 September 2022) 262 | 263 | ### Fix 264 | * **validation:** Fix excel template validation ([`3c6149e`](https://github.com/adfinis-sygroup/document-merge-service/commit/3c6149e15a455539f544c61084a0e372cc74fa7b)) 265 | 266 | ## 5.0.4 (29 August 2022) 267 | 268 | ### Fix 269 | * Allow isolation of unoconv calls to be disabled (default) ([`74834f1`](https://github.com/adfinis-sygroup/document-merge-service/commit/74834f1c820eb258e53697ecd563f1ee353a5e66)) 270 | * Remove security restrictions to make unshare possible ([`5b10cff`](https://github.com/adfinis-sygroup/document-merge-service/commit/5b10cffac5bd365d02c16999748113747d6d36e9)) 271 | 272 | ## 5.0.3 (23 August 2022) 273 | 274 | Important: Be aware that the docker-container needs CAP_SYS_ADMIN since version 4.7.0 275 | 276 | ### Fix 277 | * Log an error if unoconv or unshare fails ([`6e2f54a`](https://github.com/adfinis-sygroup/document-merge-service/commit/6e2f54ad961cdfb0052c0a03823121ccf53b68ae)) 278 | 279 | ## 5.0.2 (22 August 2022) 280 | 281 | ### Fix 282 | * Move temporary path to data directory 283 | ([`afce2ca`](https://github.com/adfinis-sygroup/document-merge-service/commit/afce2ca3429bf20d6c282fe5f8a1f1201fc278ee)) 284 | 285 | ## 5.0.1 (9 August 2022) 286 | 287 | ### Fix 288 | * **docker:** Fix docker uwsgi command ([`85892f6`](https://github.com/adfinis/document-merge-service/commit/85892f63005fe31b277ba8df623c8a0ec0f1b7ec)) 289 | 290 | ## 5.0.0 (9 August 2022) 291 | 292 | ### Feature 293 | * **license:** Switch license from MIT to GPL-3.0-or-later ([`47c1a84`](https://github.com/adfinis/document-merge-service/commit/47c1a843a9cda105d1640651a8231fbc4c18039f)) 294 | 295 | ### Fix 296 | * **python:** Use python v3.8 ([`920c0bd`](https://github.com/adfinis/document-merge-service/commit/920c0bd2c5c0dfe836b5b215a91691a4077fd63b)) 297 | 298 | ### Breaking 299 | * Drop support for Python v3.7. This should have been done in 29a49ee76b638f0a8fb7b189fb91e61c45d78bde which updated the python version to 3.10 which is too restrictive. We now guarantee support for python versions 3.7 to 3.10. ([`920c0bd`](https://github.com/adfinis/document-merge-service/commit/920c0bd2c5c0dfe836b5b215a91691a4077fd63b)) 300 | * document-merge-service is now released under the GPL-3.0-or-later license. ([`47c1a84`](https://github.com/adfinis/document-merge-service/commit/47c1a843a9cda105d1640651a8231fbc4c18039f)) 301 | 302 | ## 4.7.0 (26 July 2022) 303 | 304 | ### Feature 305 | * Isolate libreoffice instances ([`9e2db65`](https://github.com/adfinis/document-merge-service/commit/9e2db651a9804c787e7909baa415aa36e551007b)) 306 | * **engines:** Add basic excel validation ([`f396ae8`](https://github.com/adfinis/document-merge-service/commit/f396ae8879475d6c4aca29003f77965945da24fd)) 307 | * **engines:** Render all excel-sheets with thee same data ([`ca54651`](https://github.com/adfinis/document-merge-service/commit/ca54651ac85c5576fac5162ba1caaebff273f87b)) 308 | * **engines:** Test datastructures with excel templates ([`9a5c116`](https://github.com/adfinis/document-merge-service/commit/9a5c116c30c22957c9ee316c0c0f4baa9c3dba98)) 309 | * **engines:** Create template test ([`8c4cad1`](https://github.com/adfinis/document-merge-service/commit/8c4cad1273f35496080473f42ecb150a575b0a2e)) 310 | * **engines:** Fix code for new xlsx library ([`ca4a6a4`](https://github.com/adfinis/document-merge-service/commit/ca4a6a42dfb58196e7a819e1c9b2a4cd9b59dbe8)) 311 | * **engines:** Add xlsx template engine ([`e133c83`](https://github.com/adfinis/document-merge-service/commit/e133c834128916eb26ff7642e6eb31852406743c)) 312 | 313 | ### Fix 314 | * Cleanup thread pool ([`fec982e`](https://github.com/adfinis/document-merge-service/commit/fec982e25209b1088477f868bfa786f087d047bc)) 315 | 316 | ## 4.6.2 (21 January 2022) 317 | 318 | ### Fix 319 | * **cleanup:** Convert cleanup migration to command ([#467](https://github.com/adfinis/document-merge-service/issues/467)) ([`33052ee`](https://github.com/adfinis/document-merge-service/commit/33052eed48dc01a311aa57462d3a64595b74e743)) 320 | * **cleanup-migration:** Fail gracefully in new container ([`8a93339`](https://github.com/adfinis/document-merge-service/commit/8a93339be1218fe79579129483f238f17e67d2e9)) 321 | 322 | ## 4.6.1 323 | 324 | ### Fix 325 | * **api:** Do not crash in list view ([#458](https://github.com/adfinis/document-merge-service/issues/458)) ([`11b02fd`](https://github.com/adfinis/document-merge-service/commit/11b02fd58a6a0d38d10bb6a67da9999e11f0c07f)) 326 | 327 | ## 4.6.0 328 | 329 | ### Feature 330 | * Add config for deployment under specific URL prefix ([#456](https://github.com/adfinis/document-merge-service/issues/456)) ([`6801024`](https://github.com/adfinis/document-merge-service/commit/680102457b938b33b2ecbc314bcb1897644c519a)) 331 | 332 | ### Fix 333 | * **template:** Make template download url more stable ([`3438a53`](https://github.com/adfinis/document-merge-service/commit/3438a53efbd7bbe46cf3b38659e1bebc4cfe348b)) 334 | * **cleanup:** Delete old files when template is deleted or changed ([#445](https://github.com/adfinis/document-merge-service/issues/445)) ([`26c9570`](https://github.com/adfinis/document-merge-service/commit/26c9570e02dcc981ee523273a06137caa9bf8486)) 335 | * **jinja:** Autoescape data passed to template when merging ([#444](https://github.com/adfinis/document-merge-service/issues/444)) ([`2ac030e`](https://github.com/adfinis/document-merge-service/commit/2ac030e619899a55bc72440f55081258e1ab66ac)) 336 | 337 | ### Documentation 338 | * **readme:** Remove deprecated dependabot badge ([`4173a3b`](https://github.com/adfinis/document-merge-service/commit/4173a3bf6d26d02afd05fc3972492054e3476f5f)) 339 | 340 | 341 | ## 4.5.0 342 | 343 | ### Feature 344 | * Add meta-field to Template ([`27163e8`](https://github.com/adfinis/document-merge-service/commit/27163e8c5b2d1541566e4908ac88e55a3d5bc7b9)) 345 | 346 | ### Fix 347 | * Reduce number of uwsgi processes ([`46f950a`](https://github.com/adfinis/document-merge-service/commit/46f950a7a5ff6ca99c20941a5d1ec0a7cbd1bde0)) 348 | * Add uwsgi config suitable for production use ([`247c5df`](https://github.com/adfinis/document-merge-service/commit/247c5dfbc9e00e29a95b21f2646ad735368cdc21)) 349 | * **jinja:** Replace deprecated contextfilter with pass_context ([`3308cd1`](https://github.com/adfinis/document-merge-service/commit/3308cd11aac3332445053b1d9ac4564bb7034a06)) 350 | 351 | ## 4.4.0 352 | 353 | ### Feature 354 | * run subprocesses with timeout and cleanup forks 355 | ([`be092b4`](https://github.com/adfinis/document-merge-service/commit/be092b464d0120ce1d6e9bc8afdf4150cacd2710)) 356 | 357 | ## 4.3.0 358 | 359 | ### Feature 360 | * Allow disabling validation 361 | ([`f371b33`](https://github.com/adfinis/document-merge-service/commit/f371b339d7434b01b2a024308fc58f0806d8a287)) 362 | 363 | ## 4.2.1 364 | 365 | ### Fix 366 | * Fix using same image multiple times in template (35b7ffb9cff7e4577f505823449874361d1557a2) 367 | 368 | ## 4.2.0 369 | 370 | ### Feature 371 | * Handle None for images (fd6f55d61e1877e0203d7ee4212641816119077c) 372 | 373 | ### Fix 374 | * Dont crash when accessing undefined value in template (f2bb378dbb51a61d3d4e1f01afcf2b3efd831aba) 375 | 376 | 377 | ## 4.1.0 378 | ### Feature 379 | * Support inline images for docx-template (37e42724c75a5f5c8ab60ee45a2fd64118cdf407) 380 | 381 | ### Fix 382 | * Correctly validate image placeholders (9617bd71db90901ae0e18c513bc28bb3225b7857) 383 | * Also add template to context in engine validation (639e9c27435873ca8680308684d799ea9da29d6a) 384 | 385 | 386 | ## 4.0.1 387 | 388 | ### Fix 389 | * Don't reject templates with complex syntax (fb56a42aee82f9261596f7546f52f8b9930292de) 390 | 391 | 392 | ## 4.0.0 393 | 394 | ### Breaking 395 | * Remove support for external unoconv listeners 396 | * `UNOCONV_SERVER` and `UNOCONV_PORT` are no longer supported configuration options. Please remove 397 | them from your configuration file. 398 | * By default an unoconv process gets launched within the container. 399 | 400 | ### Feature 401 | * Check template for available placeholders (2ac9aeb95016665520bef53c7e3ac0310be9f84f) 402 | * Allow to validate docx template on upload (de810446fbec2ffe610cda4f9cb12be34b5bdbb5) 403 | 404 | ### Fix 405 | * Make sure port is always printed as string (dd8f34b93a9f3b279fa8e99b1b8ba3d8e1d582fb) 406 | 407 | ### Documentation 408 | * Extended user guide (09f0393ec7fe40513fcd47473272a09cf0a294d3) 409 | 410 | 411 | ## 3.0.0 412 | 413 | ### Fix 414 | * Revert automatic conversion, add filter instead (4e91c50a5938ab641a90cb84fabd56ff992c757c) 415 | 416 | ### Breaking 417 | * Replace tfk-api-unoconv service with unoconv listener (f12f0a221b64fb22665ac4609e4f52e34ff767f2) 418 | * `UNOCONV_LOCAL` and `UNOCONV_URL` are no longer supported configuration options. Please remove 419 | them from your configuration file. 420 | * By default an unoconv listener gets launched within the container. To use a different listener 421 | you can specify `UNOCONV_SERVER` and `UNOCONV_PORT`. 422 | 423 | * After gathering some practical experience with the new automatic "Listing"-conversion for 424 | multiline we noticed that this feature is a little bit too "clever" and breaks many advanced 425 | use-cases. (4e91c50a5938ab641a90cb84fabd56ff992c757c) 426 | -------------------------------------------------------------------------------- /CONFIGURATION.md: -------------------------------------------------------------------------------- 1 | # Configuration 2 | 3 | Document Merge Service is a [12factor app](https://12factor.net/) which means that configuration is stored in environment variables. 4 | Different environment variable types are explained at [django-environ](https://github.com/joke2k/django-environ#supported-types). 5 | 6 | ## Common 7 | 8 | - `SECRET_KEY`: A secret key used for cryptography. This needs to be a random string of a certain length. See [more](https://docs.djangoproject.com/en/2.1/ref/settings/#std:setting-SECRET_KEY). 9 | - `ALLOWED_HOSTS`: A list of hosts/domains your service will be served from. See [more](https://docs.djangoproject.com/en/2.1/ref/settings/#allowed-hosts). 10 | 11 | ## Database 12 | 13 | Per default [Sqlite3](https://sqlite.org/) is used as database for simple deployment and stored at `/var/lib/document-merge-service/data/sqlite3.db`. Create a volume to make it persistent. 14 | 15 | To scale the service a different database storage is needed. Any database supported by [Django](https://docs.djangoproject.com/en/2.1/ref/settings/#std:setting-DATABASE-ENGINE) can be used. 16 | 17 | - `DATABASE_ENGINE`: Database backend to use. 18 | - `DATABASE_HOST`: Host to use when connecting to database 19 | - `DATABASE_PORT`: Port to use when connecting to database 20 | - `DATABASE_NAME`: Name of database to use 21 | - `DATABASE_USER`: Username to use when connecting to the database 22 | - `DATABASE_PASSWORD`: Password to use when connecting to database 23 | 24 | ## Unoconv 25 | 26 | - `UNOCONV_ALLOWED_TYPES`: List of types allowed to convert to. See `unoconv --show` (default: ['pdf']) 27 | - `UNOCONV_PYTHON`: String, defaults to "/usr/bin/python3.5" 28 | - `UNOCONV_PATH`: String, defaults to "/usr/bin/unoconv" 29 | 30 | ## python-docx-template 31 | 32 | - `DOCXTEMPLATE_JINJA_EXTENSIONS`: list of [jinja2 extensions](http://jinja.pocoo.org/docs/2.10/extensions/) to load 33 | 34 | In python-docx-template following additional custom filters are implemented: 35 | 36 | - multiline(value) - wraps the value in a [Listing](https://docxtpl.readthedocs.io/en/latest/#escaping-newline-new-paragraph-listing) for multiline support 37 | - datetimeformat(value, format, locale) 38 | - dateformat(value, format, locale) 39 | - timeformat(value, format, locale) 40 | - getwithdefault(value, default) - converts None to empty string (or provided default value) or leaves strings as is 41 | - emptystring(value) - converts None to empty string or leaves strings as is (deprecated in favor of getwithdefault) 42 | - image(width, height, keep_aspect_ratio) - Creates an [inline image](https://docxtpl.readthedocs.io/en/latest/) from provided file with the same name. `width` and `height` are optional and represent millimetres. If `keep_aspect_ratio` is `True` the image will be scaled keeping it's original aspect ratio and the width/height parameters become a size limit instead. `keep_aspect_ration` has no effect if `width` and `height` are not given. 43 | 44 | For formatting use babel and its uniode compatible [format](http://babel.pocoo.org/en/latest/dates.html#date-fields). 45 | 46 | ## Authentication / Authorization 47 | 48 | By default, no authentication is needed. To protect the API, integrate 49 | it with your [IAM](https://en.wikipedia.org/wiki/Identity_management) 50 | supporting Open ID Connect. If not available, you might consider using 51 | [Keycloak](https://www.keycloak.org/). 52 | 53 | - `REQUIRE_AUTHENTICATION`: Force authentication to be required (default: False) 54 | - `OIDC_USERINFO_ENDPOINT`: Url of userinfo endpoint as [described](https://openid.net/specs/openid-connect-core-1_0.html#UserInfo) 55 | - `OIDC_VERIFY_SSL`: Verify ssl certificate of oidc userinfo endpoint (default: True) 56 | - `OIDC_GROUPS_CLAIM`: Name of claim to be used to define group membership (default: document_merge_service_groups) 57 | - `OIDC_USERNAME_CLAIM`: Name of claim to be used to define user (default: sub) 58 | - `OIDC_BEARER_TOKEN_REVALIDATION_TIME`: Time in seconds before bearer token validity is verified again. For best security token is validated on each request per default. It might be helpful though in case of slow Open ID Connect provider to cache it. It uses [cache](#cache) mechanism for memorizing userinfo result. Number has to be lower than access token expiration time. (default: 0) 59 | 60 | ## Permissions / Visibilities 61 | 62 | Document Merge Service uses [dgap](https://github.com/adfinis/django-generic-api-permissions) 63 | to handle permissions and visibilities. It can be configured using the following 64 | environment variables: 65 | 66 | - `DMS_VISIBILITY_CLASSES`: List of classes that handle [dgap visibilities](https://github.com/adfinis/django-generic-api-permissions#visibilities) 67 | - `DMS_PERMISSION_CLASSES`: List of classes that handle [dgap permissions](https://github.com/adfinis/django-generic-api-permissions#permissions) 68 | - `EXTENSIONS_ARGUMENTS`: Custom arguments from the app to be used in the 69 | visibility and permission classes. This is expected to be a `dict`, e.g. 70 | `EXTENSIONS_ARGUMENTS=foo=bar` could then be used in the extension classes as 71 | `settings.EXTENSIONS_ARGUMENTS["foo"]` 72 | 73 | ## Cache 74 | 75 | - `CACHE_BACKEND`: [cache backend](https://docs.djangoproject.com/en/4.2/ref/settings/#backend) to use (default: django.core.cache.backends.locmem.LocMemCache) 76 | - `CACHE_LOCATION`: [location](https://docs.djangoproject.com/en/4.2/ref/settings/#location) of cache to use 77 | - `CACHE_OPTIONS`: [options](https://docs.djangoproject.com/en/4.2/ref/settings/#options) for cache library 78 | 79 | ## CORS 80 | 81 | - `CORS_ORIGIN_ALLOW_ALL`: [allow all](https://github.com/ottoyiu/django-cors-headers#cors_origin_allow_all) 82 | - `CORS_ORIGIN_REGEX_WHITELIST`: List of [whitelist regexes](https://github.com/ottoyiu/django-cors-headers#cors_origin_regex_whitelist) defaults to "^(https?://)?127\.0\.0\.1:\d{4}$" 83 | 84 | Users of nginx/apache must ensure to have matching CORS configurations. 85 | 86 | ## Pagination 87 | 88 | - `PAGINATION_ENABLED`: whether the pagination is enabled (default: `True`) 89 | - `PAGINATION_DEFAULT_PAGE_SIZE`: the default page size if no query param (`page_size`) is given (default: `100`) 90 | - `PAGINATION_MAX_PAGE_SIZE`: the max value of the page size query param (`page_size`) (default: `1000`) 91 | 92 | ## Email 93 | 94 | - `SERVER_EMAIL`: the email address that error messages come from 95 | - `DEFAULT_FROM_EMAIL`: default email address to use for various automated correspondence. This doesn’t include error messages sent to `ADMINS`. 96 | - `EMAIL_HOST`: the host to use for sending email (default: `localhost`) 97 | - `EMAIL_PORT`: port to use for the SMTP server (default: `25`) 98 | - `EMAIL_HOST_USER`: username for the SMTP server(default: "") 99 | - `EMAIL_HOST_PASSWORD`: password for the SMTP server user (default: "") 100 | - `EMAIL_USE_TLS`: whether to use an implicit TLS (secure) connection when talking to the SMTP server (default: `False`) 101 | 102 | If either `EMAIL_HOST_USER` or `EMAIL_HOST_PASSWORD` is empty, Django won't attempt authentication. 103 | 104 | ## Email error handler 105 | 106 | - `ENABLE_ADMIN_EMAIL_LOGGING`: enable Django to send email to admins on errors (default: `False`) 107 | - `ADMINS`: list of people who will get code error notifications. Items in the list should follow this example: `Test Example ,Test2 ` 108 | 109 | ## Sentry 110 | 111 | - `SENTRY_DSN`: identifier (data source name) for where to send events to. If no value is provided, sentry won't be activated (default: "") 112 | - `SENTRY_ENVIRONMENT`: which app environment sent an event to sentry (default: `development`) 113 | - `SENTRY_TRACES_SAMPLE_RATE`: percentage chance a given transaction will be sent to Sentry (default: `1.0`) 114 | - `SENTRY_SEND_DEFAULT_PII`: enable send PII data that associates users to errors (default: `True`) 115 | 116 | ## Template storage 117 | 118 | - `FILE_STORAGE`: Django file storage backend (default: `django.core.files.storage.FileSystemStorage`) 119 | - `MEDIA_ROOT`: Absolute filesystem path to the directory that will hold user-uploaded files. (default: "") 120 | - `MEDIA_URL`: URL that handles the media served from MEDIA_ROOT, used for managing stored files. When using buckets this needs to be changed. (default: `api/v1/template/`) 121 | 122 | ### [django-storages](https://django-storages.readthedocs.io/en/latest/backends/amazon-S3.html) S3 settings 123 | 124 | Refer to for example [Digital Ocean](https://django-storages.readthedocs.io/en/latest/backends/s3_compatible/digital-ocean-spaces.html) configuration if using a S3 compatible storage which isn't AWS. 125 | 126 | Required to use S3 storage: 127 | 128 | - `DMS_S3_ACCESS_KEY_ID`: AWS access key id 129 | - `DMS_S3_SECRET_ACCESS_KEY`: AWS secret access key 130 | - `DMS_STORAGE_BUCKET_NAME`: Storage bucket name 131 | 132 | Optional: 133 | 134 | - `DMS_S3_ENDPOINT_URL`: Custom S3 URL to use when connecting to S3, including scheme. (default: "") 135 | - `DMS_S3_REGION_NAME`: Region of the storage (default: "") 136 | - `DMS_LOCATION`: A path prefix that will be prepended to all uploads (default: "") 137 | - `DMS_S3_FILE_OVERWRITE`: If `True` Files with the same name will overwrite each other. Otherwise extra characters are appended. (default: `False`) 138 | - `DMS_S3_SIGNATURE_VERSION`: S3 signature version to use (default: `s2`) 139 | - `DMS_S3_USE_SSL`: Whether or not to use SSL when connecting to S3 (default: `True`) 140 | - `DMS_S3_VERIFY`: Whether or not to verify the connection to S3. Can be set to False to not verify SSL/TLS certificates. (default: `None`) 141 | - `DMS_ENABLE_AT_REST_ENCRYPTION`: Whether to use SSEC to encrypt files uploaded to S3 (default: `False`) 142 | - `DMS_S3_STORAGE_SSEC_SECRET`: Secret key for SSEC encryption, has to be 32 bytes long (default: `xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`) 143 | 144 | ## Gunicorn 145 | 146 | Document Merge Service uses [Gunicorn](https://gunicorn.org/) as its app server. 147 | A reasonable configuration is included in the docker image and can be customized 148 | using the following environment variables: 149 | 150 | - `GUNICORN_HOST`: IP address to bind gunicorn to (default `0.0.0.0`) 151 | - `GUNICORN_PORT`: Port to bind gunicorn to (default `8000`) 152 | - `GUNICORN_WORKERS`: Number of workers for handling requests (default `8`) 153 | - `GUNICORN_TIMEOUT`: Number of seconds until worker processing a request is killed and restarted (default `60`) 154 | - `GUNICORN_LIMIT_REQUEST_LINE`: Maximum size of HTTP request line in bytes (default `8190`) 155 | 156 | If you want to further customize your configuration, you may do that by 157 | overwriting the file [`document_merge_service/gunicorn.py`](document_merge_service/gunicorn.py) 158 | and add your own custom settings: 159 | 160 | ```dockerfile 161 | FROM ghcr.io/adfinis/document-merge-service:latest 162 | 163 | COPY my_gunicorn_config.py /app/document_merge_service/gunicorn.py 164 | ``` 165 | 166 | Such a configuration file might look like this: 167 | 168 | ```python 169 | # my_gunicorn_config.py 170 | 171 | wsgi_app = "document_merge_service.wsgi:application" # must not be changed 172 | bind = "0.0.0.0:80" 173 | workers = 16 174 | timeout = 120 175 | loglevel = "debug" 176 | ``` 177 | 178 | For more information on how to customize Gunicorn, please refer to [the official documentation](https://docs.gunicorn.org/en/latest/settings.html). 179 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Contributions to Document Merge Service are very welcome! Best have a look at the open [issues](https://github.com/adfinis/document-merge-service/issues) 4 | and open a [GitHub pull request](https://github.com/adfinis/document-merge-service/compare). See instructions below how to setup development 5 | environment. Before writing any code, best discuss your proposed change in a GitHub issue to see if the proposed change makes sense for the project. 6 | 7 | ## Setup development environment 8 | 9 | ### Clone 10 | 11 | To work on Document Merge Service you first need to clone 12 | 13 | ```bash 14 | git clone https://github.com/adfinis/document-merge-service.git 15 | cd document-merge-service 16 | ``` 17 | 18 | ### Open Shell 19 | 20 | Once it is cloned you can easily open a shell in the docker container to 21 | open an development environment. 22 | 23 | ```bash 24 | # needed for permission handling 25 | # only needs to be run once 26 | echo UID=$UID > .env 27 | # open shell 28 | docker-compose run --rm document-merge-service bash 29 | ``` 30 | 31 | ### Testing 32 | 33 | Once you have shelled in docker container as described above 34 | you can use common python tooling for formatting, linting, testing 35 | etc. 36 | 37 | ```bash 38 | poetry shell 39 | # linting 40 | ruff check --diff . 41 | # format code 42 | ruff format . 43 | # running tests 44 | pytest 45 | # create migrations 46 | ./manage.py makemigrations 47 | # install debugger or other temporary dependencies 48 | pip install --user pdbpp 49 | ``` 50 | 51 | Writing of code can still happen outside the docker container of course. 52 | 53 | ### Install new requirements 54 | 55 | In case you're adding new requirements you simply need to build the docker container 56 | again for those to be installed and re-open shell. 57 | 58 | ```bash 59 | docker-compose build --pull 60 | ``` 61 | 62 | ### Setup pre commit 63 | 64 | Pre commit hooks is an additional option instead of executing checks in your editor of choice. 65 | 66 | First create a virtualenv with the tool of your choice before running below commands: 67 | 68 | ```bash 69 | poetry shell 70 | pip install pre-commit 71 | pre-commit install --hook=pre-commit 72 | pre-commit install --hook=commit-msg 73 | ``` 74 | 75 | This will activate commit hooks to validate your code as well as your commit 76 | messages. 77 | 78 | ### Setup commit-msg hook 79 | 80 | If you want to have your commit message automatically linted, execute below commands: 81 | 82 | ```bash 83 | npm install @commitlint/{config-conventional,cli} 84 | ln -s "$(pwd)/commit-msg" .git/hooks/commit-msg 85 | ``` 86 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.13 AS build 2 | 3 | ARG ENV=docker 4 | ARG APP_HOME=/app 5 | ARG VARIANT=slim 6 | 7 | ENV PYTHONUNBUFFERED=1 8 | ENV POETRY_VIRTUALENVS_CREATE=false 9 | ENV POETRY_HOME=/opt/poetry 10 | 11 | WORKDIR $APP_HOME 12 | 13 | RUN pip install --no-cache-dir -U poetry 14 | 15 | # Install project dependencies 16 | COPY pyproject.toml poetry.lock $APP_HOME/ 17 | RUN \ 18 | --mount=type=cache,target=.cache/pypoetry \ 19 | poetry install --no-root --extras $VARIANT $(test "$ENV" = "dev" && echo "--with dev") 20 | 21 | # Install project itself 22 | COPY . $APP_HOME 23 | RUN \ 24 | --mount=type=cache,target=.cache/pypoetry \ 25 | poetry install --only-root 26 | 27 | FROM python:3.13-slim 28 | 29 | ARG UID=901 30 | ARG APP_HOME=/app 31 | 32 | # Needs to be set for users with manually set UID 33 | ENV HOME=/home/document-merge-service 34 | ENV DJANGO_SETTINGS_MODULE=document_merge_service.settings 35 | ENV MEDIA_ROOT=/var/lib/document-merge-service/media 36 | ENV DATABASE_DIR=/var/lib/document-merge-service/data 37 | 38 | # Suppress noisy warning caused by xltpl: https://github.com/zhangyu836/xltpl/issues/27 39 | ENV PYTHONWARNINGS="ignore:invalid escape sequence:SyntaxWarning" 40 | 41 | RUN mkdir -p $APP_HOME $DATABASE_DIR/tmp $MEDIA_ROOT /var/www/static \ 42 | && useradd -u $UID -r document-merge-service --create-home \ 43 | && mkdir $HOME/.config \ 44 | && chmod -R 770 $DATABASE_DIR $MEDIA_ROOT $HOME /var/www/static \ 45 | # All project specific folders need to be accessible by newly created user but 46 | # also for unknown users (when UID is set manually). Such users are in group 47 | # root. 48 | && chown -R document-merge-service:root $DATABASE_DIR $MEDIA_ROOT $HOME /var/www/static 49 | 50 | WORKDIR $APP_HOME 51 | 52 | RUN \ 53 | --mount=type=cache,target=/var/cache/apt,sharing=locked \ 54 | --mount=type=cache,target=/var/lib/apt,sharing=locked \ 55 | apt-get update && apt-get install -y --no-install-recommends \ 56 | libmagic1 \ 57 | libreoffice-writer \ 58 | unoconv \ 59 | util-linux \ 60 | wait-for-it \ 61 | && rm -rf /var/lib/apt/lists/* 62 | 63 | USER document-merge-service 64 | 65 | COPY --from=build /usr/local/lib/python3.13/site-packages/ /usr/local/lib/python3.13/site-packages/ 66 | COPY --from=build /usr/local/bin/ /usr/local/bin/ 67 | 68 | EXPOSE 8000 69 | 70 | COPY . $APP_HOME 71 | 72 | CMD ["/bin/sh", "-c", "./manage.py migrate && gunicorn -c ./document_merge_service/gunicorn.py"] 73 | -------------------------------------------------------------------------------- /MAINTAINING.md: -------------------------------------------------------------------------------- 1 | # Maintainer's Handbook 2 | 3 | ## Make a new release 4 | 5 | We're using `python-semantic-release` to generate a changelog and suggest the next version. 6 | 7 | 1. Checkout `main` branch, ensure you have all tags 8 | 2. Figure out the next version 9 | 3. Update code (CHANGELOG, version info) 10 | 4. Pull Request with the version bump. 11 | 5. Create tag and release on the merge commit with the changelog 12 | 13 | ```bash 14 | # Ensure you're on the current `main` branch and have all release tags 15 | git checkout main 16 | git pull origin --tags 17 | # Figure out the next version 18 | poetry run semantic-release version --noop 19 | # Prepare changelog 20 | poetry run semantic-release changelog --noop --unreleased 21 | ``` 22 | -------------------------------------------------------------------------------- /MIGRATE.md: -------------------------------------------------------------------------------- 1 | # Migration from v5 to v6 2 | 3 | **Warning** 4 | 5 | The `group` attribute will be removed from the Template model. 6 | A suggested migration would be to move the value to `meta` before migrating. 7 | 8 | --- 9 | 10 | The previous pre-defined permission and visibility system was removed in favour of [dgap](https://github.com/adfinis/django-generic-api-permissions). 11 | 12 | The integration of `OIDC_GROUPS_API` and `OIDC_GROUPS_API_JSONPATH` was removed with it. 13 | Because every consuming app can now define its own way to handle the permissions. 14 | 15 | Example Permissions: 16 | 17 | ```python 18 | import requests 19 | from rest_framework import exceptions 20 | from generic_permissions.permissions import object_permission_for 21 | 22 | from document_merge_service.api.models import Template 23 | 24 | 25 | class CustomPermission: 26 | """ 27 | Current settings and how to refactor them 28 | OIDC_GROUPS_API = "https://example.com/users/{sub}/group" 29 | OIDC_GROUPS_API_JSONPATH = "$$.included[?(@.type=='services')].id" 30 | """ 31 | @object_permission_for(Template) 32 | def has_object_permission_template(self, request, instance): 33 | uri = "https://example.com/users/{sub}/group" 34 | # extract header 35 | token = request.headers["AUTHORIZATION"] 36 | 37 | # previously OIDC_GROUPS_API 38 | groups_api = f"https://example.com/users/{request.user.username}/group" 39 | 40 | response = requests.get( 41 | groups_api, verify=True, headers={"authorization": token} 42 | ) 43 | try: 44 | response.raise_for_status() 45 | except requests.HTTPError as e: 46 | raise exceptions.AuthenticationFailed( 47 | f"Retrieving groups from {uri} " 48 | f"failed with error '{str(e)}'." 49 | ) 50 | 51 | result = response.json() 52 | 53 | # previously OIDC_GROUPS_API_JSONPATH was used here to extract the group from the response 54 | for data in result["included"]: 55 | if data.type == "services" 56 | groups = data.id 57 | 58 | return instance.meta["group"] in groups 59 | ``` 60 | 61 | After creating the permission configure it as environment variable in your `docker-compose.yml` file: 62 | 63 | ```yaml 64 | services: 65 | document-merge-service: 66 | image: ghcr.io/adfinis/document-merge-service:latest 67 | environment: 68 | - DMS_PERMISSION_CLASSES=document_merge_service.extensions.permissions.CustomPermission 69 | volumes: 70 | - ./permissions.py:/app/document_merge_service/extensions/permissions.py 71 | ``` 72 | 73 | Example Visibility: 74 | 75 | ```python 76 | from django.db.models import Q 77 | from generic_permissions.visibilities import filter_queryset_for 78 | 79 | from document_merge_service.api.models import Template 80 | 81 | 82 | class CustomVisibility: 83 | """Example Visibility class to replicate previous behaviour.""" 84 | 85 | @filter_queryset_for(Template) 86 | def filter_templates(self, queryset, request): 87 | queryset = queryset.filter( 88 | Q(meta__group__in=self.request.user.groups or []) | Q(meta__group__isnull=True) 89 | ) 90 | 91 | return queryset 92 | ``` 93 | 94 | After creating the visibility configure it as environment variable in your `docker-compose.yml` file: 95 | 96 | ```yaml 97 | services: 98 | document-merge-service: 99 | image: ghcr.io/adfinis/document-merge-service:latest 100 | environment: 101 | - DMS_VISIBILITY_CLASSES=document_merge_service.extensions.visibilities.CustomVisibility 102 | volumes: 103 | - ./visibilities.py:/app/document_merge_service/extensions/visibilities.py 104 | ``` 105 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help start test shell format dmypy 2 | .DEFAULT_GOAL := help 3 | 4 | help: 5 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort -k 1,1 | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 6 | 7 | start: ## Start the development server 8 | @docker compose up -d --build 9 | 10 | test: ## Test the project 11 | @docker compose exec document-merge-service sh -c "ruff format --diff --fix . && ruff check --diff . && mypy document_merge_service && pytest --no-cov-on-fail --cov --create-db" 12 | 13 | shell: ## Shell into document merge service 14 | @docker compose exec document-merge-service bash 15 | 16 | format: ## Format python code with ruff check 17 | @docker compose exec document-merge-service ruff format --diff . 18 | 19 | dmypy: ## Run mypy locally (starts a deamon for performance) 20 | dmypy run document_merge_service 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Document Merge Service 2 | 3 | [![Build Status](https://github.com/adfinis/document-merge-service/actions/workflows/tests.yml/badge.svg)](https://github.com/adfinis/document-merge-service/actions/workflows/tests.yml) 4 | [![Ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://docs.astral.sh/ruff/) 5 | [![License: GPL-3.0-or-later](https://img.shields.io/github/license/adfinis/document-merge-service)](https://spdx.org/licenses/GPL-3.0-or-later.html) 6 | 7 | A document template merge service providing an API to manage templates and merge them with given data. It can also be used to convert Docx files to PDF. 8 | 9 | ## Installation 10 | 11 | **Requirements** 12 | 13 | - docker 14 | - docker-compose 15 | 16 | After installing and configuring those, download [docker-compose.yml](https://raw.githubusercontent.com/adfinis/document-merge-service/master/docker-compose.yml) and run the following command: 17 | 18 | ```bash 19 | docker-compose up -d 20 | ``` 21 | 22 | You can now access the api at [http://localhost:8000/api/v1/](http://localhost:8000/api/v1/) which includes a browsable api. 23 | 24 | ### Workaround LibreOffice lockup 25 | 26 | The workaround has a setting called `ISOLATE_UNOCONV`, it is only enabled in the 27 | development environment. If `ISOLATE_UNOCONV` is enabled the container needs 28 | `CAP_SYS_ADMIN`. See docker-compose.override.yml. 29 | 30 | ```yaml 31 | cap_add: 32 | - CAP_SYS_ADMIN 33 | security_opt: 34 | - apparmor:unconfined 35 | environment: 36 | - ISOLATE_UNOCONV=true 37 | ``` 38 | 39 | ## Getting started 40 | 41 | ### Uploading templates 42 | 43 | Upload templates using the following: 44 | 45 | ```bash 46 | curl --form template=@docx-template.docx --form name="Test Template" --form engine=docx-template http://localhost:8000/api/v1/template/ 47 | ``` 48 | 49 | ### Merging a template 50 | 51 | After uploading successfully, you can merge a template with the following call: 52 | 53 | ```bash 54 | curl -H "Content-Type: application/json" --data '{"data": {"test": "Test Input"}}' http://localhost:8000/api/v1/template/test-template/merge/ > output.docx 55 | ``` 56 | 57 | ### Converting a template 58 | To convert a standalone Docx file the following call can be used: 59 | 60 | ```bash 61 | curl -X POST --form file=@my-test-file.docx --form target_format="pdf" http://localhost:8000/api/v1/convert > example.pdf 62 | ``` 63 | 64 | 65 | ## Further reading 66 | 67 | - [Configuration](CONFIGURATION.md) - Further configuration and how to do a production setup 68 | - [Usage](USAGE.md) - How to use the DMS and it's features 69 | - [Contributing](CONTRIBUTING.md) - Look here to see how to start with your first 70 | contribution. Contributions are welcome! 71 | 72 | ## License 73 | 74 | Code released under the [GPL-3.0-or-later license](LICENSE). 75 | -------------------------------------------------------------------------------- /USAGE.md: -------------------------------------------------------------------------------- 1 | # Usage 2 | 3 | This guide is intended to help you make full use of the DMS. 4 | 5 | All the examples here will use Python 3 with the `requests` module, as it 6 | produces very redable and understandable code. Adaptation into other languages 7 | should be fairly simple. 8 | 9 | ## Basic principles 10 | 11 | ### General operation 12 | 13 | DMS mainly has two distinct operations: Uploading a template, and merging it 14 | with some data to generate a document. 15 | 16 | #### Supported engines 17 | 18 | To be able to render (generate) a document, you first need to upload a 19 | template. DMS currently supports two formats: Mail Merge, and Docx/XlsxTpl, 20 | which in turn uses Jinja2 syntax. You can read more about them here: 21 | 22 | - [xlsx-template](https://github.com/zhangyu836/xltpl/blob/master/README_EN.md) 23 | - [docx-template](https://github.com/elapouya/python-docx-template) 24 | 25 | The DocxTpl format enables you to put the placeholders directly in 26 | text. This has several advantages and disadvantages: 27 | 28 | - Since office formats can put markup anywhere in the text, it may happen that 29 | identifiers or other template syntax gets split up without the user noticing, 30 | leading to hard-to-debug syntax errors. 31 | - However, due to the representation as text, we gain the flexibility to add 32 | loops, conditionals and so on to change the output document depending on the 33 | data that's filled in. 34 | 35 | ### Authentication / Authorization 36 | 37 | By default no authentication is needed. To protect the API, integrate it with your 38 | [IAM](https://en.wikipedia.org/wiki/Identity_management) supporting Open ID Connect. If not 39 | already available, you might consider using [Keycloak](https://www.keycloak.org/). 40 | 41 | When you enable authentication, all access must be with a valid OIDC token. 42 | Authenticated users are able to upload and merge templates, but anonymous users 43 | won't be anymore. 44 | 45 | For the full details on how to configure it, see the 46 | [configuration guide](CONFIGURATION.md). 47 | 48 | ### Permissions / Visibility 49 | 50 | [dgap](https://github.com/adfinis/django-generic-api-permissions) is being used for custom permissions and visibilites. Refer to the README over at [dgap](https://github.com/adfinis/django-generic-api-permissions) on how to configure. 51 | 52 | ## Uploading templates 53 | 54 | Upload templates using the following example code: 55 | 56 | ```python 57 | >>> import requests 58 | >>> import json 59 | >>> resp = requests.post( 60 | ... 'http://localhost:8000/api/v1/template/', 61 | ... data={ 62 | ... 'engine': 'docx-template', 63 | ... 'slug': 'my-template', 64 | ... 'name': 'test template' 65 | ... }, 66 | ... files={ 67 | ... 'template': open('docx-template.docx', 'rb') 68 | ... } 69 | ... ) 70 | >>> print(resp.status_code) 71 | 201 72 | ``` 73 | 74 | The upload is using the `Content-Disposition: form-data` format, commonly used 75 | in traditional forms when uploading files from a browser. 76 | Make sure you pass in all required fields: 77 | 78 | - `engine`: either `docx-template` or `xlsx-template`, 79 | depending on your template type 80 | - `slug`: Identifier of your template. May only be used once and is your 81 | primary key for accessing the templates later on. 82 | - `name`: Display name of the template 83 | - `template`: The actual template file. Make sure you pass it in the 84 | right format that your HTTP library uses. 85 | 86 | In the following examples, I'm assuming you already did the `import` 87 | statements for the `requests` and `json` modules, so I'm not repeating it. 88 | 89 | ### Validating the template's structure and placeholders 90 | 91 | Most times, you already know the placeholders available in the template when 92 | uploading the template. To ensure that the template will render properly, you 93 | can validate the template's syntax and placeholder usage at the upload stage. 94 | 95 | The DMS provides two ways to do this: Either by providing some sample data, 96 | or by providing a simplified list of variables. 97 | 98 | The list of variables is using the following syntax: 99 | 100 | - For simple variables, just mention the variable name, such as `foo` 101 | - For lists, add square brackets after the variable name, for example `a_list[]` 102 | - For nested objects, use "dot notation": `object.property` 103 | 104 | You may also combine this syntax according to your needs. The following are all 105 | valid examples: [^xlsx-validation] 106 | 107 | - `foo.bar` 108 | - `a_list[].inner_property` 109 | - `a_list[].another_property` 110 | - `list[].nested_list[]` 111 | 112 | [^xlsx-validation]: 113 | the xlsx engine can only validate simple placeholders no 114 | lists. frontend-devs should use sample-data and an immediate response with a 115 | rendered xlsl. 116 | 117 | The template used here uses a single placeholder named `test`. See what 118 | happens if we enable placeholder validation but tell the DMS that only 119 | some some other placeholders are available: 120 | 121 | ```python 122 | >>> resp = requests.post( 123 | ... 'http://localhost:8000/api/v1/template/', 124 | ... data={ 125 | ... 'engine': 'docx-template', 126 | ... 'slug': 'my-validated-template', 127 | ... 'name': 'test template', 128 | ... 'available_placeholders': [ 129 | ... 'foo', 'bar', 'baz' 130 | ... ] 131 | ... }, 132 | ... files={ 133 | ... 'template': open('docx-template.docx', 'rb') 134 | ... } 135 | ... ) 136 | >>> print(resp.status_code) 137 | 400 138 | >>> print(resp.json()) 139 | {'non_field_errors': ['Template uses unavailable placeholders: test']} 140 | ``` 141 | 142 | The above example uses the DocxTpl syntax, so the template is validated 143 | for this (See the parameter `"engine": "docx-template"`). 144 | 145 | You can also provide sample data to fulfill the same purpose. If you do this, 146 | the DMS will implicitly generate the list as above from the data, but it will 147 | also try to render the template using the sample data given. Both have to 148 | work correctly for the upload to succeed. 149 | 150 | As the document may have some structure, it needs to be JSON encoded as the 151 | upload is using the `Content-Disposition: form-data` format. 152 | 153 | ```python 154 | >>> resp = requests.post( 155 | ... 'http://localhost:8000/api/v1/template/', 156 | ... data={ 157 | ... 'engine': 'docx-template', 158 | ... 'slug': 'my-validated-template', 159 | ... 'name': 'test template', 160 | ... 'sample_data': json.dumps({ 161 | ... 'foo': 'a value', 162 | ... 'test': 'another value' 163 | ... }) 164 | ... }, 165 | ... files={ 166 | ... 'template': open('docx-template.docx', 'rb') 167 | ... } 168 | ... ) 169 | >>> print(resp.status_code) 170 | 201 171 | >>> print(resp.json()) 172 | {'slug': 'my-validated-template', 'description': '', 'template': 173 | 'http://localhost:8000/api/v1/template/docx-template_uZCLTeY.docx', 'engine': 174 | 'docx-template', 'group': '[]', 'available_placeholders': None, 'sample_data': 175 | None} 176 | ``` 177 | 178 | As you can see, the validation went through this time, as our sample data 179 | covers all placeholders used in the template. Of course, the template 180 | isn't required to use all placeholders available! 181 | 182 | If you use a Template with the DocxTpl syntax that uses [inline images](#inline-images), 183 | you also need to include the corresponding files along the `sample_data`. So the `files` 184 | in the example above would become something like: 185 | 186 | ```python 187 | ... files=( 188 | ... ("template", open('docx-template.docx', 'rb'))), 189 | ... ("files", ("sunset1.png", open('sunset1.png', 'rb'))), 190 | ... ("files", ("sunset2.png", open('sunset2.png', 'rb'))), 191 | ... ), 192 | ``` 193 | 194 | ### Disabling template validation 195 | 196 | Sometimes, templates contain advanced syntax that cannot be correctly validated 197 | by the automatic mechanism. If you at the same time are also unable to provide 198 | usable sample data, you can disable template validation entirely. 199 | 200 | Please note that in this case, templates will be accepted that may cause errors 201 | when actually used, so make sure to test them after uploading! 202 | 203 | To disable template validation, pass in the additional parameter 204 | `disable_template_validation` with the value `true` on template upload. 205 | 206 | ## Merging templates 207 | 208 | In contrast to uploading templates, requests for merging documents uses JSON 209 | as transfer format. Make sure you set the correct HTTP headers and encode your 210 | data. 211 | 212 | ```python 213 | >>> resp = requests.post( 214 | ... 'http://localhost:8000/api/v1/template/my-validated-template/merge/', 215 | ... json={ 216 | ... 'data': { 217 | ... 'foo': 'a value', 218 | ... 'test': 'another value' 219 | ... } 220 | ... } 221 | ... ) 222 | >>> print(resp.status_code) 223 | 200 224 | >>> with open('rendered_document.docx', 'wb') as fh: 225 | ... fh.write(resp.content) 226 | 9673 227 | ``` 228 | 229 | Merging works the same for both engines, so this is basically all you need to 230 | know about how to use the DMS. 231 | 232 | Additionally, you can also convert output to pdf or other types supported by unoconv: 233 | 234 | ```python 235 | >>> resp = requests.post( 236 | ... 'http://localhost:8000/api/v1/template/my-validated-template/merge/', 237 | ... json={ 238 | ... 'data': { 239 | ... 'foo': 'a value', 240 | ... 'test': 'another value' 241 | ... }, 242 | ... 'convert': 'pdf' 243 | ... } 244 | ... ) 245 | >>> print(resp.status_code) 246 | 200 247 | >>> with open('rendered_document.pdf', 'wb') as fh: 248 | ... fh.write(resp.content) 249 | 5031 250 | ``` 251 | 252 | ## Inline images 253 | 254 | The `docx-template` engine supports including inline images. Here is shown how one can 255 | use this feature. 256 | 257 | 1. Include an image variable with the `image` filter: `{{ 'sunset.png' | image(50, 50) }}` 258 | 2. Include the image files into a multipart request to the `merge` endpoint: 259 | 260 | ```python 261 | >>> resp = requests.post( 262 | ... 'http://localhost:8000/api/v1/template/my-template/merge/', 263 | ... data={ 264 | ... 'data': json.dumps({ 265 | ... 'foo': 'a value', 266 | ... 'test': 'another value' 267 | ... }), 268 | ... 'convert': 'pdf' 269 | ... }, 270 | ... files=( 271 | ... ("files", ("sunset.png", open('sunset.png', 'rb'))), 272 | ... ), 273 | ... ) 274 | ``` 275 | 276 | The value passed to the `image` filter must be identical to the name of a file that has been provided. 277 | 278 | If you want to merge a template with an image placeholder, but you don't want to render 279 | the image, you can add the filename as key to `data` and set it to `None` or `""`. In the 280 | example above, `data` would look like this: 281 | 282 | ```python 283 | ... data={ 284 | ... 'data': json.dumps({ 285 | ... 'foo': 'a value', 286 | ... 'test': 'another value', 287 | ... 'sunset.png': None 288 | ... }), 289 | ... 'convert': 'pdf' 290 | ... }, 291 | ``` 292 | 293 | ## Converting Docx files 294 | 295 | The document merge service can also be used to convert a single Docx file to PDF. 296 | 297 | If you want to simulatanousely merge a template with data and convert it to PDF use the merge function as explained in [merging templates](#merging-templates) 298 | 299 | To convert a Docx file to PDF using the DMS you can send a `POST` request with the file and the `target_format`. Currently `pdf` is the only possible `target_format`. 300 | 301 | ```python 302 | >>> resp = requests.post( 303 | ... 'http://localhost:8000/api/v1/convert', 304 | ... data={ 305 | ... 'file': file_to_convert, 306 | 'target_format': 'pdf' 307 | ... }, 308 | ... ) 309 | ``` 310 | 311 | ## Maintenance / Cleanup 312 | 313 | The DMS allows REST verbs like `PATCH` and `DELETE` for updating and deleting 314 | existing templates: 315 | 316 | ```python 317 | >>> resp = requests.delete( 318 | ... 'http://localhost:8000/api/v1/template/my-validated-template', 319 | ... ) 320 | >>> print(resp.status_code) 321 | 204 322 | ``` 323 | -------------------------------------------------------------------------------- /docker-compose.override.yml: -------------------------------------------------------------------------------- 1 | services: 2 | document-merge-service: 3 | image: ghcr.io/adfinis/document-merge-service:dev 4 | build: 5 | context: . 6 | args: 7 | - ENV=dev 8 | - UID=$UID 9 | - VARIANT=full 10 | cap_add: 11 | - CAP_SYS_ADMIN 12 | security_opt: 13 | - apparmor:unconfined 14 | user: "${UID:?Set UID env variable to your user id}" 15 | volumes: 16 | - ./:/app 17 | command: 18 | [ 19 | "/bin/sh", 20 | "-c", 21 | "./manage.py migrate && ./manage.py runserver 0.0.0.0:8000" 22 | ] 23 | environment: 24 | - ENV=dev 25 | - ISOLATE_UNOCONV=true 26 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | document-merge-service: 3 | image: ghcr.io/adfinis/document-merge-service:latest 4 | ports: 5 | - "8000:8000" 6 | volumes: 7 | - dbdata:/var/lib/document-merge-service/data 8 | - templatefiles:/var/lib/document-merge-service/media 9 | # Example to include custom extensions 10 | # - ./visibilities.py:/app/document_merge_service/extensions/visibilities.py 11 | # - ./permissions.py:/app/document_merge_service/extensions/permissions.py 12 | environment: [] 13 | # Following options are a must to configure on production system: 14 | # https://docs.djangoproject.com/en/2.1/ref/settings/#std:setting-SECRET_KEY 15 | # - SECRET_KEY= 16 | # https://docs.djangoproject.com/en/2.1/ref/settings/#allowed-hosts 17 | # - ALLOWED_HOSTS= 18 | 19 | 20 | volumes: 21 | dbdata: 22 | templatefiles: 23 | -------------------------------------------------------------------------------- /document_merge_service/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/__init__.py -------------------------------------------------------------------------------- /document_merge_service/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/__init__.py -------------------------------------------------------------------------------- /document_merge_service/api/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | from django.conf import settings 3 | from django.db.models import TextField 4 | from django.db.models.lookups import IContains 5 | 6 | 7 | class DefaultConfig(AppConfig): 8 | name = "document_merge_service.api" 9 | 10 | def ready(self): 11 | if "sqlite3" in settings.DATABASES["default"]["ENGINE"]: # pragma: no cover 12 | TextField.register_lookup(IContains, lookup_name="search") 13 | -------------------------------------------------------------------------------- /document_merge_service/api/authentication.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import hashlib 3 | 4 | import requests 5 | from django.conf import settings 6 | from django.core.cache import cache 7 | from django.utils.encoding import force_bytes, smart_str 8 | from django.utils.translation import gettext as _ 9 | from rest_framework import authentication, exceptions 10 | 11 | 12 | class AnonymousUser(object): 13 | def __init__(self): 14 | self.username = None 15 | self.groups = [] 16 | 17 | @property 18 | def group(self): 19 | return self.groups and self.groups[0] 20 | 21 | @property 22 | def is_authenticated(self): 23 | return False 24 | 25 | def __str__(self): 26 | return "AnonymousUser" 27 | 28 | 29 | class AuthenticatedUser(AnonymousUser): 30 | def __init__(self, userinfo): 31 | self.username = userinfo[settings.OIDC_USERNAME_CLAIM] 32 | groups = [] 33 | if settings.OIDC_GROUPS_CLAIM: 34 | groups = userinfo[settings.OIDC_GROUPS_CLAIM] 35 | self.groups = groups 36 | self.userinfo = userinfo 37 | 38 | @property 39 | def is_authenticated(self): 40 | return True 41 | 42 | def __str__(self): 43 | return self.username 44 | 45 | 46 | class BearerTokenAuthentication(authentication.BaseAuthentication): 47 | header_prefix = "Bearer" 48 | 49 | def get_bearer_token(self, request): 50 | auth = authentication.get_authorization_header(request).split() 51 | 52 | if not auth: 53 | return None 54 | 55 | if smart_str(auth[0].lower()) != self.header_prefix.lower(): 56 | raise exceptions.AuthenticationFailed(_("No Bearer Authorization header")) 57 | 58 | if len(auth) == 1: 59 | msg = _("Invalid Authorization header. No credentials provided") 60 | raise exceptions.AuthenticationFailed(msg) 61 | elif len(auth) > 2: 62 | msg = _( 63 | "Invalid Authorization header. Credentials string should " 64 | "not contain spaces." 65 | ) 66 | raise exceptions.AuthenticationFailed(msg) 67 | 68 | return auth[1] 69 | 70 | def get_userinfo(self, token): 71 | response = requests.get( 72 | settings.OIDC_USERINFO_ENDPOINT, 73 | verify=settings.OIDC_VERIFY_SSL, 74 | headers={"Authorization": f"Bearer {smart_str(token)}"}, 75 | ) 76 | 77 | try: 78 | response.raise_for_status() 79 | except requests.HTTPError as e: 80 | raise exceptions.AuthenticationFailed( 81 | f"Retrieving userinfo from {settings.OIDC_USERINFO_ENDPOINT} " 82 | f"failed with error '{str(e)}'." 83 | ) 84 | 85 | return response.json() 86 | 87 | def authenticate(self, request): 88 | if not settings.REQUIRE_AUTHENTICATION: 89 | return None 90 | 91 | token = self.get_bearer_token(request) 92 | if token is None: 93 | return None 94 | 95 | userinfo_method = functools.partial(self.get_userinfo, token=token) 96 | # token might be too long for key so we use hash sum instead. 97 | hashsum_token = hashlib.sha256(force_bytes(token)).hexdigest() 98 | userinfo = cache.get_or_set( 99 | f"authentication.userinfo.{hashsum_token}", 100 | userinfo_method, 101 | timeout=settings.OIDC_BEARER_TOKEN_REVALIDATION_TIME, 102 | ) 103 | 104 | return ( 105 | AuthenticatedUser(userinfo), 106 | token, 107 | ) 108 | 109 | def authenticate_header(self, request): 110 | return f"{self.header_prefix} realm={settings.OIDC_USERINFO_ENDPOINT}" 111 | -------------------------------------------------------------------------------- /document_merge_service/api/data/2023.test.test.docx-template.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/2023.test.test.docx-template.docx -------------------------------------------------------------------------------- /document_merge_service/api/data/__init__.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import shutil 3 | 4 | from django.conf import settings 5 | from django.core.files import File 6 | 7 | _data_path = os.path.dirname(os.path.realpath(__file__)) 8 | 9 | 10 | def django_file(name, mode="rb", new_path=None, new_name=None): 11 | abspath = os.path.join(_data_path, name) 12 | 13 | if not new_path: 14 | new_path = settings.MEDIA_ROOT 15 | 16 | if not new_name: 17 | new_name = name 18 | 19 | try: 20 | os.makedirs(new_path) 21 | except FileExistsError: # pragma: no cover 22 | pass 23 | 24 | shutil.copy(abspath, f"{new_path}/{new_name}") 25 | 26 | return File(open(abspath, mode), name=new_name) 27 | -------------------------------------------------------------------------------- /document_merge_service/api/data/black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/black.png -------------------------------------------------------------------------------- /document_merge_service/api/data/docx-template-filters.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/docx-template-filters.docx -------------------------------------------------------------------------------- /document_merge_service/api/data/docx-template-image-placeholder-header-footer.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/docx-template-image-placeholder-header-footer.docx -------------------------------------------------------------------------------- /document_merge_service/api/data/docx-template-loopcontrols.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/docx-template-loopcontrols.docx -------------------------------------------------------------------------------- /document_merge_service/api/data/docx-template-placeholdercheck.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/docx-template-placeholdercheck.docx -------------------------------------------------------------------------------- /document_merge_service/api/data/docx-template-syntax.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/docx-template-syntax.docx -------------------------------------------------------------------------------- /document_merge_service/api/data/docx-template.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/docx-template.docx -------------------------------------------------------------------------------- /document_merge_service/api/data/invalid-template.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/invalid-template.xlsx -------------------------------------------------------------------------------- /document_merge_service/api/data/loadtest/1.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/loadtest/1.doc -------------------------------------------------------------------------------- /document_merge_service/api/data/loadtest/2.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/loadtest/2.docx -------------------------------------------------------------------------------- /document_merge_service/api/data/loadtest/3.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/loadtest/3.docx -------------------------------------------------------------------------------- /document_merge_service/api/data/loadtest/4.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/loadtest/4.docx -------------------------------------------------------------------------------- /document_merge_service/api/data/odt-template.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/odt-template.odt -------------------------------------------------------------------------------- /document_merge_service/api/data/test.txt: -------------------------------------------------------------------------------- 1 | Invalid word document 2 | -------------------------------------------------------------------------------- /document_merge_service/api/data/xlsx-not-valid.xlsx: -------------------------------------------------------------------------------- 1 | asdf 2 | -------------------------------------------------------------------------------- /document_merge_service/api/data/xlsx-structure.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/xlsx-structure.xlsx -------------------------------------------------------------------------------- /document_merge_service/api/data/xlsx-syntax.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/xlsx-syntax.xlsx -------------------------------------------------------------------------------- /document_merge_service/api/data/xlsx-template.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/data/xlsx-template.xlsx -------------------------------------------------------------------------------- /document_merge_service/api/engines.py: -------------------------------------------------------------------------------- 1 | import io 2 | import re 3 | import zipfile 4 | from collections.abc import Mapping 5 | 6 | import openpyxl 7 | import xltpl.writerx as writerx 8 | from docx import Document 9 | from docxtpl import DocxTemplate 10 | from jinja2.exceptions import TemplateSyntaxError 11 | from rest_framework import exceptions 12 | 13 | from document_merge_service.api.data import django_file 14 | 15 | from . import models 16 | from .jinja import get_jinja_env, get_jinja_filters 17 | 18 | 19 | class _MagicPlaceholder(str, Mapping): # type: ignore 20 | def __new__(cls, parent=None, name=None): 21 | self = str.__new__(cls, name if name else "") 22 | self._parent = parent 23 | self._reports = parent._reports if parent else set() 24 | 25 | if self != "": 26 | self._reports.add(self) 27 | return self 28 | 29 | @property 30 | def reports(self): 31 | return list(self._reports) 32 | 33 | def __iter__(self): 34 | return (x for x in [_MagicPlaceholder(parent=self, name=f"{self}[]")]) 35 | 36 | def __getitem__(self, idx): 37 | assert isinstance(idx, str) 38 | return _MagicPlaceholder(parent=self, name=f"{self}.{idx}".strip(".")) 39 | 40 | def __getattr__(self, attr): 41 | return _MagicPlaceholder(parent=self, name=f"{self}.{attr}".strip(".")) 42 | 43 | def __len__(self): 44 | return 2 45 | 46 | def __radd__(self, other): 47 | return str(self) + str(other) 48 | 49 | 50 | class DocxValidator: 51 | def _validate_is_docx(self): 52 | try: 53 | Document(self.template) 54 | except (ValueError, zipfile.BadZipfile): 55 | raise exceptions.ParseError("not a valid docx file") 56 | finally: 57 | self.template.seek(0) 58 | 59 | def validate_template_syntax(self, available_placeholders=None): # pragma: no cover 60 | raise NotImplementedError( 61 | "validate_template_syntax must be implemented in engine class" 62 | ) 63 | 64 | def validate(self, available_placeholders=None, sample_data=None): 65 | self._validate_is_docx() 66 | self.validate_template_syntax(available_placeholders, sample_data) 67 | 68 | def validate_available_placeholders( 69 | self, used_placeholders, available_placeholders 70 | ): 71 | # We don't validate available_placeholders if it's not given 72 | if not available_placeholders: 73 | return 74 | 75 | available_placeholders = self._normalize_available_placeholders( 76 | available_placeholders 77 | ) 78 | 79 | referenced_unavailable = "; ".join( 80 | sorted(set(used_placeholders) - set(available_placeholders)) 81 | ) 82 | if referenced_unavailable: 83 | raise exceptions.ValidationError( 84 | f"Template uses unavailable placeholders: {referenced_unavailable}" 85 | ) 86 | 87 | def _normalize_available_placeholders(self, placeholders): 88 | available_placeholders = set(placeholders) 89 | # add all prefixes of placeholders, so users don't 90 | # have to add "foo" if they have "foo.bar" in the list 91 | for ph in placeholders: 92 | prefix = "" 93 | for word in ph.split("."): 94 | prefix = f"{prefix}.{word}" if prefix else word 95 | if prefix.endswith("[]"): 96 | available_placeholders.add(prefix[:-2]) 97 | available_placeholders.add(prefix) 98 | return available_placeholders 99 | 100 | 101 | class DocxTemplateEngine(DocxValidator): 102 | def __init__(self, template): 103 | self.template = template 104 | 105 | def _extract_image_placeholders(self, doc): 106 | """Extract placeholders using the image filter. 107 | 108 | This method extracts all placeholder names that use the image filter so 109 | we can add a dummy image to the sample data for validation. We 110 | explicitly need to parse headers and footers as well as the actual 111 | document body. 112 | """ 113 | 114 | body_xml = doc.get_xml() 115 | body_xml = doc.patch_xml(body_xml) 116 | 117 | xmls = [body_xml] 118 | 119 | for _, part in [ 120 | *doc.get_headers_footers(doc.HEADER_URI), 121 | *doc.get_headers_footers(doc.FOOTER_URI), 122 | ]: 123 | part_xml = doc.get_part_xml(part) 124 | part_xml = doc.patch_xml(part_xml) 125 | xmls.append(part_xml) 126 | 127 | images = set() 128 | 129 | for xml in xmls: 130 | image_match = re.match(r".*{{\s?(\S*)\s?\|\s?image\(.*", xml) 131 | matches = image_match.groups() if image_match else [] 132 | images.update(matches) 133 | 134 | return images 135 | 136 | def validate_template_syntax(self, available_placeholders=None, sample_data=None): 137 | try: 138 | doc = DocxTemplate(self.template) 139 | root = _MagicPlaceholder() 140 | env = get_jinja_env() 141 | ph = { 142 | name: root[name] for name in doc.get_undeclared_template_variables(env) 143 | } 144 | 145 | for image in self._extract_image_placeholders(doc): 146 | cleaned_image = image.strip('"').strip("'") 147 | ph[root[cleaned_image]] = django_file("black.png").file 148 | 149 | ph["_tpl"] = doc 150 | 151 | doc.render(ph, env) 152 | 153 | if sample_data: 154 | sample_data["_tpl"] = doc 155 | doc.render(sample_data, env) 156 | 157 | self.validate_available_placeholders( 158 | used_placeholders=root.reports, 159 | available_placeholders=available_placeholders, 160 | ) 161 | 162 | except TemplateSyntaxError as exc: 163 | arg_str = ";".join(exc.args) 164 | raise exceptions.ValidationError(f"Syntax error in template: {arg_str}") 165 | 166 | finally: 167 | self.template.seek(0) 168 | 169 | def merge(self, data, buf): 170 | doc = DocxTemplate(self.template) 171 | data["_tpl"] = doc 172 | 173 | doc.render(data, get_jinja_env(), autoescape=True) 174 | doc.save(buf) 175 | return buf 176 | 177 | 178 | _placeholder_match = re.compile(r"^\s*{{\s*([^{}]+)\s*}}\s*$") 179 | 180 | 181 | class XlsxTemplateEngine: 182 | BUILTIN_VARS = [ 183 | "tpl_name", 184 | "sheet_name", 185 | "[]", 186 | "sheet_name.decode", 187 | ] 188 | 189 | def __init__(self, template): 190 | self.template = template 191 | self.writer = None 192 | 193 | def validate_is_xlsx(self): 194 | try: 195 | openpyxl.load_workbook(self.template) 196 | except (ValueError, zipfile.BadZipfile): 197 | raise exceptions.ParseError("not a valid xlsx file") 198 | 199 | def validate(self, available_placeholders=None, sample_data=None): 200 | self.validate_is_xlsx() 201 | self.validate_template_syntax(available_placeholders, sample_data) 202 | 203 | def _expand_available_placeholders(self, ph_list): 204 | """Expand available placeholder list for (internal) correctness. 205 | 206 | If client gives "foo[].bar", we implicitly also allow "foo[]" and "foo": 207 | 208 | >>> self._expand_available_placeholders(["foo[].bar", "baz.boo"]) 209 | ["foo[]", "foo[].bar", "baz.boo"] 210 | """ 211 | out_list = [] 212 | for ph in ph_list: 213 | pieces = ph.split(".") 214 | for offset in range(len(pieces)): 215 | prefixed = ".".join(pieces[:offset]) 216 | out_list.append(prefixed) 217 | if prefixed.endswith("[]"): 218 | out_list.append(prefixed[:-2]) 219 | out_list.append(ph) 220 | return out_list 221 | 222 | def validate_template_syntax(self, available_placeholders=None, sample_data=None): 223 | # We cannot use jinja to validate because xltpl uses jinja's lexer directly 224 | magic = None 225 | if not sample_data: 226 | sample_data = magic = _MagicPlaceholder() 227 | buf = io.BytesIO() 228 | 229 | try: 230 | self.merge(sample_data, buf, is_test_merge=True) 231 | except TemplateSyntaxError as exc: 232 | arg_str = ";".join(exc.args) 233 | raise exceptions.ValidationError(f"Syntax error in template: {arg_str}") 234 | 235 | if available_placeholders and magic is not None: 236 | missing_set = ( 237 | set(magic.reports) 238 | - set(self._expand_available_placeholders(available_placeholders)) 239 | - set(self.BUILTIN_VARS) 240 | ) 241 | if not missing_set: 242 | return 243 | 244 | missing = "; ".join(missing_set) 245 | raise exceptions.ValidationError( 246 | f"Placeholders used in template, but not available: {missing}" 247 | ) 248 | 249 | def merge(self, data, buf, is_test_merge=False): 250 | self.writer = writer = writerx.BookWriter(self.template) 251 | self._current_data = data 252 | 253 | writer.jinja_env.filters.update(get_jinja_filters()) 254 | if is_test_merge: 255 | writer.jinja_env.undefined = self._undefined_factory 256 | writer.jinja_env.globals.update(dir=dir, getattr=getattr) 257 | 258 | payloads = [] 259 | sheets = writer.sheet_resource_map.sheet_state_list 260 | for sheet in sheets: 261 | new = dict(data) 262 | new["sheet_name"] = sheet.name 263 | new["tpl_name"] = sheet.name 264 | payloads.append(new) 265 | writer.render_book(payloads=payloads) 266 | writer.save(buf) 267 | return buf 268 | 269 | def _undefined_factory(self, name): 270 | # For test merges, we set a custom "undefined" factory that 271 | # doesn't really do undefined, but just fetches the right value 272 | # from our magic placeholder structure 273 | return self._current_data[name] 274 | 275 | 276 | ENGINES = { 277 | models.Template.DOCX_TEMPLATE: DocxTemplateEngine, 278 | models.Template.XLSX_TEMPLATE: XlsxTemplateEngine, 279 | } 280 | 281 | 282 | def get_engine(engine, template): 283 | return ENGINES[engine](template) 284 | -------------------------------------------------------------------------------- /document_merge_service/api/factories.py: -------------------------------------------------------------------------------- 1 | from factory import Faker 2 | from factory.django import DjangoModelFactory 3 | 4 | from . import models 5 | 6 | 7 | class TemplateFactory(DjangoModelFactory): 8 | slug = Faker("slug") # type: ignore 9 | description = Faker("text") 10 | engine = Faker("word", ext_word_list=models.Template.ENGINE_CHOICES_LIST) 11 | template = None 12 | meta = {} # type: ignore 13 | 14 | class Meta: 15 | model = models.Template 16 | -------------------------------------------------------------------------------- /document_merge_service/api/file_converter.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from tempfile import NamedTemporaryFile 3 | 4 | from django.conf import settings 5 | from django.http import HttpResponse 6 | 7 | from .unoconv import Unoconv 8 | 9 | 10 | class FileConverter: 11 | def convert(file_contents, target_format): 12 | dir = Path(settings.DATABASE_DIR, "tmp") 13 | dir.mkdir(parents=True, exist_ok=True) 14 | 15 | with NamedTemporaryFile("wb", dir=dir) as tmp: 16 | tmp.write(file_contents) 17 | unoconv = Unoconv( 18 | pythonpath=settings.UNOCONV_PYTHON, 19 | unoconvpath=settings.UNOCONV_PATH, 20 | ) 21 | result = unoconv.process(tmp.name, target_format) 22 | 23 | status = 200 if result.returncode == 0 else 500 24 | 25 | return HttpResponse( 26 | content=result.stdout, status=status, content_type=result.content_type 27 | ) 28 | -------------------------------------------------------------------------------- /document_merge_service/api/filters.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from django.db.models.fields import TextField 4 | from django.db.models.fields.json import KT 5 | from django.db.models.functions import Cast 6 | from django_filters import Filter, FilterSet 7 | from django_filters.constants import EMPTY_VALUES 8 | from rest_framework.exceptions import ValidationError 9 | 10 | from . import models 11 | 12 | 13 | # TODO: refactor into reusable package later 14 | class JSONValueFilter(Filter): 15 | def filter(self, qs, value): 16 | if value in EMPTY_VALUES: 17 | return qs 18 | 19 | valid_lookups = self._valid_lookups(qs) 20 | 21 | try: 22 | value = json.loads(value) 23 | except json.decoder.JSONDecodeError: 24 | raise ValidationError("JSONValueFilter value needs to be json encoded.") 25 | 26 | if isinstance(value, dict): 27 | # be a bit more tolerant 28 | value = [value] 29 | 30 | for expr in value: 31 | if expr in EMPTY_VALUES: # pragma: no cover 32 | continue 33 | if not all(("key" in expr, "value" in expr)): 34 | raise ValidationError( 35 | 'JSONValueFilter value needs to have a "key" and "value" and an ' 36 | 'optional "lookup" key.' 37 | ) 38 | 39 | lookup_expr = expr.get("lookup", self.lookup_expr) 40 | if lookup_expr not in valid_lookups: 41 | raise ValidationError( 42 | f'Lookup expression "{lookup_expr}" not allowed for field ' 43 | f'"{self.field_name}". Valid expressions: ' 44 | f"{', '.join(valid_lookups.keys())}" 45 | ) 46 | # "contains" behaves differently on JSONFields as it does on TextFields. 47 | # That's why we annotate the queryset with the value. 48 | # Some discussion about it can be found here: 49 | # https://code.djangoproject.com/ticket/26511 50 | if isinstance(expr["value"], str): 51 | qs = qs.annotate( 52 | field_val=Cast( 53 | KT(f"{self.field_name}__{expr['key']}"), 54 | output_field=TextField(), 55 | ) 56 | ) 57 | lookup = {f"field_val__{lookup_expr}": expr["value"]} 58 | else: 59 | lookup = { 60 | f"{self.field_name}__{expr['key']}__{lookup_expr}": expr["value"] 61 | } 62 | qs = qs.filter(**lookup) 63 | return qs 64 | 65 | def _valid_lookups(self, qs): 66 | # We need some traversal magic in case field name is a related lookup 67 | traversals = self.field_name.split("__") 68 | actual_field = traversals.pop() 69 | 70 | model = qs.model 71 | for field in traversals: # pragma: no cover 72 | model = model._meta.get_field(field).related_model 73 | 74 | return model._meta.get_field(actual_field).get_lookups() 75 | 76 | 77 | class TemplateFilterSet(FilterSet): 78 | meta = JSONValueFilter(field_name="meta") 79 | 80 | class Meta: 81 | model = models.Template 82 | fields = { 83 | "slug": ["exact"], 84 | "description": ["icontains", "search"], 85 | } 86 | -------------------------------------------------------------------------------- /document_merge_service/api/jinja.py: -------------------------------------------------------------------------------- 1 | import magic 2 | from babel.dates import format_date, format_datetime, format_time 3 | from dateutil.parser import parse 4 | from django.conf import settings 5 | from django.utils.translation import to_locale 6 | from docx.shared import Mm 7 | from docxtpl import InlineImage, Listing 8 | from jinja2 import pass_context 9 | from jinja2.sandbox import SandboxedEnvironment 10 | from PIL import Image 11 | from rest_framework.exceptions import ValidationError 12 | 13 | 14 | def parse_string(value): 15 | return parse(str(value)) 16 | 17 | 18 | def dateformat(value, format="medium", locale=None): 19 | if value is None: 20 | return "" 21 | 22 | if locale is None: 23 | locale = to_locale(settings.LANGUAGE_CODE) 24 | 25 | parsed_value = parse_string(value) 26 | return format_date(parsed_value, format, locale=locale) 27 | 28 | 29 | def datetimeformat(value, format="medium", locale=None): 30 | if value is None: 31 | return "" 32 | 33 | if locale is None: 34 | locale = to_locale(settings.LANGUAGE_CODE) 35 | 36 | parsed_value = parse_string(value) 37 | return format_datetime(parsed_value, format, locale=locale) 38 | 39 | 40 | def timeformat(value, format="medium", locale=None): 41 | if value is None: 42 | return "" 43 | 44 | if locale is None: 45 | locale = to_locale(settings.LANGUAGE_CODE) 46 | 47 | parsed_value = parse_string(value) 48 | return format_time(parsed_value, format, locale=locale) 49 | 50 | 51 | def emptystring(value): 52 | if value is None: 53 | return "" 54 | return value 55 | 56 | 57 | def getwithdefault(value, default=""): 58 | if value is None: 59 | return default 60 | return value 61 | 62 | 63 | def multiline(value): 64 | return Listing(value) 65 | 66 | 67 | @pass_context 68 | def image(ctx, img_name, width=None, height=None, keep_aspect_ratio=False): 69 | tpl = ctx["_tpl"] 70 | 71 | if img_name not in ctx: 72 | raise ValidationError(f'No file for image "{img_name}" provided!') 73 | 74 | img = ctx.get(img_name) 75 | 76 | if not img: 77 | # Fallback to no image 78 | return 79 | 80 | img.seek(0) # needed in case image is referenced multiple times 81 | if magic.from_buffer(img.read(), mime=True) not in ["image/png", "image/jpeg"]: 82 | raise ValidationError("Only png and jpg images are supported!") 83 | 84 | width = Mm(width) if width else None 85 | height = Mm(height) if height else None 86 | 87 | if width and height and keep_aspect_ratio: 88 | w, h = Image.open(img).size 89 | width, height = get_size_with_aspect_ratio(width, height, w / h) 90 | 91 | return InlineImage(tpl, img, width=width, height=height) 92 | 93 | 94 | def get_jinja_filters(): 95 | return { 96 | "date": dateformat, 97 | "datetime": datetimeformat, 98 | "time": timeformat, 99 | "emptystring": emptystring, 100 | "getwithdefault": getwithdefault, 101 | "multiline": multiline, 102 | "image": image, 103 | } 104 | 105 | 106 | def get_jinja_env(): 107 | jinja_env = SandboxedEnvironment(extensions=settings.DOCXTEMPLATE_JINJA_EXTENSIONS) 108 | jinja_env.filters.update(get_jinja_filters()) 109 | return jinja_env 110 | 111 | 112 | def get_size_with_aspect_ratio(width, height, aspect_ratio): 113 | tpl_aspect_ratio = width / height 114 | 115 | if tpl_aspect_ratio >= aspect_ratio: 116 | width = height * aspect_ratio 117 | else: 118 | height = width / aspect_ratio 119 | 120 | return width, height 121 | -------------------------------------------------------------------------------- /document_merge_service/api/management/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/management/__init__.py -------------------------------------------------------------------------------- /document_merge_service/api/management/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/management/commands/__init__.py -------------------------------------------------------------------------------- /document_merge_service/api/management/commands/clean_dangling_files.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from django.conf import settings 4 | from django.core.management.base import BaseCommand 5 | 6 | from document_merge_service.api.models import Template 7 | 8 | 9 | class Command(BaseCommand): 10 | help = "Remove dangling template files that are not attached to a template model anymore. Currently only usable with local filesystem." 11 | 12 | def add_arguments(self, parser): 13 | parser.add_argument("--dry-run", dest="dry", action="store_true", default=False) 14 | 15 | def handle(self, *args, **options): 16 | used_files = [template.template.path for template in Template.objects.all()] 17 | 18 | for subdir, dirs, files in os.walk(settings.MEDIA_ROOT): 19 | for f in files: 20 | path = os.path.join(subdir, f) 21 | if path not in used_files and os.path.isfile(path): 22 | try: 23 | if not options.get("dry"): 24 | os.remove(path) 25 | self.stdout.write( 26 | self.style.SUCCESS(f"Deleted dangling file '{path}'") 27 | ) 28 | else: 29 | self.stdout.write( 30 | self.style.WARNING( 31 | f"Would delete dangling file '{path}'" 32 | ) 33 | ) 34 | except Exception as e: # pragma: no cover 35 | self.stdout.write( 36 | self.style.ERROR( 37 | f"Could not delete dangling file '{path}': {str(e)}" 38 | ) 39 | ) 40 | -------------------------------------------------------------------------------- /document_merge_service/api/management/commands/dms_encrypt_templates.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | from django.core.exceptions import ImproperlyConfigured 3 | from django.core.files.storage import storages 4 | from django.core.management.base import BaseCommand 5 | from tqdm import tqdm 6 | 7 | from document_merge_service.api.models import Template 8 | 9 | 10 | class Command(BaseCommand): 11 | help = "Swaps plain text template content to encrypted content" 12 | 13 | def handle(self, *args, **options): 14 | if not settings.DMS_ENABLE_AT_REST_ENCRYPTION: 15 | return self.stdout.write( 16 | self.style.WARNING( 17 | "Encryption is not enabled. Skipping encryption of templates." 18 | ) 19 | ) 20 | 21 | failed_templates = [] 22 | 23 | # flip between default and encrypted storage to have the correct parameters in the requests 24 | encrypted_storage = storages.create_storage(settings.STORAGES["default"]) 25 | unencrypted_storage_setting = settings.STORAGES["default"] 26 | if ( 27 | "OPTIONS" not in unencrypted_storage_setting 28 | or "object_parameters" not in unencrypted_storage_setting["OPTIONS"] 29 | ): 30 | raise ImproperlyConfigured( 31 | "Encryption is enabled but no object_parameters found in the storage settings." 32 | ) 33 | del unencrypted_storage_setting["OPTIONS"]["object_parameters"] 34 | unencrypted_storage = storages.create_storage(unencrypted_storage_setting) 35 | 36 | query = Template.objects.all() 37 | for template in tqdm(query.iterator(50), total=query.count()): 38 | # get original template content 39 | template.template.storage = unencrypted_storage 40 | try: 41 | content = template.template.open() 42 | 43 | # overwrite with encrypted content 44 | template.template.storage = encrypted_storage 45 | template.template.save(template.template.name, content) 46 | except Exception as e: 47 | self.stdout.write( 48 | self.style.WARNING(f"Error for template {str(template.pk)}: {e}") 49 | ) 50 | failed_templates.append(str(template.pk)) 51 | continue 52 | 53 | if failed_templates: 54 | self.stdout.write( 55 | self.style.WARNING(f"These templates failed:\n{failed_templates}") 56 | ) 57 | self.stdout.write(self.style.SUCCESS("Encryption finished")) 58 | -------------------------------------------------------------------------------- /document_merge_service/api/management/commands/upload_local_templates.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | 4 | from django.core.files.storage import DefaultStorage 5 | from django.core.management.base import BaseCommand 6 | 7 | from document_merge_service.api.models import Template 8 | 9 | 10 | class Command(BaseCommand): 11 | help = "Upload local template files to configured storage backend" 12 | 13 | def add_arguments(self, parser): 14 | parser.add_argument( 15 | "-s", 16 | "--source", 17 | help="Glob-style path to the template files that should be uploaded. E.g. `/tmp/templates/*.docx`", 18 | dest="source", 19 | type=str, 20 | required=True, 21 | ) 22 | parser.add_argument( 23 | "--dry-run", 24 | help="Only show what files would be uploaded to the storage backend; don't actually upload them.", 25 | dest="dry", 26 | action="store_true", 27 | default=False, 28 | ) 29 | 30 | def handle(self, *args, **options): 31 | storage = DefaultStorage() 32 | 33 | for path in glob.iglob(options["source"]): 34 | filename = os.path.basename(path) 35 | 36 | try: 37 | template = Template.objects.get(template=filename) 38 | except Template.DoesNotExist: 39 | self.stdout.write( 40 | self.style.WARNING(f'No template for filename "{filename}" found') 41 | ) 42 | continue 43 | 44 | if not options.get("dry"): 45 | try: 46 | with open(path, "rb") as file: 47 | storage.delete(template.template.name) 48 | storage.save(template.template.name, file) 49 | 50 | self.stdout.write( 51 | self.style.SUCCESS( 52 | f'Uploaded file for template "{template.pk}"' 53 | ) 54 | ) 55 | except Exception as e: # pragma: no cover 56 | self.stdout.write( 57 | self.style.ERROR( 58 | f'Could not upload file for template "{template.pk}": {str(e)}' 59 | ) 60 | ) 61 | else: 62 | self.stdout.write( 63 | self.style.WARNING( 64 | f'Would upload file for template "{template.pk}"' 65 | ) 66 | ) 67 | -------------------------------------------------------------------------------- /document_merge_service/api/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11.17 on 2019-01-03 10:24 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | initial = True 10 | 11 | dependencies = [] 12 | 13 | operations = [ 14 | migrations.CreateModel( 15 | name="Template", 16 | fields=[ 17 | ("slug", models.SlugField(primary_key=True, serialize=False)), 18 | ("description", models.TextField(default="")), 19 | ("template", models.FileField(max_length=1024, upload_to="")), 20 | ( 21 | "engine", 22 | models.CharField( 23 | choices=[ 24 | ( 25 | "docx-template", 26 | "Docx Template engine (https://github.com/elapouya/python-docx-template)", 27 | ), 28 | ( 29 | "docx-mailmerge", 30 | "Docx MailMerge engine (https://github.com/Bouke/docx-mailmerge)", 31 | ), 32 | ], 33 | max_length=20, 34 | ), 35 | ), 36 | ], 37 | ) 38 | ] 39 | -------------------------------------------------------------------------------- /document_merge_service/api/migrations/0002_template_group.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11.18 on 2019-01-08 13:09 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [("api", "0001_initial")] 10 | 11 | operations = [ 12 | migrations.AddField( 13 | model_name="template", 14 | name="group", 15 | field=models.CharField( 16 | blank=True, db_index=True, max_length=255, null=True 17 | ), 18 | ) 19 | ] 20 | -------------------------------------------------------------------------------- /document_merge_service/api/migrations/0003_template_meta.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2021-11-01 12:34 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("api", "0002_template_group"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AddField( 13 | model_name="template", 14 | name="meta", 15 | field=models.JSONField(default=dict), 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /document_merge_service/api/migrations/0004_cleanup_files.py: -------------------------------------------------------------------------------- 1 | from django.db import migrations 2 | 3 | 4 | class Migration(migrations.Migration): 5 | dependencies = [("api", "0003_template_meta")] 6 | operations = [] 7 | -------------------------------------------------------------------------------- /document_merge_service/api/migrations/0005_xlsx_template_engine.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-03-29 07:52 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("api", "0004_cleanup_files"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AlterField( 13 | model_name="template", 14 | name="engine", 15 | field=models.CharField( 16 | choices=[ 17 | ( 18 | "docx-template", 19 | "Docx Template engine (https://github.com/elapouya/python-docx-template)", 20 | ), 21 | ( 22 | "docx-mailmerge", 23 | "Docx MailMerge engine (https://github.com/Bouke/docx-mailmerge)", 24 | ), 25 | ( 26 | "xlsx-template", 27 | "Xlsx Template engine (https://github.com/zhangyu836/python-xlsx-template)", 28 | ), 29 | ], 30 | max_length=20, 31 | ), 32 | ), 33 | ] 34 | -------------------------------------------------------------------------------- /document_merge_service/api/migrations/0006_remove_template_group.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.16 on 2022-12-23 12:54 2 | 3 | from django.db import migrations 4 | 5 | 6 | def migrate_group_to_meta(apps, schema_editor): 7 | Template = apps.get_model("api", "Template") 8 | 9 | for template in Template.objects.filter(group__isnull=False): 10 | template.meta["group"] = template.group 11 | template.save() 12 | 13 | 14 | def migrate_group_to_meta_reverse(apps, schema_editor): 15 | Template = apps.get_model("api", "Template") 16 | 17 | for template in Template.objects.filter(meta__has_key="group"): 18 | template.group = template.meta["group"] 19 | del template.meta["group"] 20 | template.save() 21 | 22 | 23 | class Migration(migrations.Migration): 24 | dependencies = [ 25 | ("api", "0005_xlsx_template_engine"), 26 | ] 27 | 28 | operations = [ 29 | migrations.RunPython(migrate_group_to_meta, migrate_group_to_meta_reverse), 30 | migrations.RemoveField( 31 | model_name="template", 32 | name="group", 33 | ), 34 | ] 35 | -------------------------------------------------------------------------------- /document_merge_service/api/migrations/0007_template_created_at_template_created_by_group_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2.15 on 2024-08-19 07:22 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('api', '0006_remove_template_group'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='template', 15 | name='created_at', 16 | field=models.DateTimeField(auto_now_add=True, db_index=True, null=True), 17 | ), 18 | migrations.AddField( 19 | model_name='template', 20 | name='created_by_group', 21 | field=models.CharField(blank=True, max_length=255, null=True), 22 | ), 23 | migrations.AddField( 24 | model_name='template', 25 | name='created_by_user', 26 | field=models.CharField(blank=True, max_length=150, null=True), 27 | ), 28 | migrations.AddField( 29 | model_name='template', 30 | name='modified_at', 31 | field=models.DateTimeField(auto_now=True, db_index=True, null=True), 32 | ), 33 | migrations.AddField( 34 | model_name='template', 35 | name='modified_by_group', 36 | field=models.CharField(blank=True, max_length=255, null=True), 37 | ), 38 | migrations.AddField( 39 | model_name='template', 40 | name='modified_by_user', 41 | field=models.CharField(blank=True, max_length=150, null=True), 42 | ), 43 | ] 44 | -------------------------------------------------------------------------------- /document_merge_service/api/migrations/0008_alter_template_engine.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2.21 on 2025-05-15 08:31 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('api', '0007_template_created_at_template_created_by_group_and_more'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='template', 15 | name='engine', 16 | field=models.CharField(choices=[('docx-template', 'Docx Template engine (https://github.com/elapouya/python-docx-template)'), ('xlsx-template', 'Xlsx Template engine (https://github.com/zhangyu836/python-xlsx-template)')], max_length=20), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /document_merge_service/api/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/migrations/__init__.py -------------------------------------------------------------------------------- /document_merge_service/api/models.py: -------------------------------------------------------------------------------- 1 | from django.core.files.storage import DefaultStorage 2 | from django.core.files.uploadedfile import UploadedFile 3 | from django.db import models 4 | from django.dispatch import receiver 5 | 6 | 7 | class Template(models.Model): 8 | DOCX_TEMPLATE = "docx-template" 9 | XLSX_TEMPLATE = "xlsx-template" 10 | ENGINE_CHOICES_LIST = (DOCX_TEMPLATE, XLSX_TEMPLATE) 11 | ENGINE_CHOICES_TUPLE = ( 12 | ( 13 | DOCX_TEMPLATE, 14 | "Docx Template engine (https://github.com/elapouya/python-docx-template)", 15 | ), 16 | ( 17 | XLSX_TEMPLATE, 18 | "Xlsx Template engine (https://github.com/zhangyu836/python-xlsx-template)", 19 | ), 20 | ) 21 | 22 | slug: models.SlugField = models.SlugField(primary_key=True) 23 | description: models.TextField = models.TextField(default="") 24 | template: models.FileField = models.FileField(max_length=1024) 25 | engine: models.CharField = models.CharField( 26 | max_length=20, choices=ENGINE_CHOICES_TUPLE 27 | ) 28 | created_at: models.DateTimeField = models.DateTimeField( 29 | auto_now_add=True, db_index=True, null=True 30 | ) 31 | created_by_user: models.CharField = models.CharField( 32 | max_length=150, blank=True, null=True 33 | ) 34 | created_by_group: models.CharField = models.CharField( 35 | max_length=255, blank=True, null=True 36 | ) 37 | modified_at: models.DateTimeField = models.DateTimeField( 38 | auto_now=True, db_index=True, null=True 39 | ) 40 | modified_by_user: models.CharField = models.CharField( 41 | max_length=150, blank=True, null=True 42 | ) 43 | modified_by_group: models.CharField = models.CharField( 44 | max_length=255, blank=True, null=True 45 | ) 46 | meta = models.JSONField(default=dict) 47 | 48 | 49 | @receiver(models.signals.post_delete, sender=Template) 50 | def auto_delete_file_on_delete(sender, instance, **kwargs): 51 | """Delete template file from filesystem when `Template` object is deleted.""" 52 | 53 | if instance.template: 54 | DefaultStorage().delete(instance.template.name) 55 | 56 | 57 | @receiver(models.signals.pre_save, sender=Template) 58 | def auto_delete_file_on_change(sender, instance, **kwargs): 59 | """ 60 | Delete old template file from filesystem when `Template` is given a new template file. 61 | 62 | If a new template file is uploaded, we need to explicitly delete the old file 63 | to prevent dangling files in the storage. If no new file is being uploaded (i.e. 64 | only other, normal attributes are changed), we keep the file. 65 | """ 66 | 67 | try: 68 | old_file = Template.objects.get(pk=instance.pk).template 69 | except Template.DoesNotExist: 70 | return 71 | 72 | if old_file: 73 | new_file = instance.template 74 | if isinstance(new_file.file, UploadedFile): 75 | DefaultStorage().delete(old_file.name) 76 | -------------------------------------------------------------------------------- /document_merge_service/api/pagination.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | from rest_framework.pagination import PageNumberPagination 3 | 4 | 5 | class APIPagination(PageNumberPagination): 6 | page_size_query_param = "page_size" 7 | max_page_size = settings.PAGINATION_MAX_PAGE_SIZE 8 | -------------------------------------------------------------------------------- /document_merge_service/api/permissions.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | from rest_framework import permissions 3 | 4 | 5 | class AsConfigured(permissions.IsAuthenticated): 6 | def has_permission(self, request, view): 7 | if settings.REQUIRE_AUTHENTICATION: 8 | return super().has_permission(request, view) 9 | return True 10 | -------------------------------------------------------------------------------- /document_merge_service/api/serializers.py: -------------------------------------------------------------------------------- 1 | import json 2 | from functools import singledispatch 3 | 4 | from django.conf import settings 5 | from django.urls import reverse 6 | from generic_permissions.validation import ValidatorMixin 7 | from rest_framework import exceptions, serializers 8 | 9 | from . import engines, models 10 | 11 | 12 | class CustomFileField(serializers.FileField): 13 | """ 14 | Custom FileField. 15 | 16 | `to_representation()` of this FileField returns the file object instead of just the 17 | filename. 18 | """ 19 | 20 | def to_representation(self, value): 21 | return value or None 22 | 23 | 24 | class TemplateFileField(serializers.FileField): 25 | def get_attribute(self, instance): 26 | # Hacky workaround - we need the instance in `to_representation()`, 27 | # not the field value. 28 | # We cannot use `parent.instance`, as that won't be set to 29 | # the current instance in a list view 30 | return instance 31 | 32 | def to_representation(self, value): 33 | if value and value.pk and value.template: 34 | return reverse("template-download", args=[value.pk]) 35 | 36 | 37 | class AvailablePlaceholdersField(serializers.ListField): 38 | """A list field type that also accepts JSON lists. 39 | 40 | Instead of multiple fields with the same name (traditional 41 | form-data lists), we also accept a JSON list for the available 42 | placeholders. This helps reduce the number of fields in the 43 | request, which WAF, Django, and possibly other server-side 44 | web components don't appreciate. 45 | """ 46 | 47 | def to_internal_value(self, data): 48 | data = data if isinstance(data, list) else [data] 49 | all_values = [] 50 | for value in data: 51 | if value.startswith("["): 52 | # looks like JSON, parse it 53 | all_values.extend(json.loads(value)) 54 | else: 55 | all_values.append(value) 56 | 57 | return all_values 58 | 59 | 60 | class TemplateSerializer(ValidatorMixin, serializers.ModelSerializer): 61 | disable_template_validation = serializers.BooleanField( 62 | allow_null=True, default=False 63 | ) 64 | available_placeholders = AvailablePlaceholdersField(allow_null=True, required=False) 65 | sample_data = serializers.JSONField(allow_null=True, required=False) 66 | files = serializers.ListField( 67 | child=CustomFileField(write_only=True, allow_empty_file=False), required=False 68 | ) 69 | template = TemplateFileField() 70 | 71 | def _sample_to_placeholders(self, sample_doc): 72 | @singledispatch 73 | def _doc(doc): 74 | return "" 75 | 76 | @_doc.register(dict) 77 | def _(doc): 78 | return [f"{k}.{name}" for k, v in doc.items() for name in _doc(v)] + [ 79 | k for k in doc.keys() 80 | ] 81 | 82 | @_doc.register(list) 83 | def _(doc): 84 | res = [] 85 | for item in doc: 86 | res.extend([f"[].{var}" if var else "[]" for var in _doc(item)]) 87 | res.append("[]") 88 | return list(set(res)) 89 | 90 | return sorted([x.replace(".[]", "[]") for x in _doc(sample_doc)]) 91 | 92 | def validate(self, data): 93 | user = self.context["request"].user 94 | 95 | if self.instance is None: 96 | data["created_by_user"] = user.username 97 | data["created_by_group"] = user.group 98 | data["modified_by_user"] = user.username 99 | data["modified_by_group"] = user.group 100 | 101 | if data.pop("disable_template_validation", False): 102 | # Some template structures cannot be validated automatically, 103 | # or it would be impossible or too much effort to provide accurate 104 | # sample data. For those cases, we allow disabling the validation. 105 | return data 106 | 107 | engine = data.get("engine", self.instance and self.instance.engine) 108 | template = data.get("template", self.instance and self.instance.template) 109 | 110 | available_placeholders = data.pop("available_placeholders", None) 111 | sample_data = data.pop("sample_data", None) 112 | files = data.pop("files", None) 113 | 114 | if sample_data and available_placeholders: 115 | raise exceptions.ValidationError( 116 | "Only one of available_placeholders and sample_data is allowed" 117 | ) 118 | elif files and engine != models.Template.DOCX_TEMPLATE: 119 | raise exceptions.ValidationError( 120 | f'Files are only accepted with the "{models.Template.DOCX_TEMPLATE}"' 121 | f" engine" 122 | ) 123 | elif sample_data: 124 | if files: 125 | for file in files: 126 | sample_data[file.name] = file 127 | 128 | available_placeholders = self._sample_to_placeholders(sample_data) 129 | elif files: 130 | raise exceptions.ValidationError( 131 | "Files are only accepted when also providing sample_data" 132 | ) 133 | 134 | engine = engines.get_engine(engine, template) 135 | engine.validate( 136 | available_placeholders=available_placeholders, sample_data=sample_data 137 | ) 138 | 139 | return data 140 | 141 | class Meta: 142 | model = models.Template 143 | fields = ( 144 | "slug", 145 | "description", 146 | "template", 147 | "engine", 148 | "available_placeholders", 149 | "sample_data", 150 | "files", 151 | "disable_template_validation", 152 | "meta", 153 | "created_at", 154 | "created_by_user", 155 | "created_by_group", 156 | "modified_at", 157 | "modified_by_user", 158 | "modified_by_group", 159 | ) 160 | extra_kwargs = { 161 | "created_at": {"read_only": True}, 162 | "created_by_user": {"read_only": True}, 163 | "created_by_group": {"read_only": True}, 164 | "modified_at": {"read_only": True}, 165 | "modified_by_user": {"read_only": True}, 166 | "modified_by_group": {"read_only": True}, 167 | } 168 | 169 | 170 | class TemplateMergeSerializer(serializers.Serializer): 171 | data = serializers.JSONField( 172 | required=True, help_text="Data as json used for merging" 173 | ) 174 | convert = serializers.ChoiceField( 175 | allow_null=True, 176 | required=False, 177 | choices=settings.UNOCONV_ALLOWED_TYPES, 178 | help_text="Optionally convert result document to this type.", 179 | ) 180 | files = serializers.ListField( 181 | child=CustomFileField(write_only=True, allow_empty_file=False), required=False 182 | ) 183 | 184 | class Meta: 185 | model = models.Template 186 | 187 | 188 | class ConvertSerializer(serializers.Serializer): 189 | file = CustomFileField(required=True, allow_empty_file=False) 190 | target_format = serializers.ChoiceField( 191 | allow_null=False, 192 | required=True, 193 | choices=[("pdf", "PDF")], 194 | help_text="The target format of the conversion. Currently only 'pdf' is supported.", 195 | ) 196 | -------------------------------------------------------------------------------- /document_merge_service/api/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/api/tests/__init__.py -------------------------------------------------------------------------------- /document_merge_service/api/tests/test_authentication.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import json 3 | 4 | import pytest 5 | from django.core.cache import cache 6 | from rest_framework import exceptions, status 7 | 8 | from .. import authentication 9 | 10 | 11 | @pytest.mark.parametrize( 12 | "authentication_header,status_code,error", 13 | [ 14 | ("", status.HTTP_200_OK, False), 15 | ("Bearer", status.HTTP_200_OK, True), 16 | ("Bearer Too many params", status.HTTP_200_OK, True), 17 | ("Basic Auth", status.HTTP_200_OK, True), 18 | ("Bearer Token", status.HTTP_200_OK, False), 19 | ("Bearer Token", status.HTTP_502_BAD_GATEWAY, True), 20 | ], 21 | ) 22 | def test_bearer_token_authentication_authenticate( 23 | rf, authentication_header, error, requests_mock, settings, status_code 24 | ): 25 | settings.REQUIRE_AUTHENTICATION = True 26 | 27 | userinfo = {"sub": "1"} 28 | requests_mock.get( 29 | settings.OIDC_USERINFO_ENDPOINT, 30 | status_code=status_code, 31 | request_headers={"Authorization": authentication_header}, 32 | text=json.dumps(userinfo), 33 | ) 34 | 35 | request = rf.get("/openid", HTTP_AUTHORIZATION=authentication_header) 36 | 37 | try: 38 | result = authentication.BearerTokenAuthentication().authenticate(request) 39 | except exceptions.AuthenticationFailed: 40 | assert error 41 | else: 42 | if result: 43 | user, auth = result 44 | assert user.is_authenticated 45 | assert ( 46 | cache.get( 47 | f"authentication.userinfo.{hashlib.sha256(b'Token').hexdigest()}" 48 | ) 49 | == userinfo 50 | ) 51 | 52 | 53 | def test_bearer_token_authentication_authenticate_groups_claim( 54 | settings, requests_mock, rf 55 | ): 56 | settings.REQUIRE_AUTHENTICATION = True 57 | settings.OIDC_GROUPS_CLAIM = "document-merge-service" 58 | 59 | userinfo = {"sub": "1", settings.OIDC_GROUPS_CLAIM: ["test"]} 60 | requests_mock.get(settings.OIDC_USERINFO_ENDPOINT, text=json.dumps(userinfo)) 61 | 62 | request = rf.get("/openid", HTTP_AUTHORIZATION="Bearer Token") 63 | user, auth = authentication.BearerTokenAuthentication().authenticate(request) 64 | assert user.is_authenticated 65 | assert user.group == "test" 66 | assert user.groups == ["test"] 67 | 68 | 69 | def test_bearer_token_authentication_header(rf): 70 | request = rf.get("/openid") 71 | assert ( 72 | authentication.BearerTokenAuthentication().authenticate_header(request) 73 | == "Bearer realm=mock://document-merge-service.github.com/openid/userinfo" 74 | ) 75 | -------------------------------------------------------------------------------- /document_merge_service/api/tests/test_clean_dangling_files.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from django.core.management import call_command 5 | 6 | from document_merge_service.api.data import django_file 7 | 8 | 9 | @pytest.mark.parametrize("dry", [True, False]) 10 | def test_clean_dangling_files(db, dry, settings, template_factory): 11 | templates = [ 12 | template_factory(template=django_file("docx-template.docx")), 13 | template_factory(template=django_file("docx-template-syntax.docx")), 14 | ] 15 | dangling_files = [ 16 | django_file("docx-template-filters.docx"), 17 | django_file("docx-template-loopcontrols.docx"), 18 | ] 19 | 20 | call_command("clean_dangling_files", dry=dry) 21 | 22 | assert ( 23 | all( 24 | [ 25 | os.path.isfile(os.path.join(settings.MEDIA_ROOT, file.name)) is dry 26 | for file in dangling_files 27 | ] 28 | ) 29 | is True 30 | ) 31 | assert ( 32 | all([os.path.isfile(template.template.path) is True for template in templates]) 33 | is True 34 | ) 35 | -------------------------------------------------------------------------------- /document_merge_service/api/tests/test_convert.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from django.urls import reverse 3 | from rest_framework import status 4 | 5 | from document_merge_service.api.data import django_file 6 | 7 | 8 | @pytest.mark.parametrize( 9 | "filename,target_filename,target_format,response_content_type", 10 | [ 11 | ( 12 | "docx-template.docx", 13 | "docx-template.pdf", 14 | "pdf", 15 | "application/pdf", 16 | ), 17 | ( 18 | "2023.test.test.docx-template.docx", 19 | "2023.test.test.docx-template.pdf", 20 | "pdf", 21 | "application/pdf", 22 | ), 23 | ], 24 | ) 25 | def test_convert( 26 | db, client, filename, target_filename, target_format, response_content_type 27 | ): 28 | url = reverse("convert") 29 | file_to_convert = django_file(filename) 30 | 31 | data = {"file": file_to_convert.file, "target_format": target_format} 32 | response = client.post(url, data=data, format="multipart") 33 | 34 | assert response.status_code == status.HTTP_200_OK 35 | assert response.headers.get("Content-Type") == response_content_type 36 | assert ( 37 | response.headers.get("Content-Disposition") 38 | == f'attachment; filename="{target_filename}"' 39 | ) 40 | 41 | 42 | def test_incorrect_file_type(db, client): 43 | url = reverse("convert") 44 | file_to_convert = django_file("invalid-template.xlsx") 45 | 46 | data = {"file": file_to_convert.file, "target_format": "pdf"} 47 | response = client.post(url, data=data, format="multipart") 48 | 49 | assert response.status_code == status.HTTP_400_BAD_REQUEST 50 | -------------------------------------------------------------------------------- /document_merge_service/api/tests/test_encrypt_templates.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from io import StringIO 3 | 4 | import pytest 5 | from django.core.exceptions import ImproperlyConfigured 6 | from django.core.files import File as DjangoFile 7 | from django.core.management import call_command 8 | from storages.backends.s3 import S3Storage 9 | 10 | from document_merge_service.api.data import django_file 11 | 12 | 13 | @pytest.fixture 14 | def settings_storage(settings): 15 | settings.STORAGES = deepcopy(settings.STORAGES) 16 | return settings.STORAGES 17 | 18 | 19 | def test_encrypt_templates(db, settings, settings_storage, mocker, template_factory): 20 | template_factory(template=django_file("docx-template.docx")) 21 | 22 | settings.DMS_ENABLE_AT_REST_ENCRYPTION = True 23 | settings_storage["default"] = { 24 | "BACKEND": "storages.backends.s3.S3Storage", 25 | "OPTIONS": { 26 | **settings.S3_STORAGE_OPTIONS, 27 | "object_parameters": { 28 | "SSECustomerKey": "x" * 32, 29 | "SSECustomerAlgorithm": "AES256", 30 | }, 31 | }, 32 | } 33 | 34 | mocker.patch("storages.backends.s3.S3Storage.open") 35 | mocker.patch("storages.backends.s3.S3Storage.save") 36 | S3Storage.save.return_value = "name-of-the-file" 37 | S3Storage.open.return_value = DjangoFile(open("README.md", "rb")) 38 | 39 | call_command("dms_encrypt_templates") 40 | 41 | assert S3Storage.open.call_count == 1 42 | assert S3Storage.save.call_count == 1 43 | 44 | 45 | def test_encrypt_templates_disabled(db, template_factory): 46 | template_factory(template=django_file("docx-template.docx")) 47 | 48 | out = StringIO() 49 | call_command("dms_encrypt_templates", stdout=out) 50 | 51 | assert ( 52 | "Encryption is not enabled. Skipping encryption of templates." in out.getvalue() 53 | ) 54 | 55 | 56 | def test_encrypt_template_improperyconfigured(db, settings, template_factory): 57 | template_factory(template=django_file("docx-template.docx")) 58 | settings.DMS_ENABLE_AT_REST_ENCRYPTION = True 59 | 60 | out = StringIO() 61 | with pytest.raises(ImproperlyConfigured): 62 | call_command("dms_encrypt_templates", stdout=out) 63 | 64 | 65 | def test_encrypt_templates_failed( 66 | db, settings, settings_storage, mocker, template_factory 67 | ): 68 | template_factory(template=django_file("docx-template.docx")) 69 | 70 | settings.DMS_ENABLE_AT_REST_ENCRYPTION = True 71 | settings_storage["default"] = { 72 | "BACKEND": "storages.backends.s3.S3Storage", 73 | "OPTIONS": { 74 | **settings.S3_STORAGE_OPTIONS, 75 | "object_parameters": { 76 | "SSECustomerKey": "x" * 32, 77 | "SSECustomerAlgorithm": "AES256", 78 | }, 79 | }, 80 | } 81 | 82 | mocker.patch("storages.backends.s3.S3Storage.open", side_effect=FileNotFoundError) 83 | mocker.patch("storages.backends.s3.S3Storage.save") 84 | S3Storage.save.return_value = "name-of-the-file" 85 | S3Storage.open.return_value = DjangoFile(open("README.md", "rb")) 86 | 87 | out = StringIO() 88 | call_command("dms_encrypt_templates", stdout=out) 89 | 90 | assert S3Storage.open.call_count == 1 91 | assert S3Storage.save.call_count == 0 92 | assert "failed" in out.getvalue() 93 | -------------------------------------------------------------------------------- /document_merge_service/api/tests/test_excel.py: -------------------------------------------------------------------------------- 1 | import io 2 | 3 | import openpyxl 4 | import pytest 5 | from rest_framework import exceptions 6 | 7 | from ..data import django_file 8 | from ..engines import XlsxTemplateEngine 9 | 10 | _structure = { 11 | "key0": "xdata0", 12 | "key1": { 13 | "subkey1": "xdata1", 14 | }, 15 | "key2": [ 16 | "mixed", 17 | "list", 18 | {"subkey2": "xdata2"}, 19 | ], 20 | } 21 | 22 | _expect_name = ["test", "another"] 23 | 24 | 25 | def test_render(): 26 | tmpl = django_file("xlsx-structure.xlsx") 27 | engine = XlsxTemplateEngine(tmpl) 28 | buf = io.BytesIO() 29 | engine.merge(_structure, buf) 30 | buf.seek(0) 31 | doc = openpyxl.load_workbook(buf) 32 | for i, ws in enumerate(doc.worksheets): 33 | assert ws.title == _expect_name[i] 34 | assert ws["A1"].value == "xdata0" 35 | assert ws["A2"].value == "xdata1" 36 | assert ws["A3"].value == _expect_name[i] 37 | assert ws["A5"].value == "Item: mixed" 38 | assert ws["A6"].value == "Item: list" 39 | assert ws["A7"].value == "Subitem: xdata2" 40 | 41 | 42 | @pytest.mark.parametrize( 43 | "available, expect_success", 44 | [ 45 | ([], True), # disabled check by empty list 46 | (["key0"], False), 47 | (["key1.subkey1"], False), 48 | (["key0", "key1"], False), 49 | (["key0", "key1.subkey1", "key2[].subkey2"], True), # full set of vars 50 | ], 51 | ) 52 | def test_validate_template(available, expect_success): 53 | tmpl = django_file("xlsx-structure.xlsx") 54 | engine = XlsxTemplateEngine(tmpl) 55 | if expect_success: 56 | engine.validate(available) 57 | else: 58 | with pytest.raises(exceptions.ValidationError): 59 | engine.validate(available) 60 | 61 | 62 | def test_syntax_error(): 63 | tmpl = django_file("xlsx-syntax.xlsx") 64 | engine = XlsxTemplateEngine(tmpl) 65 | with pytest.raises(exceptions.ValidationError): 66 | engine.validate(sample_data=_structure) 67 | 68 | 69 | def test_valid_error(): 70 | tmpl = django_file("xlsx-not-valid.xlsx") 71 | engine = XlsxTemplateEngine(tmpl) 72 | with pytest.raises(exceptions.ParseError): 73 | engine.validate(sample_data=_structure) 74 | -------------------------------------------------------------------------------- /document_merge_service/api/tests/test_filters.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | from django.urls import reverse 5 | from rest_framework.status import HTTP_200_OK, HTTP_400_BAD_REQUEST 6 | 7 | 8 | @pytest.mark.parametrize( 9 | "value,status_code", 10 | [ 11 | (json.dumps([{"key": "foo", "value": "bar"}]), HTTP_200_OK), 12 | (json.dumps([{"key": "int", "value": 5, "lookup": "gt"}]), HTTP_200_OK), 13 | ( 14 | json.dumps( 15 | [{"key": "foo", "value": "bar"}, {"key": "baz", "value": "bla"}] 16 | ), 17 | HTTP_200_OK, 18 | ), 19 | ( 20 | json.dumps([{"key": "foo", "value": "bar", "lookup": "asdfgh"}]), 21 | HTTP_400_BAD_REQUEST, 22 | ), 23 | (json.dumps([{"key": "foo"}]), HTTP_400_BAD_REQUEST), 24 | (json.dumps({"key": "foo"}), HTTP_400_BAD_REQUEST), 25 | ("foo", HTTP_400_BAD_REQUEST), 26 | ("[{foo, no json)", HTTP_400_BAD_REQUEST), 27 | ], 28 | ) 29 | def test_json_value_filter(db, template_factory, admin_client, value, status_code): 30 | doc = template_factory(meta={"foo": "bar", "baz": "bla", "int": 23}) 31 | template_factory(meta={"foo": "baz"}) 32 | template_factory() 33 | url = reverse("template-list") 34 | resp = admin_client.get(url, {"meta": value}) 35 | assert resp.status_code == status_code 36 | if status_code == HTTP_200_OK: 37 | result = resp.json() 38 | assert len(result["results"]) == 1 39 | assert result["results"][0]["slug"] == str(doc.pk) 40 | -------------------------------------------------------------------------------- /document_merge_service/api/tests/test_jinja.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from docx.shared import Mm 3 | 4 | from document_merge_service.api.data import django_file 5 | 6 | from ..jinja import ( 7 | dateformat, 8 | datetimeformat, 9 | emptystring, 10 | getwithdefault, 11 | image, 12 | timeformat, 13 | ) 14 | 15 | 16 | @pytest.mark.parametrize( 17 | "inp,locale,expected", [("2019-12-31", "de_CH", "31.12.2019"), (None, "de_CH", "")] 18 | ) 19 | def test_dateformat(inp, expected, locale): 20 | formatted = dateformat(inp, locale=locale) 21 | assert formatted == expected 22 | 23 | 24 | @pytest.mark.parametrize( 25 | "inp,locale,expected", 26 | [("2019-12-31 23:59", "de_CH", "31.12.2019, 23:59:00"), (None, "de_CH", "")], 27 | ) 28 | def test_datetimeformat(inp, expected, locale): 29 | formatted = datetimeformat(inp, locale=locale) 30 | assert formatted == expected 31 | 32 | 33 | @pytest.mark.parametrize( 34 | "inp,locale,expected", [("23:59", "de_CH", "23:59:00"), (None, "de_CH", "")] 35 | ) 36 | def test_timeformat(inp, expected, locale): 37 | formatted = timeformat(inp, locale=locale) 38 | assert formatted == expected 39 | 40 | 41 | @pytest.mark.parametrize("inp,expected", [("text", "text"), (None, "")]) 42 | def test_emptystring(inp, expected): 43 | formatted = emptystring(inp) 44 | assert formatted == expected 45 | 46 | 47 | @pytest.mark.parametrize( 48 | "inp,default,expected", 49 | [("text", "", "text"), (None, "", ""), (None, "something", "something")], 50 | ) 51 | def test_getwithdefault(inp, default, expected): 52 | formatted = getwithdefault(inp, default=default) 53 | assert formatted == expected 54 | 55 | 56 | @pytest.mark.parametrize( 57 | "width,height,keep_aspect_ratio,expected_size", 58 | [ 59 | (20, 10, False, (20, 10)), 60 | (20, 10, True, (10, 10)), 61 | (10, 20, True, (10, 10)), 62 | (10, None, False, (10, None)), 63 | (None, 10, False, (None, 10)), 64 | (10, None, True, (10, None)), 65 | (None, 10, True, (None, 10)), 66 | ], 67 | ) 68 | def test_image(width, height, keep_aspect_ratio, expected_size): 69 | # The size of "black.png" is 32x32 pixels which is an aspect ratio of 1 70 | inline_image = image( 71 | {"_tpl": None, "black.png": django_file("black.png").file}, 72 | "black.png", 73 | width, 74 | height, 75 | keep_aspect_ratio, 76 | ) 77 | 78 | w, h = expected_size 79 | 80 | assert inline_image.width == (Mm(w) if w else None) 81 | assert inline_image.height == (Mm(h) if h else None) 82 | -------------------------------------------------------------------------------- /document_merge_service/api/tests/test_pagination.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from django.urls import reverse 3 | from rest_framework import status 4 | 5 | 6 | @pytest.mark.parametrize( 7 | "query_params,expected", 8 | [ 9 | ({"page_size": 10}, 10), 10 | ({"page_size": 120}, 110), # max page size reached 11 | ({}, 100), # default page size 12 | ], 13 | ) 14 | def test_pagination(db, client, template_factory, query_params, expected, mocker): 15 | mocker.patch( 16 | "document_merge_service.api.pagination.APIPagination.max_page_size", 110 17 | ) 18 | 19 | template_factory.create_batch(120) 20 | 21 | response = client.get(reverse("template-list"), data=query_params) 22 | 23 | assert response.status_code == status.HTTP_200_OK 24 | result = response.json() 25 | assert result["count"] == 120 26 | assert len(result["results"]) == expected 27 | -------------------------------------------------------------------------------- /document_merge_service/api/tests/test_template.py: -------------------------------------------------------------------------------- 1 | import io 2 | import json 3 | import os 4 | import re 5 | from collections import namedtuple 6 | 7 | import openpyxl 8 | import pytest 9 | from django.urls import reverse 10 | from docx import Document 11 | from lxml import etree 12 | from rest_framework import status 13 | from syrupy import filters 14 | 15 | from document_merge_service.api.authentication import AnonymousUser 16 | from document_merge_service.api.data import django_file 17 | 18 | from .. import models, serializers 19 | 20 | 21 | def get_filename_from_response(response): 22 | return re.search(r'filename="(.*)"', response["Content-Disposition"])[1] 23 | 24 | 25 | @pytest.mark.parametrize("template__description", ["test description"]) 26 | @pytest.mark.parametrize( 27 | "query_params,size", 28 | [ 29 | ({"description__icontains": "test"}, 1), 30 | ({"description__search": "test"}, 1), 31 | ({"description__icontains": "unknown"}, 0), 32 | ({"description__search": "unknown"}, 0), 33 | ], 34 | ) 35 | def test_template_list_query_params(db, admin_client, template, size, query_params): 36 | url = reverse("template-list") 37 | 38 | response = admin_client.get(url, data=query_params) 39 | assert response.status_code == status.HTTP_200_OK 40 | assert response.json()["count"] == size 41 | 42 | 43 | def test_template_detail(db, client, template, snapshot): 44 | url = reverse("template-detail", args=[template.pk]) 45 | 46 | response = client.get(url) 47 | assert response.status_code == status.HTTP_200_OK 48 | assert response.json() == snapshot( 49 | exclude=filters.props("created_at", "modified_at") 50 | ) 51 | 52 | 53 | def test_template_download(db, client, template): 54 | file = django_file("docx-template-syntax.docx") 55 | template.template.save(os.path.basename(file.name), file) 56 | template.save() 57 | 58 | url = reverse("template-detail", args=[template.pk]) 59 | response = client.get(url) 60 | 61 | data = response.json() 62 | 63 | download_url = data["template"] 64 | 65 | template_resp = client.get(download_url) 66 | 67 | file.seek(0) 68 | assert file.read() == template_resp.getvalue() 69 | 70 | 71 | def test_template_list_with_file(db, client, template): 72 | file = django_file("docx-template-syntax.docx") 73 | template.template.save(os.path.basename(file.name), file) 74 | template.save() 75 | 76 | url = reverse("template-list") 77 | response = client.get(url) 78 | 79 | assert response.json()["results"][0]["template"] is not None 80 | assert response.status_code == status.HTTP_200_OK 81 | 82 | 83 | def test_template_download_url(db, client, template): 84 | file = django_file("docx-template-syntax.docx") 85 | template.template.save(os.path.basename(file.name), file) 86 | template.save() 87 | 88 | serializer = serializers.TemplateSerializer(template) 89 | field = serializer.fields["template"] 90 | assert ( 91 | field.to_representation(template) 92 | == f"/api/v1/template-download/{template.slug}" 93 | ) 94 | 95 | 96 | @pytest.mark.parametrize( 97 | "template_name,engine,status_code,require_authentication,authenticated", 98 | [ 99 | ( 100 | "xlsx-template.xlsx", 101 | models.Template.XLSX_TEMPLATE, 102 | status.HTTP_201_CREATED, 103 | False, 104 | False, 105 | ), 106 | ( 107 | "xlsx-template.xlsx", 108 | models.Template.XLSX_TEMPLATE, 109 | status.HTTP_201_CREATED, 110 | True, 111 | True, 112 | ), 113 | ( 114 | "xlsx-template.xlsx", 115 | models.Template.XLSX_TEMPLATE, 116 | status.HTTP_401_UNAUTHORIZED, 117 | True, 118 | False, 119 | ), 120 | ( 121 | "xlsx-template.xlsx", 122 | models.Template.XLSX_TEMPLATE, 123 | status.HTTP_201_CREATED, 124 | True, 125 | True, 126 | ), 127 | ( 128 | "docx-template.docx", 129 | models.Template.DOCX_TEMPLATE, 130 | status.HTTP_201_CREATED, 131 | False, 132 | False, 133 | ), 134 | ( 135 | "docx-template.docx", 136 | models.Template.DOCX_TEMPLATE, 137 | status.HTTP_201_CREATED, 138 | True, 139 | True, 140 | ), 141 | ( 142 | "docx-template.docx", 143 | models.Template.DOCX_TEMPLATE, 144 | status.HTTP_401_UNAUTHORIZED, 145 | True, 146 | False, 147 | ), 148 | ( 149 | "docx-template.docx", 150 | models.Template.DOCX_TEMPLATE, 151 | status.HTTP_201_CREATED, 152 | True, 153 | True, 154 | ), 155 | ( 156 | "docx-template-syntax.docx", 157 | models.Template.DOCX_TEMPLATE, 158 | status.HTTP_400_BAD_REQUEST, 159 | True, 160 | True, 161 | ), 162 | ( 163 | "test.txt", 164 | models.Template.DOCX_TEMPLATE, 165 | status.HTTP_400_BAD_REQUEST, 166 | False, 167 | False, 168 | ), 169 | ], 170 | ) 171 | def test_template_create( 172 | db, 173 | client, 174 | admin_client, 175 | engine, 176 | template_name, 177 | status_code, 178 | require_authentication, 179 | settings, 180 | authenticated, 181 | ): 182 | if authenticated: 183 | client = admin_client 184 | 185 | settings.REQUIRE_AUTHENTICATION = require_authentication 186 | 187 | url = reverse("template-list") 188 | 189 | template_file = django_file(template_name) 190 | data = {"slug": "test-slug", "template": template_file.file, "engine": engine} 191 | response = client.post(url, data=data, format="multipart") 192 | 193 | assert response.status_code == status_code 194 | 195 | if status_code == status.HTTP_201_CREATED: 196 | data = response.json() 197 | template_link = data["template"] 198 | response = client.get(template_link) 199 | assert response.status_code == status.HTTP_200_OK 200 | file_ = io.BytesIO(response.getvalue()) 201 | if engine == "xlsx-template": 202 | openpyxl.load_workbook(file_) 203 | else: 204 | Document(file_) 205 | 206 | 207 | @pytest.mark.parametrize( 208 | "exists,status_code,method,url", 209 | [ 210 | ( 211 | True, 212 | status.HTTP_200_OK, 213 | "patch", 214 | reverse("template-detail", args=["foobar"]), 215 | ), 216 | ( 217 | False, 218 | status.HTTP_201_CREATED, 219 | "post", 220 | reverse("template-list"), 221 | ), 222 | ], 223 | ) 224 | @pytest.mark.parametrize("template__slug", ["foobar"]) 225 | def test_created_modified(db, admin_client, exists, template, status_code, method, url): 226 | template_file = django_file("xlsx-template.xlsx") 227 | data = { 228 | "slug": "test-slug", 229 | "template": template_file.file, 230 | "engine": models.Template.XLSX_TEMPLATE, 231 | } 232 | 233 | request_method = getattr(admin_client, method) 234 | 235 | if not exists: 236 | template.delete() 237 | 238 | response = request_method(url, data=data, format="multipart") 239 | 240 | assert response.status_code == status_code 241 | data = response.json() 242 | 243 | if exists: 244 | assert data["created_by_user"] is None 245 | assert data["created_by_group"] is None 246 | else: 247 | assert data["created_by_user"] == "admin" 248 | assert data["created_by_group"] == "admin" 249 | 250 | assert data["modified_by_user"] == "admin" 251 | assert data["modified_by_group"] == "admin" 252 | 253 | assert data["modified_at"] 254 | assert data["created_at"] 255 | 256 | 257 | @pytest.mark.parametrize( 258 | "status_code, disable_validation", 259 | [ 260 | ( 261 | status.HTTP_400_BAD_REQUEST, 262 | "false", 263 | ), 264 | ( 265 | status.HTTP_400_BAD_REQUEST, 266 | "", 267 | ), 268 | ( 269 | status.HTTP_201_CREATED, 270 | "true", 271 | ), 272 | ], 273 | ) 274 | def test_disable_validation( 275 | db, 276 | status_code, 277 | admin_client, 278 | settings, 279 | disable_validation, 280 | ): 281 | settings.REQUIRE_AUTHENTICATION = False 282 | url = reverse("template-list") 283 | 284 | template_file = django_file("docx-template-syntax.docx") 285 | data = { 286 | "slug": "test-slug", 287 | "template": template_file.file, 288 | "engine": models.Template.DOCX_TEMPLATE, 289 | } 290 | if disable_validation: 291 | data["disable_template_validation"] = disable_validation 292 | 293 | response = admin_client.post(url, data=data, format="multipart") 294 | assert response.status_code == status_code 295 | 296 | if status_code == status.HTTP_201_CREATED: 297 | data = response.json() 298 | template_link = data["template"] 299 | response = admin_client.get(template_link) 300 | assert response.status_code == status.HTTP_200_OK 301 | Document(io.BytesIO(response.getvalue())) 302 | 303 | 304 | @pytest.mark.parametrize( 305 | "template_name,available_placeholders,sample_data,files,expect_missing_placeholders,engine,status_code", 306 | [ 307 | ( 308 | "docx-template-placeholdercheck.docx", 309 | ["foo", "bar", "baz"], 310 | None, 311 | [], 312 | [ 313 | "bar.some_attr", 314 | "black.png", 315 | "list", 316 | "list[]", 317 | "list[].attribute", 318 | ], 319 | models.Template.DOCX_TEMPLATE, 320 | status.HTTP_400_BAD_REQUEST, 321 | ), 322 | ( 323 | "docx-template-placeholdercheck.docx", 324 | [ 325 | "foo", 326 | "bar", 327 | "baz", 328 | "bar.some_attr", 329 | "list[].attribute", 330 | "black.png", 331 | ], 332 | None, 333 | [], 334 | [], 335 | models.Template.DOCX_TEMPLATE, 336 | status.HTTP_201_CREATED, 337 | ), 338 | ( 339 | "docx-template-placeholdercheck.docx", 340 | [ 341 | "foo", 342 | "bar", 343 | "baz", 344 | "bar.some_attr", 345 | "list[].attribute", 346 | ], 347 | None, 348 | [], 349 | ["black.png"], 350 | models.Template.DOCX_TEMPLATE, 351 | status.HTTP_400_BAD_REQUEST, 352 | ), 353 | ( 354 | "docx-template-placeholdercheck.docx", 355 | None, 356 | { 357 | "foo": "hello", 358 | "bar": { 359 | "some_attr": True, 360 | "list": [{"attribute": "value"}, {"attribute": "value2"}], 361 | }, 362 | "baz": "1234", 363 | "list": [{"attribute": "value"}], 364 | }, 365 | [django_file("black.png").file], 366 | [], 367 | models.Template.DOCX_TEMPLATE, 368 | status.HTTP_201_CREATED, 369 | ), 370 | ( 371 | "docx-template-placeholdercheck.docx", 372 | None, 373 | {}, 374 | [django_file("black.png").file], 375 | [], 376 | models.Template.DOCX_TEMPLATE, 377 | status.HTTP_400_BAD_REQUEST, 378 | ), 379 | ( 380 | "docx-template-placeholdercheck.docx", 381 | None, 382 | {}, 383 | [], 384 | [], 385 | models.Template.DOCX_TEMPLATE, 386 | status.HTTP_201_CREATED, 387 | ), 388 | ( 389 | "docx-template-placeholdercheck.docx", 390 | None, 391 | { 392 | "foo": "hello", 393 | "bar": { 394 | "some_attr": True, 395 | "list": [{"attribute": "value"}, {"attribute": "value2"}], 396 | }, 397 | "baz": "1234", 398 | "list": [{"attribute": "value"}], 399 | }, 400 | [], 401 | ["black.png"], 402 | models.Template.DOCX_TEMPLATE, 403 | status.HTTP_400_BAD_REQUEST, 404 | ), 405 | ( 406 | "docx-template-placeholdercheck.docx", 407 | None, 408 | { 409 | "foo": "hello", 410 | "bar": { 411 | "some_attr": True, 412 | "list": [{"attribute": "value"}, {"attribute": "value2"}], 413 | }, 414 | }, 415 | [django_file("black.png").file], 416 | ["baz", "list", "list[]", "list[].attribute"], 417 | models.Template.DOCX_TEMPLATE, 418 | status.HTTP_400_BAD_REQUEST, 419 | ), 420 | ( 421 | "docx-template.docx", 422 | ["test", "blah"], 423 | {"test": "hello"}, 424 | [], 425 | [], 426 | models.Template.DOCX_TEMPLATE, 427 | status.HTTP_400_BAD_REQUEST, 428 | ), 429 | ( 430 | "xlsx-template.xlsx", 431 | [], 432 | {"test": "hello"}, 433 | [django_file("black.png").file], 434 | [], 435 | models.Template.XLSX_TEMPLATE, 436 | status.HTTP_400_BAD_REQUEST, 437 | ), 438 | ( 439 | "docx-template-image-placeholder-header-footer.docx", 440 | ["black.png", "white.png"], 441 | None, 442 | [], 443 | [], 444 | models.Template.DOCX_TEMPLATE, 445 | status.HTTP_201_CREATED, 446 | ), 447 | ], 448 | ) 449 | @pytest.mark.parametrize("use_json", [True, False]) 450 | def test_template_create_with_available_placeholders( 451 | db, 452 | admin_client, 453 | engine, 454 | template_name, 455 | available_placeholders, 456 | sample_data, 457 | files, 458 | status_code, 459 | settings, 460 | use_json, 461 | expect_missing_placeholders, 462 | ): 463 | settings.DOCXTEMPLATE_JINJA_EXTENSIONS = ["jinja2.ext.loopcontrols"] 464 | url = reverse("template-list") 465 | 466 | template_file = django_file(template_name) 467 | 468 | # files are being reused, so make sure they're readable 469 | for f in files: 470 | f.seek(0) 471 | 472 | data = { 473 | "slug": "test-slug", 474 | "template": template_file.file, 475 | "files": files, 476 | "engine": engine, 477 | } 478 | if sample_data: 479 | data["sample_data"] = json.dumps(sample_data) 480 | if available_placeholders: 481 | data["available_placeholders"] = ( 482 | json.dumps(available_placeholders) if use_json else available_placeholders 483 | ) 484 | 485 | response = admin_client.post(url, data=data, format="multipart") 486 | assert response.status_code == status_code, response.json() 487 | 488 | if status_code == status.HTTP_400_BAD_REQUEST: 489 | resp = response.json() 490 | expect_missing_str = "; ".join(expect_missing_placeholders) 491 | 492 | if sample_data and available_placeholders: 493 | # validation only allows one of these two params 494 | assert ( 495 | resp["non_field_errors"][0] 496 | == "Only one of available_placeholders and sample_data is allowed" 497 | ) 498 | elif engine == models.Template.XLSX_TEMPLATE and files: 499 | assert ( 500 | resp["non_field_errors"][0] 501 | == 'Files are only accepted with the "docx-template" engine' 502 | ) 503 | elif not sample_data and files: 504 | assert ( 505 | resp["non_field_errors"][0] 506 | == "Files are only accepted when also providing sample_data" 507 | ) 508 | else: 509 | # we expect some missing placeholders 510 | assert resp["non_field_errors"][0] in [ 511 | f"Template uses unavailable placeholders: {expect_missing_str}", 512 | f'No file for image "{expect_missing_str}" provided!', 513 | ] 514 | 515 | if status_code == status.HTTP_201_CREATED: 516 | data = response.json() 517 | template_link = data["template"] 518 | response = admin_client.get(template_link) 519 | assert response.status_code == status.HTTP_200_OK 520 | Document(io.BytesIO(response.getvalue())) 521 | 522 | 523 | @pytest.mark.parametrize( 524 | "template__engine,template__template", 525 | [(models.Template.DOCX_TEMPLATE, django_file("docx-template-filters.docx"))], 526 | ) 527 | @pytest.mark.parametrize( 528 | "template_name,status_code", 529 | [ 530 | ("docx-template.docx", status.HTTP_200_OK), 531 | ("test.txt", status.HTTP_400_BAD_REQUEST), 532 | ], 533 | ) 534 | def test_template_update(db, client, template, template_name, status_code): 535 | url = reverse("template-detail", args=[template.pk]) 536 | 537 | template_file = django_file(template_name) 538 | data = {"description": "Test description", "template": template_file.file} 539 | response = client.patch(url, data=data, format="multipart") 540 | assert response.status_code == status_code 541 | 542 | if status_code == status.HTTP_200_OK: 543 | assert os.path.isfile(template.template.path) is False 544 | template.refresh_from_db() 545 | assert template.description == "Test description" 546 | 547 | 548 | @pytest.mark.parametrize("template__template", [django_file("docx-template.docx")]) 549 | def test_template_destroy(db, client, template): 550 | url = reverse("template-detail", args=[template.pk]) 551 | 552 | response = client.delete(url) 553 | assert response.status_code == status.HTTP_204_NO_CONTENT 554 | assert os.path.isfile(template.template.path) is False 555 | 556 | 557 | @pytest.mark.parametrize( 558 | "template__slug,template__engine,template__template", 559 | [ 560 | ( 561 | "TestNameTemplate", 562 | models.Template.DOCX_TEMPLATE, 563 | django_file("docx-template.docx"), 564 | ), 565 | ], 566 | ) 567 | def test_template_merge_docx( 568 | db, client, mock_filefield_name_validation, template, snapshot 569 | ): 570 | url = reverse("template-merge", args=[template.pk]) 571 | 572 | response = client.post(url, data={"data": {"test": "Test input"}}, format="json") 573 | assert response.status_code == status.HTTP_200_OK 574 | assert ( 575 | response.get("content-type") 576 | == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" 577 | ) 578 | assert get_filename_from_response(response) == f"{template.slug}.docx" 579 | 580 | docx = Document(io.BytesIO(response.getvalue())) 581 | xml = etree.tostring(docx._element.body, encoding="unicode", pretty_print=True) 582 | try: 583 | snapshot.assert_match(xml) 584 | except AssertionError: # pragma: no cover 585 | with open(f"/tmp/{template.slug}.docx", "wb") as output: 586 | output.write(response.getvalue()) 587 | print("Template output changed. Check file at %s" % output.name) 588 | raise 589 | 590 | 591 | @pytest.mark.parametrize( 592 | "placeholder,template_content", 593 | [ 594 | ("{{blah}}", {"blah": "blub"}), 595 | ( 596 | '{{NAME and ", represents " + NAME}}', 597 | {"NAME": "foo"}, 598 | ), 599 | ( 600 | '{{NAME and ", represents " + NAME}}', 601 | {"NAME": ""}, 602 | ), 603 | # passed data should be escaped 604 | ("{{escapeme}}", {"escapeme": "<&>"}), 605 | ], 606 | ) 607 | def test_merge_expression( 608 | docx_template_with_placeholder, client, snapshot, placeholder, template_content 609 | ): 610 | """Test evaluation of some custom expressions. 611 | 612 | Use this test to try out expressions without creating a new docx template for each 613 | variant. 614 | """ 615 | template = docx_template_with_placeholder(placeholder) 616 | 617 | url = reverse("template-merge", args=[template.pk]) 618 | 619 | response = client.post(url, data={"data": template_content}, format="json") 620 | assert response.status_code == status.HTTP_200_OK 621 | 622 | docx = Document(io.BytesIO(response.getvalue())) 623 | xml = etree.tostring(docx._element.body, encoding="unicode", pretty_print=True) 624 | try: 625 | snapshot.assert_match(xml) 626 | except AssertionError: # pragma: no cover 627 | with open(f"/tmp/{template.slug}.docx", "wb") as output: 628 | output.write(response.getvalue()) 629 | print("Template output changed. Check file at %s" % output.name) 630 | raise 631 | 632 | 633 | @pytest.mark.parametrize( 634 | "placeholder,template_content", 635 | [ 636 | ("{{blah}}", {"blah": "blub"}), 637 | ( 638 | '{{NAME and ", represents " + NAME}}', 639 | {"NAME": "foo"}, 640 | ), 641 | ( 642 | '{{NAME and ", represents " + NAME}}', 643 | {"NAME": ""}, 644 | ), 645 | ], 646 | ) 647 | @pytest.mark.parametrize( 648 | "template__engine", 649 | [models.Template.DOCX_TEMPLATE], 650 | ) 651 | def test_validate_expression( 652 | docx_template_with_placeholder, placeholder, template_content 653 | ): 654 | """Test validation of templates with custom expressions.""" 655 | template = docx_template_with_placeholder(placeholder) 656 | 657 | Request = namedtuple("Request", ["user"]) 658 | serializer = serializers.TemplateSerializer( 659 | context={"request": Request(AnonymousUser())} 660 | ) 661 | serializer.instance = template 662 | 663 | serializer.validate({"data": template_content}) 664 | 665 | 666 | # This needs a strange parametrization. If `unoconv_local` is in a separate 667 | # `parametrize()`, the template filename in the second test will be appended with a 668 | # hash and the test fails 669 | @pytest.mark.parametrize( 670 | "template__engine,template__template,unoconv_local", 671 | [ 672 | (models.Template.DOCX_TEMPLATE, django_file("docx-template.docx"), True), 673 | (models.Template.DOCX_TEMPLATE, django_file("docx-template.docx"), False), 674 | ], 675 | ) 676 | def test_template_merge_as_pdf( 677 | db, settings, unoconv_local, client, mock_filefield_name_validation, template 678 | ): 679 | url = reverse("template-merge", args=[template.pk]) 680 | 681 | response = client.post( 682 | url, data={"data": {"test": "Test input"}, "convert": "pdf"}, format="json" 683 | ) 684 | assert response.status_code == status.HTTP_200_OK 685 | assert response["Content-Type"] == "application/pdf" 686 | assert get_filename_from_response(response) == f"{template.slug}.pdf" 687 | assert response.content[0:4] == b"%PDF" 688 | 689 | 690 | @pytest.mark.parametrize( 691 | "template__engine,template__template", 692 | [(models.Template.DOCX_TEMPLATE, django_file("docx-template-loopcontrols.docx"))], 693 | ) 694 | def test_template_merge_jinja_extensions_docx( 695 | db, client, mock_filefield_name_validation, template, settings, snapshot 696 | ): 697 | settings.DOCXTEMPLATE_JINJA_EXTENSIONS = ["jinja2.ext.loopcontrols"] 698 | 699 | url = reverse("template-merge", args=[template.pk]) 700 | 701 | response = client.post(url, data={"data": {"test": "Test input"}}, format="json") 702 | assert response.status_code == status.HTTP_200_OK 703 | assert ( 704 | response.get("content-type") 705 | == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" 706 | ) 707 | 708 | docx = Document(io.BytesIO(response.getvalue())) 709 | xml = etree.tostring(docx._element.body, encoding="unicode", pretty_print=True) 710 | snapshot.assert_match(xml) 711 | 712 | 713 | @pytest.mark.parametrize( 714 | "missing_file,wrong_mime,status_code", 715 | [ 716 | (False, False, status.HTTP_200_OK), 717 | (False, True, status.HTTP_400_BAD_REQUEST), 718 | (True, False, status.HTTP_400_BAD_REQUEST), 719 | ], 720 | ) 721 | @pytest.mark.parametrize( 722 | "template__engine,template__template", 723 | [(models.Template.DOCX_TEMPLATE, django_file("docx-template-filters.docx"))], 724 | ) 725 | def test_template_merge_jinja_filters_docx( 726 | db, 727 | client, 728 | mock_filefield_name_validation, 729 | template, 730 | snapshot, 731 | settings, 732 | tmp_path, 733 | missing_file, 734 | wrong_mime, 735 | status_code, 736 | ): 737 | settings.LANGUAGE_CODE = "de-ch" 738 | url = reverse("template-merge", args=[template.pk]) 739 | 740 | # Couldn't put this into `parametrize`. For some reason, in the second run, the 741 | # template name is extended with a seemingly random string. 742 | template.template = django_file("docx-template-filters.docx") 743 | template.save() 744 | 745 | data = { 746 | "data": json.dumps( 747 | { 748 | "test_date": "1984-09-15", 749 | "test_time": "23:24", 750 | "test_datetime": "1984-09-15 23:23", 751 | "test_datetime2": "23:23-1984-09-15", 752 | "test_none": None, 753 | "test_nested": {"multiline": "This is\na test."}, 754 | } 755 | ), 756 | } 757 | 758 | if not missing_file: 759 | file = django_file("black.png").file 760 | if wrong_mime: 761 | # create a file with the correct filename (black.png) but with 762 | # the contents of a docx. 763 | file = tmp_path / "black.png" 764 | for line in template.template.file: 765 | file.write_bytes(line) 766 | file = file.open("rb") 767 | 768 | data["files"] = [file] 769 | 770 | response = client.post(url, data=data, format="multipart") 771 | assert response.status_code == status_code 772 | 773 | if status_code == status.HTTP_200_OK: 774 | assert ( 775 | response.get("content-type") 776 | == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" 777 | ) 778 | 779 | docx = Document(io.BytesIO(response.getvalue())) 780 | xml = etree.tostring(docx._element.body, encoding="unicode", pretty_print=True) 781 | snapshot.assert_match(xml) 782 | 783 | 784 | @pytest.mark.parametrize( 785 | "template__engine,template__template", 786 | [(models.Template.DOCX_TEMPLATE, django_file("docx-template-filters.docx"))], 787 | ) 788 | @pytest.mark.parametrize( 789 | "file_value", 790 | [None, ""], 791 | ) 792 | def test_template_merge_file_reset( 793 | db, 794 | client, 795 | mock_filefield_name_validation, 796 | template, 797 | settings, 798 | file_value, 799 | ): 800 | settings.LANGUAGE_CODE = "de-ch" 801 | url = reverse("template-merge", args=[template.pk]) 802 | 803 | # Couldn't put this into `parametrize`. For some reason, in the second run, the 804 | # template name is extended with a seemingly random string. 805 | template.template = django_file("docx-template-filters.docx") 806 | template.save() 807 | 808 | data = { 809 | "data": { 810 | "test_date": "1984-09-15", 811 | "test_time": "23:24", 812 | "test_datetime": "1984-09-15 23:23", 813 | "test_datetime2": "23:23-1984-09-15", 814 | "test_none": None, 815 | "test_nested": {"multiline": "This is\na test."}, 816 | "black.png": file_value, 817 | } 818 | } 819 | 820 | response = client.post(url, data=data, format="json") 821 | assert response.status_code == status.HTTP_200_OK 822 | 823 | assert ( 824 | response.get("content-type") 825 | == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" 826 | ) 827 | 828 | 829 | @pytest.mark.parametrize( 830 | "sample,expected", 831 | [ 832 | ({"foo": {"bar": ["foo", "blah"]}}, ["foo", "foo.bar", "foo.bar[]"]), 833 | ( 834 | { 835 | "this": { 836 | "is": { 837 | "a": [ 838 | { 839 | "list": { 840 | "with": { 841 | "a": ["nested", "object", "and", "a", "list"] 842 | } 843 | } 844 | } 845 | ] 846 | } 847 | } 848 | }, 849 | [ 850 | "this", 851 | "this.is", 852 | "this.is.a", 853 | "this.is.a[]", 854 | "this.is.a[].list", 855 | "this.is.a[].list.with", 856 | "this.is.a[].list.with.a", 857 | "this.is.a[].list.with.a[]", 858 | ], 859 | ), 860 | ], 861 | ) 862 | def test_sample_to_placeholders(sample, expected): 863 | ts = serializers.TemplateSerializer() 864 | assert ts._sample_to_placeholders(sample) == sorted(expected) 865 | 866 | 867 | @pytest.mark.parametrize( 868 | "template__engine,template__template", 869 | [ 870 | ( 871 | models.Template.DOCX_TEMPLATE, 872 | django_file("docx-template-placeholdercheck.docx"), 873 | ) 874 | ], 875 | ) 876 | def test_template_merge_missing_data( 877 | db, client, mock_filefield_name_validation, template, settings 878 | ): 879 | settings.DOCXTEMPLATE_JINJA_EXTENSIONS = ["jinja2.ext.loopcontrols"] 880 | 881 | url = reverse("template-merge", args=[template.pk]) 882 | 883 | response = client.post(url, data={"data": {"blah": "Test input"}}, format="json") 884 | 885 | assert response.status_code == status.HTTP_400_BAD_REQUEST 886 | assert response.json() == [ 887 | "Placeholder from template not found in data: 'bar' is undefined" 888 | ] 889 | -------------------------------------------------------------------------------- /document_merge_service/api/tests/test_unoconv.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import shutil 4 | import sys 5 | from multiprocessing.pool import ThreadPool 6 | from pathlib import Path 7 | from subprocess import TimeoutExpired, run 8 | from time import sleep 9 | 10 | import pytest 11 | from psutil import process_iter 12 | 13 | from .. import unoconv 14 | from ..unoconv import Unoconv, run_fork_safe 15 | 16 | 17 | def test_timeout(): 18 | with pytest.raises(TimeoutExpired): 19 | run_fork_safe(["sleep", "infinity"], timeout=0.5) 20 | 21 | 22 | def kill_zombies(): # pragma: no cover 23 | # Depending on if we are pid 1, we want to cleanup zombie processes 24 | # As our pid depends on how the tests are run, we add "no cover" to this function 25 | if os.getpid() != 1: 26 | return 27 | for x in process_iter(["name"]): 28 | if x.status() == "zombie": 29 | x.wait() 30 | 31 | 32 | def kill_dms_sleep(dms_test_bin): # pragma: no cover 33 | found = False 34 | kill_zombies() 35 | for x in process_iter(["name"]): 36 | if dms_test_bin.name == x.name(): 37 | found = True 38 | x.kill() 39 | x.wait() 40 | return found 41 | 42 | 43 | def test_fork(dms_test_bin): # pragma: no cover 44 | kill_dms_sleep(dms_test_bin) 45 | shell_cmd = f"{dms_test_bin} infinity & disown" 46 | run(["/bin/bash", "-c", shell_cmd]) 47 | sleep(0.5) 48 | assert kill_dms_sleep(dms_test_bin) 49 | run_fork_safe(["/bin/bash", "-c", shell_cmd]) 50 | sleep(0.5) 51 | assert not kill_dms_sleep(dms_test_bin) 52 | 53 | 54 | def run_fork_load(test_file): 55 | conv = Unoconv("/usr/bin/python3", shutil.which("unoconv")) 56 | return conv.process(test_file, "pdf") 57 | 58 | 59 | def test_unoconv_unshare_error(loadtest_data, caplog): 60 | test_file = Path(loadtest_data, "1.docx") 61 | conv = Unoconv("/usr/bin/python3", shutil.which("unoconv")) 62 | try: 63 | save = unoconv._unshare 64 | unoconv._unshare = "false" 65 | conv.process(test_file, "pdf") 66 | assert "CAP_SYS_ADMIN" in caplog.text 67 | finally: 68 | unoconv._unshare = save 69 | 70 | 71 | def test_unoconv_error(caplog): 72 | test_file = "/asdfasdfa" 73 | conv = Unoconv("/usr/bin/python3", shutil.which("unoconv")) 74 | conv.process(test_file, "pdf") 75 | assert "unoconv failed with returncode" in caplog.text 76 | 77 | 78 | def try_fork_load(arg): 79 | n, test_file = arg 80 | if n < 10: 81 | # slowly start load test 82 | sleep(0.05 * (10 - n)) 83 | try: 84 | result = run_fork_load(test_file) 85 | return result 86 | except Exception as e: # pragma: no cover 87 | return e 88 | 89 | 90 | def test_fork_load(capsys, loadtest_data): 91 | count = 8 92 | test_files = [] 93 | test_files += [Path(loadtest_data.parent, "docx-template.docx")] * count 94 | test_files += [Path(loadtest_data, "1.doc")] * count 95 | test_files += [Path(loadtest_data, "2.docx")] * count 96 | test_files += [Path(loadtest_data, "3.docx")] * count 97 | test_files += [Path(loadtest_data, "4.docx")] * count 98 | random.shuffle(test_files) 99 | try: 100 | pool = ThreadPool(8) 101 | with capsys.disabled(): 102 | sys.stdout.write(" Loadtest: ") 103 | sys.stdout.flush() 104 | for result in pool.imap(try_fork_load, enumerate(test_files)): 105 | with capsys.disabled(): 106 | sys.stdout.write(".") 107 | sys.stdout.flush() 108 | if isinstance(result, Exception): # pragma: no cover 109 | raise result 110 | elif not result.stdout.startswith(b"%PDF"): # pragma: no cover 111 | raise ValueError(result) 112 | with capsys.disabled(): 113 | sys.stdout.write("done") 114 | finally: 115 | pool.close() 116 | pool.join() 117 | -------------------------------------------------------------------------------- /document_merge_service/api/tests/test_upload_local_templates.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os 3 | import shutil 4 | 5 | import pytest 6 | from django.core.management import call_command 7 | from faker import Faker 8 | 9 | from document_merge_service.api.data import django_file 10 | 11 | 12 | def sha256sum(path): 13 | with open(path, "rb") as file: 14 | return hashlib.sha256(file.read()).hexdigest() 15 | 16 | 17 | def is_equal(a, b): 18 | return sha256sum(a) == sha256sum(b) 19 | 20 | 21 | @pytest.fixture 22 | def tmp_path(settings): 23 | path = os.path.join(settings.MEDIA_ROOT, f"local-templates-{Faker().uuid4()}") 24 | 25 | yield path 26 | 27 | shutil.rmtree(path, ignore_errors=True) 28 | 29 | 30 | @pytest.mark.parametrize("dry", [True, False]) 31 | def test_upload_local_templates(db, dry, template_factory, tmp_path): 32 | templates = [ 33 | template_factory(template=django_file("docx-template-syntax.docx")), 34 | template_factory(template=django_file("docx-template-syntax.docx")), 35 | ] 36 | 37 | files = [ 38 | django_file( 39 | "docx-template.docx", 40 | new_path=tmp_path, 41 | new_name=templates[0].template.name, 42 | ), 43 | django_file( 44 | "docx-template.docx", 45 | new_path=tmp_path, 46 | new_name=templates[1].template.name, 47 | ), 48 | django_file( 49 | "docx-template.docx", 50 | new_path=tmp_path, 51 | new_name="some-file-without-template.docx", 52 | ), 53 | ] 54 | 55 | paths = [os.path.join(tmp_path, file.name) for file in files] 56 | 57 | assert not is_equal(templates[0].template.path, paths[0]) 58 | assert not is_equal(templates[1].template.path, paths[1]) 59 | 60 | call_command("upload_local_templates", dry=dry, source=f"{tmp_path}/*.docx") 61 | 62 | assert is_equal(templates[0].template.path, paths[0]) != dry 63 | assert is_equal(templates[1].template.path, paths[1]) != dry 64 | -------------------------------------------------------------------------------- /document_merge_service/api/unoconv.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import re 4 | import signal 5 | from collections import namedtuple 6 | from mimetypes import guess_type 7 | from subprocess import ( 8 | PIPE, 9 | CalledProcessError, 10 | CompletedProcess, 11 | Popen, 12 | TimeoutExpired, 13 | run as srun, 14 | ) 15 | from uuid import uuid4 16 | 17 | from django.conf import settings 18 | from django.core.exceptions import ImproperlyConfigured 19 | 20 | logger = logging.getLogger(__name__) 21 | _unshare = "unshare" 22 | 23 | UnoconvResult = namedtuple( 24 | "UnoconvResult", ["stdout", "stderr", "returncode", "content_type"] 25 | ) 26 | 27 | # in testing 2 seconds is enough 28 | _min_timeout = 2 29 | 30 | # terminate_then_kill() takes 1 second in the worst case, so we have to use three seconds, 31 | # or the timeout won't be triggered before harakiri. 32 | # Increased to 3 seconds to be safe, we have reports of orphan soffice.bin in production. 33 | _ahead_of_harakiri = 3 34 | 35 | 36 | def get_default_timeout(): 37 | timeout = 55 38 | try: 39 | from document_merge_service import gunicorn 40 | 41 | # Default is 30s if not configured explicitly: 42 | # https://docs.gunicorn.org/en/latest/settings.html#timeout 43 | harakiri = getattr(gunicorn, "timeout", 30) 44 | if harakiri: 45 | try: 46 | timeout = max(int(harakiri) - _ahead_of_harakiri, _min_timeout) 47 | except ValueError: # pragma: no cover 48 | pass 49 | except ModuleNotFoundError: # pragma: no cover 50 | pass 51 | return timeout 52 | 53 | 54 | _default_timeout = get_default_timeout() 55 | 56 | 57 | def getpgid(proc): 58 | try: 59 | return (proc, os.getpgid(proc.pid)) 60 | except ProcessLookupError: # pragma: no cover 61 | return (proc, None) 62 | 63 | 64 | def kill(proc, sig): 65 | process, group = proc 66 | try: 67 | if group is None: 68 | if process.returncode is None: # pragma: no cover 69 | os.kill(process.pid, sig) 70 | else: 71 | os.killpg(group, sig) 72 | except ProcessLookupError: 73 | pass 74 | 75 | 76 | def terminate_then_kill(proc): 77 | process, _ = proc 78 | kill(proc, signal.SIGTERM) 79 | try: 80 | process.wait(timeout=1) 81 | except TimeoutExpired: # pragma: no cover 82 | pass 83 | finally: 84 | kill(proc, signal.SIGKILL) 85 | 86 | 87 | def run_fork_safe( 88 | *popenargs, 89 | input=None, 90 | capture_output=False, 91 | timeout=None, 92 | check=False, 93 | **kwargs, 94 | ): 95 | """Run command with arguments and return a CompletedProcess instance. 96 | 97 | Works like `subprocess.run`, but puts the subprocess and its children in a new 98 | process group, so orphan forks can be terminated, too. 99 | """ 100 | if input is not None: # pragma: no cover 101 | if kwargs.get("stdin") is not None: 102 | raise ValueError("stdin and input arguments may not both be used.") 103 | kwargs["stdin"] = PIPE 104 | 105 | if capture_output: # pragma: no cover 106 | if kwargs.get("stdout") is not None or kwargs.get("stderr") is not None: 107 | raise ValueError( 108 | "stdout and stderr arguments may not be used with capture_output." 109 | ) 110 | kwargs["stdout"] = PIPE 111 | kwargs["stderr"] = PIPE 112 | 113 | with Popen(*popenargs, start_new_session=True, **kwargs) as process: 114 | proc = getpgid(process) 115 | try: 116 | stdout, stderr = process.communicate(input, timeout=timeout) 117 | finally: 118 | terminate_then_kill(proc) 119 | retcode = process.poll() 120 | if check and retcode: # pragma: no cover 121 | raise CalledProcessError( 122 | retcode, process.args, output=stdout, stderr=stderr 123 | ) 124 | return CompletedProcess(process.args, retcode, stdout, stderr) 125 | 126 | 127 | def run(cmd, unshare=False): 128 | # Run libreoffice in isolation. If the main broker of libreoffice locks up, 129 | # all following calls to unoconv will hang as well. By unsharing the mount namespace, 130 | # we get a copy of all the mounts, but we can change them without affecting the 131 | # original. So unoconv will never connect to a main broker, and allways start a 132 | # new instance. Then run_fork_safe will terminate the process and all the children 133 | # of unoconv. 134 | # 135 | # I think masking /tmp is enough, but we can use this technique for other paths if 136 | # needed. 137 | if unshare: 138 | shell = [ 139 | "unshare", 140 | "--map-root-user", 141 | "--ipc", 142 | "--mount", 143 | "sh", 144 | ] 145 | cmd = f""" 146 | mount -t tmpfs tmpfs /tmp 147 | exec {cmd} 148 | """.strip() 149 | else: # pragma: no cover 150 | shell = ["sh"] 151 | ret = run_fork_safe( 152 | shell, 153 | stdout=PIPE, 154 | stderr=PIPE, 155 | timeout=_default_timeout, 156 | input=cmd.encode("utf-8"), 157 | ) 158 | return ret 159 | 160 | 161 | class Unoconv: 162 | def __init__(self, pythonpath, unoconvpath): 163 | """ 164 | Convert documents with unoconv command-line utility. 165 | 166 | :param pythonpath: str() - path to the python interpreter 167 | :param unoconvpath: str() - path to the unoconv binary 168 | """ 169 | self.cmd = f"{pythonpath} {unoconvpath}" 170 | self.unshare = settings.ISOLATE_UNOCONV 171 | 172 | def get_formats(self): 173 | from django.conf import settings 174 | 175 | cmd = f"{self.cmd} --show" 176 | p = run(cmd, unshare=self.unshare) 177 | if not p.returncode == 0: # pragma: no cover 178 | raise Exception("Failed to fetch the formats from unoconv!") 179 | 180 | formats = [] 181 | for line in p.stderr.decode("utf-8").split("\n"): 182 | if line.startswith(" "): 183 | match = re.match(r"^\s\s(?P[a-z]*)\s", line) 184 | if match: 185 | formats.append(match.group("format")) 186 | 187 | formats = set(formats) 188 | not_supported = set(settings.UNOCONV_ALLOWED_TYPES) - formats 189 | if not_supported: 190 | raise ImproperlyConfigured( 191 | f"Unoconv doesn't support types {', '.join(not_supported)}." 192 | ) 193 | 194 | return formats 195 | 196 | def process(self, filename, convert): 197 | """ 198 | Convert a file. 199 | 200 | :param filename: str() 201 | :param convert: str() - target format. e.g. "pdf" 202 | :return: UnoconvResult() 203 | """ 204 | # unoconv MUST be running with the same python version as libreoffice 205 | pipe = str(uuid4()) 206 | cmd = f"{self.cmd} --timeout 10 --pipe {pipe} --format {convert} --stdout '{filename}'" 207 | p = run(cmd, unshare=self.unshare) 208 | stdout = p.stdout 209 | if not p.returncode == 0: # pragma: no cover 210 | stdout = f"unoconv returncode: {p.returncode}" 211 | try: 212 | srun([_unshare, "true"], check=True) 213 | except CalledProcessError: 214 | logger.error( 215 | "Could not unshare, this process needs CAP_SYS_ADMIN to unshare." 216 | ) 217 | else: 218 | logger.error( 219 | f"unoconv failed with returncode: {p.returncode} stderr: {p.stderr}" 220 | ) 221 | if p.stdout: 222 | logger.error(f"unoconv failed with stdout: {p.stdout}") 223 | 224 | content_type, _ = guess_type(f"something.{convert}") 225 | 226 | result = UnoconvResult( 227 | stdout=stdout, 228 | stderr=p.stderr, 229 | returncode=p.returncode, 230 | content_type=content_type, 231 | ) 232 | 233 | return result 234 | -------------------------------------------------------------------------------- /document_merge_service/api/urls.py: -------------------------------------------------------------------------------- 1 | from django.urls import re_path 2 | from rest_framework.routers import DefaultRouter 3 | 4 | from . import views 5 | 6 | r = DefaultRouter() 7 | 8 | r.register("template", views.TemplateView) 9 | 10 | urlpatterns = [ 11 | re_path( 12 | r"^template-download/(?P.+)$", 13 | views.DownloadTemplateView.as_view(), 14 | name="template-download", 15 | ), 16 | re_path( 17 | r"^convert$", 18 | views.ConvertView.as_view(), 19 | name="convert", 20 | ), 21 | ] 22 | 23 | urlpatterns.extend(r.urls) 24 | -------------------------------------------------------------------------------- /document_merge_service/api/views.py: -------------------------------------------------------------------------------- 1 | import mimetypes 2 | from os.path import splitext 3 | from pathlib import Path 4 | 5 | import jinja2 6 | from django.http import FileResponse, HttpResponse 7 | from generic_permissions.permissions import PermissionViewMixin 8 | from generic_permissions.visibilities import VisibilityViewMixin 9 | from rest_framework import exceptions, viewsets 10 | from rest_framework.decorators import action 11 | from rest_framework.generics import RetrieveAPIView 12 | from rest_framework.views import APIView 13 | 14 | from . import engines, filters, models, serializers 15 | from .file_converter import FileConverter 16 | 17 | 18 | class TemplateView(VisibilityViewMixin, PermissionViewMixin, viewsets.ModelViewSet): 19 | queryset = models.Template.objects 20 | serializer_class = serializers.TemplateSerializer 21 | filterset_class = filters.TemplateFilterSet 22 | ordering_fields = ("slug", "description") 23 | ordering = ("slug",) 24 | 25 | @action( 26 | methods=["post"], 27 | detail=True, 28 | serializer_class=serializers.TemplateMergeSerializer, 29 | ) 30 | def merge(self, request, pk=None): 31 | template = self.get_object() 32 | engine = engines.get_engine(template.engine, template.template) 33 | 34 | content_type, _ = mimetypes.guess_type(template.template.name) 35 | response = HttpResponse( 36 | content_type=content_type or "application/force-download" 37 | ) 38 | 39 | serializer = self.get_serializer(data=request.data) 40 | serializer.is_valid(raise_exception=True) 41 | 42 | data = serializer.data["data"] 43 | files = serializer.data.get("files") 44 | 45 | if files is not None: 46 | for file in files: 47 | data[file.name] = file 48 | 49 | try: 50 | response = engine.merge(serializer.data["data"], response) 51 | except jinja2.UndefinedError as exc: 52 | raise exceptions.ValidationError( 53 | f"Placeholder from template not found in data: {exc}" 54 | ) 55 | 56 | convert = serializer.data.get("convert") 57 | 58 | if convert: 59 | response = FileConverter.convert(response.content, convert) 60 | 61 | extension = mimetypes.guess_extension(response.headers["Content-Type"]) 62 | filename = f"{template.slug}{extension}" 63 | response["Content-Disposition"] = f'attachment; filename="{filename}"' 64 | return response 65 | 66 | 67 | class DownloadTemplateView(RetrieveAPIView): 68 | queryset = models.Template.objects 69 | lookup_field = "pk" 70 | 71 | def retrieve(self, request, **kwargs): 72 | template = self.get_object() 73 | 74 | return FileResponse( 75 | template.template.file, 76 | filename=f"{template.slug}{Path(template.template.name).suffix}", 77 | ) 78 | 79 | 80 | class ConvertView(APIView): 81 | def post(self, request, **kwargs): 82 | serializer = serializers.ConvertSerializer(data=request.data) 83 | serializer.is_valid(raise_exception=True) 84 | 85 | file = serializer.data["file"] 86 | target_format = serializer.data["target_format"] 87 | 88 | content_type, foo = mimetypes.guess_type(file.name) 89 | 90 | if content_type not in [ 91 | "application/vnd.oasis.opendocument.text", 92 | "application/vnd.openxmlformats-officedocument.wordprocessingml.document", 93 | ]: 94 | raise exceptions.ValidationError( 95 | "Incorrect file format. Only docx and odt files are supported for conversion." 96 | ) 97 | 98 | response = FileConverter.convert(file.read(), target_format) 99 | 100 | filename = f"{splitext(file.name)[0]}.{target_format}" 101 | response["Content-Disposition"] = f'attachment; filename="{filename}"' 102 | return response 103 | -------------------------------------------------------------------------------- /document_merge_service/conftest.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from io import BytesIO 3 | from pathlib import Path 4 | 5 | import pytest 6 | import sentry_sdk 7 | from django.core.cache import cache 8 | from django.core.files.storage import DefaultStorage 9 | from pytest_factoryboy import register 10 | from rest_framework.test import APIClient 11 | 12 | from document_merge_service.api import engines, factories, models 13 | from document_merge_service.api.data import django_file 14 | 15 | from .api.authentication import AnonymousUser 16 | from .sentry import sentry_init 17 | 18 | register(factories.TemplateFactory) 19 | 20 | 21 | class TestUser(AnonymousUser): 22 | def __init__(self, username=None, groups=None): 23 | self.username = username if username else "admin" 24 | self.groups = groups or [] 25 | 26 | @property 27 | def is_authenticated(self): 28 | return True 29 | 30 | 31 | @pytest.fixture(scope="function", autouse=True) 32 | def _autoclear_cache(): 33 | cache.clear() 34 | 35 | 36 | @pytest.fixture 37 | def admin_groups(): 38 | return ["admin"] 39 | 40 | 41 | @pytest.fixture 42 | def admin_user(admin_groups): 43 | return TestUser(groups=admin_groups) 44 | 45 | 46 | @pytest.fixture 47 | def client(): 48 | return APIClient() 49 | 50 | 51 | @pytest.fixture 52 | def admin_client(db, admin_user): 53 | client = APIClient() 54 | client.force_authenticate(user=admin_user) 55 | return client 56 | 57 | 58 | @pytest.fixture 59 | def mock_filefield_name_validation(mocker): 60 | mocker.patch( 61 | "django.db.models.fields.files.validate_file_name", 62 | side_effect=lambda name, *args, **kwargs: name, 63 | ) 64 | 65 | 66 | @pytest.fixture 67 | def docx_template_with_placeholder(admin_client, template): 68 | """Return a factory function to build a docx template with a given placeholder.""" 69 | template.engine = models.Template.DOCX_TEMPLATE 70 | template.template = django_file("docx-template.docx") 71 | template.save() 72 | 73 | def make_template(placeholder): 74 | engine = engines.get_engine(template.engine, template.template) 75 | binary = BytesIO() 76 | engine.merge({"test": placeholder}, binary) 77 | binary.seek(0) 78 | template.template.save("foo.docx", binary) 79 | template.save() 80 | DefaultStorage().save(template.template.name, binary) 81 | return template 82 | 83 | return make_template 84 | 85 | 86 | @pytest.fixture 87 | def dms_test_bin(): 88 | sleep_path = Path(shutil.which("sleep")) 89 | test_path = Path(Path(__file__).parent.absolute(), "tmpb5nw53v5") 90 | with test_path.open("wb") as f, open(sleep_path, "rb") as g: 91 | f.write(g.read()) 92 | test_path.chmod(0o755) 93 | yield test_path 94 | test_path.unlink() 95 | 96 | 97 | @pytest.fixture 98 | def loadtest_data(): 99 | base = Path(__file__).parent.absolute() 100 | return Path(base, "api", "data", "loadtest") 101 | 102 | 103 | @pytest.fixture 104 | def sentry_mock(mocker): 105 | sentry_init("https://SomePublicKey@0.ingest.sentry.io/0", "test", 0.01, False) 106 | 107 | with sentry_sdk.isolation_scope() as scope: 108 | client = scope.get_client() 109 | transport = mocker.MagicMock() 110 | 111 | mocker.patch.object(client, "transport", new=transport) 112 | 113 | return transport 114 | -------------------------------------------------------------------------------- /document_merge_service/extensions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/extensions/__init__.py -------------------------------------------------------------------------------- /document_merge_service/extensions/permissions.py: -------------------------------------------------------------------------------- 1 | # To be overwritten for permission extensions point 2 | -------------------------------------------------------------------------------- /document_merge_service/extensions/visibilities.py: -------------------------------------------------------------------------------- 1 | # To be overwritten for validation extensions point 2 | -------------------------------------------------------------------------------- /document_merge_service/gunicorn.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | wsgi_app = "document_merge_service.wsgi:application" 4 | bind = f"{os.environ.get('GUNICORN_HOST', '0.0.0.0')}:{os.environ.get('GUNICORN_PORT', 8000)}" 5 | workers = os.environ.get("GUNICORN_WORKERS", 8) 6 | proc_name = "document-merge-service" 7 | timeout = os.environ.get("GUNICORN_TIMEOUT", 60) 8 | limit_request_line = os.environ.get("GUNICORN_LIMIT_REQUEST_LINE", 8190) 9 | -------------------------------------------------------------------------------- /document_merge_service/sentry.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import sentry_sdk 4 | from sentry_sdk.integrations.django import DjangoIntegration 5 | from sentry_sdk.integrations.logging import LoggingIntegration 6 | 7 | 8 | def sentry_init(dsn, env, traces_sample_rate, send_default_pii): 9 | sentry_sdk.init( 10 | dsn=dsn, 11 | environment=env, 12 | send_default_pii=send_default_pii, 13 | traces_sample_rate=traces_sample_rate, 14 | integrations=[ 15 | DjangoIntegration(), 16 | LoggingIntegration(level=logging.INFO, event_level=logging.ERROR), 17 | # the `level` kwarg defaults to INFO and instructs sentry to include log messages of that level or higher in 18 | # the message sent to sentry when triggered by an event of level specified in event_level kwarg as 19 | # breadcrumbs. 20 | ], 21 | ) 22 | -------------------------------------------------------------------------------- /document_merge_service/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | import environ 5 | 6 | from .sentry import sentry_init 7 | 8 | env = environ.Env() 9 | django_root = environ.Path(__file__) - 2 10 | 11 | ENV_FILE = env.str("ENV_FILE", default=django_root(".env")) 12 | if os.path.exists(ENV_FILE): # pragma: no cover 13 | environ.Env.read_env(ENV_FILE) 14 | 15 | # per default production is enabled for security reasons 16 | # for development create .env file with ENV=development 17 | ENV = env.str("ENV", "production") 18 | 19 | 20 | def default(default_dev=env.NOTSET, default_prod=env.NOTSET): 21 | """Environment aware default.""" 22 | return default_prod if ENV == "production" else default_dev 23 | 24 | 25 | # Unoconv 26 | ISOLATE_UNOCONV = env.bool("ISOLATE_UNOCONV", default=False) 27 | SECRET_KEY = env.str("SECRET_KEY", default=default("uuuuuuuuuu")) 28 | DEBUG = env.bool("DEBUG", default=default(True, False)) 29 | ALLOWED_HOSTS = env.list("ALLOWED_HOSTS", default=default(["*"])) 30 | 31 | 32 | # Database 33 | # https://docs.djangoproject.com/en/1.11/ref/settings/#databases 34 | 35 | DATABASE_DIR = env.str("DATABASE_DIR", default="/var/lib/document-merge-service/data") 36 | DATABASES = { 37 | "default": { 38 | "ENGINE": env.str("DATABASE_ENGINE", default="django.db.backends.sqlite3"), 39 | "NAME": env.str("DATABASE_NAME", default=f"{DATABASE_DIR}/sqlite3.db"), 40 | "USER": env.str("DATABASE_USER", default=""), 41 | "PASSWORD": env.str("DATABASE_PASSWORD", default=""), 42 | "HOST": env.str("DATABASE_HOST", default=""), 43 | "PORT": env.str("DATABASE_PORT", default=""), 44 | "OPTIONS": env.dict("DATABASE_OPTIONS", default={}), 45 | } 46 | } 47 | 48 | 49 | # Application definition 50 | 51 | INSTALLED_APPS = [ 52 | "django.contrib.messages", 53 | "django.contrib.staticfiles", 54 | "rest_framework", 55 | "django_filters", 56 | "document_merge_service.api.apps.DefaultConfig", 57 | "corsheaders", 58 | "generic_permissions.apps.GenericPermissionsConfig", 59 | ] 60 | 61 | if "postgresql" in DATABASES["default"]["ENGINE"]: # pragma: no cover 62 | INSTALLED_APPS.append("django.contrib.postgres") 63 | 64 | MIDDLEWARE = [ 65 | "django.middleware.security.SecurityMiddleware", 66 | "corsheaders.middleware.CorsMiddleware", 67 | "django.middleware.common.CommonMiddleware", 68 | "django.middleware.locale.LocaleMiddleware", 69 | "whitenoise.middleware.WhiteNoiseMiddleware", 70 | ] 71 | 72 | ROOT_URLCONF = "document_merge_service.urls" 73 | WSGI_APPLICATION = "document_merge_service.wsgi.application" 74 | 75 | TEMPLATES = [ 76 | { 77 | "BACKEND": "django.template.backends.django.DjangoTemplates", 78 | "DIRS": [], 79 | "APP_DIRS": True, 80 | "OPTIONS": { 81 | "context_processors": [ 82 | "django.template.context_processors.debug", 83 | "django.template.context_processors.request", 84 | "django.contrib.messages.context_processors.messages", 85 | ] 86 | }, 87 | } 88 | ] 89 | 90 | 91 | # Cache 92 | # https://docs.djangoproject.com/en/1.11/ref/settings/#caches 93 | 94 | CACHES = { 95 | "default": { 96 | "BACKEND": env.str( 97 | "CACHE_BACKEND", default="django.core.cache.backends.locmem.LocMemCache" 98 | ), 99 | "LOCATION": env.str("CACHE_LOCATION", default=""), 100 | "OPTIONS": env.dict( 101 | "CACHE_OPTIONS", 102 | default={}, 103 | cast={ 104 | "value": str, 105 | # Common options for PyMemcacheCache which need to have the 106 | # proper datatype in order to work 107 | "cast": { 108 | "connect_timeout": float, 109 | "timeout": float, 110 | "no_delay": bool, 111 | "ignore_exc": bool, 112 | }, 113 | }, 114 | ), 115 | } 116 | } 117 | 118 | 119 | # CORS 120 | CORS_ORIGIN_ALLOW_ALL = env.bool("CORS_ORIGIN_ALLOW_ALL", False) 121 | CORS_ORIGIN_REGEX_WHITELIST = [r"^(https?://)?127\.0\.0\.1:\d{4}$"] 122 | CORS_ORIGIN_REGEX_WHITELIST += env.list( 123 | "CORS_ORIGIN_REGEX_WHITELIST", default=[r"^(https?://)?127\.0\.0\.1:\d{4}$"] 124 | ) 125 | 126 | 127 | # Internationalization 128 | # https://docs.djangoproject.com/en/1.11/topics/i18n/ 129 | 130 | LANGUAGE_CODE = env.str("LANGUAGE_CODE", "en-us") 131 | TIME_ZONE = env.str("TIME_ZONE", "UTC") 132 | USE_I18N = True 133 | USE_TZ = True 134 | 135 | 136 | def parse_admins(admins): 137 | """ 138 | Parse env admins to django admins. 139 | 140 | Example of ADMINS environment variable: 141 | Test Example ,Test2 142 | """ 143 | result = [] 144 | for admin in admins: 145 | match = re.search(r"(.+) \<(.+@.+)\>", admin) 146 | if not match: # pragma: no cover 147 | raise environ.ImproperlyConfigured( 148 | 'In ADMINS admin "{0}" is not in correct ' 149 | '"Firstname Lastname "'.format(admin) 150 | ) 151 | result.append((match.group(1), match.group(2))) 152 | return result 153 | 154 | 155 | ADMINS = parse_admins(env.list("ADMINS", default=[])) 156 | 157 | 158 | # Static files (CSS, JavaScript, Images) 159 | # https://docs.djangoproject.com/en/1.11/howto/static-files/ 160 | 161 | STATIC_URL = "/static/" 162 | STATIC_ROOT = os.path.join(django_root, "staticfiles") 163 | 164 | # Media files 165 | 166 | STORAGES = { 167 | "default": { 168 | "BACKEND": env.str( 169 | "FILE_STORAGE", default="django.core.files.storage.FileSystemStorage" 170 | ) 171 | }, 172 | "staticfiles": { 173 | "BACKEND": "whitenoise.storage.CompressedManifestStaticFilesStorage", 174 | }, 175 | } 176 | MEDIA_ROOT = env.str("MEDIA_ROOT", "") 177 | # TODO: This should be removed in favor of storing the files in a bucket 178 | # https://code.djangoproject.com/ticket/32991 179 | MEDIA_URL = env.str("MEDIA_URL", "api/v1/template/") 180 | 181 | # django-storages S3 settings 182 | DMS_ENABLE_AT_REST_ENCRYPTION = env.bool("DMS_ENABLE_AT_REST_ENCRYPTION", False) 183 | S3_STORAGE_OPTIONS = { 184 | "access_key": env.str("DMS_S3_ACCESS_KEY_ID", "minio"), 185 | "secret_key": env.str("DMS_S3_SECRET_ACCESS_KEY", "minio123"), 186 | "bucket_name": env.str("DMS_STORAGE_BUCKET_NAME", "dms-media"), 187 | "endpoint_url": env.str("DMS_S3_ENDPOINT_URL", "http://minio:9000"), 188 | "region_name": env.str("DMS_S3_REGION_NAME", None), 189 | "location": env.str("DMS_LOCATION", ""), 190 | "file_overwrite": env.bool("DMS_S3_FILE_OVERWRITE", False), 191 | "signature_version": env.str("DMS_S3_SIGNATURE_VERSION", "v2"), 192 | "use_ssl": env.bool("DMS_S3_USE_SSL", default=True), 193 | "verify": env.bool("DMS_S3_VERIFY", default=None), 194 | } 195 | 196 | if DMS_ENABLE_AT_REST_ENCRYPTION: # pragma: no cover 197 | S3_STORAGE_OPTIONS["object_parameters"] = { 198 | "SSECustomerKey": env.str( 199 | "DMS_S3_STORAGE_SSEC_SECRET", 200 | default=default("x" * 32), 201 | ), 202 | "SSECustomerAlgorithm": "AES256", 203 | } 204 | 205 | if ( 206 | STORAGES["default"]["BACKEND"] == "storages.backends.s3.S3Storage" 207 | ): # pragma: no cover 208 | STORAGES["default"]["OPTIONS"] = S3_STORAGE_OPTIONS 209 | 210 | # unoconv 211 | UNOCONV_ALLOWED_TYPES = env.list("UNOCOV_ALLOWED_TYPES", default=["pdf"]) 212 | UNOCONV_PYTHON = env.str("UNOCONV_PYTHON", default="/usr/bin/python3") 213 | UNOCONV_PATH = env.str("UNOCONV_PATH", default="/usr/bin/unoconv") 214 | 215 | 216 | # Jinja2 217 | DOCXTEMPLATE_JINJA_EXTENSIONS = env.list("DOCXTEMPLATE_JINJA_EXTENSIONS", default=[]) 218 | 219 | # Authentication 220 | 221 | REQUIRE_AUTHENTICATION = env.bool("REQUIRE_AUTHENTICATION", False) 222 | 223 | OIDC_USERINFO_ENDPOINT = env.str("OIDC_USERINFO_ENDPOINT", default=None) 224 | OIDC_VERIFY_SSL = env.bool("OIDC_VERIFY_SSL", default=True) 225 | OIDC_GROUPS_CLAIM = env.str("OIDC_GROUPS_CLAIM", default="") 226 | OIDC_USERNAME_CLAIM = env.str("OIDC_USERNAME_CLAIM", default="sub") 227 | OIDC_BEARER_TOKEN_REVALIDATION_TIME = env.int( 228 | "OIDC_BEARER_TOKEN_REVALIDATION_TIME", default=0 229 | ) 230 | 231 | # Rest framework 232 | # https://www.django-rest-framework.org/api-guide/settings/ 233 | 234 | REST_FRAMEWORK = { 235 | "DEFAULT_PERMISSION_CLASSES": [ 236 | "document_merge_service.api.permissions.AsConfigured", 237 | ], 238 | "DEFAULT_AUTHENTICATION_CLASSES": [ 239 | "document_merge_service.api.authentication.BearerTokenAuthentication" 240 | ], 241 | "UNAUTHENTICATED_USER": "document_merge_service.api.authentication.AnonymousUser", 242 | "DEFAULT_FILTER_BACKENDS": ( 243 | "rest_framework.filters.OrderingFilter", 244 | "django_filters.rest_framework.DjangoFilterBackend", 245 | "rest_framework.filters.SearchFilter", 246 | ), 247 | "TEST_REQUEST_DEFAULT_FORMAT": "json", 248 | } 249 | 250 | PAGINATION_ENABLED = env.bool("PAGINATION_ENABLED", True) 251 | PAGINATION_DEFAULT_PAGE_SIZE = env.int("PAGINATION_DEFAULT_PAGE_SIZE", 100) 252 | PAGINATION_MAX_PAGE_SIZE = env.int("PAGINATION_MAX_PAGE_SIZE", 1000) 253 | 254 | if PAGINATION_ENABLED: 255 | REST_FRAMEWORK.update( 256 | { 257 | "DEFAULT_PAGINATION_CLASS": "document_merge_service.api.pagination.APIPagination", 258 | "PAGE_SIZE": PAGINATION_DEFAULT_PAGE_SIZE, 259 | } 260 | ) 261 | 262 | # Logging 263 | ENABLE_ADMIN_EMAIL_LOGGING = env.bool("ENABLE_ADMIN_EMAIL_LOGGING", False) 264 | 265 | LOGGING = { 266 | "version": 1, 267 | "disable_existing_loggers": False, 268 | "handlers": { 269 | "console": { 270 | "level": "WARNING", 271 | "filters": None, 272 | "class": "logging.StreamHandler", 273 | }, 274 | "mail_admins": { 275 | "level": "ERROR", 276 | "filters": None, 277 | "class": "django.utils.log.AdminEmailHandler", 278 | }, 279 | }, 280 | "loggers": {"django": {"handlers": ["console"], "level": "WARNING"}}, 281 | } 282 | 283 | URL_PREFIX = env.str("URL_PREFIX", default="") 284 | 285 | # Email settings 286 | SERVER_EMAIL = env.str("SERVER_EMAIL", default="root@localhost") 287 | DEFAULT_FROM_EMAIL = env.str("DEFAULT_FROM_EMAIL", default="webmaster@localhost") 288 | EMAIL_HOST = env.str("EMAIL_HOST", default="localhost") 289 | EMAIL_PORT = env.int("EMAIL_PORT", default=25) 290 | EMAIL_HOST_USER = env.str("EMAIL_HOST_USER", default="") 291 | EMAIL_HOST_PASSWORD = env.str("EMAIL_HOST_PASSWORD", default="") 292 | EMAIL_USE_TLS = env.bool("EMAIL_USE_TLS", default=False) 293 | 294 | # Email error handler 295 | if ENABLE_ADMIN_EMAIL_LOGGING: # pragma: no cover 296 | LOGGING["loggers"]["django"]["handlers"].append("mail_admins") # type: ignore 297 | 298 | # Sentry error tracking 299 | SENTRY_DSN = env.str("SENTRY_DSN", default="") 300 | SENTRY_ENVIRONMENT = env.str("SENTRY_ENVIRONMENT", default="development") 301 | SENTRY_TRACES_SAMPLE_RATE = env.float("SENTRY_TRACES_SAMPLE_RATE", default=0.01) 302 | SENTRY_SEND_DEFAULT_PII = env.bool("SENTRY_SEND_DEFAULT_PII", default=False) 303 | 304 | if SENTRY_DSN: # pragma: no cover 305 | sentry_init( 306 | SENTRY_DSN, 307 | SENTRY_ENVIRONMENT, 308 | SENTRY_TRACES_SAMPLE_RATE, 309 | SENTRY_SEND_DEFAULT_PII, 310 | ) 311 | 312 | # https://github.com/adfinis/django-generic-api-permissions 313 | GENERIC_PERMISSIONS_PERMISSION_CLASSES = env.list("DMS_PERMISSION_CLASSES", default=[]) 314 | GENERIC_PERMISSIONS_VISIBILITY_CLASSES = env.list("DMS_VISIBILITY_CLASSES", default=[]) 315 | 316 | # App specific arguments for the extension classes 317 | EXTENSIONS_ARGUMENTS = env.dict("EXTENSIONS_ARGUMENTS", default={}) 318 | 319 | # DMS potentially uses a large number of fields when creating templates due to 320 | # the possibility to validate a template against available placeholders. 321 | # https://docs.djangoproject.com/en/5.1/ref/settings/#data-upload-max-number-fields 322 | DATA_UPLOAD_MAX_NUMBER_FIELDS = env.int("DATA_UPLOAD_MAX_NUMBER_FIELDS", default=1000) 323 | -------------------------------------------------------------------------------- /document_merge_service/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adfinis/document-merge-service/f380907eba4224a2041b19a840dfaa2019450da6/document_merge_service/tests/__init__.py -------------------------------------------------------------------------------- /document_merge_service/tests/test_sentry.py: -------------------------------------------------------------------------------- 1 | import sentry_sdk 2 | 3 | 4 | def test_sentry(sentry_mock): 5 | assert len(sentry_mock.method_calls) == 0 6 | sentry_sdk.capture_exception(Exception("test_sentry_exc")) 7 | assert len(sentry_mock.method_calls) == 1 8 | sentry_mock.record_lost_event.assert_not_called() 9 | assert ( 10 | sentry_mock.method_calls[0] 11 | .args[0] 12 | .items[0] 13 | .get_event()["exception"]["values"][0]["value"] 14 | == "test_sentry_exc" 15 | ) 16 | -------------------------------------------------------------------------------- /document_merge_service/tests/test_settings.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from django.conf import settings 3 | from django.core.exceptions import ImproperlyConfigured 4 | 5 | from ..api.unoconv import Unoconv 6 | 7 | 8 | def test_get_unoconv_formats(): 9 | uno = Unoconv(pythonpath=settings.UNOCONV_PYTHON, unoconvpath=settings.UNOCONV_PATH) 10 | formats = uno.get_formats() 11 | assert "pdf" in formats 12 | 13 | 14 | def test_get_unoconv_formats_invalid_format(monkeypatch): 15 | monkeypatch.setattr(settings, "UNOCONV_ALLOWED_TYPES", ["invalid"]) 16 | uno = Unoconv(pythonpath=settings.UNOCONV_PYTHON, unoconvpath=settings.UNOCONV_PATH) 17 | 18 | with pytest.raises(ImproperlyConfigured): 19 | uno.get_formats() 20 | -------------------------------------------------------------------------------- /document_merge_service/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | from django.urls import include, re_path 3 | 4 | urlpatterns = [ 5 | re_path( 6 | f"^{settings.URL_PREFIX}api/v1/", include("document_merge_service.api.urls") 7 | ) 8 | ] 9 | -------------------------------------------------------------------------------- /document_merge_service/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for project_app2 project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.11/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "document_merge_service.settings") 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import, unicode_literals 4 | 5 | import os 6 | import sys 7 | 8 | if __name__ == "__main__": 9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "document_merge_service.settings") 10 | from django.core.management import execute_from_command_line 11 | 12 | execute_from_command_line(sys.argv) 13 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "document-merge-service" 3 | version = "8.0.0" 4 | description = "Merge Document Template Service" 5 | license = "GPL-3.0-or-later" 6 | authors = ["Adfinis AG "] 7 | homepage = "https://github.com/adfinis/document-merge-service" 8 | repository = "https://github.com/adfinis/document-merge-service" 9 | documentation = "https://github.com/adfinis/document-merge-service/blob/main/README.md" 10 | readme = "README.md" 11 | classifiers = [ 12 | "Development Status :: 5 - Production/Stable", 13 | "Intended Audience :: Developers", 14 | "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", 15 | "Programming Language :: Python :: 3", 16 | "Programming Language :: Python :: 3.10", 17 | "Programming Language :: Python :: 3.11", 18 | "Programming Language :: Python :: 3.12", 19 | "Framework :: Django :: 4", 20 | "Framework :: Django :: 4.2", 21 | ] 22 | include = ["CHANGELOG.md"] 23 | exclude = ["document-merge-service/**/tests"] 24 | 25 | [tool.poetry.dependencies] 26 | python = ">=3.10.0,<3.14" 27 | boto3 = { version = "^1.34.143", optional = true } 28 | Babel = "^2.15.0" 29 | Django = "~4.2.15" 30 | django-cors-headers = "^4.4.0" 31 | django-environ = ">=0.11.2" 32 | django-filter = ">=24.2" 33 | django-generic-api-permissions = "^0.4.6" 34 | django-storages = "^1.14.6" 35 | djangorestframework = "^3.15.2" 36 | docxtpl = ">=0.17,<0.21" 37 | gunicorn = "^23.0.0" 38 | Jinja2 = "^3.1.4" 39 | pillow = ">=10.3,<12" 40 | psycopg = { version = "^3.1.19", optional = true, extras = ["binary"] } 41 | pymemcache = { version = "^4.0.0", optional = true } 42 | python-dateutil = "^2.9.0" 43 | python-magic = "^0.4.27" 44 | python-memcached = "^1.59" 45 | requests = "^2.32.3" 46 | sentry-sdk = ">=2.7,<2.29" 47 | tqdm = "^4.66.4" 48 | urllib3 = "^2.2.1" 49 | whitenoise = "^6.9.0" 50 | xltpl = "~0.21" 51 | poetry = "^2.0.0" 52 | 53 | [tool.poetry.group.dev] 54 | optional = true 55 | 56 | [tool.poetry.group.dev.dependencies] 57 | django-stubs = "5.2.0" 58 | factory-boy = "3.3.3" 59 | gitlint = "0.19.1" 60 | mypy = "1.15.0" 61 | pdbpp = "0.11.6" 62 | psutil = "7.0.0" 63 | pytest = "8.3.5" 64 | pytest-cov = "6.1.1" 65 | pytest-django = "4.11.1" 66 | pytest-env = "1.1.5" 67 | pytest-factoryboy = "2.7.0" 68 | pytest-mock = "3.14.0" 69 | pytest-randomly = "3.16.0" 70 | python-semantic-release = "7.34.6" 71 | requests-mock = "1.12.1" 72 | ruff = "0.11.10" 73 | syrupy = "4.9.1" 74 | types-python-dateutil = "2.9.0.20250516" 75 | types-requests = "2.32.0.20250515" 76 | types-setuptools = "80.7.0.20250516" 77 | types-toml = "0.10.8.20240310" 78 | 79 | [tool.poetry.extras] 80 | full = ["psycopg", "pymemcache", "boto3"] 81 | slim = [] 82 | 83 | [build-system] 84 | requires = ["poetry-core>=1.0.0"] 85 | build-backend = "poetry.core.masonry.api" 86 | 87 | [tool.ruff] 88 | exclude = ["migrations", "snapshots", ".venv"] 89 | line-length = 88 90 | 91 | [tool.ruff.lint] 92 | select = ["C9", "D", "E", "F", "W", "I"] 93 | ignore = [ 94 | "D100", 95 | "D101", 96 | "D102", 97 | "D103", 98 | "D104", 99 | "D105", 100 | "D106", 101 | "D107", 102 | "D202", 103 | "E501", 104 | "D212", # Multi-line docstring summary should start at the first line 105 | "D213", # Multi-line docstring summary should start at the second line 106 | "D407", # Missing dashed underline after section 107 | "D406", # Section name should end with a newline 108 | "D203", # one-blank-line-before-class (docstring) 109 | ] 110 | 111 | [tool.ruff.lint.mccabe] 112 | max-complexity = 11 113 | 114 | [tool.ruff.lint.isort] 115 | combine-as-imports = true 116 | 117 | [tool.pytest.ini_options] 118 | addopts = "--reuse-db --randomly-seed=1521188766 --randomly-dont-reorganize" 119 | DJANGO_SETTINGS_MODULE = "document_merge_service.settings" 120 | filterwarnings = [ 121 | "error::DeprecationWarning", 122 | "error::PendingDeprecationWarning", 123 | "ignore:invalid escape sequence", # xltpl 124 | "ignore:pkg_resources is deprecated as an API:DeprecationWarning", # docxtpl 125 | ] 126 | env = [ 127 | "ADMINS=Test Example ,Test2 ", 128 | "OIDC_USERINFO_ENDPOINT=mock://document-merge-service.github.com/openid/userinfo", 129 | "OIDC_BEARER_TOKEN_REVALIDATION_TIME=60", 130 | ] 131 | 132 | [tool.coverage.run] 133 | source = ["."] 134 | 135 | [tool.coverage.report] 136 | fail_under = 100 137 | 138 | exclude_lines = [ 139 | "pragma: no cover", 140 | "pragma: todo cover", 141 | "def __str__", 142 | "def __unicode__", 143 | "def __repr__", 144 | ] 145 | omit = [ 146 | "*/migrations/*", 147 | "*/apps.py", 148 | "manage.py", 149 | "setup.py", 150 | "document_merge_service/settings_*.py", 151 | "document_merge_service/wsgi.py", 152 | "document_merge_service/document_merge_service_metadata.py", 153 | ] 154 | show_missing = true 155 | 156 | [tool.semantic_release] 157 | version_source = "tag" 158 | version_toml = ["pyproject.toml:tool.poetry.version"] 159 | --------------------------------------------------------------------------------