├── .devcontainer ├── compose.yaml └── devcontainer.json ├── .editorconfig ├── .github ├── ISSUE_TEMPLATE.md └── workflows │ ├── build-and-publish.yaml │ ├── check_pull_request.yaml │ ├── cron.yaml │ ├── open-release-pr.yaml │ ├── tag-release.yaml │ └── workflow.yaml ├── .gitignore ├── .sync.yml ├── .vscode └── tasks.json ├── CHANGELOG.md ├── Dockerfile ├── HISTORY.md ├── LICENSE ├── README.md ├── compose.yaml ├── django_scrubber ├── __init__.py ├── admin.py ├── apps.py ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ ├── scrub_data.py │ │ └── scrub_validation.py ├── migrations │ ├── 0001_initial.py │ └── __init__.py ├── models.py ├── scrubbers.py ├── services │ ├── __init__.py │ └── validator.py └── tests │ ├── __init__.py │ ├── services │ ├── __init__.py │ └── test_validator.py │ ├── test_models.py │ ├── test_scrub_data.py │ ├── test_scrub_validator.py │ ├── test_scrubbers.py │ └── test_settings.py ├── example ├── __init__.py ├── apps.py ├── models.py └── settings.py ├── manage.py ├── pyproject.toml ├── requirements-ci.txt ├── requirements-test.txt ├── system_dependencies.txt └── version /.devcontainer/compose.yaml: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------- 2 | # Managed by modulesync - DO NOT EDIT 3 | # ------------------------------------------------- 4 | 5 | --- 6 | services: 7 | app: 8 | entrypoint: "" 9 | command: sleep infinity 10 | build: 11 | context: . 12 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------- 2 | // Managed by modulesync - DO NOT EDIT 3 | // ------------------------------------------------- 4 | 5 | { 6 | // name of the devcontainer 7 | "name": "django-scrubber", 8 | // define the docker compose file to use for the devcontainer 9 | "dockerComposeFile": [ 10 | "../compose.yaml", 11 | "./compose.yaml" 12 | ], 13 | // define which services from the compose file to start and stop 14 | "runServices": ["app"], 15 | // define the docker-compose service to use for the dev container 16 | "service": "app", 17 | // define the workspace folder our app is located in 18 | "workspaceFolder": "/app", 19 | // set the remote user to connect as 20 | "remoteUser": "app", 21 | // features to be installed in the dev container 22 | "features": { 23 | "ghcr.io/devcontainers/features/common-utils:2": {}, 24 | "ghcr.io/devcontainers/features/git:1": {} 25 | }, 26 | // configure vscode 27 | "customizations": { 28 | // Configure properties specific to VS Code. 29 | "vscode": { 30 | "settings": { 31 | // terminal settings 32 | "terminal.integrated.profiles.linux": { 33 | "bash": { 34 | "path": "/bin/bash" 35 | } 36 | }, 37 | "terminal.integrated.defaultProfile.linux": "bash", 38 | // language specific editor settings 39 | "[python]": { 40 | "editor.defaultFormatter": "charliermarsh.ruff" 41 | }, 42 | "[django-html]": { 43 | "editor.defaultFormatter": "monosans.djlint" 44 | }, 45 | "[html]": { 46 | "editor.defaultFormatter": "monosans.djlint" 47 | }, 48 | "[markdown]": { 49 | "files.trimTrailingWhitespace": false 50 | }, 51 | // allow tasks to run on editor startup 52 | "task.allowAutomaticTasks": "on", 53 | // python environment 54 | "python.defaultInterpreterPath": "/home/app/venv/bin/python", 55 | "python.analysis.extraPaths": [ 56 | "/home/app/venv/lib/python3.12/site-packages/" 57 | ], 58 | "python.analysis.useImportHeuristic": true, 59 | "python.analysis.autoSearchPaths": true, 60 | "python.analysis.autoImportCompletions": true, 61 | "python.analysis.indexing": true, 62 | "python.analysis.packageIndexDepths": [ 63 | { 64 | "name": "", 65 | "depth": 10, 66 | "includeAllSymbols": true 67 | } 68 | ], 69 | // don't activate the virtual environment every time as we're using the env binary 70 | "python.terminal.activateEnvironment": false, 71 | "python.terminal.activateEnvInCurrentTerminal": true, 72 | // used for autocomplete etc 73 | "python.languageServer": "Pylance", 74 | // editor settings 75 | "editor.formatOnPaste": true, 76 | "editor.formatOnSave": true, 77 | "editor.codeActionsOnSave": { 78 | "source.fixAll": "always", 79 | "source.organizeImports": "always" 80 | }, 81 | "editor.rulers": [ 82 | 88, 83 | 120 84 | ], 85 | // shows the nested current scopes during the scroll at the top of the editor 86 | "editor.stickyScroll.enabled": true, 87 | // file formatting options 88 | "files.trimTrailingWhitespace": true, 89 | "files.insertFinalNewline": true, 90 | "files.associations": { 91 | "**/*.html": "html", 92 | "**/templates/*": "django-html", 93 | "**/requirements{/**,*}.{txt,in}": "pip-requirements" 94 | }, 95 | "emmet.includeLanguages": { 96 | "django-html": "html" 97 | }, 98 | // files to exclude from search results 99 | "search.exclude": { 100 | "**/__pycache__": true, 101 | "**/.bash_aliases": true, 102 | "**/.git": true, 103 | "**/.ipython": true, 104 | "**/.mypy_cache": true, 105 | "**/logs": true, 106 | "**/node_modules": true, 107 | "**/tmp": true 108 | }, 109 | // files to exclude from all checks 110 | "files.exclude": { 111 | "**/*.pyc": true, 112 | "**/.git": false, 113 | "**/migrations/*": false 114 | }, 115 | // gitlens settings 116 | "gitlens.codeLens.enabled": false, 117 | "gitlens.advanced.blame.customArguments": [ 118 | "--ignore-revs-file", 119 | ".git-blame-ignore-revs" 120 | ], 121 | // copilot settings 122 | "github.copilot.editor.enableAutoCompletions": true, 123 | "github.copilot.enable": { 124 | "*": true, 125 | "plaintext": false, 126 | "markdown": false, 127 | "scminput": false 128 | } 129 | }, 130 | // list all extensions that should be installed when the container is created 131 | "extensions": [ 132 | // --------------------------------------- 133 | // CODING SUPPORT 134 | // --------------------------------------- 135 | // Visual Studio IntelliCode - AI-assisted development 136 | // https://marketplace.visualstudio.com/items?itemName=VisualStudioExptTeam.vscodeintellicode 137 | "visualstudioexptteam.vscodeintellicode", 138 | // --------------------------------------- 139 | // PYTHON 140 | // --------------------------------------- 141 | // Python extension for Visual Studio Code 142 | // https://marketplace.visualstudio.com/items?itemName=ms-python.python 143 | "ms-python.python", 144 | // Pylance - A performant, feature-rich language server for Python in VS Code 145 | // https://marketplace.visualstudio.com/items?itemName=ms-python.vscode-pylance 146 | "ms-python.vscode-pylance", 147 | // Python docstring generator 148 | // https://marketplace.visualstudio.com/items?itemName=njpwerner.autodocstring 149 | "njpwerner.autodocstring", 150 | // Proper indentation for Python 151 | // https://marketplace.visualstudio.com/items?itemName=KevinRose.vsc-python-indent 152 | "KevinRose.vsc-python-indent", 153 | // Visually highlight indentation depth 154 | // https://marketplace.visualstudio.com/items?itemName=oderwat.indent-rainbow 155 | "oderwat.indent-rainbow", 156 | // Code comment highlights 157 | // https://marketplace.visualstudio.com/items?itemName=aaron-bond.better-comments 158 | "aaron-bond.better-comments", 159 | // Linting with ruff 160 | // https://marketplace.visualstudio.com/items?itemName=charliermarsh.ruff 161 | "charliermarsh.ruff@2025.22.0", 162 | // Linting with mypy 163 | // https://marketplace.visualstudio.com/items?itemName=ms-python.mypy-type-checker 164 | "ms-python.mypy-type-checker", 165 | // --------------------------------------- 166 | // GIT 167 | // --------------------------------------- 168 | // View git log, file history, compare branches or commits 169 | // https://marketplace.visualstudio.com/items?itemName=donjayamanne.githistory 170 | "donjayamanne.githistory", 171 | // Supercharge the Git capabilities built into Visual Studio Code 172 | // https://marketplace.visualstudio.com/items?itemName=eamodio.gitlens 173 | "eamodio.gitlens", 174 | // GitLab Workflow 175 | // https://marketplace.visualstudio.com/items?itemName=GitLab.gitlab-workflow 176 | "GitLab.gitlab-workflow", 177 | // create / apply git patches 178 | // https://marketplace.visualstudio.com/items?itemName=paragdiwan.gitpatch 179 | "paragdiwan.gitpatch", 180 | // --------------------------------------- 181 | // FILE TYPE SUPPORT 182 | // --------------------------------------- 183 | // Support for dotenv file syntax 184 | // https://marketplace.visualstudio.com/items?itemName=mikestead.dotenv 185 | "mikestead.dotenv", 186 | // Syntax highlighting for .po files 187 | // https://marketplace.visualstudio.com/items?itemName=mrorz.language-gettext 188 | "mrorz.language-gettext", 189 | // Duplicate translation error marking for .po files 190 | // https://marketplace.visualstudio.com/items?itemName=ovcharik.gettext-duplicate-error 191 | "ovcharik.gettext-duplicate-error", 192 | // Formatter and linter for Jinja2 templates 193 | // https://marketplace.visualstudio.com/items?itemName=monosans.djlint 194 | "monosans.djlint", 195 | // YAML language support 196 | // https://marketplace.visualstudio.com/items?itemName=redhat.vscode-yaml 197 | "redhat.vscode-yaml", 198 | // TOML language support 199 | // https://marketplace.visualstudio.com/items?itemName=tamasfe.even-better-toml 200 | "tamasfe.even-better-toml", 201 | // --------------------------------------- 202 | // DJANGO 203 | // --------------------------------------- 204 | // Django template support 205 | // https://marketplace.visualstudio.com/items?itemName=batisteo.vscode-django 206 | "batisteo.vscode-django" 207 | ] 208 | } 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | charset = utf-8 7 | end_of_line = lf 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | 11 | [*.{py,rst,ini}] 12 | indent_style = space 13 | indent_size = 4 14 | 15 | [*.{html,css,scss,json,yml}] 16 | indent_style = space 17 | indent_size = 2 18 | 19 | [*.md] 20 | trim_trailing_whitespace = false 21 | 22 | [Makefile] 23 | indent_style = tab 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | * Django Scrubber version: 2 | * Django version: 3 | * Python version: 4 | * Operating System: 5 | 6 | ### Description 7 | 8 | Describe what you were trying to get done. 9 | Tell us what happened, what went wrong, and what you expected to happen. 10 | 11 | ### What I Did 12 | 13 | ``` 14 | Paste the command(s) you ran and the output. 15 | If there was a crash, please include the traceback here. 16 | ``` 17 | -------------------------------------------------------------------------------- /.github/workflows/build-and-publish.yaml: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------- 2 | # Managed by modulesync - DO NOT EDIT 3 | # ------------------------------------------------- 4 | 5 | --- 6 | name: Build and publish 7 | 8 | on: 9 | push: 10 | tags: 11 | - "**" 12 | 13 | jobs: 14 | # build package, make release on github and upload to pypi when a new tag is pushed 15 | # see https://github.com/RegioHelden/github-reusable-workflows/blob/main/.github/workflows/build-and-publish.yaml 16 | build-and-release: 17 | name: Build and publish 18 | permissions: 19 | contents: write 20 | id-token: write 21 | uses: RegioHelden/github-reusable-workflows/.github/workflows/build-and-publish.yaml@v2.2.1 22 | with: 23 | python-version: "3.12" 24 | 25 | # must be defined in the repo as trusted publishing does not work with reusable workflows yet 26 | # see https://github.com/pypi/warehouse/issues/11096 27 | publish-pypi: 28 | name: Publish on PyPI 29 | runs-on: ubuntu-latest 30 | permissions: 31 | contents: read 32 | id-token: write 33 | needs: 34 | - build-and-release 35 | steps: 36 | - name: Set up Python 37 | uses: actions/setup-python@v5 38 | with: 39 | python-version: "3.12" 40 | 41 | - name: Install the latest version of uv 42 | uses: astral-sh/setup-uv@v6 43 | 44 | - name: Download the distribution packages 45 | uses: actions/download-artifact@v4 46 | with: 47 | name: python-package-distributions 48 | path: dist/ 49 | 50 | - name: Publish 51 | run: uv publish --trusted-publishing always 52 | -------------------------------------------------------------------------------- /.github/workflows/check_pull_request.yaml: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------- 2 | # Managed by modulesync - DO NOT EDIT 3 | # ------------------------------------------------- 4 | 5 | --- 6 | name: Check pull request 7 | 8 | on: 9 | # when labels are added/removed or draft status gets changed to ready for review 10 | pull_request: 11 | types: [opened, synchronize, reopened, labeled, unlabeled, ready_for_review] 12 | 13 | jobs: 14 | # make sure the pull request matches our guidelines like having at least one label assigned 15 | # see https://github.com/RegioHelden/github-reusable-workflows/blob/main/.github/workflows/check-pull-request.yaml 16 | check-pull-request: 17 | name: Check pull request 18 | permissions: 19 | issues: write 20 | pull-requests: write 21 | uses: RegioHelden/github-reusable-workflows/.github/workflows/check-pull-request.yaml@v2.2.1 22 | -------------------------------------------------------------------------------- /.github/workflows/cron.yaml: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------- 2 | # Managed by modulesync - DO NOT EDIT 3 | # ------------------------------------------------- 4 | 5 | --- 6 | name: Cron actions 7 | 8 | on: 9 | workflow_dispatch: 10 | schedule: 11 | - cron: "0 0 * * *" 12 | 13 | jobs: 14 | # synchronize labels from central definition at https://github.com/RegioHelden/.github/blob/main/labels.yaml 15 | # see https://github.com/RegioHelden/github-reusable-workflows/blob/main/.github/workflows/sync-labels.yaml 16 | update-labels: 17 | name: Update labels 18 | permissions: 19 | issues: write 20 | uses: RegioHelden/github-reusable-workflows/.github/workflows/sync-labels.yaml@v2.2.1 21 | -------------------------------------------------------------------------------- /.github/workflows/open-release-pr.yaml: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------- 2 | # Managed by modulesync - DO NOT EDIT 3 | # ------------------------------------------------- 4 | 5 | --- 6 | name: Open release PR 7 | 8 | on: 9 | workflow_dispatch: 10 | 11 | jobs: 12 | # trigger creation of a release pull request with version increase and changelog update 13 | # see https://github.com/RegioHelden/github-reusable-workflows/blob/main/.github/workflows/release-pull-request.yaml 14 | open-release-pr: 15 | name: Open release PR 16 | permissions: 17 | contents: write 18 | pull-requests: write 19 | uses: RegioHelden/github-reusable-workflows/.github/workflows/release-pull-request.yaml@v2.2.1 20 | secrets: 21 | personal-access-token: "${{ secrets.COMMIT_KEY }}" 22 | -------------------------------------------------------------------------------- /.github/workflows/tag-release.yaml: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------- 2 | # Managed by modulesync - DO NOT EDIT 3 | # ------------------------------------------------- 4 | 5 | --- 6 | name: Tag release 7 | 8 | on: 9 | push: 10 | branches: 11 | - main 12 | 13 | jobs: 14 | # create a new git tag when a version update was merged to main branch 15 | # see https://github.com/RegioHelden/github-reusable-workflows/blob/main/.github/workflows/tag-release.yaml 16 | tag-release: 17 | name: Create tag 18 | permissions: 19 | contents: write 20 | uses: RegioHelden/github-reusable-workflows/.github/workflows/tag-release.yaml@v2.2.1 21 | with: 22 | python-version: "3.12" 23 | secrets: 24 | personal-access-token: "${{ secrets.COMMIT_KEY }}" 25 | -------------------------------------------------------------------------------- /.github/workflows/workflow.yaml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | # code pushed to pull request branch 5 | push: 6 | branches-ignore: 7 | - main 8 | # when draft state is removed (needed as automatically created PRs are not triggering this action) 9 | pull_request: 10 | types: [ready_for_review] 11 | 12 | env: 13 | GITHUB_WORKFLOW: true 14 | COVERAGE_PYTHON_VERSION: 3.12 15 | COVERAGE_DJANGO_VERSION: 5.2 16 | COVERAGE_DATABASE: postgres 17 | 18 | jobs: 19 | # lint code for errors 20 | # see https://github.com/RegioHelden/github-reusable-workflows/blob/main/.github/workflows/python-ruff.yaml 21 | lint: 22 | name: Lint 23 | permissions: 24 | contents: read 25 | uses: RegioHelden/github-reusable-workflows/.github/workflows/python-ruff.yaml@v2.1.0 26 | with: 27 | ruff-version: "0.11.5" 28 | python-version: "3.12" 29 | 30 | test: 31 | name: Test and coverage 32 | needs: 33 | - lint 34 | runs-on: ubuntu-24.04 35 | 36 | strategy: 37 | fail-fast: false 38 | matrix: 39 | python-version: 40 | - "3.9" 41 | - "3.10" 42 | - "3.11" 43 | - "3.12" 44 | - "3.13" 45 | django: 46 | - "4.2" 47 | - "5.0" 48 | - "5.1" 49 | - "5.2" 50 | database: 51 | - sqlite 52 | - mysql 53 | - postgres 54 | exclude: 55 | - django: "4.2" 56 | python-version: "3.13" 57 | - django: "5.0" 58 | python-version: "3.9" 59 | - django: "5.0" 60 | python-version: "3.13" 61 | - django: "5.1" 62 | python-version: "3.9" 63 | - django: "5.2" 64 | python-version: "3.9" 65 | 66 | services: 67 | # postgres service 68 | postgres: 69 | image: postgres:17-alpine 70 | env: 71 | POSTGRES_USER: postgres 72 | POSTGRES_PASSWORD: postgres 73 | POSTGRES_DB: postgres 74 | ports: 75 | - 5432:5432 76 | options: >- 77 | --health-cmd="pg_isready" 78 | --health-interval=10s 79 | --health-timeout=5s 80 | --health-retries=5 81 | 82 | # mysql service 83 | mysql: 84 | image: mysql:9 85 | env: 86 | MYSQL_ALLOW_EMPTY_PASSWORD: yes 87 | MYSQL_DATABASE: test 88 | ports: 89 | - 3306:3306 90 | options: >- 91 | --health-cmd="mysqladmin ping" 92 | --health-interval=10s 93 | --health-timeout=5s 94 | --health-retries=5 95 | 96 | steps: 97 | - uses: actions/checkout@v4 98 | with: 99 | persist-credentials: false 100 | 101 | - name: Set up Python ${{ matrix.python-version }} 102 | uses: actions/setup-python@v5 103 | with: 104 | python-version: ${{ matrix.python-version }} 105 | 106 | - name: Install the latest version of uv 107 | uses: astral-sh/setup-uv@v5 108 | 109 | - name: Install requirements 110 | run: uv pip install --system -r requirements-ci.txt 111 | 112 | - name: Install Django ${{ matrix.django }} 113 | run: uv pip install --system "Django~=${{ matrix.django }}" 114 | - name: Install MySQL libs 115 | if: matrix.database == 'mysql' 116 | run: uv pip install --system mysqlclient>=2.2.7 django-mysql>=4.16.0 117 | - name: Install postgres libs 118 | if: matrix.database == 'postgres' 119 | run: uv pip install --system psycopg>=3.2.6 120 | 121 | - name: Install package 122 | run: uv pip install --system -e . 123 | 124 | - name: Run tests 125 | run: python manage.py test 126 | env: 127 | DATABASE_ENGINE: ${{ matrix.database }} 128 | 129 | - name: Coverage 130 | if: | 131 | matrix.python-version == env.COVERAGE_PYTHON_VERSION 132 | && 133 | matrix.django == env.COVERAGE_DJANGO_VERSION 134 | && 135 | matrix.database == env.COVERAGE_DATABASE 136 | && 137 | github.repository == 'RegioHelden/django-scrubber' 138 | && 139 | ( 140 | github.event_name == 'push' 141 | || 142 | ( 143 | github.event_name == 'pull_request' 144 | && 145 | github.head_ref == 'master' 146 | ) 147 | ) 148 | run: uv pip install --system coverage coveralls && coverage run --source=django_scrubber manage.py test && coveralls 149 | env: 150 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 151 | COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} 152 | DATABASE_ENGINE: ${{ matrix.database }} 153 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------- 2 | # Managed by modulesync - DO NOT EDIT 3 | # ------------------------------------------------- 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Packages 9 | *.egg 10 | *.egg-info 11 | dist 12 | build 13 | eggs 14 | parts 15 | bin 16 | var 17 | sdist 18 | develop-eggs 19 | .installed.cfg 20 | lib 21 | lib64 22 | 23 | # Installer logs 24 | pip-log.txt 25 | 26 | # Unit test / coverage reports 27 | .coverage 28 | .tox 29 | nosetests.xml 30 | htmlcov 31 | 32 | # Translations 33 | *.mo 34 | 35 | # dev env 36 | .mr.developer.cfg 37 | .project 38 | .pydevproject 39 | .mypy_cache 40 | .ruff_cache 41 | .ipython 42 | .bash_history 43 | compose.override.yaml 44 | 45 | # Language specific 46 | __pycache__ 47 | *.pyc 48 | *.py[cod] 49 | *.sw* 50 | 51 | # Pycharm/Intellij 52 | .idea 53 | .pycharm_helpers 54 | 55 | # Complexity 56 | output/*.html 57 | output/*/index.html 58 | 59 | # Sphinx 60 | docs/_build 61 | 62 | # OS 63 | .bash_history 64 | 65 | # database 66 | db.sqlite3 67 | -------------------------------------------------------------------------------- /.sync.yml: -------------------------------------------------------------------------------- 1 | --- 2 | :global: 3 | python_min_version: "3.9" 4 | python_supported_versions: ["3.9", "3.10", "3.11", "3.12", "3.13"] 5 | module_rootname: "django_scrubber" 6 | module_description: "Data anonymizer for Django" 7 | module_keywords: 8 | ["django", "data protection", "scrubber", "scrub", "anonymize", "gdpr"] 9 | dependencies: ["Faker>=20.0.0", "Django>=4.2,<6.0"] 10 | max_line_length: 119 11 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | // ------------------------------------------- 2 | // Managed by modulesync - DO NOT EDIT 3 | // ------------------------------------------- 4 | 5 | // Defines standard actions that can be executed using the `Tasks: Run Task` command 6 | // See https://go.microsoft.com/fwlink/?LinkId=733558 for documentation 7 | // ------------------------------------------- 8 | { 9 | "version": "2.0.0", 10 | "problemMatcher": [], 11 | "presentation": { 12 | "reveal": "always", 13 | "panel": "new" 14 | }, 15 | "type": "shell", 16 | "tasks": [ 17 | { 18 | "label": "Test", 19 | "dependsOn": [ 20 | "Django: Run tests" 21 | ], 22 | // mark as the default build task so cmd/ctrl+shift+b will trigger it 23 | "group": { 24 | "kind": "test", 25 | "isDefault": true 26 | } 27 | }, 28 | { 29 | "label": "Django: Run tests", 30 | "command": "${config:python.defaultInterpreterPath}", 31 | "args": [ 32 | "manage.py", 33 | "test", 34 | "--no-input", 35 | "--parallel=2" 36 | ], 37 | "group": "test" 38 | } 39 | ] 40 | } 41 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [v5.0.0](https://github.com/RegioHelden/django-scrubber/tree/v5.0.0) (2025-05-26) 4 | 5 | [Full Changelog](https://github.com/RegioHelden/django-scrubber/compare/v4.2.0...v5.0.0) 6 | 7 | **Breaking changes:** 8 | 9 | - Only fail validation with an non-zero exit code if strict mode is active [\#88](https://github.com/RegioHelden/django-scrubber/pull/88) (@lociii) 10 | 11 | **Merged pull requests:** 12 | 13 | - Update uv to 0.7.8 [\#91](https://github.com/RegioHelden/django-scrubber/pull/91) (@regiohelden-dev) 14 | - Update uv to 0.7.7 [\#90](https://github.com/RegioHelden/django-scrubber/pull/90) (@regiohelden-dev) 15 | - Update uv to 0.7.6 [\#89](https://github.com/RegioHelden/django-scrubber/pull/89) (@regiohelden-dev) 16 | - Update uv to 0.7.5 [\#87](https://github.com/RegioHelden/django-scrubber/pull/87) (@regiohelden-dev) 17 | - Update uv to 0.7.4 [\#86](https://github.com/RegioHelden/django-scrubber/pull/86) (@regiohelden-dev) 18 | - Update uv to 0.7.3 [\#85](https://github.com/RegioHelden/django-scrubber/pull/85) (@regiohelden-dev) 19 | - Update uv to 0.7.0 [\#84](https://github.com/RegioHelden/django-scrubber/pull/84) (@regiohelden-dev) 20 | - Update github-reusable-workflows to 2.2.1 [\#83](https://github.com/RegioHelden/django-scrubber/pull/83) (@regiohelden-dev) 21 | - Updates from modulesync [\#82](https://github.com/RegioHelden/django-scrubber/pull/82) (@regiohelden-dev) 22 | - Update github-reusable-workflows to 2.2.0 and uv to 0.6.17 [\#81](https://github.com/RegioHelden/django-scrubber/pull/81) (@regiohelden-dev) 23 | - Update github-reusable-workflows to 2.1.1 [\#80](https://github.com/RegioHelden/django-scrubber/pull/80) (@regiohelden-dev) 24 | - Update ruff VSCode integration to 2025.22.0, remove classifiers for unsupported Python versions [\#79](https://github.com/RegioHelden/django-scrubber/pull/79) (@regiohelden-dev) 25 | - Remove dependabot integration, set Python version for GitHub actions [\#77](https://github.com/RegioHelden/django-scrubber/pull/77) (@regiohelden-dev) 26 | - Add supported python versions to sync config [\#76](https://github.com/RegioHelden/django-scrubber/pull/76) (@lociii) 27 | - Make dependabot set a proper label [\#75](https://github.com/RegioHelden/django-scrubber/pull/75) (@regiohelden-dev) 28 | - Bump regiohelden/github-reusable-workflows from 2.0.0 to 2.1.0 [\#74](https://github.com/RegioHelden/django-scrubber/pull/74) (@dependabot[bot]) 29 | - Integrate with modulesync [\#73](https://github.com/RegioHelden/django-scrubber/pull/73) (@regiohelden-dev) 30 | - Set python min version for modulesync [\#72](https://github.com/RegioHelden/django-scrubber/pull/72) (@lociii) 31 | - Align test setup with other libraries, prepare for modulesync rollout [\#71](https://github.com/RegioHelden/django-scrubber/pull/71) (@lociii) 32 | 33 | ## [4.1.0] - 2025-03-04 34 | 35 | **Fixed bugs:** 36 | 37 | - Restore output in `scrub_validation` command 38 | - Move back to annotation, instead of Subquery for performance reasons 39 | 40 | ## [4.0.0] - 2025-02-19 41 | 42 | **Breaking changes:** 43 | 44 | - Removed support for Python 3.8 45 | 46 | **Implemented enhancements:** 47 | 48 | - Added support for Python 3.13 - Thanks @GitRon 49 | - Improved documentation on concatenation of different field types 50 | - Removed outdated sqlite workaround 51 | - Switch linting and formatting to ruff 52 | - Add devcontainer setup for VSCode 53 | - Updates to GitHub actions 54 | 55 | ## [3.0.0] - 2024-09-10 56 | 57 | **Breaking changes:** 58 | 59 | - Removed `SCRUBBER_VALIDATION_WHITELIST` in favour of `SCRUBBER_REQUIRED_FIELD_MODEL_WHITELIST` - Thanks @GitRon 60 | 61 | **Implemented enhancements:** 62 | 63 | - Added Django test model `db.TestModel` to default whitelist of `SCRUBBER_REQUIRED_FIELD_MODEL_WHITELIST` - Thanks @GitRon 64 | - Removed support for the `mock` package in unit tests 65 | - Adjusted some default settings 66 | 67 | ## [2.1.1] - 2024-08-20 68 | 69 | **Fixed bugs:** 70 | 71 | - Fixed an issue where the management command `scrub_validation` could fail even though all models were skipped - Thanks @GitRon 72 | 73 | ## [2.1.0] - 2024-08-20 74 | 75 | **Implemented enhancements:** 76 | 77 | - Added support for `Django` version `5.1` - Thanks @GitRon 78 | - Added `SCRUBBER_VALIDATION_WHITELIST` and excluded Django core test model - Thanks @GitRon 79 | 80 | ## [2.0.0] - 2024-06-27 81 | 82 | **Breaking changes:** 83 | 84 | - Remove support for `Django` below version `4.2` 85 | - Remove support for `Python` below version `3.8` 86 | - Minimum required `Faker` version is now `20.0.0`, released 11/2023 87 | 88 | **Implemented enhancements:** 89 | 90 | - Added support for `Django` version `5.0` 91 | - Added support for `Python` version `3.12` 92 | - Add docker compose setup to run tests 93 | 94 | ## [1.3.0] - 2024-06-05 95 | 96 | **Implemented enhancements:** 97 | 98 | - Add support for regular expressions in `SCRUBBER_REQUIRED_FIELD_MODEL_WHITELIST` - Thanks @fbinz 99 | 100 | ## [1.2.2] - 2023-11-04 101 | 102 | **Implemented enhancements:** 103 | 104 | - Set `default_auto_field` for `django-scrubber` to `django.db.models.AutoField` to prevent overrides from Django settings - Thanks @GitRon 105 | 106 | ## [1.2.1] - 2023-11-03 107 | 108 | - Yanked 109 | 110 | ## [1.2.0] - 2023-04-01 111 | 112 | **Implemented enhancements:** 113 | 114 | - Added scrubber validation - Thanks @GitRon 115 | - Added strict mode - Thanks @GitRon 116 | 117 | ## [1.1.0] - 2022-07-11 118 | 119 | **Implemented enhancements:** 120 | 121 | - Invalid fields on scrubbers will no longer raise exception but just trigger warnings now 122 | - Author list completed 123 | 124 | ## [1.0.0] - 2022-07-11 125 | 126 | **Implemented enhancements:** 127 | 128 | - Meta data for python package improved - Thanks @GitRon 129 | 130 | ## [0.9.0] - 2022-06-27 131 | 132 | **Implemented enhancements:** 133 | 134 | - Add functionality to scrub third party models like the Django user model, see https://github.com/RegioHelden/django-scrubber#scrubbing-third-party-models - Thanks @GitRon 135 | - Add tests for Python 3.10 - Thanks @costela 136 | 137 | ## [0.8.0] - 2022-05-01 138 | 139 | **Implemented enhancements:** 140 | 141 | - Add `keep-sessions` argument to scrub_data command. Will NOT truncate all (by definition critical) session data. Thanks @GitRon 142 | - Add `remove-fake-data` argument to scrub_data command. Will truncate the database table storing preprocessed data for the Faker library. Thanks @GitRon 143 | - Add Django 3.2 and 4.0 to test matrix 144 | 145 | **Breaking changes:** 146 | 147 | - Remove Python 3.6 from test matrix 148 | - Remove Django 2.2 and 3.1 from test matrix 149 | 150 | ## [0.7.0] - 2022-02-24 151 | 152 | **Implemented enhancements:** 153 | 154 | - Remove upper boundary for Faker as they release non-breaking major upgrades way too often, please pin a working release in your own app 155 | 156 | ## [0.6.2] - 2022-02-08 157 | 158 | **Implemented enhancements:** 159 | 160 | - Support faker 12.x 161 | 162 | ## [0.6.1] - 2022-01-25 163 | 164 | **Implemented enhancements:** 165 | 166 | - Support faker 11.x 167 | 168 | ## [0.6.0] - 2021-10-18 169 | 170 | **Implemented enhancements:** 171 | 172 | - Add support to override Faker locale in scrubber settings 173 | - Publish coverage only on main repository 174 | 175 | ## [0.5.6] - 2021-10-08 176 | 177 | **Implemented enhancements:** 178 | 179 | - Pin psycopg2 in CI to 2.8.6 as 2.9+ is incompatible with Django 2.2 180 | 181 | ## [0.5.5] - 2021-10-08 182 | 183 | **Implemented enhancements:** 184 | 185 | - Support faker 9.x 186 | 187 | ## [0.5.4] - 2021-04-13 188 | 189 | **Implemented enhancements:** 190 | 191 | - Support faker 8.x 192 | 193 | ## [0.5.3] - 2021-02-04 194 | 195 | **Implemented enhancements:** 196 | 197 | - Support faker 6.x 198 | 199 | ## [0.5.2] - 2021-01-12 200 | 201 | **Implemented enhancements:** 202 | 203 | - Add tests for Python 3.9 204 | - Add tests for Django 3.1 205 | - Support faker 5.x 206 | - Update dev package requirements 207 | 208 | ## [0.5.1] - 2020-10-16 209 | 210 | **Implemented enhancements:** 211 | 212 | - Fix travis setup 213 | 214 | ## [0.5.0] - 2020-10-16 215 | 216 | **Implemented enhancements:** 217 | 218 | - Support for django-model-utils 4.x.x 219 | 220 | **Breaking changes:** 221 | 222 | - Add compatibility for Faker 3.x.x, remove support for Faker < 0.8.0 223 | - Remove support for Python 2.7 and 3.5 224 | - Remove support for Django 1.x 225 | 226 | ## [0.4.4] - 2019-12-11 227 | 228 | **Implemented enhancements:** 229 | 230 | - add the same version restrictions on faker to setup.py 231 | 232 | ## [0.4.3] - 2019-12-04 233 | 234 | **Implemented enhancements:** 235 | 236 | - add empty and null scrubbers 237 | - make `Lorem` scrubber lazy, matching README 238 | 239 | **Fixed bugs:** 240 | 241 | - set more stringent version requirements (faker >= 3 breaks builds) 242 | 243 | ## [0.4.1] - 2019-11-16 244 | 245 | **Fixed bugs:** 246 | 247 | - correctly clear fake data model to fix successive calls to `scrub_data` (thanks [Benedikt Bauer](https://github.com/mastacheata)) 248 | 249 | ## [0.4.0] - 2019-11-13 250 | 251 | **Implemented enhancements:** 252 | 253 | - `Faker` scrubber now supports passing arbitrary arguments to faker providers and also non-text fields (thanks [Benedikt Bauer](https://github.com/mastacheata) and [Ronny Vedrilla](https://github.com/GitRon)) 254 | 255 | ## [0.3.1] - 2018-09-10 256 | 257 | **Fixed bugs:** 258 | 259 | - [#9](https://github.com/RegioHelden/django-scrubber/pull/9) `Hash` scrubber choking on fields with `max_length=None` - Thanks to [Charlie Denton](https://github.com/meshy) 260 | 261 | ## [0.3.0] - 2018-09-06 262 | 263 | **Implemented enhancements:** 264 | 265 | - Finally added some basic tests (thanks [Marco De Felice](https://github.com/md-f)) 266 | - `Hash` scrubber can now also be used on sqlite 267 | - Expand tests to include python 3.7 and django 2.1 268 | 269 | **Breaking changes:** 270 | 271 | - Scrubbers that are lazily initialized now receive `Field` instances as parameters, instead of field 272 | names. If you have custom scrubbers depending on the previous behavior, these should be updated. Accessing the 273 | field's name from the object instance is trivial: `field_instance.name`. E.g.: if you have `some_field = MyCustomScrubber` 274 | in any of your models' `Scrubbers`, this class must accept a `Field` instance as first parameter. 275 | Note that explicitly intializing any of the built-in scrubbers with field names is still supported, so if you were 276 | just using built-in scrubbers, you should not be affected by this change. 277 | - related to the above, `FuncField` derived classes can now do connection-based setup by implementing the 278 | `connection_setup` method. This is mostly useful for doing different things based on the DB vendor, and is used to 279 | implement `MD5()` on sqlite (see added feature above) 280 | - Ignore proxy models when scrubbing (thanks [Marco De Felice](https://github.com/md-f)) 281 | 282 | ## [0.2.1] - 2018-08-14 283 | 284 | **Implemented enhancements:** 285 | 286 | - Option to scrub only one model from the management command 287 | - Support loading additional faker providers by config setting SCRUBBER\_ADDITIONAL\_FAKER\_PROVIDERS 288 | - Switched changelog format to the one proposed on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) 289 | 290 | ## [0.2.0] - 2018-08-13 291 | 292 | **Implemented enhancements:** 293 | 294 | - scrubbers.Concat to make simple concatenation of scrubbers possible 295 | 296 | ## [0.1.4] - 2018-08-13 297 | 298 | **Implemented enhancements:** 299 | 300 | - Make our README look beautiful on PyPI 301 | 302 | ## [0.1.3] - 2018-08-13 303 | 304 | **Fixed bugs:** 305 | 306 | - [#1](https://github.com/RegioHelden/django-scrubber/pull/1) badly timed import - Thanks to [Charlie Denton](https://github.com/meshy) 307 | 308 | ## [0.1.2] - 2018-06-22 309 | 310 | **Implemented enhancements:** 311 | 312 | - Use bumpversion and travis to make new releases 313 | 314 | **Breaking changes:** 315 | 316 | - rename project: django\_scrubber → django-scrubber 317 | 318 | ## [0.1.0] - 2018-06-22 319 | 320 | **Implemented enhancements:** 321 | 322 | - Initial release 323 | 324 | 325 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------- 2 | # Managed by modulesync - DO NOT EDIT 3 | # ------------------------------------------------- 4 | 5 | FROM python:3.12-slim-bookworm 6 | 7 | ARG DEBIAN_FRONTEND=noninteractive 8 | ENV PYTHONUNBUFFERED=1 PYTHONDONTWRITEBYTECODE=x LC_ALL=C.UTF-8 UV_COMPILE_BYTECODE=0 9 | 10 | COPY system_dependencies.txt /app/ 11 | 12 | RUN sys_deps=$(grep -v '^#' system_dependencies.txt | tr '\n' ' '); \ 13 | apt -y update && \ 14 | apt -y --no-install-recommends install pipx $sys_deps && \ 15 | apt clean && \ 16 | find /usr/share/man /usr/share/locale /usr/share/doc -type f -delete && \ 17 | rm -rf /var/lib/apt/lists/* 18 | 19 | WORKDIR /app 20 | 21 | RUN grep -q -w 1000 /etc/group || groupadd --gid 1000 app && \ 22 | id -u app >/dev/null 2>&1 || useradd --gid 1000 --uid 1000 -m app && \ 23 | chown app:app /app 24 | 25 | USER app 26 | 27 | COPY --chown=app requirements* /app/ 28 | 29 | ENV PATH=/home/app/.local/bin:/home/app/venv/bin:${PATH} DJANGO_SETTINGS_MODULE=example.settings 30 | 31 | RUN pipx install --force uv==0.7.8 && uv venv ~/venv && \ 32 | uv pip install --no-cache --upgrade --requirements /app/requirements-test.txt && \ 33 | uv cache clean 34 | 35 | EXPOSE 8000 36 | -------------------------------------------------------------------------------- /HISTORY.md: -------------------------------------------------------------------------------- 1 | ## [4.1.0] - 2025-03-04 2 | 3 | **Fixed bugs:** 4 | 5 | - Restore output in `scrub_validation` command 6 | - Move back to annotation, instead of Subquery for performance reasons 7 | 8 | ## [4.0.0] - 2025-02-19 9 | 10 | **Breaking changes:** 11 | 12 | - Removed support for Python 3.8 13 | 14 | **Implemented enhancements:** 15 | 16 | - Added support for Python 3.13 - Thanks @GitRon 17 | - Improved documentation on concatenation of different field types 18 | - Removed outdated sqlite workaround 19 | - Switch linting and formatting to ruff 20 | - Add devcontainer setup for VSCode 21 | - Updates to GitHub actions 22 | 23 | ## [3.0.0] - 2024-09-10 24 | 25 | **Breaking changes:** 26 | 27 | - Removed `SCRUBBER_VALIDATION_WHITELIST` in favour of `SCRUBBER_REQUIRED_FIELD_MODEL_WHITELIST` - Thanks @GitRon 28 | 29 | **Implemented enhancements:** 30 | 31 | - Added Django test model `db.TestModel` to default whitelist of `SCRUBBER_REQUIRED_FIELD_MODEL_WHITELIST` - Thanks @GitRon 32 | - Removed support for the `mock` package in unit tests 33 | - Adjusted some default settings 34 | 35 | ## [2.1.1] - 2024-08-20 36 | 37 | **Fixed bugs:** 38 | 39 | - Fixed an issue where the management command `scrub_validation` could fail even though all models were skipped - Thanks @GitRon 40 | 41 | ## [2.1.0] - 2024-08-20 42 | 43 | **Implemented enhancements:** 44 | 45 | - Added support for `Django` version `5.1` - Thanks @GitRon 46 | - Added `SCRUBBER_VALIDATION_WHITELIST` and excluded Django core test model - Thanks @GitRon 47 | 48 | ## [2.0.0] - 2024-06-27 49 | 50 | **Breaking changes:** 51 | 52 | - Remove support for `Django` below version `4.2` 53 | - Remove support for `Python` below version `3.8` 54 | - Minimum required `Faker` version is now `20.0.0`, released 11/2023 55 | 56 | **Implemented enhancements:** 57 | 58 | - Added support for `Django` version `5.0` 59 | - Added support for `Python` version `3.12` 60 | - Add docker compose setup to run tests 61 | 62 | ## [1.3.0] - 2024-06-05 63 | 64 | **Implemented enhancements:** 65 | 66 | - Add support for regular expressions in `SCRUBBER_REQUIRED_FIELD_MODEL_WHITELIST` - Thanks @fbinz 67 | 68 | ## [1.2.2] - 2023-11-04 69 | 70 | **Implemented enhancements:** 71 | 72 | - Set `default_auto_field` for `django-scrubber` to `django.db.models.AutoField` to prevent overrides from Django settings - Thanks @GitRon 73 | 74 | ## [1.2.1] - 2023-11-03 75 | 76 | - Yanked 77 | 78 | ## [1.2.0] - 2023-04-01 79 | 80 | **Implemented enhancements:** 81 | 82 | - Added scrubber validation - Thanks @GitRon 83 | - Added strict mode - Thanks @GitRon 84 | 85 | ## [1.1.0] - 2022-07-11 86 | 87 | **Implemented enhancements:** 88 | 89 | - Invalid fields on scrubbers will no longer raise exception but just trigger warnings now 90 | - Author list completed 91 | 92 | ## [1.0.0] - 2022-07-11 93 | 94 | **Implemented enhancements:** 95 | 96 | - Meta data for python package improved - Thanks @GitRon 97 | 98 | ## [0.9.0] - 2022-06-27 99 | 100 | **Implemented enhancements:** 101 | 102 | - Add functionality to scrub third party models like the Django user model, see https://github.com/RegioHelden/django-scrubber#scrubbing-third-party-models - Thanks @GitRon 103 | - Add tests for Python 3.10 - Thanks @costela 104 | 105 | ## [0.8.0] - 2022-05-01 106 | 107 | **Implemented enhancements:** 108 | 109 | - Add `keep-sessions` argument to scrub_data command. Will NOT truncate all (by definition critical) session data. Thanks @GitRon 110 | - Add `remove-fake-data` argument to scrub_data command. Will truncate the database table storing preprocessed data for the Faker library. Thanks @GitRon 111 | - Add Django 3.2 and 4.0 to test matrix 112 | 113 | **Breaking changes:** 114 | 115 | - Remove Python 3.6 from test matrix 116 | - Remove Django 2.2 and 3.1 from test matrix 117 | 118 | ## [0.7.0] - 2022-02-24 119 | 120 | **Implemented enhancements:** 121 | 122 | - Remove upper boundary for Faker as they release non-breaking major upgrades way too often, please pin a working release in your own app 123 | 124 | ## [0.6.2] - 2022-02-08 125 | 126 | **Implemented enhancements:** 127 | 128 | - Support faker 12.x 129 | 130 | ## [0.6.1] - 2022-01-25 131 | 132 | **Implemented enhancements:** 133 | 134 | - Support faker 11.x 135 | 136 | ## [0.6.0] - 2021-10-18 137 | 138 | **Implemented enhancements:** 139 | 140 | - Add support to override Faker locale in scrubber settings 141 | - Publish coverage only on main repository 142 | 143 | ## [0.5.6] - 2021-10-08 144 | 145 | **Implemented enhancements:** 146 | 147 | - Pin psycopg2 in CI to 2.8.6 as 2.9+ is incompatible with Django 2.2 148 | 149 | ## [0.5.5] - 2021-10-08 150 | 151 | **Implemented enhancements:** 152 | 153 | - Support faker 9.x 154 | 155 | ## [0.5.4] - 2021-04-13 156 | 157 | **Implemented enhancements:** 158 | 159 | - Support faker 8.x 160 | 161 | ## [0.5.3] - 2021-02-04 162 | 163 | **Implemented enhancements:** 164 | 165 | - Support faker 6.x 166 | 167 | ## [0.5.2] - 2021-01-12 168 | 169 | **Implemented enhancements:** 170 | 171 | - Add tests for Python 3.9 172 | - Add tests for Django 3.1 173 | - Support faker 5.x 174 | - Update dev package requirements 175 | 176 | ## [0.5.1] - 2020-10-16 177 | 178 | **Implemented enhancements:** 179 | 180 | - Fix travis setup 181 | 182 | ## [0.5.0] - 2020-10-16 183 | 184 | **Implemented enhancements:** 185 | 186 | - Support for django-model-utils 4.x.x 187 | 188 | **Breaking changes:** 189 | 190 | - Add compatibility for Faker 3.x.x, remove support for Faker < 0.8.0 191 | - Remove support for Python 2.7 and 3.5 192 | - Remove support for Django 1.x 193 | 194 | ## [0.4.4] - 2019-12-11 195 | 196 | **Implemented enhancements:** 197 | 198 | - add the same version restrictions on faker to setup.py 199 | 200 | ## [0.4.3] - 2019-12-04 201 | 202 | **Implemented enhancements:** 203 | 204 | - add empty and null scrubbers 205 | - make `Lorem` scrubber lazy, matching README 206 | 207 | **Fixed bugs:** 208 | 209 | - set more stringent version requirements (faker >= 3 breaks builds) 210 | 211 | ## [0.4.1] - 2019-11-16 212 | 213 | **Fixed bugs:** 214 | 215 | - correctly clear fake data model to fix successive calls to `scrub_data` (thanks [Benedikt Bauer](https://github.com/mastacheata)) 216 | 217 | ## [0.4.0] - 2019-11-13 218 | 219 | **Implemented enhancements:** 220 | 221 | - `Faker` scrubber now supports passing arbitrary arguments to faker providers and also non-text fields (thanks [Benedikt Bauer](https://github.com/mastacheata) and [Ronny Vedrilla](https://github.com/GitRon)) 222 | 223 | ## [0.3.1] - 2018-09-10 224 | 225 | **Fixed bugs:** 226 | 227 | - [#9](https://github.com/RegioHelden/django-scrubber/pull/9) `Hash` scrubber choking on fields with `max_length=None` - Thanks to [Charlie Denton](https://github.com/meshy) 228 | 229 | ## [0.3.0] - 2018-09-06 230 | 231 | **Implemented enhancements:** 232 | 233 | - Finally added some basic tests (thanks [Marco De Felice](https://github.com/md-f)) 234 | - `Hash` scrubber can now also be used on sqlite 235 | - Expand tests to include python 3.7 and django 2.1 236 | 237 | **Breaking changes:** 238 | 239 | - Scrubbers that are lazily initialized now receive `Field` instances as parameters, instead of field 240 | names. If you have custom scrubbers depending on the previous behavior, these should be updated. Accessing the 241 | field's name from the object instance is trivial: `field_instance.name`. E.g.: if you have `some_field = MyCustomScrubber` 242 | in any of your models' `Scrubbers`, this class must accept a `Field` instance as first parameter. 243 | Note that explicitly intializing any of the built-in scrubbers with field names is still supported, so if you were 244 | just using built-in scrubbers, you should not be affected by this change. 245 | - related to the above, `FuncField` derived classes can now do connection-based setup by implementing the 246 | `connection_setup` method. This is mostly useful for doing different things based on the DB vendor, and is used to 247 | implement `MD5()` on sqlite (see added feature above) 248 | - Ignore proxy models when scrubbing (thanks [Marco De Felice](https://github.com/md-f)) 249 | 250 | ## [0.2.1] - 2018-08-14 251 | 252 | **Implemented enhancements:** 253 | 254 | - Option to scrub only one model from the management command 255 | - Support loading additional faker providers by config setting SCRUBBER\_ADDITIONAL\_FAKER\_PROVIDERS 256 | - Switched changelog format to the one proposed on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) 257 | 258 | ## [0.2.0] - 2018-08-13 259 | 260 | **Implemented enhancements:** 261 | 262 | - scrubbers.Concat to make simple concatenation of scrubbers possible 263 | 264 | ## [0.1.4] - 2018-08-13 265 | 266 | **Implemented enhancements:** 267 | 268 | - Make our README look beautiful on PyPI 269 | 270 | ## [0.1.3] - 2018-08-13 271 | 272 | **Fixed bugs:** 273 | 274 | - [#1](https://github.com/RegioHelden/django-scrubber/pull/1) badly timed import - Thanks to [Charlie Denton](https://github.com/meshy) 275 | 276 | ## [0.1.2] - 2018-06-22 277 | 278 | **Implemented enhancements:** 279 | 280 | - Use bumpversion and travis to make new releases 281 | 282 | **Breaking changes:** 283 | 284 | - rename project: django\_scrubber → django-scrubber 285 | 286 | ## [0.1.0] - 2018-06-22 287 | 288 | **Implemented enhancements:** 289 | 290 | - Initial release 291 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | # ------------------------------------------- 2 | # Managed by modulesync - DO NOT EDIT 3 | # ------------------------------------------- 4 | 5 | The MIT License (MIT) 6 | Copyright (c) RegioHelden GmbH 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Django Scrubber 2 | 3 | [![Build Status](https://github.com/RegioHelden/django-scrubber/workflows/Build/badge.svg)](https://github.com/RegioHelden/django-scrubber/actions) 4 | [![PyPI](https://img.shields.io/pypi/v/django-scrubber.svg)](https://pypi.org/project/django-scrubber/) 5 | [![Downloads](https://pepy.tech/badge/django-scrubber)](https://pepy.tech/project/django-scrubber) 6 | 7 | `django_scrubber` is a django app meant to help you anonymize your project's database data. It destructively alters data 8 | directly on the DB and therefore **should not be used on production**. 9 | 10 | The main use case is providing developers with realistic data to use during development, without having to distribute 11 | your customers' or users' potentially sensitive information. 12 | To accomplish this, `django_scrubber` should be plugged in a step during the creation of your database dumps. 13 | 14 | Simply mark the fields you want to anonymize and call the `scrub_data` management command. Data will be replaced based 15 | on different *scrubbers* (see below), which define how the anonymous content will be generated. 16 | 17 | If you want to be sure that you don't forget any fields in the ongoing development progress, you can use the 18 | management command `scrub_validation` in your CI/CD pipeline to check for any missing fields. 19 | 20 | ## Installation 21 | 22 | Simply run: 23 | 24 | ``` 25 | pip install django-scrubber 26 | ``` 27 | 28 | And add `django_scrubber` to your django `INSTALLED_APPS`. I.e.: in `settings.py` add: 29 | 30 | ``` 31 | INSTALLED_APPS = [ 32 | ... 33 | 'django_scrubber.apps.DjangoScrubberConfig', 34 | ... 35 | ] 36 | ``` 37 | 38 | ## Scrubbing data 39 | 40 | In order to scrub data, i.e.: to replace DB data with anonymized versions, `django-scrubber` must know which models and 41 | fields it should act on, and how the data should be replaced. 42 | 43 | There are a few different ways to select which data should be scrubbed, namely: explicitly per model field; or globally 44 | per name or field type. 45 | 46 | Adding scrubbers directly to model, matching scrubbers to fields by name: 47 | 48 | ```python 49 | class MyModel(Model): 50 | somefield = CharField() 51 | 52 | class Scrubbers: 53 | somefield = scrubbers.Hash('somefield') 54 | ``` 55 | 56 | Adding scrubbers globally, either by field name or field type: 57 | 58 | ```python 59 | # (in settings.py) 60 | 61 | SCRUBBER_GLOBAL_SCRUBBERS = { 62 | 'name': scrubbers.Hash, 63 | EmailField: scrubbers.Hash, 64 | } 65 | ``` 66 | 67 | Model scrubbers override field-name scrubbers, which in turn override field-type scrubbers. 68 | 69 | To disable global scrubbing in some specific model, simply set the respective field scrubber to `None`. 70 | 71 | Scrubbers defined for non-existing fields will raise a warning but not fail the scubbing process. 72 | 73 | Which mechanism will be used to scrub the selected data is determined by using one of the provided scrubbers 74 | in `django_scrubber.scrubbers`. See below for a list. 75 | Alternatively, values may be anything that can be used as a value in a `QuerySet.update()` call (like `Func` instances, 76 | string literals, etc), or any `callable` that returns such an object when called with a `Field` object as argument. 77 | 78 | By default, `django_scrubber` will affect all models from all registered apps. This may lead to issues with third-party 79 | apps if the global scrubbers are too general. This can be avoided with the `SCRUBBER_APPS_LIST` setting. Using this, you 80 | might for instance split your `INSTALLED_APPS` into multiple `SYSTEM_APPS` and `LOCAL_APPS`, then 81 | set `SCRUBBER_APPS_LIST = LOCAL_APPS`, to scrub only your own apps. 82 | 83 | Finally just run `./manage.py scrub_data` to **destructively** scrub the registered fields. 84 | 85 | ### Arguments to the scrub_data command 86 | 87 | `--model` Scrub only a single model (format .) 88 | 89 | `--keep-sessions` Will NOT truncate all (by definition critical) session data. 90 | 91 | `--remove-fake-data` Will truncate the database table storing preprocessed data for the Faker library. 92 | 93 | ## Built-In scrubbers 94 | 95 | ### Empty/Null 96 | 97 | The simplest scrubbers: replace the field's content with the empty string or `NULL`, respectively. 98 | 99 | ```python 100 | class Scrubbers: 101 | somefield = scrubbers.Empty 102 | someother = scrubbers.Null 103 | ``` 104 | 105 | These scrubbers have no options. 106 | 107 | ### Keeper 108 | 109 | When running the validation or want to work in strict mode, you maybe want to actively decide to keep certain data 110 | instead of scrubbing them. In this case, you can just define `scrubbers.Keep`. 111 | 112 | ```python 113 | class Scrubbers: 114 | non_critical_field = scrubbers.Keep 115 | ``` 116 | 117 | These scrubber doesn't have any options. 118 | 119 | ### Hash 120 | 121 | Simple hashing of content: 122 | 123 | ```python 124 | class Scrubbers: 125 | somefield = scrubbers.Hash # will use the field itself as source 126 | someotherfield = scrubbers.Hash('somefield') # can optionally pass a different field name as hashing source 127 | ``` 128 | 129 | Currently, this uses the MD5 hash which is supported in a wide variety of DB engines. Additionally, since security is 130 | not the main objective, a shorter hash length has a lower risk of being longer than whatever field it is supposed to 131 | replace. 132 | 133 | ### Lorem 134 | 135 | Simple scrubber meant to replace `TextField` with a static block of text. Has no options. 136 | 137 | ```python 138 | class Scrubbers: 139 | somefield = scrubbers.Lorem 140 | ``` 141 | 142 | ### Concat 143 | 144 | Wrapper around `django.db.functions.Concat` to enable simple concatenation of scrubbers. This is useful if you want to 145 | ensure a fields uniqueness through composition of, for instance, the `Hash` and `Faker` (see below) scrubbers. 146 | 147 | When using different input field types, make sure to explicitly state an `output_field` type. 148 | 149 | The following will generate random email addresses by hashing the user-part and using `faker` for the domain part: 150 | 151 | ```python 152 | class Scrubbers: 153 | email = scrubbers.Concat(scrubbers.Hash('email'), models.Value('@'), scrubbers.Faker('domain_name'), output_field=models.EmailField()) 154 | ``` 155 | 156 | ### Faker 157 | 158 | Replaces content with the help of [faker](https://pypi.python.org/pypi/Faker). 159 | 160 | ```python 161 | class Scrubbers: 162 | first_name = scrubbers.Faker('first_name') 163 | last_name = scrubbers.Faker('last_name') 164 | past_date = scrubbers.Faker('past_date', start_date="-30d", tzinfo=None) 165 | ``` 166 | 167 | The replacements are done on the database-level and should therefore be able to cope with large amounts of data with 168 | reasonable performance. 169 | 170 | The `Faker` scrubber requires at least one argument: the faker provider used to generate random data. 171 | All [faker providers](https://faker.readthedocs.io/en/latest/providers.html) are supported, and you can also register 172 | your own custom providers.
173 | Any remaining arguments will be passed through to that provider. Please refer to the faker docs if a provider accepts 174 | arguments and what to do with them. 175 | 176 | #### Locales 177 | 178 | Faker will be initialized with the current django `LANGUAGE_CODE` and will populate the DB with localized data. If you 179 | want localized scrubbing, simply set it to some other value. 180 | 181 | #### Idempotency 182 | 183 | By default, the faker instance used to populate the DB uses a fixed random seed, in order to ensure different scrubbings 184 | of the same data generate the same output. This is particularly useful if the scrubbed data is imported as a dump by 185 | developers, since changing data during troubleshooting would otherwise be confusing. 186 | 187 | This behaviour can be changed by setting `SCRUBBER_RANDOM_SEED=None`, which ensures every scrubbing will generate random 188 | source data. 189 | 190 | #### Limitations 191 | 192 | Scrubbing unique fields may lead to `IntegrityError`s, since there is no guarantee that the random content will not be 193 | repeated. Playing with different settings for `SCRUBBER_RANDOM_SEED` and `SCRUBBER_ENTRIES_PER_PROVIDER` may alleviate 194 | the problem. 195 | Unfortunately, for performance reasons, the source data for scrubbing with faker is added to the database, and 196 | arbitrarily increasing `SCRUBBER_ENTRIES_PER_PROVIDER` will significantly slow down scrubbing (besides still not 197 | guaranteeing uniqueness). 198 | 199 | When using `django < 2.1` and working on `sqlite` a bug within django causes field-specific scrubbing ( 200 | e.g. `date_object`) to fail. Please consider using a different database backend or upgrade to the latest django version. 201 | 202 | ## Scrubbing third-party models 203 | 204 | Sometimes you just don't have control over some code, but you still want to scrub the data of a given model. 205 | 206 | A good example is the Django user model. It contains sensitive data, and you would have to overwrite the whole model 207 | just to add the scrubber metaclass. 208 | 209 | That's the way to go: 210 | 211 | 1. Define your Scrubber class **somewhere** in your codebase (like a `scrubbers.py`) 212 | 213 | ```python 214 | # scrubbers.py 215 | class UserScrubbers: 216 | scrubbers.Faker('de_DE') 217 | first_name = scrubbers.Faker('first_name') 218 | last_name = scrubbers.Faker('last_name') 219 | username = scrubbers.Faker('uuid4') 220 | password = scrubbers.Faker('sha1') 221 | last_login = scrubbers.Null 222 | email = scrubbers.Concat(first_name, models.Value('.'), last_name, models.Value('@'), 223 | models.Value(settings.SCRUBBER_DOMAIN)) 224 | ```` 225 | 226 | 2. Set up a mapping between your third-party model and your scrubber class 227 | 228 | ```python 229 | # settings.py 230 | SCRUBBER_MAPPING = { 231 | "auth.User": "apps.account.scrubbers.UserScrubbers", 232 | } 233 | ``` 234 | 235 | ## Settings 236 | 237 | ### `SCRUBBER_GLOBAL_SCRUBBERS`: 238 | 239 | Dictionary of global scrubbers. Keys should be either field names as strings or field type classes. Values should be one 240 | of the scrubbers provided in `django_scrubber.scrubbers`. 241 | 242 | Example: 243 | 244 | ```python 245 | SCRUBBER_GLOBAL_SCRUBBERS = { 246 | 'name': scrubbers.Hash, 247 | EmailField: scrubbers.Hash, 248 | } 249 | ``` 250 | 251 | ### `SCRUBBER_RANDOM_SEED`: 252 | 253 | The seed used when generating random content by the Faker scrubber. Setting this to `None` means each scrubbing will 254 | generate different data. 255 | 256 | (default: `42`) 257 | 258 | ### `SCRUBBER_ENTRIES_PER_PROVIDER`: 259 | 260 | Number of entries to use as source for Faker scrubber. Increasing this value will increase the randomness of generated 261 | data, but decrease performance. 262 | 263 | (default: `1000`) 264 | 265 | ### `SCRUBBER_SKIP_UNMANAGED`: 266 | 267 | Do not attempt to scrub models which are not managed by the ORM. 268 | 269 | (default: `True`) 270 | 271 | ### `SCRUBBER_APPS_LIST`: 272 | 273 | Only scrub models belonging to these specific django apps. If unset, will scrub all installed apps. 274 | 275 | (default: `None`) 276 | 277 | ### `SCRUBBER_ADDITIONAL_FAKER_PROVIDERS`: 278 | 279 | Add additional fake providers to be used by Faker. Must be noted as full dotted path to the provider class. 280 | 281 | (default: `{*()}`, empty set) 282 | 283 | ### `SCRUBBER_FAKER_LOCALE`: 284 | 285 | Set an alternative locale for Faker used during the scrubbing process. 286 | 287 | (default: `None`, falls back to Django's default locale) 288 | 289 | ### `SCRUBBER_MAPPING`: 290 | 291 | Define a class and a mapper which does not have to live inside the given model. Useful, if you have no control over the 292 | models code you'd like to scrub. 293 | 294 | ````python 295 | SCRUBBER_MAPPING = { 296 | "auth.User": "my_app.scrubbers.UserScrubbers", 297 | } 298 | ```` 299 | 300 | (default: `{}`) 301 | 302 | ### `SCRUBBER_STRICT_MODE`: 303 | 304 | When strict mode is activated, you have to define a scrubbing policy for every field of every type defined in 305 | `SCRUBBER_REQUIRED_FIELD_TYPES`. If you have unscrubbed fields and this flag is active, you can't run 306 | `python manage.py scrub_data`. 307 | 308 | (default: `False`) 309 | 310 | ### `SCRUBBER_REQUIRED_FIELD_TYPES`: 311 | 312 | Defaults to all text-based Django model fields. Usually, privacy-relevant data is only stored in text-fields, numbers 313 | and booleans (usually) can't contain sensitive personal data. These fields will be checked when running 314 | `python manage.py scrub_validation`. 315 | 316 | (default: `(models.CharField, models.TextField, models.URLField, models.JSONField, models.GenericIPAddressField, 317 | models.EmailField,)`) 318 | 319 | ### `SCRUBBER_REQUIRED_FIELD_MODEL_WHITELIST`: 320 | 321 | Whitelists a list of models which will not be checked during `scrub_validation` and when 322 | activating the strict mode. Defaults to the non-privacy-related Django base models. 323 | Items can either be full model names (e.g. `auth.Group`) or regular expression patterns matching 324 | against the full model name (e.g. `re.compile(auth.*)` to whitelist all auth models). 325 | 326 | (default: `('auth.Group', 'auth.Permission', 'contenttypes.ContentType', 'sessions.Session', 'sites.Site', 327 | 'django_scrubber.FakeData', 'db.TestModel',)`) 328 | 329 | (default: {}) 330 | 331 | ## Logging 332 | 333 | Scrubber uses the default django logger. The logger name is ``django_scrubber.scrubbers``. 334 | So if you want to log - for example - to the console, you could set up the logger like this: 335 | 336 | ```` 337 | LOGGING['loggers']['django_scrubber'] = { 338 | 'handlers': ['console'], 339 | 'propagate': True, 340 | 'level': 'DEBUG', 341 | } 342 | ```` 343 | 344 | ## Making a new release 345 | 346 | This project makes use of [RegioHelden's reusable GitHub workflows](https://github.com/RegioHelden/github-reusable-workflows). \ 347 | Make a new release by manually triggering the `Open release PR` workflow. 348 | -------------------------------------------------------------------------------- /compose.yaml: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------- 2 | # Managed by modulesync - DO NOT EDIT 3 | # ------------------------------------------------- 4 | 5 | --- 6 | services: 7 | app: 8 | build: . 9 | user: app 10 | command: /app/manage.py test 11 | volumes: 12 | - .:/app:cached 13 | environment: 14 | SHELL: /bin/bash 15 | IPYTHONDIR: /app/.ipython 16 | HISTFILE: /app/.bash_history 17 | PYTHONPATH: /app # make app available without installation 18 | restart: "no" 19 | -------------------------------------------------------------------------------- /django_scrubber/__init__.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | from django.db import models 3 | 4 | defaults = { 5 | "SCRUBBER_RANDOM_SEED": 42, # we prefer idempotent scrubbing 6 | "SCRUBBER_ENTRIES_PER_PROVIDER": 1000, 7 | "SCRUBBER_GLOBAL_SCRUBBERS": {}, 8 | "SCRUBBER_SKIP_UNMANAGED": True, 9 | "SCRUBBER_APPS_LIST": None, 10 | "SCRUBBER_ADDITIONAL_FAKER_PROVIDERS": {*()}, 11 | "SCRUBBER_FAKER_LOCALE": None, 12 | "SCRUBBER_MAPPING": {}, 13 | "SCRUBBER_STRICT_MODE": False, 14 | "SCRUBBER_REQUIRED_FIELD_TYPES": ( 15 | models.CharField, 16 | models.TextField, 17 | models.URLField, 18 | models.JSONField, 19 | models.GenericIPAddressField, 20 | models.EmailField, 21 | ), 22 | "SCRUBBER_REQUIRED_FIELD_MODEL_WHITELIST": ( 23 | "auth.Group", 24 | "auth.Permission", 25 | "contenttypes.ContentType", 26 | "db.TestModel", 27 | "sessions.Session", 28 | "sites.Site", 29 | "django_scrubber.FakeData", 30 | ), 31 | } 32 | 33 | 34 | def settings_with_fallback(key): 35 | return getattr(settings, key, defaults[key]) 36 | 37 | 38 | class ScrubberInitError(Exception): 39 | pass 40 | -------------------------------------------------------------------------------- /django_scrubber/admin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RegioHelden/django-scrubber/65eb2be50c5a4aee82b7759f18eed772026771b7/django_scrubber/admin.py -------------------------------------------------------------------------------- /django_scrubber/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | from django.utils.translation import gettext_lazy as _ 3 | 4 | 5 | class DjangoScrubberConfig(AppConfig): 6 | name = "django_scrubber" 7 | verbose_name = _("Django Scrubber") 8 | default_auto_field = "django.db.models.AutoField" 9 | -------------------------------------------------------------------------------- /django_scrubber/management/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RegioHelden/django-scrubber/65eb2be50c5a4aee82b7759f18eed772026771b7/django_scrubber/management/__init__.py -------------------------------------------------------------------------------- /django_scrubber/management/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RegioHelden/django-scrubber/65eb2be50c5a4aee82b7759f18eed772026771b7/django_scrubber/management/commands/__init__.py -------------------------------------------------------------------------------- /django_scrubber/management/commands/scrub_data.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import logging 3 | import warnings 4 | 5 | from django.apps import apps 6 | from django.conf import settings 7 | from django.contrib.sessions.models import Session 8 | from django.core.exceptions import FieldDoesNotExist 9 | from django.core.management.base import BaseCommand, CommandError 10 | from django.db.models import F 11 | from django.db.utils import DataError, IntegrityError 12 | 13 | from django_scrubber import settings_with_fallback 14 | from django_scrubber.models import FakeData 15 | from django_scrubber.scrubbers import Keep 16 | from django_scrubber.services.validator import ScrubberValidatorService 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | 21 | class Command(BaseCommand): 22 | help = "Replace database data according to model-specific or global scrubbing rules." 23 | leave_locale_alone = True 24 | 25 | def add_arguments(self, parser): 26 | parser.add_argument( 27 | "--model", 28 | type=str, 29 | required=False, 30 | help="Scrub only a single model (format .)", 31 | ) 32 | parser.add_argument( 33 | "--keep-sessions", 34 | action="store_true", 35 | required=False, 36 | help="Will NOT truncate all (by definition critical) session data", 37 | ) 38 | parser.add_argument( 39 | "--remove-fake-data", 40 | action="store_true", 41 | required=False, 42 | help="Will truncate the database table storing preprocessed data for the Faker library. " 43 | "If you want to do multiple iterations of scrubbing, it will save you time to keep " 44 | "them. If not, you will add a huge bunch of data to your dump size.", 45 | ) 46 | 47 | def handle(self, *args, **kwargs): 48 | if not settings.DEBUG: 49 | # avoid logger, otherwise we might silently fail if we're on live and logging is being sent somewhere else 50 | self.stderr.write("This command should only be run with DEBUG=True, to avoid running on live systems") 51 | return False 52 | 53 | # Check STRICT mode 54 | if settings_with_fallback("SCRUBBER_STRICT_MODE"): 55 | service = ScrubberValidatorService() 56 | non_scrubbed_field_list = service.process() 57 | if len(non_scrubbed_field_list) > 0: 58 | self.stderr.write( 59 | 'When "SCRUBBER_STRICT_MODE" is enabled, you have to define a scrubbing policy ' 60 | "for every text-based field.", 61 | ) 62 | return False 63 | 64 | global_scrubbers = settings_with_fallback("SCRUBBER_GLOBAL_SCRUBBERS") 65 | 66 | # run only for selected model 67 | if kwargs.get("model") is not None: 68 | app_label, model_name = kwargs.get("model").rsplit(".", 1) 69 | try: 70 | models = [apps.get_model(app_label=app_label, model_name=model_name)] 71 | except LookupError as e: 72 | raise CommandError("--model should be defined as .") from e 73 | 74 | # run for all models of all apps 75 | else: 76 | models = apps.get_models() 77 | 78 | scrubber_apps_list = settings_with_fallback("SCRUBBER_APPS_LIST") 79 | for model_class in models: 80 | self._scrub_model(model_class, scrubber_apps_list, global_scrubbers) 81 | 82 | # Truncate session data 83 | if not kwargs.get("keep_sessions", False): 84 | Session.objects.all().delete() 85 | 86 | # Truncate Faker data 87 | if kwargs.get("remove_fake_data", False): 88 | FakeData.objects.all().delete() 89 | return None 90 | return None 91 | 92 | def _scrub_model(self, model_class, scrubber_apps_list, global_scrubbers): 93 | if model_class._meta.proxy: 94 | return 95 | if settings_with_fallback("SCRUBBER_SKIP_UNMANAGED") and not model_class._meta.managed: 96 | return 97 | if scrubber_apps_list and model_class._meta.app_config.name not in scrubber_apps_list: 98 | return 99 | 100 | scrubbers = {} 101 | for field in model_class._meta.fields: 102 | if field.name in global_scrubbers: 103 | scrubbers[field] = global_scrubbers[field.name] 104 | elif type(field) in global_scrubbers: 105 | scrubbers[field] = global_scrubbers[type(field)] 106 | 107 | scrubbers.update(_get_model_scrubbers(model_class)) 108 | 109 | # Filter out all fields marked as "to be kept" 110 | scrubbers_without_kept_fields = {} 111 | for field, scrubbing_method in scrubbers.items(): 112 | if scrubbing_method != Keep: 113 | scrubbers_without_kept_fields[field] = scrubbing_method 114 | scrubbers = scrubbers_without_kept_fields 115 | 116 | if not scrubbers: 117 | return 118 | 119 | realized_scrubbers = _filter_out_disabled(_call_callables(scrubbers)) 120 | 121 | logger.info("Scrubbing %s with %s", model_class._meta.label, realized_scrubbers) 122 | 123 | try: 124 | model_class.objects.annotate( 125 | mod_pk=F("pk") % settings_with_fallback("SCRUBBER_ENTRIES_PER_PROVIDER"), 126 | ).update(**realized_scrubbers) 127 | except IntegrityError as e: 128 | raise CommandError( 129 | f"Integrity error while scrubbing {model_class} ({e}); maybe increase SCRUBBER_ENTRIES_PER_PROVIDER?", 130 | ) from e 131 | except DataError as e: 132 | raise CommandError(f"DataError while scrubbing {model_class} ({e})") from e 133 | 134 | 135 | def _call_callables(d): 136 | """ 137 | Helper to realize lazy scrubbers, like Faker, or global field-type scrubbers 138 | """ 139 | return {k.name: ((callable(v) and v(k)) or v) for k, v in d.items()} 140 | 141 | 142 | def _parse_scrubber_class_from_string(path: str): 143 | """ 144 | Takes a string to a certain scrubber class and returns a python class definition - not an instance. 145 | """ 146 | try: 147 | module_name, class_name = path.rsplit(".", 1) 148 | module = importlib.import_module(module_name) 149 | return getattr(module, class_name) 150 | except (ImportError, ValueError) as e: 151 | raise ImportError(f'Mapped scrubber class "{path}" could not be found.') from e 152 | 153 | 154 | def _get_model_scrubbers(model): 155 | # Get model-scrubber-mapping from settings 156 | scrubber_mapping = settings_with_fallback("SCRUBBER_MAPPING") 157 | 158 | # Initialise scrubber list 159 | scrubbers = {} 160 | 161 | # Check if model has a settings-defined... 162 | if model._meta.label in scrubber_mapping: 163 | scrubber_cls = _parse_scrubber_class_from_string(scrubber_mapping[model._meta.label]) 164 | # If not... 165 | else: 166 | # Try to get the scrubber metaclass from the given model 167 | try: 168 | scrubber_cls = model.Scrubbers 169 | except AttributeError: 170 | return scrubbers # no model-specific scrubbers 171 | 172 | # Get field mappings from scrubber class 173 | for k, v in _get_fields(scrubber_cls): 174 | try: 175 | field = model._meta.get_field(k) 176 | scrubbers[field] = v 177 | except FieldDoesNotExist: 178 | warnings.warn(f"Scrubber defined for {model.__name__}.{k} but field does not exist", stacklevel=2) 179 | 180 | # Return scrubber-field-mapping 181 | return scrubbers 182 | 183 | 184 | def _get_fields(d): 185 | """ 186 | Helper to get "normal" (i.e.: non-magic and non-dunder) instance attributes. 187 | Returns an iterator of (field_name, field) tuples. 188 | """ 189 | return ((k, v) for k, v in vars(d).items() if not k.startswith("_")) 190 | 191 | 192 | def _filter_out_disabled(d): 193 | """ 194 | Helper to remove Nones (actually any false-like type) from the scrubbers. 195 | This is needed so we can disable global scrubbers in a per-model basis. 196 | """ 197 | return {k: v for k, v in d.items() if v} 198 | -------------------------------------------------------------------------------- /django_scrubber/management/commands/scrub_validation.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from django.core.management.base import BaseCommand 4 | 5 | from django_scrubber import settings_with_fallback 6 | from django_scrubber.services.validator import ScrubberValidatorService 7 | 8 | 9 | class Command(BaseCommand): 10 | def handle(self, *args, **options): 11 | service = ScrubberValidatorService() 12 | non_scrubbed_field_list = service.process() 13 | 14 | found_models = 0 15 | found_fields = 0 16 | 17 | if len(non_scrubbed_field_list): 18 | for model_path, affected_field_list in non_scrubbed_field_list.items(): 19 | self.stdout.write(f"Model {model_path!r}:") 20 | 21 | found_models += 1 22 | for _field in affected_field_list: 23 | self.stdout.write(f"- {_field}") 24 | found_fields += 1 25 | 26 | self.stdout.write("") 27 | 28 | if found_models > 0: 29 | self.stdout.write(f"{found_models} model(s) having {found_fields} unscrubbed field(s) detected.") 30 | 31 | # strict mode should fail with a non-zero exit code 32 | if settings_with_fallback("SCRUBBER_STRICT_MODE"): 33 | sys.exit(1) 34 | 35 | self.stdout.write("However strict mode is deactivated and scrubbing is not enforced.") 36 | sys.exit(0) 37 | 38 | self.stdout.write("No unscrubbed fields detected. Yeah!") 39 | -------------------------------------------------------------------------------- /django_scrubber/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11.9 on 2018-01-24 08:52 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | initial = True 9 | 10 | dependencies = [ 11 | ] 12 | 13 | operations = [ 14 | migrations.CreateModel( 15 | name='FakeData', 16 | fields=[ 17 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 18 | ('provider', models.CharField(db_index=True, max_length=255, verbose_name='Faker provider')), 19 | ('provider_offset', models.PositiveSmallIntegerField()), 20 | ('content', models.CharField(max_length=255, verbose_name='Fake content')), 21 | ], 22 | ), 23 | migrations.AddIndex( 24 | model_name='fakedata', 25 | index=models.Index(fields=['provider', 'provider_offset'], name='django_scru_provide_d7f250_idx'), 26 | ), 27 | migrations.AlterUniqueTogether( 28 | name='fakedata', 29 | unique_together=set([('provider', 'provider_offset')]), 30 | ), 31 | ] 32 | -------------------------------------------------------------------------------- /django_scrubber/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RegioHelden/django-scrubber/65eb2be50c5a4aee82b7759f18eed772026771b7/django_scrubber/migrations/__init__.py -------------------------------------------------------------------------------- /django_scrubber/models.py: -------------------------------------------------------------------------------- 1 | from typing import ClassVar 2 | 3 | from django.db.models import CharField, Count, Index, Manager, Model, PositiveSmallIntegerField 4 | 5 | 6 | class FakeDataManager(Manager): 7 | def provider_count(self, provider): 8 | return self.filter(provider=provider).values("provider").annotate(count=Count("provider")).values("count") 9 | 10 | 11 | class FakeData(Model): 12 | provider = CharField(max_length=255, verbose_name="Faker provider", db_index=True) 13 | provider_offset = PositiveSmallIntegerField() 14 | content = CharField(max_length=255, verbose_name="Fake content") 15 | 16 | objects = FakeDataManager() 17 | 18 | class Meta: 19 | indexes: ClassVar[list[Index]] = [ 20 | Index(fields=["provider", "provider_offset"]), 21 | ] 22 | unique_together = (("provider", "provider_offset"),) 23 | 24 | def __str__(self): 25 | return f"{self.provider}: '{self.content}'" 26 | -------------------------------------------------------------------------------- /django_scrubber/scrubbers.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import logging 3 | from builtins import str as text 4 | from typing import ClassVar 5 | 6 | import faker 7 | from django.db import connections, router 8 | from django.db.models import Field, Func, OuterRef, Subquery 9 | from django.db.models.functions import Cast 10 | from django.db.models.functions import Concat as DjangoConcat 11 | from django.db.utils import IntegrityError 12 | from django.utils.translation import get_language, to_locale 13 | 14 | from . import ScrubberInitError, settings_with_fallback 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class FieldFunc(Func): 20 | """ 21 | Base class for creating Func-like scrubbers. 22 | Unlike Func, may receive a Field object as first argument, in which case it populates self.extra with its __dict__. 23 | This enable derived classes to use the Field's attributes, either in methods or as interpolation variables in 24 | self.template. 25 | """ 26 | 27 | def __init__(self, field, *args, **kwargs): 28 | if isinstance(field, Field): 29 | super().__init__(field.name, *args, **kwargs) 30 | self.extra.update(field.__dict__) 31 | self.connection_setup(connections[router.db_for_write(field.model)]) 32 | else: 33 | super().__init__(field, *args, **kwargs) 34 | 35 | def connection_setup(self, db_connection): 36 | """ 37 | This function is called when initializing the scrubber, and allows doing setup necessary to support certain DB 38 | vendors. It should be implemented by derived classes of FieldFunc. 39 | """ 40 | 41 | 42 | class Empty(FieldFunc): 43 | template = "''" 44 | 45 | 46 | class Null(FieldFunc): 47 | template = "NULL" 48 | 49 | 50 | class Keep(FieldFunc): 51 | template = None 52 | 53 | 54 | class Hash(FieldFunc): 55 | """ 56 | Simple md5 hashing of content. 57 | If initialized with a Field object, will use its max_length attribute to truncate the generated hash. 58 | Otherwise, if initialized with a field name as string, will use the full hash length. 59 | """ 60 | 61 | template = "NULL" # replaced during __init__ 62 | arity = 1 63 | 64 | def __init__(self, *args, **kwargs): 65 | super().__init__(*args, **kwargs) 66 | if self.extra.get("max_length") is not None: 67 | self.template = "SUBSTR(MD5(%(expressions)s), 1, %(max_length)s)" 68 | else: 69 | self.template = "MD5(%(expressions)s)" 70 | 71 | def connection_setup(self, db_connection): 72 | if db_connection.vendor == "sqlite": 73 | # add MD5 support for sqlite; this calls back to python and will probably have a performance impact 74 | import hashlib 75 | import sqlite3 76 | 77 | sqlite3.enable_callback_tracebacks(True) # otherwise errors get ignored 78 | db_connection.connection.create_function( 79 | "MD5", 80 | 1, 81 | lambda c: hashlib.md5(c.encode("utf8")).hexdigest(), # noqa: S324 82 | ) 83 | 84 | 85 | class Lorem(FieldFunc): 86 | """ 87 | Simple fixed-text scrubber, which replaces content with one paragraph of the well-known "lorem ipsum" text. 88 | """ 89 | 90 | template = ( 91 | "'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore " 92 | "et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " 93 | "aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse " 94 | "cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in " 95 | "culpa qui officia deserunt mollit anim id est laborum.'" 96 | ) 97 | 98 | 99 | class Concat: 100 | """ 101 | Wrapper around django.db.functions.Concat for lazy concatenation of scrubbers. 102 | """ 103 | 104 | def __init__(self, *expressions, **kwargs): 105 | self.expressions = expressions 106 | self.kwargs = kwargs 107 | 108 | def __call__(self, field): 109 | realized_expressions = [] 110 | for exp in self.expressions: 111 | realized_expressions.append((callable(exp) and exp(field)) or exp) 112 | return DjangoConcat(*realized_expressions, **self.kwargs) 113 | 114 | 115 | class Faker: 116 | INITIALIZED_PROVIDERS: ClassVar[set[str]] = set() 117 | 118 | def __init__(self, provider, *args, **kwargs): 119 | self.provider = provider 120 | self.provider_args = args 121 | self.provider_kwargs = kwargs 122 | args_hash = hash(self.provider_args) ^ hash(tuple(self.provider_kwargs.items())) 123 | self.provider_key = f"{self.provider} - {args_hash}" 124 | 125 | def _initialize_data(self): 126 | from .models import FakeData 127 | 128 | # get locale from config and fall back to django's default one 129 | locale = settings_with_fallback("SCRUBBER_FAKER_LOCALE") 130 | if not locale: 131 | locale = to_locale(get_language()) 132 | faker_instance = faker.Faker(locale=locale) 133 | 134 | # load additional faker providers 135 | for provider_name in settings_with_fallback("SCRUBBER_ADDITIONAL_FAKER_PROVIDERS"): 136 | # try to load module 137 | try: 138 | module_name, class_name = text(provider_name).rsplit(".", 1) 139 | module = importlib.import_module(module_name) 140 | except Exception as e: 141 | raise ScrubberInitError( 142 | f"module not found for provider defined in SCRUBBER_ADDITIONAL_FAKER_PROVIDERS: {provider_name}", 143 | ) from e 144 | 145 | # add provider to faker instance 146 | provider = getattr(module, class_name, None) 147 | if provider is None: 148 | raise ScrubberInitError( 149 | "faker provider not found for provider defined in " 150 | f"SCRUBBER_ADDITIONAL_FAKER_PROVIDERS: {provider_name}", 151 | ) 152 | faker_instance.add_provider(provider) 153 | 154 | provider_args_str = ", ".join(str(i) for i in self.provider_args) 155 | provider_kwargs_str = ", ".join(str(i) for i in self.provider_kwargs) 156 | logger.info( 157 | "Initializing fake scrub data for provider %s(%s, %s)", 158 | self.provider, 159 | provider_args_str, 160 | provider_kwargs_str, 161 | ) 162 | # TODO: maybe be a bit smarter and only regenerate if needed? 163 | FakeData.objects.filter(provider=self.provider_key).delete() 164 | fakedata = [] 165 | 166 | # if we don't reset the seed for each provider, registering a new one might change all 167 | # data for subsequent providers 168 | faker.Generator.seed(settings_with_fallback("SCRUBBER_RANDOM_SEED")) 169 | for i in range(settings_with_fallback("SCRUBBER_ENTRIES_PER_PROVIDER")): 170 | fakedata.append( 171 | FakeData( 172 | provider=self.provider_key, 173 | provider_offset=i, 174 | content=faker_instance.format(self.provider, *self.provider_args, **self.provider_kwargs), 175 | ), 176 | ) 177 | 178 | try: 179 | FakeData.objects.bulk_create(fakedata) 180 | except IntegrityError as e: 181 | raise ScrubberInitError( 182 | f"Integrity error initializing faker data ({e}); maybe decrease SCRUBBER_ENTRIES_PER_PROVIDER?", 183 | ) from e 184 | 185 | self.INITIALIZED_PROVIDERS.add(self.provider_key) 186 | 187 | def __call__(self, field): 188 | """ 189 | Lazily instantiate the actual subquery used for scrubbing. 190 | 191 | The Faker scrubber ignores the field parameter. 192 | """ 193 | if self.provider_key not in self.INITIALIZED_PROVIDERS: 194 | self._initialize_data() 195 | 196 | # import it here to enable global scrubbers in settings.py 197 | from .models import FakeData 198 | 199 | return Cast( 200 | Subquery( 201 | FakeData.objects.filter( 202 | provider=self.provider_key, 203 | provider_offset=OuterRef("mod_pk"), 204 | ).values("content")[:1], 205 | ), 206 | field, 207 | ) 208 | -------------------------------------------------------------------------------- /django_scrubber/services/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RegioHelden/django-scrubber/65eb2be50c5a4aee82b7759f18eed772026771b7/django_scrubber/services/__init__.py -------------------------------------------------------------------------------- /django_scrubber/services/validator.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Union 3 | 4 | from django.apps import apps 5 | 6 | from django_scrubber import settings_with_fallback 7 | 8 | 9 | class ScrubberValidatorService: 10 | """ 11 | Service to validate if all text-based fields are being scrubbed within your project and dependencies. 12 | """ 13 | 14 | @staticmethod 15 | def check_pattern(pattern: Union[str, re.Pattern], value): 16 | if isinstance(pattern, str): 17 | return pattern == value 18 | if isinstance(pattern, re.Pattern): 19 | return pattern.fullmatch(value) 20 | raise ValueError("Invalid pattern type") 21 | 22 | def process(self) -> dict: 23 | from django_scrubber.management.commands.scrub_data import _get_model_scrubbers 24 | 25 | scrubber_required_field_types = settings_with_fallback("SCRUBBER_REQUIRED_FIELD_TYPES") 26 | model_whitelist = settings_with_fallback("SCRUBBER_REQUIRED_FIELD_MODEL_WHITELIST") 27 | 28 | # Get a list of all registered models in your Django application 29 | model_list = apps.get_models() 30 | 31 | # Create a dictionary to store the fields of each model 32 | non_scrubbed_field_list = {} 33 | 34 | # Iterate over each model in the list 35 | for model in model_list: 36 | # Check if model is whitelisted 37 | if any(self.check_pattern(pattern, model._meta.label) for pattern in model_whitelist): 38 | continue 39 | 40 | fields_need_scrubbing = [] 41 | # Get the model's name and fields 42 | fields = model._meta.get_fields() 43 | 44 | # Gather list of all fields of the given model that require scrubbing 45 | for field in fields: 46 | if type(field) in scrubber_required_field_types: 47 | fields_need_scrubbing.append(field.name) 48 | 49 | # Get scrubber class 50 | scrubber_class = _get_model_scrubbers(model) 51 | 52 | # If we did find a scrubber class... 53 | if scrubber_class: 54 | # We check for every scrubbing requiring field, if it's set to be scrubbed 55 | for scrubbed_field in scrubber_class: 56 | if scrubbed_field.name in fields_need_scrubbing: 57 | fields_need_scrubbing.remove(scrubbed_field.name) 58 | 59 | # Store per model all non-scrubbed but scrubbing requiring fields 60 | if len(fields_need_scrubbing) > 0: 61 | non_scrubbed_field_list[model._meta.label] = fields_need_scrubbing 62 | 63 | return non_scrubbed_field_list 64 | -------------------------------------------------------------------------------- /django_scrubber/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RegioHelden/django-scrubber/65eb2be50c5a4aee82b7759f18eed772026771b7/django_scrubber/tests/__init__.py -------------------------------------------------------------------------------- /django_scrubber/tests/services/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RegioHelden/django-scrubber/65eb2be50c5a4aee82b7759f18eed772026771b7/django_scrubber/tests/services/__init__.py -------------------------------------------------------------------------------- /django_scrubber/tests/services/test_validator.py: -------------------------------------------------------------------------------- 1 | import re 2 | from unittest import mock 3 | 4 | from django.test import TestCase, override_settings 5 | 6 | from django_scrubber import scrubbers 7 | from django_scrubber.services.validator import ScrubberValidatorService 8 | 9 | 10 | class ScrubberValidatorServiceTest(TestCase): 11 | class FullUserScrubbers: 12 | first_name = scrubbers.Hash 13 | last_name = scrubbers.Hash 14 | email = scrubbers.Hash 15 | password = scrubbers.Hash 16 | username = scrubbers.Hash 17 | 18 | class PartUserScrubbers: 19 | first_name = scrubbers.Hash 20 | last_name = scrubbers.Hash 21 | password = scrubbers.Hash 22 | 23 | def test_process_no_scrubbing(self): 24 | service = ScrubberValidatorService() 25 | result = service.process() 26 | 27 | self.assertEqual(len(result), 2) 28 | 29 | model_list = tuple(result.keys()) 30 | self.assertIn("auth.User", model_list) 31 | self.assertIn("example.DataToBeScrubbed", model_list) 32 | 33 | @override_settings(SCRUBBER_MAPPING={"auth.User": "FullUserScrubbers"}) 34 | @mock.patch( 35 | "django_scrubber.management.commands.scrub_data._parse_scrubber_class_from_string", 36 | return_value=FullUserScrubbers, 37 | ) 38 | def test_process_scrubber_mapper_all_fields(self, mocked_function): 39 | service = ScrubberValidatorService() 40 | result = service.process() 41 | 42 | self.assertEqual(len(result), 1) 43 | 44 | model_list = tuple(result.keys()) 45 | self.assertIn("example.DataToBeScrubbed", model_list) 46 | 47 | @override_settings(SCRUBBER_MAPPING={"auth.User": "PartUserScrubbers"}) 48 | @mock.patch( 49 | "django_scrubber.management.commands.scrub_data._parse_scrubber_class_from_string", 50 | return_value=PartUserScrubbers, 51 | ) 52 | def test_process_scrubber_mapper_some_fields(self, mocked_function): 53 | service = ScrubberValidatorService() 54 | result = service.process() 55 | 56 | self.assertEqual(len(result), 2) 57 | 58 | model_list = tuple(result.keys()) 59 | self.assertIn("auth.User", model_list) 60 | 61 | @override_settings(SCRUBBER_REQUIRED_FIELD_TYPES=()) 62 | def test_process_scrubber_required_field_type_variable_used(self): 63 | service = ScrubberValidatorService() 64 | result = service.process() 65 | 66 | self.assertEqual(len(result), 0) 67 | 68 | @override_settings( 69 | SCRUBBER_REQUIRED_FIELD_MODEL_WHITELIST=[re.compile("auth.*")], 70 | ) 71 | def test_process_scrubber_required_field_model_whitelist_regex(self): 72 | service = ScrubberValidatorService() 73 | result = service.process() 74 | 75 | model_list = tuple(result.keys()) 76 | self.assertNotIn("auth.User", model_list) 77 | self.assertNotIn("auth.Permission", model_list) 78 | -------------------------------------------------------------------------------- /django_scrubber/tests/test_models.py: -------------------------------------------------------------------------------- 1 | """ 2 | test_django_scrubber 3 | ------------ 4 | 5 | Tests for `django_scrubber` models module. 6 | """ 7 | 8 | from django.db.utils import IntegrityError 9 | from django.test import TestCase 10 | 11 | from django_scrubber import models 12 | 13 | 14 | class TestDjangoScrubber(TestCase): 15 | def test_uniqueness(self): 16 | models.FakeData.objects.create(provider="foo", provider_offset=0, content="bar") 17 | with self.assertRaises(IntegrityError): 18 | models.FakeData.objects.create(provider="foo", provider_offset=0, content="baz") 19 | -------------------------------------------------------------------------------- /django_scrubber/tests/test_scrub_data.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | from unittest.mock import patch 3 | 4 | from django.contrib.auth import get_user_model 5 | from django.core.management import call_command 6 | from django.test import TestCase, override_settings 7 | 8 | from django_scrubber import scrubbers 9 | from django_scrubber.management.commands.scrub_data import _get_model_scrubbers, _parse_scrubber_class_from_string 10 | 11 | User = get_user_model() 12 | 13 | 14 | class TestScrubData(TestCase): 15 | def setUp(self): 16 | self.user = User.objects.create(first_name="test_first_name") 17 | 18 | def test_scrub_data(self): 19 | with self.settings(DEBUG=True, SCRUBBER_GLOBAL_SCRUBBERS={"first_name": scrubbers.Faker("first_name")}): 20 | call_command("scrub_data", verbosity=3) 21 | self.user.refresh_from_db() 22 | 23 | self.assertNotEqual(self.user.first_name, "test_first_name") 24 | 25 | def test_scrub_data_debug_is_false(self): 26 | err = StringIO() 27 | 28 | with self.settings(DEBUG=False): 29 | call_command("scrub_data", stderr=err) 30 | output = err.getvalue() 31 | self.user.refresh_from_db() 32 | 33 | self.assertIn("This command should only be run with DEBUG=True, to avoid running on live systems", output) 34 | self.assertEqual(self.user.first_name, "test_first_name") 35 | 36 | @override_settings(SCRUBBERS_STRICT_MODE=True) 37 | def test_scrub_data_strict_mode_enabled_scrubbing_blocked(self): 38 | err = StringIO() 39 | 40 | with self.settings(DEBUG=False): 41 | call_command("scrub_data", stderr=err) 42 | output = err.getvalue() 43 | self.user.refresh_from_db() 44 | 45 | self.assertIn("This command should only be run with DEBUG=True, to avoid running on live systems", output) 46 | self.assertEqual(self.user.first_name, "test_first_name") 47 | 48 | def test_hash_simple_global_scrubber(self): 49 | with self.settings(DEBUG=True, SCRUBBER_GLOBAL_SCRUBBERS={"first_name": scrubbers.Hash}): 50 | call_command("scrub_data") 51 | self.user.refresh_from_db() 52 | 53 | self.assertNotEqual(self.user.first_name, "test_first_name") 54 | 55 | def test_hash_simple_class_scrubber(self): 56 | class Scrubbers: 57 | first_name = scrubbers.Hash 58 | 59 | with self.settings(DEBUG=True), patch.object(User, "Scrubbers", Scrubbers, create=True): 60 | call_command("scrub_data") 61 | self.user.refresh_from_db() 62 | 63 | self.assertNotEqual(self.user.first_name, "test_first_name") 64 | 65 | def test_scrub_invalid_field(self): 66 | class Scrubbers: 67 | this_does_not_exist_382784 = scrubbers.Null 68 | 69 | with ( 70 | self.settings(DEBUG=True), 71 | patch.object(User, "Scrubbers", Scrubbers, create=True), 72 | self.assertWarnsRegex( 73 | Warning, 74 | "Scrubber defined for User.this_does_not_exist_382784 but field does not exist", 75 | ), 76 | ): 77 | call_command("scrub_data") 78 | 79 | @override_settings(SCRUBBER_MAPPING={"auth.User": "example.scrubbers.UserScrubbers"}) 80 | def test_get_model_scrubbers_mapper_from_settings_used(self): 81 | with ( 82 | patch( 83 | "django_scrubber.management.commands.scrub_data._parse_scrubber_class_from_string", 84 | return_value={}, 85 | ) as mocked_method, 86 | patch("django_scrubber.management.commands.scrub_data._get_fields", return_value=[]), 87 | ): 88 | test_scrubbers = _get_model_scrubbers(User) 89 | mocked_method.assert_called_once() 90 | self.assertEqual(test_scrubbers, {}) 91 | 92 | def test_parse_scrubber_class_from_string_regular(self): 93 | class_type = _parse_scrubber_class_from_string("django_scrubber.tests.test_models.TestDjangoScrubber") 94 | self.assertIsInstance(class_type, type) 95 | 96 | def test_parse_scrubber_class_from_string_wrong_path(self): 97 | with self.assertRaises(ImportError): 98 | _parse_scrubber_class_from_string("not.valid.path") 99 | 100 | def test_parse_scrubber_class_from_string_path_no_separator(self): 101 | with self.assertRaises(ImportError): 102 | _parse_scrubber_class_from_string("broken_path") 103 | -------------------------------------------------------------------------------- /django_scrubber/tests/test_scrub_validator.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | from unittest import mock 3 | 4 | from django.core.management import call_command 5 | from django.test import TestCase, override_settings 6 | 7 | from django_scrubber.services.validator import ScrubberValidatorService 8 | 9 | 10 | class TestScrubValidator(TestCase): 11 | @override_settings(SCRUBBER_STRICT_MODE=False) 12 | def test_scrub_validator_regular(self): 13 | out = StringIO() 14 | 15 | with self.assertRaises(SystemExit) as exc: 16 | call_command( 17 | "scrub_validation", 18 | verbosity=3, 19 | stdout=out, 20 | ) 21 | 22 | self.assertEqual(exc.exception.code, 0) 23 | self.assertIn("unscrubbed field(s) detected", out.getvalue()) 24 | self.assertIn("However strict mode is deactivated and scrubbing is not enforced.", out.getvalue()) 25 | 26 | @override_settings(SCRUBBER_STRICT_MODE=True) 27 | def test_scrub_validator_strict_mode(self): 28 | out = StringIO() 29 | 30 | with self.assertRaises(SystemExit) as exc: 31 | call_command( 32 | "scrub_validation", 33 | verbosity=3, 34 | stdout=out, 35 | ) 36 | 37 | self.assertEqual(exc.exception.code, 1) 38 | self.assertIn("unscrubbed field(s) detected", out.getvalue()) 39 | self.assertNotIn("However strict mode is deactivated and scrubbing is not enforced.", out.getvalue()) 40 | 41 | @mock.patch.object(ScrubberValidatorService, "process") 42 | def test_scrub_validator_service_called(self, mocked_method): 43 | out = StringIO() 44 | 45 | call_command("scrub_validation", verbosity=3, stdout=out) 46 | 47 | mocked_method.assert_called_once() 48 | self.assertIn("No unscrubbed fields detected. Yeah!", out.getvalue()) 49 | -------------------------------------------------------------------------------- /django_scrubber/tests/test_scrubbers.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | 3 | import django 4 | from django.contrib.sessions.models import Session 5 | from django.core.management import call_command 6 | from django.db import connection 7 | from django.test import TestCase, override_settings 8 | from django.utils import timezone 9 | 10 | from django_scrubber import scrubbers 11 | from django_scrubber.models import FakeData 12 | from example.models import DataFactory, DataToBeScrubbed 13 | 14 | 15 | class TestScrubbers(TestCase): 16 | def test_empty_scrubber(self): 17 | data = DataFactory.create(first_name="Foo") 18 | with self.settings(DEBUG=True, SCRUBBER_GLOBAL_SCRUBBERS={"first_name": scrubbers.Empty}): 19 | call_command("scrub_data") 20 | data.refresh_from_db() 21 | 22 | self.assertEqual(data.first_name, "") 23 | 24 | def test_null_scrubber(self): 25 | data = DataFactory.create(date_past=timezone.now().date()) 26 | with self.settings(DEBUG=True, SCRUBBER_GLOBAL_SCRUBBERS={"date_past": scrubbers.Null}): 27 | call_command("scrub_data") 28 | data.refresh_from_db() 29 | 30 | self.assertEqual(data.date_past, None) 31 | 32 | def test_hash_scrubber_max_length(self): 33 | data = DataFactory.create(first_name="Foo") 34 | with self.settings(DEBUG=True, SCRUBBER_GLOBAL_SCRUBBERS={"first_name": scrubbers.Hash}): 35 | call_command("scrub_data") 36 | data.refresh_from_db() 37 | 38 | self.assertNotEqual(data.first_name, "Foo") 39 | self.assertEqual( 40 | len(data.first_name), 41 | DataToBeScrubbed._meta.get_field("first_name").max_length, 42 | "len({}) != {}".format(data.first_name, DataToBeScrubbed._meta.get_field("first_name").max_length), 43 | ) 44 | 45 | def test_hash_scrubber_textfield(self): 46 | data = DataFactory.create(description="Foo") 47 | with self.settings(DEBUG=True, SCRUBBER_GLOBAL_SCRUBBERS={"description": scrubbers.Hash}): 48 | call_command("scrub_data") 49 | data.refresh_from_db() 50 | 51 | self.assertNotEqual(data.description, "Foo") 52 | 53 | def test_lorem_scrubber(self): 54 | data = DataFactory.create(description="Foo") 55 | with self.settings(DEBUG=True, SCRUBBER_GLOBAL_SCRUBBERS={"description": scrubbers.Lorem}): 56 | call_command("scrub_data") 57 | data.refresh_from_db() 58 | 59 | self.assertNotEqual(data.description, "Foo") 60 | self.assertEqual(data.description[:11], "Lorem ipsum") 61 | 62 | def test_faker_scrubber_charfield(self): 63 | data = DataFactory.create(last_name="Foo") 64 | with self.settings(DEBUG=True, SCRUBBER_GLOBAL_SCRUBBERS={"last_name": scrubbers.Faker("last_name")}): 65 | call_command("scrub_data") 66 | data.refresh_from_db() 67 | 68 | self.assertNotEqual(data.last_name, "Foo") 69 | self.assertNotEqual(data.last_name, "") 70 | 71 | def test_faker_scrubber_with_provider_arguments(self): 72 | """ 73 | Use this as an example for Faker scrubbers with parameters passed along 74 | """ 75 | data = DataFactory.create(ean8="8") 76 | with self.settings(DEBUG=True, SCRUBBER_GLOBAL_SCRUBBERS={"ean8": scrubbers.Faker("ean", length=8)}): 77 | call_command("scrub_data") 78 | data.refresh_from_db() 79 | 80 | # The EAN Faker will by default emit ean13, so this would fail if the parameter was ignored 81 | self.assertEqual(8, len(data.ean8)) 82 | 83 | # Add a new scrubber for ean13 84 | with self.settings(DEBUG=True, SCRUBBER_GLOBAL_SCRUBBERS={"ean8": scrubbers.Faker("ean", length=13)}): 85 | call_command("scrub_data") 86 | data.refresh_from_db() 87 | 88 | # make sure it doesn't reuse the ean with length=8 scrubber 89 | self.assertEqual(13, len(data.ean8)) 90 | 91 | def test_faker_scrubber_datefield(self): 92 | """ 93 | Use this as an example for Scrubber's capability of optimistically Casting to the current field's type 94 | There is a bug with django < 2.1 and sqlite, that's why we don't run the test there. 95 | """ 96 | if django.VERSION >= (2, 1) or connection.vendor != "sqlite": 97 | today = timezone.now().date() 98 | 99 | data = DataFactory.create(date_past=today) 100 | with self.settings( 101 | DEBUG=True, 102 | SCRUBBER_GLOBAL_SCRUBBERS={ 103 | "date_past": scrubbers.Faker( 104 | "past_date", 105 | start_date="-30d", 106 | tzinfo=timezone.get_current_timezone(), 107 | ), 108 | }, 109 | ): 110 | call_command("scrub_data") 111 | data.refresh_from_db() 112 | 113 | self.assertGreater(today, data.date_past) 114 | self.assertLess(today - timedelta(days=31), data.date_past) 115 | 116 | def test_faker_scrubber_run_twice(self): 117 | """ 118 | Use this as an example of what happens when you want to run the same Faker scrubbers twice 119 | """ 120 | data = DataFactory.create(company="Foo") 121 | with self.settings(DEBUG=True, SCRUBBER_GLOBAL_SCRUBBERS={"company": scrubbers.Faker("company")}): 122 | call_command("scrub_data") 123 | call_command("scrub_data") 124 | data.refresh_from_db() 125 | 126 | self.assertNotEqual(data.company, "Foo") 127 | self.assertNotEqual(data.company, "") 128 | 129 | @override_settings(DEBUG=True) 130 | def test_faker_scrubber_run_clear_session_by_default(self): 131 | """ 132 | Ensures that the session table will be emptied by default 133 | """ 134 | # Create session object 135 | Session.objects.create(session_key="foo", session_data="Lorem ipsum", expire_date=timezone.now()) 136 | 137 | # Sanity check 138 | self.assertTrue(Session.objects.all().exists()) 139 | 140 | # Call command 141 | call_command("scrub_data") 142 | 143 | # Assertion that session table is empty now 144 | self.assertFalse(Session.objects.all().exists()) 145 | 146 | @override_settings(DEBUG=True) 147 | def test_faker_scrubber_run_disable_session_clearing(self): 148 | """ 149 | Ensures that the session table will be emptied by default 150 | """ 151 | # Create session object 152 | Session.objects.create(session_key="foo", session_data="Lorem ipsum", expire_date=timezone.now()) 153 | 154 | # Sanity check 155 | self.assertTrue(Session.objects.all().exists()) 156 | 157 | # Call command 158 | call_command("scrub_data", keep_sessions=True) 159 | 160 | # Assertion that session table is empty now 161 | self.assertTrue(Session.objects.all().exists()) 162 | 163 | @override_settings(DEBUG=True) 164 | def test_faker_scrubber_run_clear_faker_data_not_by_default(self): 165 | """ 166 | Ensures that the session table will be emptied by default 167 | """ 168 | # Create faker data object 169 | FakeData.objects.create(provider="company", content="Foo", provider_offset=1) 170 | 171 | # Sanity check 172 | self.assertTrue(FakeData.objects.filter(provider="company", content="Foo").exists()) 173 | 174 | # Call command 175 | call_command("scrub_data") 176 | 177 | # Assertion that faker data still exists 178 | self.assertTrue(FakeData.objects.filter(provider="company", content="Foo").exists()) 179 | 180 | @override_settings(DEBUG=True) 181 | def test_faker_scrubber_run_clear_faker_data_works(self): 182 | """ 183 | Ensures that the session table will be emptied by default 184 | """ 185 | # Create faker data object 186 | FakeData.objects.create(provider="company", content="Foo", provider_offset=1) 187 | 188 | # Sanity check 189 | self.assertTrue(FakeData.objects.filter(provider="company", content="Foo").exists()) 190 | 191 | # Call command 192 | call_command("scrub_data", remove_fake_data=True) 193 | 194 | # Assertion that faker data still exists 195 | self.assertFalse(FakeData.objects.filter(provider="company", content="Foo").exists()) 196 | -------------------------------------------------------------------------------- /django_scrubber/tests/test_settings.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase, override_settings 2 | 3 | from django_scrubber import defaults, settings_with_fallback 4 | 5 | 6 | class TestScrubbers(TestCase): 7 | def test_default(self): 8 | self.assertEqual( 9 | defaults["SCRUBBER_RANDOM_SEED"], 10 | settings_with_fallback("SCRUBBER_RANDOM_SEED"), 11 | ) 12 | 13 | @override_settings(SCRUBBER_RANDOM_SEED=9001) 14 | def test_override(self): 15 | self.assertEqual( 16 | 9001, 17 | settings_with_fallback("SCRUBBER_RANDOM_SEED"), 18 | ) 19 | -------------------------------------------------------------------------------- /example/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------- 2 | # Managed by modulesync - DO NOT EDIT 3 | # ------------------------------------------------- 4 | -------------------------------------------------------------------------------- /example/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class ExampleConfig(AppConfig): 5 | name = "example" 6 | verbose_name = "Example" 7 | -------------------------------------------------------------------------------- /example/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | from factory.django import DjangoModelFactory 3 | 4 | 5 | class DataToBeScrubbed(models.Model): 6 | first_name = models.CharField(max_length=8) 7 | last_name = models.CharField(max_length=255, blank=True, default="") 8 | description = models.TextField() 9 | ean8 = models.CharField(max_length=13) 10 | date_past = models.DateField(null=True) 11 | company = models.CharField(max_length=255) 12 | 13 | def __str__(self): 14 | return f"{self.first_name} {self.last_name}" 15 | 16 | 17 | class DataFactory(DjangoModelFactory): 18 | class Meta: 19 | model = DataToBeScrubbed 20 | -------------------------------------------------------------------------------- /example/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | DEBUG = True 4 | USE_TZ = True 5 | 6 | # SECURITY WARNING: keep the secret key used in production secret! 7 | SECRET_KEY = "uzbLoOIYlJnzGDYlUfynNyocjZH9NLSc3AAREwLDaugQkCzsQn" # noqa: S105 8 | 9 | DATABASES = { 10 | "default": { 11 | "ENGINE": "django.db.backends.sqlite3", 12 | "NAME": ":memory:", 13 | "OPTIONS": {}, 14 | }, 15 | } 16 | 17 | if os.environ.get("GITHUB_WORKFLOW", None): 18 | DATABASE_ENGINE = os.environ.get("DATABASE_ENGINE", "sqlite") 19 | if "mysql" in DATABASE_ENGINE: 20 | DATABASES = { 21 | "default": { 22 | "ENGINE": "django.db.backends.mysql", 23 | "NAME": "test", 24 | "USER": "root", 25 | "PASSWORD": "", 26 | "HOST": "127.0.0.1", 27 | "PORT": "3306", 28 | }, 29 | } 30 | elif "postgres" in DATABASE_ENGINE: 31 | DATABASES = { 32 | "default": { 33 | "ENGINE": "django.db.backends.postgresql", 34 | "NAME": "postgres", 35 | "USER": "postgres", 36 | "PASSWORD": "postgres", 37 | "HOST": "127.0.0.1", 38 | "PORT": "5432", 39 | }, 40 | } 41 | 42 | INSTALLED_APPS = [ 43 | "django.contrib.auth", 44 | "django.contrib.contenttypes", 45 | "django.contrib.sessions", 46 | "django.contrib.sites", 47 | "django_scrubber.apps.DjangoScrubberConfig", 48 | "example.apps.ExampleConfig", 49 | ] 50 | 51 | SITE_ID = 1 52 | 53 | MIDDLEWARE = () 54 | 55 | DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" 56 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # ------------------------------------------- 4 | # Managed by modulesync - DO NOT EDIT 5 | # ------------------------------------------- 6 | 7 | import os 8 | import sys 9 | 10 | if __name__ == "__main__": 11 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "example.settings") 12 | 13 | from django.core.management import execute_from_command_line 14 | 15 | execute_from_command_line(sys.argv) 16 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # ------------------------------------------- 2 | # Managed by modulesync - DO NOT EDIT 3 | # ------------------------------------------- 4 | 5 | [project] 6 | name = "django-scrubber" 7 | dynamic = ["version"] 8 | license = "MIT" 9 | requires-python = ">=3.9" 10 | description = "Data anonymizer for Django" 11 | readme = "README.md" 12 | keywords = ["django", "data protection", "scrubber", "scrub", "anonymize", "gdpr"] 13 | authors = [ 14 | { name = "RegioHelden GmbH", email = "opensource@regiohelden.de" }, 15 | ] 16 | maintainers = [ 17 | { name = "RegioHelden GmbH", email = "opensource@regiohelden.de" }, 18 | ] 19 | classifiers = [ 20 | "Development Status :: 5 - Production/Stable", 21 | "Environment :: Web Environment", 22 | "Framework :: Django", 23 | "Intended Audience :: Developers", 24 | "License :: OSI Approved :: MIT License", 25 | "Operating System :: OS Independent", 26 | "Programming Language :: Python", 27 | "Programming Language :: Python :: 3", 28 | "Programming Language :: Python :: 3.9", 29 | "Programming Language :: Python :: 3.10", 30 | "Programming Language :: Python :: 3.11", 31 | "Programming Language :: Python :: 3.12", 32 | "Programming Language :: Python :: 3.13", 33 | "Topic :: Software Development", 34 | "Topic :: Software Development :: Libraries :: Python Modules", 35 | ] 36 | dependencies = ["Faker>=20.0.0", "Django>=4.2,<6.0"] 37 | 38 | [project.urls] 39 | Homepage = "https://github.com/RegioHelden/django-scrubber" 40 | Documentation = "https://github.com/RegioHelden/django-scrubber/blob/main/README.md" 41 | Repository = "https://github.com/RegioHelden/django-scrubber.git" 42 | Issues = "https://github.com/RegioHelden/django-scrubber/issues" 43 | Changelog = "https://github.com/RegioHelden/django-scrubber/blob/main/CHANGELOG.md" 44 | 45 | [build-system] 46 | requires = ["hatchling"] 47 | build-backend = "hatchling.build" 48 | 49 | [tool.hatch.version] 50 | path = "version" 51 | pattern = "(?P.+)" 52 | 53 | [tool.hatch.build.targets.wheel] 54 | include = ["LICENSE", "README.md", "CHANGELOG.md", "django_scrubber/*"] 55 | 56 | [tool.hatch.build.targets.sdist] 57 | include = ["LICENSE", "README.md", "CHANGELOG.md", "django_scrubber/*"] 58 | 59 | [tool.ruff] 60 | exclude = [ 61 | ".cache", 62 | ".git", 63 | "__pycache", 64 | "docs", 65 | "migrations", 66 | "src", 67 | ] 68 | line-length = 119 69 | 70 | [tool.ruff.lint] 71 | dummy-variable-rgx = "_|dummy" 72 | # See https://docs.astral.sh/ruff/rules/ for all supported rules 73 | select = [ 74 | "A", # flake8-builtins 75 | "B", # flake8-bugbear 76 | "BLE", # flake8-blind-except 77 | "C4", # flake8-comprehensions 78 | "C90", # mccabe 79 | "COM", # flake8-commas 80 | "DJ", # flake8-django 81 | "DTZ", # flake8-datetimez 82 | "E", # pycodestyle 83 | "ERA", # eradicate 84 | "F", # pyflakes 85 | "G", # flake8-logging-format 86 | "I", # isort 87 | "ICN", # flake8-import-conventions 88 | "INP", # flake8-no-pep420 89 | "N", # pep8-naming 90 | "PIE", # flake8-pie 91 | "PGH", # pygrep-hooks 92 | "PL", # pylint 93 | "PTH", # flake8-use-pathlib 94 | "RET", # flake8-return 95 | "RSE", # flake8-raise 96 | "RUF", # ruff-specific rules 97 | "S", # flake8-bandit 98 | "SIM", # flake8-simplify 99 | "T20", # flake8-print 100 | "TID", # flake8-tidy-imports 101 | "UP", # pyupgrade 102 | "W", # pycodestyle 103 | "YTT", # flake8-2020 104 | ] 105 | 106 | [tool.ruff.lint.pycodestyle] 107 | max-line-length = 119 108 | 109 | [tool.ruff.lint.mccabe] 110 | max-complexity = 16 111 | 112 | [tool.coverage.run] 113 | branch = true 114 | 115 | [tool.coverage.report] 116 | omit = ["*site-packages*", "*tests*", "*.tox*"] 117 | show_missing = true 118 | exclude_lines = ["raise NotImplementedError"] 119 | -------------------------------------------------------------------------------- /requirements-ci.txt: -------------------------------------------------------------------------------- 1 | factory_boy==3.3.3 2 | Faker==36.1.1 3 | -------------------------------------------------------------------------------- /requirements-test.txt: -------------------------------------------------------------------------------- 1 | -r requirements-ci.txt 2 | Django==5.2 3 | -------------------------------------------------------------------------------- /system_dependencies.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RegioHelden/django-scrubber/65eb2be50c5a4aee82b7759f18eed772026771b7/system_dependencies.txt -------------------------------------------------------------------------------- /version: -------------------------------------------------------------------------------- 1 | 5.0.0 2 | --------------------------------------------------------------------------------