├── .dockerignore ├── .gitattributes ├── .github ├── CODEOWNERS ├── pull_request_template.md └── workflows │ ├── docker.yaml │ ├── pr_checks.yml │ ├── publish_docs.yaml │ └── publish_package.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── data_files ├── data_categories.csv ├── data_categories.json ├── data_categories.yml ├── data_subjects.csv ├── data_subjects.json ├── data_subjects.yml ├── data_uses.csv ├── data_uses.json └── data_uses.yml ├── demo_resources ├── demo_dataset.yml ├── demo_extended_taxonomy.yml ├── demo_organization.yml ├── demo_policy.yml └── demo_system.yml ├── dev-requirements.txt ├── docker-compose.yml ├── implementation.md ├── mkdocs ├── Dockerfile ├── docs │ ├── .DS_Store │ ├── .nojekyll │ ├── css │ │ ├── fides.css │ │ ├── logo.css │ │ └── taxonomy.css │ ├── csv │ │ ├── data_elements.csv │ │ ├── data_subjects.csv │ │ └── data_uses.csv │ ├── draft.md │ ├── explorer.md │ ├── img │ │ ├── .DS_Store │ │ ├── Radial Tree@1x.svg │ │ ├── Resource_Relations.svg │ │ ├── Sunburst@1x.svg │ │ ├── Tree@1x.svg │ │ ├── ethyca.svg │ │ ├── favicon.ico │ │ ├── fideslang.png │ │ ├── fideslang.svg │ │ └── ~fideslang.svg │ ├── index.md │ ├── js │ │ ├── .DS_Store │ │ ├── vis.js │ │ ├── vis2-absolute.js │ │ ├── vis2.js │ │ └── visdraft.js │ ├── license.md │ ├── overview.md │ ├── resources │ │ ├── dataset.md │ │ ├── organization.md │ │ ├── policy.md │ │ └── system.md │ ├── syntax.md │ └── taxonomy │ │ ├── data_categories.md │ │ ├── data_subjects.md │ │ ├── data_uses.md │ │ └── overview.md ├── mkdocs.yml ├── overrides │ └── partials │ │ └── footer.html └── requirements.txt ├── noxfile.py ├── pyproject.toml ├── requirements.txt ├── scripts ├── README.md └── export_default_taxonomy.py ├── src ├── __init__.py └── fideslang │ ├── __init__.py │ ├── default_fixtures.py │ ├── default_taxonomy │ ├── __init__.py │ ├── data_categories.py │ ├── data_subjects.py │ ├── data_uses.py │ ├── organizations.py │ └── utils.py │ ├── gvl │ ├── __init__.py │ ├── gvl_data_category_mapping.json │ ├── gvl_data_use_mapping.json │ ├── gvl_feature_mapping.json │ └── models.py │ ├── manifests.py │ ├── models.py │ ├── parse.py │ ├── py.typed │ ├── relationships.py │ ├── utils.py │ └── validation.py └── tests ├── conftest.py ├── data ├── failing_dataset_collection_taxonomy.yml ├── failing_dataset_field_taxonomy.yml ├── failing_dataset_taxonomy.yml ├── failing_declaration_taxonomy.yml ├── failing_nested_dataset.yml ├── passing_declaration_taxonomy.yml ├── sample_hierarchy_figures.json └── sample_manifest.yml └── fideslang ├── gvl └── test_gvl.py ├── test_default_taxonomy.py ├── test_manifests.py ├── test_models.py ├── test_parse.py ├── test_relationships.py └── test_validation.py /.dockerignore: -------------------------------------------------------------------------------- 1 | # Ignore existing build artifacts 2 | build/ 3 | dist/ 4 | src/fideslang.egg-info/ 5 | 6 | # Ignore the docs 7 | mkdocs/ 8 | 9 | # Ignore dev files 10 | .github/ 11 | .devcontainer/ 12 | .nox/ 13 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | src/fideslang/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Set the fidesctl core team as the default codeowners 2 | 3 | * @ethyca/fidesctl-team 4 | 5 | # Set the product/tech writing team as owners for the docs 6 | 7 | mkdocs/ @ethyca/docs-authors 8 | README.md @ethyca/docs-authors 9 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | Closes # 2 | 3 | ### Description Of Changes 4 | 5 | _Write some things here about the changes and any potential caveats_ 6 | 7 | 8 | ### Code Changes 9 | 10 | * [ ] _list your code changes here_ 11 | 12 | ### Steps to Confirm 13 | 14 | * [ ] _list any manual steps taken to confirm the changes_ 15 | 16 | ### Pre-Merge Checklist 17 | 18 | * [ ] All CI Pipelines Succeeded 19 | * [ ] Documentation Updated 20 | * [ ] Issue Requirements are Met 21 | * [ ] Relevant Follow-Up Issues Created 22 | * [ ] Update `CHANGELOG.md` 23 | -------------------------------------------------------------------------------- /.github/workflows/docker.yaml: -------------------------------------------------------------------------------- 1 | name: Docker Build & Push 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | 8 | env: 9 | DOCKER_USER: ethycaci 10 | DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }} 11 | 12 | jobs: 13 | push-fidesctl: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v2 17 | with: 18 | fetch-depth: 0 # This is required to properly tag images 19 | 20 | - name: Login to DockerHub 21 | uses: docker/login-action@v1 22 | with: 23 | username: ${{ env.DOCKER_USER }} 24 | password: ${{ env.DOCKER_TOKEN }} 25 | 26 | - name: Build Fideslang 27 | run: make build 28 | 29 | - name: Push Fideslang 30 | run: make push 31 | -------------------------------------------------------------------------------- /.github/workflows/pr_checks.yml: -------------------------------------------------------------------------------- 1 | name: Static Checks 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - "*" 7 | 8 | env: 9 | CONTAINER: fideslang-local 10 | IMAGE: ethyca/fideslang:local 11 | DEFAULT_PYTHON_VERSION: "3.10.11" 12 | 13 | jobs: 14 | Build: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: Checkout 18 | uses: actions/checkout@v2 19 | 20 | - name: Set up Docker Buildx 21 | id: buildx 22 | uses: docker/setup-buildx-action@v1 23 | 24 | - name: Build fideslang container 25 | uses: docker/build-push-action@v2 26 | with: 27 | builder: ${{ steps.buildx.outputs.name }} 28 | context: . 29 | target: prod 30 | outputs: type=docker,dest=/tmp/${{ env.CONTAINER }}.tar 31 | push: false 32 | tags: ${{ env.IMAGE }} 33 | 34 | - name: Upload fideslang container 35 | uses: actions/upload-artifact@v2 36 | with: 37 | name: ${{ env.CONTAINER }} 38 | path: /tmp/${{ env.CONTAINER }}.tar 39 | retention-days: 1 40 | 41 | Export: 42 | runs-on: ubuntu-latest 43 | steps: 44 | - name: Checkout 45 | uses: actions/checkout@v3 46 | 47 | - name: Set Up Python 48 | uses: actions/setup-python@v4 49 | with: 50 | python-version: "3.10.11" 51 | cache: "pip" 52 | 53 | - name: Pin Cython 54 | run: | 55 | echo 'Cython < 3.0' > /tmp/constraint.txt 56 | PIP_CONSTRAINT=/tmp/constraint.txt pip wheel PyYAML==5.4.1 57 | pip install 'PyYAML==5.4.1' 58 | 59 | - name: Install Package 60 | run: pip install . 61 | 62 | - name: Run Export 63 | run: python scripts/export_default_taxonomy.py 64 | 65 | Static-Checks: 66 | continue-on-error: true 67 | strategy: 68 | matrix: 69 | session_name: ["black", "mypy", "pylint", "xenon"] 70 | runs-on: ubuntu-latest 71 | steps: 72 | - name: Checkout 73 | uses: actions/checkout@v3 74 | 75 | - name: Set Up Python 76 | uses: actions/setup-python@v4 77 | with: 78 | python-version: ${{ env.DEFAULT_PYTHON_VERSION }} 79 | cache: "pip" 80 | 81 | - name: Pin Cython 82 | run: | 83 | echo 'Cython < 3.0' > /tmp/constraint.txt 84 | PIP_CONSTRAINT=/tmp/constraint.txt pip wheel PyYAML==5.4.1 85 | pip install 'PyYAML==5.4.1' 86 | 87 | - name: Install Nox 88 | run: pip install nox 89 | 90 | - name: Run Static Check 91 | run: nox -s ${{ matrix.session_name }} 92 | 93 | Pytest-Matrix: 94 | strategy: 95 | matrix: 96 | python_version: ["3.8", "3.9", "3.10", "3.11"] 97 | pydantic_version: ["1.8.2", "1.9.2", "1.10.9"] 98 | pyyaml_version: ["5.4.1", "6.0"] 99 | runs-on: ubuntu-latest 100 | continue-on-error: true 101 | steps: 102 | - name: Checkout 103 | uses: actions/checkout@v3 104 | 105 | - name: Set Up Python 106 | uses: actions/setup-python@v4 107 | with: 108 | python-version: ${{ matrix.python_version }} 109 | cache: "pip" 110 | 111 | - name: Pin Cython 112 | run: | 113 | echo 'Cython < 3.0' > /tmp/constraint.txt 114 | PIP_CONSTRAINT=/tmp/constraint.txt pip wheel PyYAML==5.4.1 115 | pip install 'PyYAML==5.4.1' 116 | 117 | - name: Install Nox 118 | run: pip install nox 119 | 120 | - name: Run Tests 121 | run: nox -s "tests-${{ matrix.python_version }}(pyyaml_version='${{ matrix.pyyaml_version }}', pydantic_version='${{ matrix.pydantic_version }}')" 122 | 123 | CSV-Checks: 124 | runs-on: ubuntu-latest 125 | steps: 126 | - name: Checkout 127 | uses: actions/checkout@v3 128 | 129 | - name: Check CSV Files 130 | run: | 131 | echo "Check all data_files/*.csv for any empty lines..." 132 | ls -l data_files/*.csv 133 | if grep -n -E '^$' data_files/*.csv; then 134 | echo "Error: empty lines found (see grep matches above)" 135 | exit 1 136 | fi 137 | echo "Success!" 138 | -------------------------------------------------------------------------------- /.github/workflows/publish_docs.yaml: -------------------------------------------------------------------------------- 1 | name: Publish updated docs to gh-pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - "mkdocs/**" 9 | 10 | jobs: 11 | publish_docs: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - uses: actions/setup-python@v2 16 | - run: pip install -r mkdocs/requirements.txt 17 | - name: Publish docs 18 | run: mkdocs gh-deploy -v -f mkdocs/mkdocs.yml --force 19 | -------------------------------------------------------------------------------- /.github/workflows/publish_package.yaml: -------------------------------------------------------------------------------- 1 | name: Publish fideslang 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | 8 | jobs: 9 | upload_to_pypi: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | 14 | - name: Install Twine 15 | run: pip install twine 16 | 17 | - name: Build Distributions 18 | run: | 19 | python -m pip install build 20 | python -m build 21 | 22 | - name: Twine Upload 23 | run: twine upload dist/* 24 | env: 25 | TWINE_USERNAME: __token__ 26 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## generic files to ignore 2 | *~ 3 | *.lock 4 | *.DS_Store 5 | *.swp 6 | *.out 7 | 8 | # rails specific 9 | *.sqlite3 10 | config/database.yml 11 | log/* 12 | tmp/* 13 | 14 | # java specific 15 | *.class 16 | 17 | # docs 18 | mkdocs/docs/site/ 19 | 20 | # python specific 21 | *.pyc 22 | 23 | # xcode/iphone specific 24 | build/* 25 | *.pbxuser 26 | *.mode2v3 27 | *.mode1v3 28 | *.perspective 29 | *.perspectivev3 30 | *~.nib 31 | 32 | # akka specific 33 | logs/* 34 | 35 | # sbt specific 36 | target/ 37 | project/boot 38 | lib_managed/* 39 | project/build/target 40 | project/build/lib_managed 41 | project/build/src_managed 42 | project/plugins/lib_managed 43 | project/plugins/target 44 | project/plugins/src_managed 45 | project/plugins/project 46 | .bsp 47 | 48 | core/lib_managed 49 | core/target 50 | pubsub/lib_managed 51 | pubsub/target 52 | 53 | # eclipse specific 54 | .metadata 55 | jrebel.lic 56 | .settings 57 | .classpath 58 | .project 59 | 60 | .ensime* 61 | *.sublime-* 62 | .cache 63 | 64 | # intellij 65 | *.eml 66 | *.iml 67 | *.ipr 68 | *.iws 69 | .*.sw? 70 | .idea 71 | 72 | # metals 73 | .metals 74 | .bloop 75 | project/metals.sbt 76 | 77 | # paulp script 78 | /.lib/ 79 | 80 | # Editors 81 | .vscode/ 82 | .idea/ 83 | 84 | # Vagrant 85 | .vagrant/ 86 | 87 | # Mac/OSX 88 | .DS_Store 89 | 90 | # Windows 91 | Thumbs.db 92 | 93 | # Source for the following rules: https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore 94 | # Byte-compiled / optimized / DLL files 95 | __pycache__/ 96 | *.py[cod] 97 | *$py.class 98 | *.pyc 99 | 100 | # C extensions 101 | *.so 102 | 103 | # Distribution / packaging 104 | .Python 105 | build/ 106 | src/fideslang/_version.py 107 | develop-eggs/ 108 | dist/ 109 | downloads/ 110 | eggs/ 111 | .eggs/ 112 | lib/ 113 | lib64/ 114 | parts/ 115 | sdist/ 116 | var/ 117 | wheels/ 118 | *egg-info/ 119 | .installed.cfg 120 | *.egg 121 | MANIFEST 122 | conda-out/ 123 | 124 | # PyInstaller 125 | # Usually these files are written by a python script from a template 126 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 127 | *.manifest 128 | *.spec 129 | 130 | # Installer logs 131 | pip-log.txt 132 | pip-delete-this-directory.txt 133 | 134 | # Unit test / coverage reports 135 | htmlcov/ 136 | .tox/ 137 | .nox/ 138 | .coverage 139 | .coverage.* 140 | .cache 141 | nosetests.xml 142 | coverage.xml 143 | *.cover 144 | .hypothesis/ 145 | .pytest_cache/ 146 | 147 | # Translations 148 | *.mo 149 | *.pot 150 | 151 | # Django stuff: 152 | *.log 153 | local_settings.py 154 | db.sqlite3 155 | 156 | # Flask stuff: 157 | instance/ 158 | .webassets-cache 159 | 160 | # Scrapy stuff: 161 | .scrapy 162 | 163 | # Sphinx documentation 164 | docs/_build/ 165 | 166 | # PyBuilder 167 | target/ 168 | 169 | # Jupyter Notebook 170 | .ipynb_checkpoints 171 | 172 | # IPython 173 | profile_default/ 174 | ipython_config.py 175 | 176 | # pyenv 177 | .python-version 178 | 179 | # celery beat schedule file 180 | celerybeat-schedule 181 | 182 | # SageMath parsed files 183 | *.sage.py 184 | 185 | # Environments 186 | .env 187 | .venv 188 | env/ 189 | venv/ 190 | ENV/ 191 | env.bak/ 192 | venv.bak/ 193 | 194 | # Spyder project settings 195 | .spyderproject 196 | .spyproject 197 | 198 | # Rope project settings 199 | .ropeproject 200 | 201 | # mkdocs documentation 202 | /site 203 | 204 | # mypy 205 | .mypy_cache/ 206 | .dmypy.json 207 | dmypy.json 208 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | minimum_pre_commit_version: "2" 2 | 3 | repos: 4 | - repo: local 5 | hooks: 6 | - id: docker 7 | name: docker 8 | entry: make build-local 9 | files: "^src/" 10 | types_or: [file, python] 11 | language: system 12 | 13 | - id: black 14 | name: black 15 | entry: make black 16 | files: "^src/" 17 | types_or: [file, python] 18 | language: system 19 | 20 | - id: mypy 21 | name: mypy 22 | entry: make mypy 23 | files: "^src/" 24 | types_or: [file, python] 25 | language: system 26 | 27 | - id: xenon 28 | name: xenon 29 | entry: make xenon 30 | files: "^src/" 31 | types_or: [file, python] 32 | language: system 33 | 34 | - id: pylint 35 | name: pylint 36 | entry: make pylint 37 | files: "^src/" 38 | types_or: [file, python] 39 | language: system 40 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Fides Code of Conduct 2 | 3 | The Fides project, which includes Fideslang, adheres to the following [Code of Conduct](https://ethyca.github.io/fides/community/code_of_conduct/). 4 | 5 | The Fides core team welcomes any contributions and suggestions to help make the community a better place 🤝 6 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Fides Contribution Guidelines 2 | 3 | The Fides project, which includes Fideslang, adheres to the following [Contribution Guidelines](https://ethyca.github.io/fides/development/overview/). 4 | 5 | The Fides core team welcomes any contributions and suggestions to help make the community a better place 🤝 6 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim-bullseye as base 2 | 3 | # Update pip in the base image since we'll use it everywhere 4 | RUN pip install -U pip 5 | 6 | ####################### 7 | ## Tool Installation ## 8 | ####################### 9 | 10 | FROM base as builder 11 | 12 | RUN : \ 13 | && apt-get update \ 14 | && apt-get install \ 15 | -y --no-install-recommends \ 16 | curl \ 17 | git \ 18 | make \ 19 | vim \ 20 | g++ \ 21 | gnupg \ 22 | gcc \ 23 | && apt-get clean \ 24 | && rm -rf /var/lib/apt/lists/* 25 | 26 | ######################### 27 | ## Python Dependencies ## 28 | ######################### 29 | 30 | COPY dev-requirements.txt dev-requirements.txt 31 | RUN pip install -r dev-requirements.txt 32 | 33 | COPY requirements.txt requirements.txt 34 | RUN pip install -r requirements.txt 35 | 36 | ############################### 37 | ## General Application Setup ## 38 | ############################### 39 | 40 | COPY . /fideslang 41 | WORKDIR /fideslang 42 | 43 | # Immediately flush to stdout, globally 44 | ENV PYTHONUNBUFFERED=TRUE 45 | 46 | # Enable detection of running within Docker 47 | ENV RUNNING_IN_DOCKER=TRUE 48 | 49 | ################################### 50 | ## Application Development Setup ## 51 | ################################### 52 | 53 | FROM builder as dev 54 | 55 | # Install fideslang as a symlink 56 | RUN pip install -e ".[all]" 57 | 58 | ################################## 59 | ## Production Application Setup ## 60 | ################################## 61 | 62 | FROM builder as prod 63 | 64 | # Install without a symlink 65 | RUN python -m pip install build 66 | RUN python -m build 67 | RUN pip install dist/fideslang-*.tar.gz 68 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include src/fideslang/py.typed 3 | include README.md 4 | include requirements.txt 5 | include dev-requirements.txt 6 | include versioneer.py 7 | include src/fideslang/_version.py 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .DEFAULT_GOAL := help 2 | 3 | #################### 4 | # CONSTANTS 5 | #################### 6 | REGISTRY := ethyca 7 | IMAGE_TAG := $(shell git fetch --force --tags && git describe --tags --dirty --always) 8 | 9 | # Image Names & Tags 10 | IMAGE_NAME := fideslang 11 | IMAGE := $(REGISTRY)/$(IMAGE_NAME):$(IMAGE_TAG) 12 | IMAGE_LOCAL := $(REGISTRY)/$(IMAGE_NAME):local 13 | IMAGE_LATEST := $(REGISTRY)/$(IMAGE_NAME):latest 14 | 15 | # Disable TTY to perserve output within Github Actions logs 16 | # CI env variable is always set to true in Github Actions 17 | ifeq "$(CI)" "true" 18 | CI_ARGS:=--no-TTY 19 | endif 20 | 21 | # Run in Compose 22 | RUN = docker compose run --rm $(CI_ARGS) $(IMAGE_NAME) 23 | 24 | .PHONY: help 25 | help: 26 | @echo -------------------- 27 | @echo Development Targets: 28 | @echo ---- 29 | @echo build - Builds the fideslang Docker image. 30 | @echo ---- 31 | @echo check-all - Run all CI checks 32 | @echo ---- 33 | @echo clean - Runs Docker commands to clean up the docker local environment. 34 | @echo ---- 35 | @echo shell - Starts a shell within the container to run CLI commands. 36 | @echo ---- 37 | @echo docs-serve - Spins up the docs server on localhost:8000 38 | @echo -------------------- 39 | 40 | #################### 41 | # Dev 42 | #################### 43 | 44 | .PHONY: shell 45 | shell: build-local 46 | @echo "Setting up a local development shell... (press CTRL-D to exit)" 47 | @$(RUN) /bin/bash 48 | @make teardown 49 | 50 | #################### 51 | # Docker 52 | #################### 53 | 54 | build: 55 | docker build --target=prod --tag $(IMAGE) . 56 | 57 | build-local: 58 | docker build --target=dev --tag $(IMAGE_LOCAL) . 59 | 60 | # The production image is used for running tests in CI 61 | build-local-prod: 62 | docker build --target=prod --tag $(IMAGE_LOCAL) . 63 | 64 | push: build 65 | docker tag $(IMAGE) $(IMAGE_LATEST) 66 | docker push $(IMAGE) 67 | docker push $(IMAGE_LATEST) 68 | 69 | #################### 70 | # CI 71 | #################### 72 | 73 | black: 74 | @$(RUN) nox -s black 75 | 76 | # The order of dependent targets here is intentional 77 | check-all: teardown build-local-prod check-install check-static pytest 78 | @echo "Running formatter, linter, typechecker and tests..." 79 | 80 | check-static: 81 | @$(RUN) nox -s check_static 82 | 83 | check-install: 84 | @echo "Checking that fideslang is installed..." 85 | @$(RUN) python -c "import fideslang" 86 | 87 | mypy: 88 | @$(RUN) nox -s mypy 89 | 90 | pylint: 91 | @$(RUN) nox -s pylint 92 | 93 | pytest: build-local 94 | @$(RUN) nox -s tests 95 | 96 | xenon: 97 | @$(RUN) nox -s xenon 98 | 99 | #################### 100 | # Utils 101 | #################### 102 | 103 | .PHONY: clean 104 | clean: 105 | @echo "Doing docker cleanup for this project..." 106 | @docker compose down --remove-orphans --volumes --rmi all 107 | @docker system prune --force 108 | @echo "Clean complete!" 109 | 110 | .PHONY: teardown 111 | teardown: 112 | @echo "Tearing down the dev environment..." 113 | @docker compose down --remove-orphans 114 | @echo "Teardown complete" 115 | 116 | .PHONY: docs-serve 117 | docs-serve: 118 | @docker compose build docs 119 | @docker compose run --rm --service-ports $(CI_ARGS) docs \ 120 | /bin/bash -c "mkdocs serve --dev-addr=0.0.0.0:8000" 121 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fideslang 2 | 3 | [![License: CC BY 4.0](https://img.shields.io/badge/License-CC%20BY%204.0-lightgrey.svg)](https://creativecommons.org/licenses/by/4.0/) [![Twitter](https://img.shields.io/twitter/follow/ethyca?style=social)](https://twitter.com/ethyca) 4 | 5 | ![Fideslang banner](mkdocs/docs/img/fideslang.png "Fideslang banner") 6 | 7 | ## Overview 8 | 9 | Fideslang or Fides Language is a privacy taxonomy and working draft of a proposed structure to describe data and data processing behaviors as part of a typical software development process. Our hope with standardizing this definition publicly with the community is to derive an interoperable standard for describing types of data and how they're being used in applications to simplify global privacy regulations. 10 | 11 | **To view the detailed taxonomy documentation, please visit [https://ethyca.github.io/fideslang/](https://ethyca.github.io/fideslang)** 12 | 13 | ## Summary of Taxonomy Classification Groups 14 | 15 | The taxonomy is currently comprised of three classification groups that are used together to easily describe the data types and associated processing behaviors of an entire tech stack; both the application processes and any data storage. 16 | 17 | [Click here to view an interactive visualization of the taxonomy](https://ethyca.github.io/fideslang/explorer/) 18 | 19 | ### 1. Data Categories 20 | 21 | Data Categories are labels used to describe the type of data processed by a system. You can assign one or more data categories to a field when classifying a system. 22 | 23 | Data Categories are hierarchical with natural inheritance, meaning you can classify data coarsely with a high-level category (e.g. `user.contact` data), or you can classify it with greater precision using subclasses (e.g. `user.contact.email` data). 24 | 25 | Learn more about [Data Categories in the taxonomy reference now](https://ethyca.github.io/fideslang/taxonomy/data_categories/). 26 | 27 | ### 2. Data Use Categories 28 | 29 | Data Use Categories are labels that describe how, or for what purpose(s) a component of your system is using data. Similar to data categories, you can assign one or multiple Data Use Categories to a system. 30 | 31 | Data Use Categories are also hierarchical with natural inheritance, meaning you can easily describe what you're using data for either coarsely (e.g. `provide.service.operations`) or with more precision using subclasses (e.g. `provide.service.operations.support.optimization`). 32 | 33 | Learn more about [Data Use Categories in the taxonomy reference now](https://ethyca.github.io/fideslang/data_uses/). 34 | 35 | ### 3. Data Subject Categories 36 | 37 | "Data Subject" is a label commonly used in the regulatory world to describe the users of a system whose data is being processed. In many systems a generic user label may be sufficient, however the Privacy Taxonomy is intended to provide greater control through specificity where needed. 38 | 39 | Examples of a Data Subject are: 40 | 41 | - `anonymous_user` 42 | - `employee` 43 | - `customer` 44 | - `patient` 45 | - `next_of_kin` 46 | 47 | Learn more about [Data Subject Categories in the taxonomy reference now](https://ethyca.github.io/fideslang/taxonomy/data_subjects/). 48 | 49 | ### Extensibility & Interoperability 50 | 51 | The taxonomy is designed to support common privacy compliance regulations and standards out of the box, these include GDPR, CCPA, LGPD and ISO 19944. 52 | 53 | You can extend the taxonomy to support your system needs. If you do this, we recommend extending from the existing class structures to ensure interoperability inside and outside your organization. 54 | 55 | If you have suggestions for missing classifications or concepts, please submit them for addition. 56 | -------------------------------------------------------------------------------- /data_files/data_subjects.csv: -------------------------------------------------------------------------------- 1 | automated_decisions_or_profiling,fides_key,is_default,name,organization_fides_key,replaced_by,rights,tags,version_added,version_deprecated,parent_key,description 2 | ,data_subject,,Data Subject,,,,,,,, 3 | ,anonymous_user,True,Anonymous User,default_organization,,,,2.0.0,,data_subject,An individual that is unidentifiable to the systems. Note - This should only be applied to truly anonymous users where there is no risk of re-identification 4 | ,citizen_voter,True,Citizen Voter,default_organization,,,,2.0.0,,data_subject,An individual registered to voter with a state or authority. 5 | ,commuter,True,Commuter,default_organization,,,,2.0.0,,data_subject,An individual that is traveling or transiting in the context of location tracking. 6 | ,consultant,True,Consultant,default_organization,,,,2.0.0,,data_subject,An individual employed in a consultative/temporary capacity by the organization. 7 | ,customer,True,Customer,default_organization,,,,2.0.0,,data_subject,An individual or other organization that purchases goods or services from the organization. 8 | ,employee,True,Employee,default_organization,,,,2.0.0,,data_subject,An individual employed by the organization. 9 | ,job_applicant,True,Job Applicant,default_organization,,,,2.0.0,,data_subject,An individual applying for employment to the organization. 10 | ,next_of_kin,True,Next of Kin,default_organization,,,,2.0.0,,data_subject,A relative of any other individual subject where such a relationship is known. 11 | ,passenger,True,Passenger,default_organization,,,,2.0.0,,data_subject,An individual traveling on some means of provided transport. 12 | ,patient,True,Patient,default_organization,,,,2.0.0,,data_subject,An individual identified for the purposes of any medical care. 13 | ,prospect,True,Prospect,default_organization,,,,2.0.0,,data_subject,An individual or organization to whom an organization is selling goods or services. 14 | ,shareholder,True,Shareholder,default_organization,,,,2.0.0,,data_subject,An individual or organization that holds equity in the organization. 15 | ,supplier_vendor,True,Supplier/Vendor,default_organization,,,,2.0.0,,data_subject,An individual or organization that provides services or goods to the organization. 16 | ,trainee,True,Trainee,default_organization,,,,2.0.0,,data_subject,An individual undergoing training by the organization. 17 | ,visitor,True,Visitor,default_organization,,,,2.0.0,,data_subject,An individual visiting a location. 18 | -------------------------------------------------------------------------------- /data_files/data_subjects.json: -------------------------------------------------------------------------------- 1 | { 2 | "data_subject": [ 3 | { 4 | "version_added": "2.0.0", 5 | "version_deprecated": null, 6 | "replaced_by": null, 7 | "is_default": true, 8 | "fides_key": "anonymous_user", 9 | "organization_fides_key": "default_organization", 10 | "tags": null, 11 | "name": "Anonymous User", 12 | "description": "An individual that is unidentifiable to the systems. Note - This should only be applied to truly anonymous users where there is no risk of re-identification", 13 | "rights": null, 14 | "automated_decisions_or_profiling": null 15 | }, 16 | { 17 | "version_added": "2.0.0", 18 | "version_deprecated": null, 19 | "replaced_by": null, 20 | "is_default": true, 21 | "fides_key": "citizen_voter", 22 | "organization_fides_key": "default_organization", 23 | "tags": null, 24 | "name": "Citizen Voter", 25 | "description": "An individual registered to voter with a state or authority.", 26 | "rights": null, 27 | "automated_decisions_or_profiling": null 28 | }, 29 | { 30 | "version_added": "2.0.0", 31 | "version_deprecated": null, 32 | "replaced_by": null, 33 | "is_default": true, 34 | "fides_key": "commuter", 35 | "organization_fides_key": "default_organization", 36 | "tags": null, 37 | "name": "Commuter", 38 | "description": "An individual that is traveling or transiting in the context of location tracking.", 39 | "rights": null, 40 | "automated_decisions_or_profiling": null 41 | }, 42 | { 43 | "version_added": "2.0.0", 44 | "version_deprecated": null, 45 | "replaced_by": null, 46 | "is_default": true, 47 | "fides_key": "consultant", 48 | "organization_fides_key": "default_organization", 49 | "tags": null, 50 | "name": "Consultant", 51 | "description": "An individual employed in a consultative/temporary capacity by the organization.", 52 | "rights": null, 53 | "automated_decisions_or_profiling": null 54 | }, 55 | { 56 | "version_added": "2.0.0", 57 | "version_deprecated": null, 58 | "replaced_by": null, 59 | "is_default": true, 60 | "fides_key": "customer", 61 | "organization_fides_key": "default_organization", 62 | "tags": null, 63 | "name": "Customer", 64 | "description": "An individual or other organization that purchases goods or services from the organization.", 65 | "rights": null, 66 | "automated_decisions_or_profiling": null 67 | }, 68 | { 69 | "version_added": "2.0.0", 70 | "version_deprecated": null, 71 | "replaced_by": null, 72 | "is_default": true, 73 | "fides_key": "employee", 74 | "organization_fides_key": "default_organization", 75 | "tags": null, 76 | "name": "Employee", 77 | "description": "An individual employed by the organization.", 78 | "rights": null, 79 | "automated_decisions_or_profiling": null 80 | }, 81 | { 82 | "version_added": "2.0.0", 83 | "version_deprecated": null, 84 | "replaced_by": null, 85 | "is_default": true, 86 | "fides_key": "job_applicant", 87 | "organization_fides_key": "default_organization", 88 | "tags": null, 89 | "name": "Job Applicant", 90 | "description": "An individual applying for employment to the organization.", 91 | "rights": null, 92 | "automated_decisions_or_profiling": null 93 | }, 94 | { 95 | "version_added": "2.0.0", 96 | "version_deprecated": null, 97 | "replaced_by": null, 98 | "is_default": true, 99 | "fides_key": "next_of_kin", 100 | "organization_fides_key": "default_organization", 101 | "tags": null, 102 | "name": "Next of Kin", 103 | "description": "A relative of any other individual subject where such a relationship is known.", 104 | "rights": null, 105 | "automated_decisions_or_profiling": null 106 | }, 107 | { 108 | "version_added": "2.0.0", 109 | "version_deprecated": null, 110 | "replaced_by": null, 111 | "is_default": true, 112 | "fides_key": "passenger", 113 | "organization_fides_key": "default_organization", 114 | "tags": null, 115 | "name": "Passenger", 116 | "description": "An individual traveling on some means of provided transport.", 117 | "rights": null, 118 | "automated_decisions_or_profiling": null 119 | }, 120 | { 121 | "version_added": "2.0.0", 122 | "version_deprecated": null, 123 | "replaced_by": null, 124 | "is_default": true, 125 | "fides_key": "patient", 126 | "organization_fides_key": "default_organization", 127 | "tags": null, 128 | "name": "Patient", 129 | "description": "An individual identified for the purposes of any medical care.", 130 | "rights": null, 131 | "automated_decisions_or_profiling": null 132 | }, 133 | { 134 | "version_added": "2.0.0", 135 | "version_deprecated": null, 136 | "replaced_by": null, 137 | "is_default": true, 138 | "fides_key": "prospect", 139 | "organization_fides_key": "default_organization", 140 | "tags": null, 141 | "name": "Prospect", 142 | "description": "An individual or organization to whom an organization is selling goods or services.", 143 | "rights": null, 144 | "automated_decisions_or_profiling": null 145 | }, 146 | { 147 | "version_added": "2.0.0", 148 | "version_deprecated": null, 149 | "replaced_by": null, 150 | "is_default": true, 151 | "fides_key": "shareholder", 152 | "organization_fides_key": "default_organization", 153 | "tags": null, 154 | "name": "Shareholder", 155 | "description": "An individual or organization that holds equity in the organization.", 156 | "rights": null, 157 | "automated_decisions_or_profiling": null 158 | }, 159 | { 160 | "version_added": "2.0.0", 161 | "version_deprecated": null, 162 | "replaced_by": null, 163 | "is_default": true, 164 | "fides_key": "supplier_vendor", 165 | "organization_fides_key": "default_organization", 166 | "tags": null, 167 | "name": "Supplier/Vendor", 168 | "description": "An individual or organization that provides services or goods to the organization.", 169 | "rights": null, 170 | "automated_decisions_or_profiling": null 171 | }, 172 | { 173 | "version_added": "2.0.0", 174 | "version_deprecated": null, 175 | "replaced_by": null, 176 | "is_default": true, 177 | "fides_key": "trainee", 178 | "organization_fides_key": "default_organization", 179 | "tags": null, 180 | "name": "Trainee", 181 | "description": "An individual undergoing training by the organization.", 182 | "rights": null, 183 | "automated_decisions_or_profiling": null 184 | }, 185 | { 186 | "version_added": "2.0.0", 187 | "version_deprecated": null, 188 | "replaced_by": null, 189 | "is_default": true, 190 | "fides_key": "visitor", 191 | "organization_fides_key": "default_organization", 192 | "tags": null, 193 | "name": "Visitor", 194 | "description": "An individual visiting a location.", 195 | "rights": null, 196 | "automated_decisions_or_profiling": null 197 | } 198 | ] 199 | } 200 | -------------------------------------------------------------------------------- /data_files/data_subjects.yml: -------------------------------------------------------------------------------- 1 | data_subject: 2 | - version_added: 2.0.0 3 | version_deprecated: null 4 | replaced_by: null 5 | is_default: true 6 | fides_key: anonymous_user 7 | organization_fides_key: default_organization 8 | tags: null 9 | name: Anonymous User 10 | description: An individual that is unidentifiable to the systems. Note - This should 11 | only be applied to truly anonymous users where there is no risk of re-identification 12 | rights: null 13 | automated_decisions_or_profiling: null 14 | - version_added: 2.0.0 15 | version_deprecated: null 16 | replaced_by: null 17 | is_default: true 18 | fides_key: citizen_voter 19 | organization_fides_key: default_organization 20 | tags: null 21 | name: Citizen Voter 22 | description: An individual registered to voter with a state or authority. 23 | rights: null 24 | automated_decisions_or_profiling: null 25 | - version_added: 2.0.0 26 | version_deprecated: null 27 | replaced_by: null 28 | is_default: true 29 | fides_key: commuter 30 | organization_fides_key: default_organization 31 | tags: null 32 | name: Commuter 33 | description: An individual that is traveling or transiting in the context of location 34 | tracking. 35 | rights: null 36 | automated_decisions_or_profiling: null 37 | - version_added: 2.0.0 38 | version_deprecated: null 39 | replaced_by: null 40 | is_default: true 41 | fides_key: consultant 42 | organization_fides_key: default_organization 43 | tags: null 44 | name: Consultant 45 | description: An individual employed in a consultative/temporary capacity by the 46 | organization. 47 | rights: null 48 | automated_decisions_or_profiling: null 49 | - version_added: 2.0.0 50 | version_deprecated: null 51 | replaced_by: null 52 | is_default: true 53 | fides_key: customer 54 | organization_fides_key: default_organization 55 | tags: null 56 | name: Customer 57 | description: An individual or other organization that purchases goods or services 58 | from the organization. 59 | rights: null 60 | automated_decisions_or_profiling: null 61 | - version_added: 2.0.0 62 | version_deprecated: null 63 | replaced_by: null 64 | is_default: true 65 | fides_key: employee 66 | organization_fides_key: default_organization 67 | tags: null 68 | name: Employee 69 | description: An individual employed by the organization. 70 | rights: null 71 | automated_decisions_or_profiling: null 72 | - version_added: 2.0.0 73 | version_deprecated: null 74 | replaced_by: null 75 | is_default: true 76 | fides_key: job_applicant 77 | organization_fides_key: default_organization 78 | tags: null 79 | name: Job Applicant 80 | description: An individual applying for employment to the organization. 81 | rights: null 82 | automated_decisions_or_profiling: null 83 | - version_added: 2.0.0 84 | version_deprecated: null 85 | replaced_by: null 86 | is_default: true 87 | fides_key: next_of_kin 88 | organization_fides_key: default_organization 89 | tags: null 90 | name: Next of Kin 91 | description: A relative of any other individual subject where such a relationship 92 | is known. 93 | rights: null 94 | automated_decisions_or_profiling: null 95 | - version_added: 2.0.0 96 | version_deprecated: null 97 | replaced_by: null 98 | is_default: true 99 | fides_key: passenger 100 | organization_fides_key: default_organization 101 | tags: null 102 | name: Passenger 103 | description: An individual traveling on some means of provided transport. 104 | rights: null 105 | automated_decisions_or_profiling: null 106 | - version_added: 2.0.0 107 | version_deprecated: null 108 | replaced_by: null 109 | is_default: true 110 | fides_key: patient 111 | organization_fides_key: default_organization 112 | tags: null 113 | name: Patient 114 | description: An individual identified for the purposes of any medical care. 115 | rights: null 116 | automated_decisions_or_profiling: null 117 | - version_added: 2.0.0 118 | version_deprecated: null 119 | replaced_by: null 120 | is_default: true 121 | fides_key: prospect 122 | organization_fides_key: default_organization 123 | tags: null 124 | name: Prospect 125 | description: An individual or organization to whom an organization is selling goods 126 | or services. 127 | rights: null 128 | automated_decisions_or_profiling: null 129 | - version_added: 2.0.0 130 | version_deprecated: null 131 | replaced_by: null 132 | is_default: true 133 | fides_key: shareholder 134 | organization_fides_key: default_organization 135 | tags: null 136 | name: Shareholder 137 | description: An individual or organization that holds equity in the organization. 138 | rights: null 139 | automated_decisions_or_profiling: null 140 | - version_added: 2.0.0 141 | version_deprecated: null 142 | replaced_by: null 143 | is_default: true 144 | fides_key: supplier_vendor 145 | organization_fides_key: default_organization 146 | tags: null 147 | name: Supplier/Vendor 148 | description: An individual or organization that provides services or goods to the 149 | organization. 150 | rights: null 151 | automated_decisions_or_profiling: null 152 | - version_added: 2.0.0 153 | version_deprecated: null 154 | replaced_by: null 155 | is_default: true 156 | fides_key: trainee 157 | organization_fides_key: default_organization 158 | tags: null 159 | name: Trainee 160 | description: An individual undergoing training by the organization. 161 | rights: null 162 | automated_decisions_or_profiling: null 163 | - version_added: 2.0.0 164 | version_deprecated: null 165 | replaced_by: null 166 | is_default: true 167 | fides_key: visitor 168 | organization_fides_key: default_organization 169 | tags: null 170 | name: Visitor 171 | description: An individual visiting a location. 172 | rights: null 173 | automated_decisions_or_profiling: null 174 | -------------------------------------------------------------------------------- /data_files/data_uses.csv: -------------------------------------------------------------------------------- 1 | fides_key,is_default,name,organization_fides_key,parent_key,replaced_by,tags,version_added,version_deprecated,description 2 | data_use,,Data Use,,,,,,, 3 | analytics,TRUE,Analytics,default_organization,data_use,,,2.0.0,,"Provides analytics for activities such as system and advertising performance reporting, insights and fraud detection." 4 | analytics.reporting,TRUE,Analytics for Reporting,default_organization,analytics,,,2.0.0,,Provides analytics for general reporting such as system and advertising performance. 5 | analytics.reporting.ad_performance,TRUE,Analytics for Advertising Performance,default_organization,analytics.reporting,,,2.0.0,,Provides analytics for reporting of advertising performance. 6 | analytics.reporting.content_performance,TRUE,Analytics for Content Performance,default_organization,analytics.reporting,,,2.0.0,,Analytics for reporting on content performance. 7 | analytics.reporting.campaign_insights,TRUE,Analytics for Insights,default_organization,analytics.reporting,,,2.0.0,,Provides analytics for reporting of campaign insights related to advertising and promotion activities. 8 | analytics.reporting.system,TRUE,Analytics for System Activity,default_organization,analytics.reporting,,,2.0.0,,Provides analytics for reporting on system activity. 9 | analytics.reporting.system.performance,TRUE,Analytics for System Performance,default_organization,analytics.reporting.system,,,2.0.0,,Provides analytics for reporting on system performance. 10 | collect,TRUE,Collect,default_organization,data_use,,,2.0.0,,Collects or stores data in order to use it for another purpose which has not yet been expressly defined. 11 | employment,TRUE,Employment,default_organization,data_use,,,2.0.0,,Processes data for the purpose of recruitment or employment and human resources (HR) related activities. 12 | employment.recruitment,TRUE,Employment Recruitment,default_organization,employment,,,2.0.0,,Processes data of prospective employees for the purpose of recruitment. 13 | essential,TRUE,Essential,default_organization,data_use,,,2.0.0,,"Operates the service or product, including legal obligations, support and basic system operations." 14 | essential.fraud_detection,TRUE,Essential Fraud Detection,default_organization,essential,,,2.0.0,,"Detects possible fraud or misuse of the product, service, application or system." 15 | essential.legal_obligation,TRUE,Essential Legal Obligation,default_organization,essential,,,2.0.0,,Provides service to meet a legal or compliance obligation such as consent management. 16 | essential.service,TRUE,Essential for Service,default_organization,essential,,,2.0.0,,"Provides the essential product, service, application or system, without which the product/service would not be possible." 17 | essential.service.authentication,TRUE,Essential Service Authentication,default_organization,essential.service,,,2.0.0,,"Authenticate users to the product, service, application or system." 18 | essential.service.notifications,TRUE,Essential Service Notifications,default_organization,essential.service,,,2.0.0,,"Sends notifications about the product, service, application or system." 19 | essential.service.operations,TRUE,Essential for Operations,default_organization,essential.service,,,2.0.0,,"Essential to ensure the operation of the product, service, application or system." 20 | essential.service.payment_processing,TRUE,Essential for Payment Processing,default_organization,essential.service,,,2.0.0,,"Essential to processes payments for the product, service, application or system." 21 | essential.service.security,TRUE,Essential for Security,default_organization,essential.service,,,2.0.0,,"Essential to provide security for the product, service, application or system" 22 | essential.service.upgrades,TRUE,Essential for Service Upgrades,default_organization,essential.service,,,2.0.0,,Provides timely system upgrade information options. 23 | essential.service.notifications.email,TRUE,Essential Email Service Notifications,default_organization,essential.service.notifications,,,2.0.0,,"Sends email notifications about the product, service, application or system." 24 | essential.service.notifications.sms,TRUE,Essential SMS Service Notifications,default_organization,essential.service.notifications,,,2.0.0,,"Sends SMS notifications about the product, service, application or system." 25 | essential.service.operations.support,TRUE,Essential for Operations Support,default_organization,essential.service.operations,,,2.0.0,,"Provides support for the product, service, application or system." 26 | essential.service.operations.improve,TRUE,Essential for Support Improvement,default_organization,essential.service.operations,,,2.0.0,,"Essential to optimize and improve support for the product, service, application or system." 27 | finance,TRUE,Finance,default_organization,data_use,,,2.0.0,,Enables finance and accounting activities such as audits and tax reporting. 28 | functional,TRUE,Functional,default_organization,data_use,,,2.0.0,,"Used for specific, necessary, and legitimate purposes" 29 | functional.storage,TRUE,Local Data Storage,default_organization,functional,,,2.0.0,,"Stores or accesses information from the device as needed when using a product, service, application, or system" 30 | functional.service,TRUE,Service,default_organization,functional,,,2.0.0,,"Functions relating to provided services, products, applications or systems." 31 | functional.service.improve,TRUE,Improve Service,default_organization,functional.service,,,2.0.0,,"Improves the specific product, service, application or system." 32 | marketing,TRUE,Marketing,default_organization,data_use,,,2.0.0,,"Enables marketing, promotion, advertising and sales activities for the product, service, application or system." 33 | marketing.advertising,TRUE,"Advertising, Marketing or Promotion",default_organization,marketing,,,2.0.0,,"Advertises or promotes the product, service, application or system and associated services." 34 | marketing.communications,TRUE,Marketing Communications,default_organization,marketing,,,2.0.0,,"Uses combined channels to message and market to a customer, user or prospect." 35 | marketing.advertising.first_party,TRUE,First Party Advertising,default_organization,marketing.advertising,,,2.0.0,,Serves advertisements based on first party data collected or derived about the user. 36 | marketing.advertising.frequency_capping,TRUE,Frequency Capping,default_organization,marketing.advertising,,,2.0.0,,Restricts the number of times a specific advertisement is shown to an individual. 37 | marketing.advertising.negative_targeting,TRUE,Negative Targeting,default_organization,marketing.advertising,,,2.0.0,,Enforces rules used to ensure a certain audience or group is not targeted by advertising. 38 | marketing.advertising.profiling,TRUE,Profiling for Advertising,default_organization,marketing.advertising,,,2.0.0,,Creates audience profiles for the purpose of targeted advertising 39 | marketing.advertising.serving,TRUE,Essential for Serving Ads,default_organization,marketing.advertising,,,2.0.0,,Essential to the delivery of advertising and content. 40 | marketing.advertising.third_party,TRUE,Third Party Advertising,default_organization,marketing.advertising,,,2.0.0,,Serves advertisements based on data within the system or joined with data provided by 3rd parties. 41 | marketing.advertising.first_party.contextual,TRUE,First Party Contextual Advertising,default_organization,marketing.advertising.first_party,,,2.0.0,,Serves advertisements based on current content being viewed by the user of the system or service. 42 | marketing.advertising.first_party.targeted,TRUE,First Party Personalized Advertising,default_organization,marketing.advertising.first_party,,,2.0.0,,Targets advertisements based on data collected or derived about the user from use of the system. 43 | marketing.advertising.third_party.targeted,TRUE,Third Party Targeted Advertising,default_organization,marketing.advertising.third_party,,,2.0.0,,Targets advertisements based on data within the system or joined with data provided by 3rd parties. 44 | marketing.communications.email,TRUE,Marketing Email Communications,default_organization,marketing.communications,,,2.0.0,,Sends email marketing communications. 45 | marketing.communications.sms,TRUE,Marketing SMS Communications,default_organization,marketing.communications,,,2.0.0,,Sends SMS marketing communications. 46 | operations,TRUE,Operations,default_organization,data_use,,,2.0.0,,Supports business processes necessary to the organization's operation. 47 | personalize,TRUE,Personalize,default_organization,data_use,,,2.0.0,,"Personalizes the product, service, application or system." 48 | personalize.content,TRUE,Content Personalization,default_organization,personalize,,,2.0.0,,"Personalizes the content of the product, service, application or system." 49 | personalize.system,TRUE,System Personalization,default_organization,personalize,,,2.0.0,,Personalizes the system. 50 | personalize.content.limited,TRUE,Limited Content Personalization,default_organization,personalize.content,,,2.1.1,,Uses limited data for the purpose of serving content. 51 | personalize.content.profiling,TRUE,Profiling for Personalization,default_organization,personalize.content,,,2.1.1,,Creates profiles for the purpose of serving content. 52 | personalize.content.profiled,TRUE,Targeted Content Personalization,default_organization,personalize.content,,,2.1.1,,Uses profiles for the purpose of serving content. 53 | sales,TRUE,Sales,default_organization,data_use,,,2.0.0,,Supports sales activities such as communications and outreach. 54 | third_party_sharing,TRUE,Third Party Sharing,default_organization,data_use,,,2.0.0,,Transfers data to third parties outside of the system or service's scope. 55 | third_party_sharing.legal_obligation,TRUE,Sharing for Legal Obligation,default_organization,third_party_sharing,,,2.0.0,,"Shares data for legal obligations, including contracts, applicable laws or regulations." 56 | train_ai_system,TRUE,Train AI System,default_organization,data_use,,,2.0.0,,Trains an AI system or data model for machine learning. -------------------------------------------------------------------------------- /demo_resources/demo_dataset.yml: -------------------------------------------------------------------------------- 1 | dataset: 2 | - fides_key: demo_users_dataset 3 | organization_fides_key: default_organization 4 | name: Demo Users Dataset 5 | description: Data collected about users for our analytics system. 6 | meta: null 7 | data_categories: [] 8 | collections: 9 | - name: users 10 | description: User information 11 | data_categories: [] 12 | fields: 13 | - name: created_at 14 | description: User's creation timestamp 15 | data_categories: 16 | - system.operations 17 | - name: email 18 | description: User's Email 19 | data_categories: 20 | - user.contact.email 21 | - name: first_name 22 | description: User's first name 23 | data_categories: 24 | - user.name 25 | - name: food_preference 26 | description: User's favorite food 27 | data_categories: [] 28 | - name: state 29 | description: User's State 30 | data_categories: 31 | - user.contact.state 32 | - name: uuid 33 | description: User's unique ID 34 | data_categories: 35 | - user.unique_id 36 | -------------------------------------------------------------------------------- /demo_resources/demo_extended_taxonomy.yml: -------------------------------------------------------------------------------- 1 | data_use: 2 | - fides_key: third_party_sharing.personalized_advertising.direct_marketing 3 | name: Direct Marketing 4 | description: User information for direct marketing purposes 5 | parent_key: third_party_sharing.personalized_advertising 6 | 7 | data_subject: 8 | - fides_key: potential_customer 9 | name: Potential Customer 10 | description: A prospective individual or other organization that purchases goods or services from the organization. 11 | rights: 12 | strategy: INCLUDE 13 | values: 14 | - Informed 15 | - Access 16 | - Rectification 17 | - Erasure 18 | - Object 19 | automated_decisions_or_profiling: true 20 | -------------------------------------------------------------------------------- /demo_resources/demo_organization.yml: -------------------------------------------------------------------------------- 1 | organization: 2 | - fides_key: default_organization 3 | name: Demo Organization 4 | description: An e-commerce organization 5 | security_policy: https://ethyca.com/privacy-policy/ 6 | controller: 7 | name: Con Troller 8 | address: 123 demo street, New York, NY, USA 9 | email: controller@demo_company.com 10 | phone: +1 555 555 5555 11 | data_protection_officer: 12 | name: DataPro Tection 13 | address: 123 demo street, New York, NY, USA 14 | email: dpo@demo_company.com 15 | phone: +1 555 555 5555 16 | representative: 17 | name: Rep Resentative 18 | address: 123 demo street, New York, NY, USA 19 | email: representative@demo_company.com 20 | phone: +1 555 555 5555 21 | 22 | 23 | -------------------------------------------------------------------------------- /demo_resources/demo_policy.yml: -------------------------------------------------------------------------------- 1 | policy: 2 | - fides_key: demo_privacy_policy 3 | name: Demo Privacy Policy 4 | description: The main privacy policy for the organization. 5 | rules: 6 | - name: Reject Direct Marketing 7 | description: Disallow collecting any user contact info to use for marketing. 8 | data_categories: 9 | matches: ANY 10 | values: 11 | - user.contact 12 | data_uses: 13 | matches: ANY 14 | values: 15 | - advertising 16 | data_subjects: 17 | matches: ANY 18 | values: 19 | - customer 20 | -------------------------------------------------------------------------------- /demo_resources/demo_system.yml: -------------------------------------------------------------------------------- 1 | system: 2 | - fides_key: demo_analytics_system 3 | name: Demo Analytics System 4 | description: A system used for analyzing customer behaviour. 5 | system_type: Service 6 | administrating_department: Engineering 7 | ingress: 8 | - fides_key: demo_users_dataset 9 | type: dataset 10 | privacy_declarations: 11 | - name: Analyze customer behaviour for improvements. 12 | data_categories: 13 | - user.contact 14 | - user.device.cookie_id 15 | data_use: improve.system 16 | data_subjects: 17 | - customer 18 | ingress: 19 | - demo_users_dataset 20 | 21 | - fides_key: demo_marketing_system 22 | name: Demo Marketing System 23 | description: Collect data about our users for marketing. 24 | system_type: Service 25 | privacy_declarations: 26 | - name: Collect data for marketing 27 | data_categories: 28 | #- user.contact # uncomment to add this category to the system 29 | - user.cookie_id 30 | data_use: advertising 31 | data_subjects: 32 | - customer 33 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | black==23.3.0 2 | mypy==1.4.0 3 | nox>=2023 4 | packaging>=22.0 5 | pre-commit==2.9.3 6 | pylint==2.10.0 7 | pytest==7.3.1 8 | pytest-cov==2.11.1 9 | requests-mock==1.8.0 10 | setuptools>=64.0.2 11 | types-PyYAML 12 | xenon==0.7.3 13 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | fideslang: 3 | image: ethyca/fideslang:local 4 | command: /bin/bash 5 | volumes: 6 | - type: bind 7 | source: . 8 | target: /fideslang 9 | read_only: False 10 | 11 | docs: 12 | build: 13 | context: mkdocs/ 14 | volumes: 15 | - ./mkdocs:/docs 16 | expose: 17 | - 8000 18 | ports: 19 | - "8000:8000" 20 | -------------------------------------------------------------------------------- /mkdocs/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim-bullseye 2 | 3 | # Install auxiliary software 4 | RUN apt-get update 5 | RUN apt-get install -y \ 6 | git \ 7 | vim 8 | 9 | # Update pip and install requirements 10 | RUN pip install -U pip 11 | COPY requirements.txt requirements.txt 12 | RUN pip install -r requirements.txt 13 | 14 | # Copy in the required files 15 | COPY . /docs 16 | WORKDIR /docs 17 | 18 | EXPOSE 8000 19 | CMD ["mkdocs", "serve", "--dev-addr=0.0.0.0:8000"] 20 | -------------------------------------------------------------------------------- /mkdocs/docs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IABTechLab/fideslang/76ba0add91935da45af7cce8ba179c7da210c820/mkdocs/docs/.DS_Store -------------------------------------------------------------------------------- /mkdocs/docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IABTechLab/fideslang/76ba0add91935da45af7cce8ba179c7da210c820/mkdocs/docs/.nojekyll -------------------------------------------------------------------------------- /mkdocs/docs/css/logo.css: -------------------------------------------------------------------------------- 1 | /* -- START: Logo CSS overrides specific to taxonomy repo -- */ 2 | 3 | body header.md-header .md-logo img { 4 | width: auto; 5 | height: 65px; 6 | } -------------------------------------------------------------------------------- /mkdocs/docs/css/taxonomy.css: -------------------------------------------------------------------------------- 1 | /* -- START: Taxonomy Visualization Explorer CSS -- */ 2 | 3 | 4 | /* Typography */ 5 | 6 | .vis { 7 | font-family: "Source Sans Pro", sans-serif; 8 | font-weight: 400; 9 | font-size: 12px; 10 | line-height: 1.5; 11 | letter-spacing: 0.15px; 12 | color: var(--md-typeset-color); 13 | margin: 0 auto; 14 | padding: 20px; 15 | background-color: var(--md-vis-bg-color); 16 | border-radius: 5px; 17 | } 18 | 19 | .vis h1 { 20 | font-weight: 700; 21 | font-size: 26px; 22 | letter-spacing: -0.48px; 23 | color: #111439; 24 | margin: 0; 25 | } 26 | 27 | .vis .legend { 28 | font-weight: 600; 29 | font-size: 10px; 30 | letter-spacing: -0.19px; 31 | color: #0a2540; 32 | } 33 | 34 | .vis .card-title { 35 | font-weight: 700; 36 | } 37 | 38 | .vis .card-subtitle { 39 | font-weight: 600; 40 | } 41 | 42 | 43 | /* General */ 44 | 45 | .vis-container { 46 | max-width: 1400px; 47 | text-align: center; 48 | } 49 | 50 | .vis-container>*+* { 51 | margin-top: 1.5rem; 52 | } 53 | 54 | .chart-container>svg { 55 | display: block; 56 | } 57 | 58 | @media screen and (min-width: 1200px) { 59 | .controls-container { 60 | display: flex; 61 | flex-wrap: wrap; 62 | justify-content: space-between; 63 | } 64 | } 65 | 66 | .btn-group { 67 | display: inline-flex; 68 | vertical-align: middle; 69 | margin: 0.25em; 70 | } 71 | 72 | .btn { 73 | line-height: 1; 74 | font-size: 14px; 75 | background-color: #eef2f7; 76 | border: none; 77 | padding: 0.65em 0.75em; 78 | margin: 0; 79 | color: #000; 80 | } 81 | 82 | .btn--icon { 83 | padding: 0; 84 | width: 48px; 85 | height: 2.3em; 86 | display: inline-flexbox; 87 | justify-content: center; 88 | align-items: center; 89 | } 90 | 91 | .btn:first-child { 92 | border-top-left-radius: 2px; 93 | border-bottom-left-radius: 2px; 94 | } 95 | 96 | .btn:last-child { 97 | border-top-right-radius: 2px; 98 | border-bottom-right-radius: 2px; 99 | } 100 | 101 | .btn:hover { 102 | background-color: #e9ebee; 103 | } 104 | 105 | .btn:active, 106 | .btn.is-selected { 107 | background-color: #cbd7e7; 108 | } 109 | 110 | 111 | /* Legend */ 112 | 113 | .vis-color-legend { 114 | display: inline-flex; 115 | justify-content: center; 116 | flex-wrap: wrap; 117 | } 118 | 119 | .vis-color-legend .legend-item { 120 | display: inline-flex; 121 | align-items: center; 122 | margin-left: 0.5em; 123 | margin-right: 0.5em; 124 | } 125 | 126 | .vis-color-legend .legend-swatch { 127 | width: 1em; 128 | height: 1em; 129 | margin-right: 0.5em; 130 | } 131 | 132 | 133 | /* Tooltip */ 134 | 135 | .vis-tooltip { 136 | position: absolute; 137 | top: 0; 138 | left: 0; 139 | background-color: #ffffff; 140 | padding: 1em; 141 | border-radius: 4px; 142 | box-shadow: 0 2px 24px 0 rgba(0, 0, 0, 0.2); 143 | pointer-events: none; 144 | opacity: 0; 145 | transition: opacity 0.15s; 146 | color: rgb(79, 86, 107); 147 | } 148 | 149 | .vis-tooltip.is-visible { 150 | opacity: 1; 151 | } 152 | 153 | .vis-tooltip .card { 154 | max-width: 240px; 155 | } 156 | 157 | .vis-tooltip .card>*+* { 158 | margin-top: 1em; 159 | } 160 | 161 | .vis-tooltip code { 162 | font-size: 10px; 163 | color: rgb(79, 86, 107); 164 | } 165 | 166 | 167 | /* Sunburst */ 168 | 169 | .vis .sunburst-svg .partition-path, 170 | .vis .sunburst-svg .label-text { 171 | transition: fill 0.15s; 172 | } 173 | 174 | .vis .sunburst-svg .partition-path.is-highlighted { 175 | fill: #57f2ea; 176 | } 177 | 178 | .vis .sunburst-svg .label-text.is-highlighted { 179 | fill: currentColor; 180 | } 181 | 182 | 183 | /* Radial Tree */ 184 | 185 | 186 | /* Tree */ 187 | 188 | .vis .radial-tree-svg .link, 189 | .vis .radial-tree-svg .node, 190 | .vis .tree-svg .link, 191 | .vis .tree-svg .node { 192 | transition: fill 0.15s; 193 | } 194 | 195 | .vis .radial-tree-svg .link, 196 | .vis .tree-svg .link { 197 | stroke-width: 1.5px; 198 | } 199 | 200 | .vis .radial-tree-svg .link.is-highlighted, 201 | .vis .tree-svg .link.is-highlighted { 202 | stroke: #57f2ea; 203 | stroke-width: 2.5px; 204 | } 205 | 206 | .vis .radial-tree-svg .node.is-highlighted, 207 | .vis .tree-svg .node.is-highlighted { 208 | fill: #57f2ea; 209 | } 210 | 211 | .vis .radial-tree-svg .label-text .label-text__bg, 212 | .vis .tree-svg .label-text .label-text__bg { 213 | stroke: var(--md-default-bg-color); 214 | } 215 | 216 | .vis .radial-tree-svg .label-text.is-highlighted, 217 | .vis .tree-svg .label-text.is-highlighted { 218 | font-weight: 700; 219 | } 220 | 221 | 222 | /* -- END: Taxonomy Visualization Explorer CSS -- */ -------------------------------------------------------------------------------- /mkdocs/docs/csv/data_subjects.csv: -------------------------------------------------------------------------------- 1 | fides_key,parent_key,name,description 2 | subject,,, 3 | consumer,subject,,A natural person who is a resident of a State within the United States of America. 4 | household,subject,,A group of Consumers who cohabitate with one another at the same residential address and share use of common devices or services. 5 | employee,subject,,"An individual who is an employee of, director of, officer of, medical staff member of, or independent contractor of a business" 6 | B2B,subject,,Business-to-business; individuals who interact with a business purely in a commercial capacity 7 | child,subject,,A Consumer under the age of 15 8 | child.child_under_thirteen,child,,A Consumer under the age of 13 9 | child.child_thirteen_to_sixteen ,child,,A Consumer between the ages of 13-15  -------------------------------------------------------------------------------- /mkdocs/docs/draft.md: -------------------------------------------------------------------------------- 1 | # Draft IAB Fides Taxonomy 2 | Draft for comment of the IAB PIAT/Fides data governance taxonomy. 3 | 4 | 5 |
6 |
7 |
8 |
9 | 10 | 11 | 12 |
13 |
14 |
15 |
16 | 19 | 22 | 25 |
26 |
27 |
28 |
29 | 30 | 31 | 32 |
33 |
34 |
35 | 36 | 37 | 38 | ### 1. Data Categories 39 | Data Categories are labels to describe the type of data processed by your software. These are most heavily used by the System and Dataset resources, where you can assign one or more data categories to each field. 40 | 41 | Data Categories are hierarchical with natural inheritance, meaning you can classify data coarsely with a high-level category (e.g. `user.contact` data), or you can classify it with greater precision using subcategories (e.g. `user.contact.email` data). 42 | 43 | 44 | ### 2. Data Uses 45 | Data Uses are labels that describe how, or for what purpose(s) a component of your system is using data. 46 | 47 | Data Uses are also hierarchical with natural inheritance, meaning you can easily describe what you're using data for either coarsely (e.g. `provide.service.operations`) or with more precision using subcategories (e.g. `provide.service.operations.support.optimization`). 48 | 49 | Learn more about [Data Uses in the taxonomy reference now](taxonomy/data_uses.md). 50 | 51 | ### 3. Data Subjects 52 | 53 | Data Subjects is a label commonly used in the regulatory world to describe the users of a system who's data is being processed. In many systems a generic user label may be sufficient, however the taxonomy is intended to provide greater control through specificity where needed. 54 | 55 | Examples of this are: 56 | 57 | - `anonymous_user` 58 | - `employee` 59 | - `customer` 60 | - `patient` 61 | - `next_of_kin` 62 | 63 | Learn more about [Data Subjects in the taxonomy reference now](taxonomy/data_subjects.md). 64 | 65 | ### Extensibility and Interoperability 66 | 67 | The taxonomy is designed to support common privacy compliance regulations `and standards out of the box, these include GDPR, CCPA, LGPD and ISO 19944. 68 | 69 | You can extend the taxonomy to support your system needs. If you do this, we recommend extending from the existing class structures to ensure interoperability inside and outside your organization. 70 | 71 | If you have suggestions for missing classifications or concepts, please submit them for addition. 72 | -------------------------------------------------------------------------------- /mkdocs/docs/explorer.md: -------------------------------------------------------------------------------- 1 | # Privacy Taxonomy Explorer 2 | 3 | The taxonomy explorer is a useful way to visualize and review the taxonomy for those looking to explore in greater depth. 4 | 5 |
6 |
7 |
8 |
9 | 10 | 11 | 12 |
13 |
14 |
15 |
16 | 19 | 22 | 25 |
26 |
27 |
28 |
29 | 30 | 31 | 32 |
33 |
34 |
35 | 36 | 37 | -------------------------------------------------------------------------------- /mkdocs/docs/img/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IABTechLab/fideslang/76ba0add91935da45af7cce8ba179c7da210c820/mkdocs/docs/img/.DS_Store -------------------------------------------------------------------------------- /mkdocs/docs/img/Radial Tree@1x.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Icon/Radial Tree@1x 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /mkdocs/docs/img/Sunburst@1x.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Icon/Sunburst@1x 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /mkdocs/docs/img/Tree@1x.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Icon/Tree@1x 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /mkdocs/docs/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IABTechLab/fideslang/76ba0add91935da45af7cce8ba179c7da210c820/mkdocs/docs/img/favicon.ico -------------------------------------------------------------------------------- /mkdocs/docs/img/fideslang.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IABTechLab/fideslang/76ba0add91935da45af7cce8ba179c7da210c820/mkdocs/docs/img/fideslang.png -------------------------------------------------------------------------------- /mkdocs/docs/img/fideslang.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /mkdocs/docs/img/~fideslang.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Fides Lang 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /mkdocs/docs/index.md: -------------------------------------------------------------------------------- 1 | # IAB Tech Lab & Fideslang 2 | 3 | Fideslang ( fee-dez-læŋg, derived from the Latin term "Fidēs" and "language") is a taxonomy developed to standardize the way privacy and governance-related data elements, purposes of data use, and subjects are labeled and described. This taxonomy provides an interoperable standard designed to assist businesses in navigating the complex landscape of global privacy regulations. 4 | 5 | In collaboration with [Ethyca](https://ethyca.com), [IAB Tech Lab](https://iabtechlab.com/) received a donation of Fideslang to accelerate the development of privacy standards within the ad tech industry. Fideslang represents five years of dedicated work aimed at enhancing data privacy practices by creating a universal language that bridges the gap between legal and development teams. This innovation aligns seamlessly with the IAB Tech Lab's Privacy Taxonomy Project, a key initiative of the Privacy Implementation & Accountability Task Force. The project aims to create a standardized privacy taxonomy that enables businesses to effectively manage their data privacy compliance and communicate privacy information more clearly across the industry. 6 | 7 | The Privacy Taxonomy is uniquely tailored to the evolving landscape of data protection. Building on the foundation of Fideslang, the taxonomy aims to set a new standard for how privacy information is conveyed across the digital advertising ecosystem. 8 | The Privacy Taxonomy is open for public comment until October 5th, 2024. Industry stakeholders are encouraged to review and provide feedback at [support@iabtechlab.com](support@iabtechlab.com). 9 | 10 | [![License: CC BY 4.0](https://img.shields.io/badge/License-CC%20BY%204.0-lightgrey.svg)](https://creativecommons.org/licenses/by/4.0/) 11 | 12 | 13 | ## Taxonomy Explorer 14 | 15 | The IAB Tech Lab Privacy Taxonomy is composed of three main classification groups: Data Elements, Data Uses, and Data Subjects. These groups work together to describe the data types, purposes of use, and data owners (subjects) of data being processed for privacy and governance purposes. Below, you can explore the primary components of the taxonomy. 16 | 17 | To learn more about the taxonomy's structure read the [explanation below](#fideslang-privacy-taxonomy-explained) 18 | 19 |
20 |
21 |
22 |
23 | 24 | 25 | 26 |
27 |
28 |
29 |
30 | 33 | 36 | 39 |
40 |
41 |
42 |
43 | 44 | 45 | 46 |
47 |
48 |
49 | 50 | 51 | 52 | ## Fideslang Privacy Taxonomy Explained 53 | 54 | ### 1. Data Elements 55 | Data Elements are labels to describe the type of data processed by your business and technology systems. Data Categories are hierarchical with natural inheritance, meaning you can label data coarsely with a high-level category (e.g. user.contact data), or you can tag it with greater precision using subcategories (e.g. user.contact.email data). This provides a standard way to tag data in databases, which can assist with data privacy operations (e.g. data mapping, DSRs, contracts, disclosures, consent/opt-out, etc.). The data element, when clear under the applicable law, aligns with categories specified in US data privacy laws (e.g., CCPA, CPA). 56 | 57 | 58 | ### 2. Data Uses 59 | Data Uses are labels that describe how, or for what purpose(s) you are using data. You may think of these as analogous to Purpose of Processing in such documents as a RoPA (Record of Processing Activities). 60 | 61 | Data Uses are also hierarchical with natural inheritance, meaning you can easily describe what you're using data for either coarsely (e.g. provide.service.operations) or with more precision using subcategories (e.g. provide.service.operations.support.optimization). 62 | 63 | The top-level labels create standard buckets to categorize data uses into: (1) necessary, (2) operational, (3) analytics, (4) advertising and marketing, and (5) disclosure. 64 | 65 | ### 3. Data Subjects 66 | 67 | Data Subjects describes the owner or individual that the data being processed describes, examples might be a customer, or an employee. In many systems a generic user label may be sufficient, however the taxonomy is intended to provide greater control through specificity where needed for governance. 68 | 69 | Examples of this are: 70 | 71 | - `consumer` 72 | - `househould` 73 | - `employee` 74 | 75 | ### Laws Triggered 76 | For data categories and data uses, these are mapped to the major laws they trigger and the sensitivity that a given data category may obtain based on processing under a given framework. 77 | 78 | ### IAB Frameworks 79 | The Fideslang taxonomy automatically cross-references all data uses to the IAB TCF and IAB MSPA frameworks, meaning that if you tag a data use such as `advertising_marketing.first_party.targeted`, it will automatically inherit the classification of "First Party Advertising" as defined by 1.33ii of the MSPA. 80 | 81 | ### Sensitivity Matrix 82 | When using the Privacy Taxonomy, you may assign sensitivity on a scale of 1-3 to given data categories. With 1 not being sensitive and 3 being sensitive as determined by applicable law. You should complete this sensitivity matrix based on your businesses internal policies and risk management. 83 | 84 | Sensitivity Matrix scoring: 85 | 86 | - 1 = no; 87 | - 2 = no; unless combined with another non-sensitive data point that makes the combined data elements sensitive 88 | -e.g, account log-in + password/credentials to access the account 89 | - 3 = yes, per se sensitive as determined by applicable law 90 | -e.g. citizenship status, racial or ethnic origin, religious beliefs, data relating to children, etc. 91 | 92 | 93 | ### Extensibility and Interoperability 94 | 95 | The Privacy Taxonomy is designed to support common privacy compliance regulations and standards out of the box, these include CCPA, MSPA, etc. 96 | 97 | You can extend the taxonomy to support your system needs. If you do this, we recommend extending from the existing class structures to ensure interoperability inside and outside your organization. 98 | 99 | If you have suggestions for missing classifications or concepts, please submit them for addition. 100 | 101 | Public Comment 102 | Privacy Taxonomy is open for public comment until October 5th, 2024. Industry stakeholders are encouraged to review and provide feedback to [support@iabtechlab.com](mailto:support@iabtechlab.com). 103 | 104 | -------------------------------------------------------------------------------- /mkdocs/docs/js/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IABTechLab/fideslang/76ba0add91935da45af7cce8ba179c7da210c820/mkdocs/docs/js/.DS_Store -------------------------------------------------------------------------------- /mkdocs/docs/overview.md: -------------------------------------------------------------------------------- 1 | # Fides Language Documentation 2 | 3 | This is the documentation for Fides' configuration language. It is relevant to users of [**Fides**](https://github.com/ethyca/fides/) and other privacy tools that are in the roadmap. 4 | 5 | The Fides language is Fides' primary user interface. In every use of Fides, configuration files written in the Fides language is always at the heart of the workflow. 6 | 7 | ## About the Fides Language 8 | 9 | The Fides language is based on **YAML** configuration files. YAML provides a well-understood structure, upon which the Fides language adds helpful primitives which represent types of data, processes or policies. By declaring these primitives with Fides you can describe: 10 | 11 | - what types of data your application process (using Fides `data_category` annotations) 12 | - how your system uses that data (using Fides `data_use` annotations) 13 | - what policies you want your system to adhere to (using Fides `Policy` resources) 14 | - etc. 15 | 16 | All other language features exist only to make the definition of privacy primitives more flexible and convenient. 17 | 18 | When fully utilized, these configuration files written using the Fides language tell other Fides tools what your software is doing with data and how to manage the privacy risks of that data process. Software systems are complicated though, so a full Fides configuration will consist of multiple files describing different resources, including: 19 | 20 | ### Dataset YAML 21 | 22 | A Dataset declaration in Fides language represents any location where data is stored: databases, data warehouses, caches and other data storage systems. Within a Fides Dataset, you declare the individual fields (e.g. database columns) where data is located and annotate them to describe the categories of data that are stored. 23 | 24 | ### System YAML 25 | 26 | A System declaration in Fides language represents the privacy properties of a single software project, service, codebase, or application. So the Fides System declaration describes both the categories of data being processed, but also the purposes for which that data is processed. 27 | 28 | ### Policy YAML 29 | 30 | A Policy declaration in Fides language represents a set of rules for privacy or compliance that the system must adhere to. Fides CLI tools evaluates these policies against the system and dataset declarations to ensure automated compliance. 31 | -------------------------------------------------------------------------------- /mkdocs/docs/resources/dataset.md: -------------------------------------------------------------------------------- 1 | # Dataset 2 | 3 | A Dataset takes a database schema (tables and columns) and adds Fides privacy categorizations. This is a database-agnostic way to annotate privacy declarations. 4 | 5 | ``` 6 | organization 7 | |-> system 8 | |-> ** dataset ** 9 | |-> collections 10 | |-> fields 11 | ``` 12 | 13 | * The schema is represented as a set of "collections" (tables) that contain "fields" (columns). These can also be arbitrarily nested to handle document-type databases (e.g., NoSQL or S3). 14 | 15 | * At each level -- Dataset, collection, and field, you can assign one or more Data Categories. The Categories declared at each child level are additive. 16 | 17 | You use your Datasets by adding them to Systems. A System can contain any number of Datasets, and a Dataset can be added to any number of Systems. 18 | When a dataset is referenced by a system, all applicable data categories set on the dataset are treated as part of the system. 19 | If a Dataset is not referenced by a System, a warning is surfaced denoting an orphan dataset exists. 20 | 21 | Datasets cannot contain other Datasets. 22 | 23 | ## Object Structure 24 | 25 | **fides_key**     _constrained string_ 26 | 27 | A string token of your own invention that uniquely identifies this Dataset. It's your responsibility to ensure that the value is unique across all of your Dataset objects. The value may only contain alphanumeric characters, underscores, and hyphens. (`[A-Za-z0-9_.-]`). 28 | 29 | **name**     _string_ 30 | 31 | A UI-friendly label for the Dataset. 32 | 33 | **description**     _string_ 34 | 35 | A human-readable description of the Dataset. 36 | 37 | **organization_fides_key**     _string_     default: `default_organization` 38 | 39 | The fides key of the [Organization](../../resources/organization/) to which this Dataset belongs. 40 | 41 | **meta**     _object_ 42 | 43 | An optional object that provides additional information about the Dataset. You can structure the object however you like. It can be a simple set of `key: value` properties or a deeply nested hierarchy of objects. How you use the object is up to you: Fides ignores it. 44 | 45 | **data_categories**     [_string_]
46 | 47 | Arrays of Data Category resources, identified by `fides_key`, that apply to all collections in the Dataset. 48 | 49 | **collections**     [_object_]
50 | 51 | An array of objects that describe the Dataset's collections. 52 | 53 | **collections.name**     string
54 | 55 | A UI-friendly label for the collection. 56 | 57 | **collections.description**     _string_ 58 | 59 | A human-readable description of the collection. 60 | 61 | **collections.data_categories**     [_string_]
62 | 63 | Arrays of Data Category resources, identified by `fides_key`, that apply to all fields in the collection. 64 | 65 | **collections.fields**     [_object_]
66 | 67 | An array of objects that describe the collection's fields. 68 | 69 | **collections.fields.name**     string
70 | 71 | A UI-friendly label for the field. 72 | 73 | **collections.fields.description**     _string_ 74 | 75 | A human-readable description of the field. 76 | 77 | **collections.fields.data_categories**     [_string_]
78 | 79 | Arrays of Data Categories, identified by `fides_key`, that applies to this field. 80 | 81 | **collections.fields.fields**     [_object_]
82 | 83 | An optional array of objects that describe hierarchical/nested fields (typically found in NoSQL databases) 84 | 85 | ## Examples 86 | 87 | ### **Manifest File** 88 | 89 | ```yaml 90 | dataset: 91 | - fides_key: demo_users_dataset 92 | name: Demo Users Dataset 93 | description: Data collected about users for our analytics system. 94 | collections: 95 | - name: users 96 | description: User information 97 | data_categories: 98 | - user 99 | fields: 100 | - name: first_name 101 | description: User's first name 102 | data_categories: 103 | - user.name 104 | - name: email 105 | description: User's Email 106 | data_categories: 107 | - user.contact.email 108 | - name: phone 109 | description: User's phone numbers 110 | data_categories: 111 | - user.contact.phone_number 112 | fields: 113 | - name: mobile 114 | description: User's mobile phone number 115 | data_categories: 116 | - user.contact.phone_number 117 | - name: home 118 | description: User's home phone number 119 | data_categories: 120 | - user.contact.phone_number 121 | ``` 122 | 123 | ### **API Payload** 124 | 125 | ```json 126 | { 127 | "fides_key": "demo_users_dataset", 128 | "name": "Demo Users Dataset", 129 | "description": "Data collected about users for our analytics system.", 130 | "collections": [ 131 | { 132 | "name": "users", 133 | "description": "User information", 134 | "fields": [ 135 | { 136 | "name": "first_name", 137 | "description": "User's first name", 138 | "data_categories": [ 139 | "user.name" 140 | ] 141 | }, 142 | { 143 | "name": "email", 144 | "description": "User's Email", 145 | "data_categories": [ 146 | "user.contact.email" 147 | ] 148 | }, 149 | { 150 | "name": "phone", 151 | "description": "User's phone numbers", 152 | "data_categories": [ 153 | "user.contact.phone_number" 154 | ], 155 | "fields": [ 156 | { 157 | "name": "mobile", 158 | "description": "User's mobile phone number", 159 | "data_categories": [ 160 | "user.contact.phone_number" 161 | ], 162 | }, 163 | { 164 | "name": "home", 165 | "description": "User's home phone number", 166 | "data_categories": [ 167 | "user.contact.phone_number" 168 | ] 169 | } 170 | ] 171 | } 172 | ] 173 | } 174 | ] 175 | } 176 | ``` 177 | -------------------------------------------------------------------------------- /mkdocs/docs/resources/organization.md: -------------------------------------------------------------------------------- 1 | # Organization 2 | 3 | An Organization represents all or part of an enterprise or company, and establishes the root of your resource hierarchy. This means that while you can have more than one Organization resource, they can't refer to each other's sub-resources. For example, your "American Stores" Organization can't refer to the Policy objects that are defined by your "European Stores" Organization. 4 | 5 | The Organization resource will also contain vital information with regards to compliance reporting in the case of a data map or RoPA (Record of Processing Activities). 6 | 7 | All other resource types must refer to an Organization (through their `organization_fides_key` properties). Fides creates a default Organization that it uses for all resources that don't otherwise specify an Organization. Unless you're creating multiple Organizations (which should be rare), it is suggested to use the default Organization resource. 8 | 9 | The fides key for the default Organization is `default_organization`. 10 | 11 | ## Object Structure 12 | 13 | **fides_key**  _string_ 14 | 15 | A string token of your own invention that uniquely identifies this Organization. It's your responsibility to ensure that the value is unique across all of your Organization objects. The value can only contain alphanumeric characters, hyphens, periods and underscores (`[A-Za-z0-9_.-]`). 16 | 17 | **name**  _string_ 18 | 19 | A UI-friendly label for the Organization. 20 | 21 | **description**  _string_ 22 | 23 | A human-readable description of the Organization. 24 | 25 | **controller**  [array] 26 | 27 | An array of contact information for the controller over personal data usage within the organization (`name`, `address`, `email`, `phone`). 28 | 29 | **data_protection_officer**  [array] 30 | 31 | An array of contact information for the Data Protection Officer (DPO) within the organization (`name`, `address`, `email`, `phone`). 32 | 33 | **representative**  [array] 34 | 35 | An array of contact information for an optional representative for the organization on behalf of the controller and/or DPO (`name`, `address`, `email`, `phone`). 36 | 37 | **security_policy**  _string_ 38 | 39 | A url to the organization security policy, (i.e. https://ethyca.com/privacy-policy/) 40 | 41 | ## Examples 42 | 43 | ### **Manifest File** 44 | 45 | ```yaml 46 | organization: 47 | fides_key: default_organization 48 | name: Acme Incorporated 49 | description: An Organization that represents all of Acme Inc. 50 | security_policy: https://example.org/privacy 51 | controller: 52 | name: Dave L. Epper 53 | address: 1 Acme Pl. New York, NY 54 | email: controller@acmeinc.com 55 | phone: +1 555 555 5555 56 | data_protection_officer: 57 | name: Preet Ector 58 | address: 1 Acme Pl. New York, NY 59 | email: dpo@acmeinc.com 60 | phone: +1 555 555 5555 61 | representative: 62 | name: Ann Othername 63 | address: 1 Acme Pl. New York, NY 64 | email: representative@acmeinc.com 65 | phone: +1 555 555 5555 66 | ``` 67 | 68 | ### **API Payload** 69 | 70 | ```json 71 | { 72 | "fides_key": "default_organization", 73 | "name": "Acme Incorporated", 74 | "description": "An Organization that represents all of Acme Inc.", 75 | "security_policy": "https://example.org/privacy", 76 | "controller": { 77 | "name": "Dave L. Epper", 78 | "address": "1 Acme Pl. New York, NY", 79 | "email": "controller@acmeinc.com", 80 | "phone": "+1 555 555 5555" 81 | }, 82 | "data_protection_officer": { 83 | "name": "Preet Ector", 84 | "address": "1 Acme Pl. New York, NY", 85 | "email": "dpo@acmeinc.com", 86 | "phone": "+1 555 555 5555" 87 | }, 88 | "representative": { 89 | "name": "Ann Othername", 90 | "address": "1 Acme Pl. New York, NY", 91 | "email": "representative@acmeinc.com", 92 | "phone": "+1 555 555 5555" 93 | } 94 | } 95 | ``` 96 | -------------------------------------------------------------------------------- /mkdocs/docs/resources/policy.md: -------------------------------------------------------------------------------- 1 | # Policy 2 | 3 | A Policy is your privacy policy as code, it lists a set of acceptable and non-acceptable rules and uses all 3 privacy attributes (`data_category`, `data_use`, `data_subject`). The purpose of the policy is to state what types of data are allowed for certain usages. 4 | 5 | ``` 6 | organization 7 | |-> ** policy ** 8 | |-> rules 9 | ``` 10 | 11 | ## Object Structure 12 | 13 | **fides_key**     _constrained string_ 14 | 15 | A string token of your own invention that uniquely identifies this Policy. It's your responsibility to ensure that the value is unique across all of your Policy objects. The value may only contain alphanumeric characters, underscores, and hyphens. (`[A-Za-z0-9_.-]`). 16 | 17 | **name**     _string_ 18 | 19 | A UI-friendly label for the Policy. 20 | 21 | **description**     _string_ 22 | 23 | A human-readable description of the Policy. 24 | 25 | **data_categories**     _string_      26 | 27 | The [Data Categories](../../taxonomy/data_categories/) privacy attribute describes types of sensitive data as defined in the taxonomy. 28 | 29 | **data_uses**     _string_      30 | 31 | The [Data Use](../../taxonomy/data_uses/) privacy attribute describes the various categories of data processing and operations at your organization. 32 | 33 | **data_subject**     _string_      34 | 35 | The [Data Subjects](../../taxonomy/data_subjects/) privacy attribute describes the individual persons whose data your rule pertains to. 36 | 37 | **matches**     _enum_      38 | 39 | * `ANY` 40 | * `ALL` 41 | * `NONE` 42 | * `OTHER` 43 | 44 | The matches criteria describes how you would like this rule to be evaluated. These basic logic gates determine whether the array of privacy attributes will be fully included (`ALL`), not included at all (`NONE`), only included if at least 1 item in the array matches (`ANY`), or excluded with any additional attributes included (`OTHER`). 45 | 46 | **organization_fides_key**     _string_     default: `default_organization` 47 | 48 | The fides key of the [Organization](../..//resources/organization/) to which this Policy belongs. 49 | 50 | ## Examples 51 | 52 | ### **Manifest File** 53 | 54 | ```yaml 55 | policy: 56 | - fides_key: demo_privacy_policy 57 | name: Demo Privacy Policy 58 | description: The main privacy policy for the organization. 59 | rules: 60 | - fides_key: reject_direct_marketing 61 | name: Reject Direct Marketing 62 | description: Disallow collecting any user contact info to use for marketing. 63 | data_categories: 64 | matches: ANY 65 | values: 66 | - user.contact 67 | data_uses: 68 | matches: ANY 69 | values: 70 | - advertising 71 | data_subjects: 72 | matches: ANY 73 | values: 74 | - customer 75 | ``` 76 | 77 | **Demo manifest file:** `/fides/demo_resources/demo_policy.yml` 78 | 79 | ### **API Payload** 80 | 81 | ```json title="POST /api/v1/policy" 82 | { 83 | "fides_key": "demo_privacy_policy", 84 | "organization_fides_key": "default_organization", 85 | "name": "string", 86 | "description": "The main privacy policy for the organization.", 87 | "rules": [ 88 | { 89 | "fides_key": "reject_direct_marketing", 90 | "organization_fides_key": "default_organization", 91 | "name": "Reject Direct Marketing", 92 | "description": "Disallow collecting any user contact info to use for marketing.", 93 | "data_categories": { 94 | "matches": "ANY", 95 | "values": [ 96 | "user.contact" 97 | ] 98 | }, 99 | "data_uses": { 100 | "matches": "ANY", 101 | "values": [ 102 | "advertising" 103 | ] 104 | }, 105 | "data_subjects": { 106 | "matches": "ANY", 107 | "values": [ 108 | "customer" 109 | ] 110 | }, 111 | } 112 | ] 113 | } 114 | ``` 115 | -------------------------------------------------------------------------------- /mkdocs/docs/resources/system.md: -------------------------------------------------------------------------------- 1 | # System 2 | 3 | A System is a model for describing anything that processes data for your organization (applications, services, 3rd party APIs, etc.) and describes how these datasets are used for business functions of instances of your data resources. It contains all 3 privacy attributes (`data_category`, `data_use`, and `data_subject`). 4 | 5 | ``` 6 | organization 7 | |-> ** system ** 8 | |-> privacy declarations 9 | ``` 10 | 11 | ## Object Structure 12 | 13 | **fides_key**     _constrained string_ 14 | 15 | A string token of your own invention that uniquely identifies this System. It's your responsibility to ensure that the value is unique across all of your System objects. The value may only contain alphanumeric characters, underscores, and hyphens. (`[A-Za-z0-9_.-]`). 16 | 17 | **name**     _string_ 18 | 19 | A UI-friendly label for the System. 20 | 21 | **description**     _string_ 22 | 23 | A human-readable description of the System. 24 | 25 | **system_type**     _string_ 26 | 27 | A required value to describe the type of system being modeled, examples include: Service, Application, Third Party, etc. 28 | 29 | **administrating_department**     _string_ 30 | 31 | An optional value to identify the owning department or group of the system within your organization 32 | 33 | **egress**     [array]      34 | 35 | The resources to which the System sends data. 36 | 37 | **ingress**     [array]      38 | 39 | The resources from which the System receives data. 40 | 41 | **privacy_declarations**     [array]      42 | 43 | The array of declarations describing the types of data in your system. This is a list of the privcy attributes (`data_category`, `data_use`, and `data_subject`) for each of your systems. 44 | 45 | If a dataset is referenced as part of the system, all applicable data categories set on the dataset are treated as part of the system. 46 | 47 | **organization_fides_key**     _string_     default: `default_organization` 48 | 49 | The fides key of the [Organization](../../resources/organization/) to which this System belongs. 50 | 51 | ## Examples 52 | 53 | ### **Manifest File** 54 | 55 | ```yaml 56 | system: 57 | - fides_key: demo_analytics_system 58 | name: Demo Analytics System 59 | description: A system used for analyzing customer behaviour. 60 | system_type: Service 61 | administrating_department: Engineering 62 | egress: 63 | - fides_key: another_demo_system 64 | type: system 65 | data_categories: 66 | - user.contact 67 | ingress: 68 | - fides_key: yet_another_demo_system 69 | type: system 70 | data_categories: 71 | - user.device.cookie_id 72 | privacy_declarations: 73 | - name: Analyze customer behaviour for improvements. 74 | data_categories: 75 | - user.contact 76 | - user.device.cookie_id 77 | data_use: improve.system 78 | data_subjects: 79 | - customer 80 | egress: 81 | - another_demo_system 82 | ingress: 83 | - yet_another_demo_system 84 | ``` 85 | 86 | **Demo manifest file:** `/fides/demo_resources/demo_system.yml` 87 | 88 | ### **API** 89 | 90 | ```json title="POST /api/v1/system" 91 | 92 | { 93 | "fides_key": "demo_analytics_system", 94 | "name": "Demo Analytics System", 95 | "description": "A system used for analyzing customer behaviour.", 96 | "system_type": "Service", 97 | "administrating_department": "Engineering", 98 | "egress": [ 99 | { 100 | "fides_key": "another_demo_system", 101 | "type": "system", 102 | "data_categories": ["user.contact"] 103 | } 104 | ], 105 | "ingress": [ 106 | { 107 | "fides_key": "yet_another_demo_system", 108 | "type": "system", 109 | "data_categories": ["user.device.cookie_id"] 110 | } 111 | ], 112 | "privacy_declarations": [ 113 | { 114 | "name": "Analyze customer behaviour for improvements.", 115 | "data_categories": [ 116 | "user.contact", 117 | "user.device.cookie_id" 118 | ], 119 | "data_use": "improve.system", 120 | "data_subjects": [ 121 | "customer" 122 | ], 123 | "egress": ["another_demo_system"], 124 | "ingress": ["yet_another_demo_system"] 125 | } 126 | ] 127 | } 128 | ``` 129 | -------------------------------------------------------------------------------- /mkdocs/docs/syntax.md: -------------------------------------------------------------------------------- 1 | 2 | # Fides Configuration Syntax 3 | 4 | Other pages in this language section describe various concepts and resources that appear in the Fides language. This page describes the syntax of the language in more detail to help better interpret Fides whether you're authoring or reading. 5 | 6 | The Fides language is designed to be relatively easy for anyone to read and write. The primary objective is to translate complex privacy compliance concepts into an approachable syntax, it's for this reason Fides is entirely written as YAML configurations. 7 | 8 | ## YAML - Building Block of Fides 9 | 10 | ### Fides Taxonomy 11 | 12 | The Fides language is intentionally simple. To assure this, Fides declarations use predefined primitives (e.g. data categories) that are used when describing your datasets, systems, policies, etc. These predefined primitives exist as part of the Fides taxonomy which is maintained in your Fides server so they can be consistently used across your organization's development team. 13 | 14 | You can learn more about the taxonomy structure and how to extend it in the [taxonomy guide](./taxonomy/overview.md). 15 | 16 | ### Dot Notation and Snake_Case 17 | 18 | To make writing and reading Fides language as easy for humans as possible, declarations from the privacy taxonomy use `dot notation` for the keys and use `snake_case` compound labels. 19 | 20 | For example, to describe a field in a database as contact information relating to a user, you can write its data category as: 21 | 22 | ``` yaml 23 | # This declares that the contact data is about a given user: 24 | user.contact 25 | ``` 26 | 27 | If we require greater specificity, we could declare the contact type as a phone number by using a more specific sub-category: 28 | 29 | ``` yaml 30 | # This declares that the is data about a given user, 31 | # and is from the contact category and of type phone number. 32 | user.contact.phone_number 33 | ``` 34 | 35 | ### Key-Value 36 | 37 | The key-value is YAML, and Fides', basic building block. Every item in a Fides YAML document is a member of at least one dictionary. The key is always a `string`. The value is a scalar so that it can be any datatype. So the value can be a `string`, a `number`, or another `dictionary` - most commonly in Fides, this will be a `string` that may provide a description or a pointer to a reference object in the taxonomy. 38 | 39 | If we use the example of a user's contact email, to correctly declare this in valid Fides YAML as part of a Dataset, it would be: 40 | 41 | ``` yaml 42 | fields: # Group of fields in the dataset. 43 | - name: email 44 | description: User's Email 45 | data_categories: # Data category label(s) to assign field. 46 | - user.contact.email 47 | - user.account.contact.email 48 | ``` 49 | The key for each key-value pair determines what value types are valid (for example, a resource type such as `data_categories` must use values from the Data Categories taxonomy), but many keys accept arbitrary strings as descriptive labels. 50 | 51 | Finally, as you see in the example above, keys such as `data_categories` accept a list of values for multi-labeling. In this case, the field email has been assigned the value **user contact email** as well as **account-related contact email**, indicating that it may be either of those categories when used. 52 | 53 | 54 | ## Character Encoding 55 | 56 | Fides configuration files must always be UTF-8 encoded. While the delimiters of the language are all ASCII characters, Fides accepts non-ASCII characters in key-values, comments, and string values. -------------------------------------------------------------------------------- /mkdocs/docs/taxonomy/data_subjects.md: -------------------------------------------------------------------------------- 1 | # Data Subjects Reference 2 | 3 | Data Subject are the group of labels commonly assigned to describe the type of system users to whom data may belong or is being processed. Examples might be customers, patients or simply abstract users. 4 | 5 | A Data Subject is a label that describes a segment of individuals whose data you store. Data Subject labels are typically fairly broad -- "Citizen", "Visitor", "Passenger", and so on -- although you be as specific as your system needs: "Fans in Section K", for example. 6 | 7 | ## Object Structure 8 | 9 | **fides_key**     _constrained string_ 10 | 11 | A string token of your own invention that uniquely identifies this Data Subject. It's your responsibility to ensure that the value is unique across all of your Data Subject objects. The value can only contain alphanumeric characters, hyphens, periods and underscores (`[A-Za-z0-9_.-]`). 12 | 13 | **name**     _string_ 14 | 15 | A UI-friendly label for the Data Subject. 16 | 17 | **description**     _string_ 18 | 19 | A human-readable description of the Data Subject. 20 | 21 | **rights**     _enum_ 22 | 23 | An array of rights available to the data subject, made of available values coupled with Chapter 3 of the GDPR. The output of a data map is based upon the strategy for applying rights (`rights.strategy`) and the selections made from the following valid options: 24 | 25 | * `Informed` 26 | * `Access` 27 | * `Rectification` 28 | * `Erasure` 29 | * `Portability` 30 | * `Restrict Processing` 31 | * `Withdraw Consent` 32 | * `Object` 33 | * `Object to Automated Processing` 34 | 35 | **strategy**     _enum_ 36 | 37 | A strategy for selecting the rights available to the data subject. 38 | 39 | * `ALL` 40 | * `EXCLUDE` 41 | * `INCLUDE` 42 | * `NONE` 43 | 44 | **automated_decisions_or_profiling**     boolean 45 | 46 | A boolean value of whether or not automated decision-making or profiling exists. Tied to article 22 of the GDPR. 47 | 48 | **organization_fides_key**     _string_     default: `default_organization` 49 | 50 | The fides key of the organization to which this Data Subject belongs. 51 | 52 | !!! Note "Extensibility and interoperability" 53 | Data Subjects in the taxonomy are designed to support common privacy regulations and standards out of the box, these include GDPR, CCPA, LGPD and ISO 19944. 54 | 55 | You can extend the taxonomy to support your system needs. If you do this, we recommend extending from the existing class structures to ensure interoperability inside and outside your organization. 56 | 57 | If you have suggestions for core classes that should ship with the taxonomy, [please submit your requests here](https://github.com/ethyca/privacy-taxonomy/issues) 58 | 59 | 60 | ## Data Subjects 61 | 62 | At present, Data Subjects are a flat structure with no subcategories, although this is likely to change over time. 63 | 64 | Currently, your collection of Data Subjects is given as a flat list: A Data Subject can't contain other Data Subjects. 65 | 66 | | Label | Parent Key | Description | 67 | | --- | --- | --- | 68 | |`anonymous_user` |`-` |An individual that is unidentifiable to the systems. Note - This should only be applied to truly anonymous users where there is no risk of re-identification| 69 | |`citizen_voter` |`-` |An individual registered to voter with a state or authority. | 70 | |`commuter` |`-` |An individual that is traveling or transiting in the context of location tracking. | 71 | |`consultant` |`-` |An individual employed in a consultative/temporary capacity by the organization. | 72 | |`customer` |`-` |An individual or other organization that purchases goods or services from the organization. | 73 | |`employee` |`-` |An individual employed by the organization. | 74 | |`job_applicant` |`-` |An individual applying for employment to the organization. | 75 | |`next_of_kin` |`-` |A relative of any other individual subject where such a relationship is known. | 76 | |`passenger` |`-` |An individual traveling on some means of provided transport. | 77 | |`patient` |`-` |An individual identified for the purposes of any medical care. | 78 | |`prospect` |`-` |An individual or organization to whom an organization is selling goods or services. | 79 | |`shareholder` |`-` |An individual or organization that holds equity in the organization. | 80 | |`supplier_vendor`|`-` |An individual or organization that provides services or goods to the organization. | 81 | |`trainee` |`-` |An individual undergoing training by the organization. | 82 | |`visitor` |`-` |An individual visiting a location. | 83 | -------------------------------------------------------------------------------- /mkdocs/docs/taxonomy/overview.md: -------------------------------------------------------------------------------- 1 | # Fides Taxonomy 2 | 3 | The Fides taxonomy contains four classification groups that are used together to easily describe all of the data types and associated processing behaviors of an entire tech stack; both the application and its data storage. 4 | 5 | ## Summary of Taxonomy Classification Groups 6 | 7 | ### 1. Data Categories 8 | Data Categories are labels to describe the type of data processed by your software. These are most heavily used by the System and Dataset resources, where you can assign one or more data categories to each field. 9 | 10 | Data Categories are hierarchical with natural inheritance, meaning you can classify data coarsely with a high-level category (e.g. `user.contact` data), or you can classify it with greater precision using subcategories (e.g. `user.contact.email` data). 11 | 12 | Learn more about [Data Categories in the taxonomy reference now](data_categories.md). 13 | 14 | ### 2. Data Uses 15 | Data Uses are labels that describe how, or for what purpose(s) a component of your system is using data. 16 | 17 | Data Uses are also hierarchical with natural inheritance, meaning you can easily describe what you're using data for either coarsely (e.g. `provide.service.operations`) or with more precision using subcategories (e.g. `provide.service.operations.support.optimization`). 18 | 19 | Learn more about [Data Uses in the taxonomy reference now](data_uses.md). 20 | 21 | ### 3. Data Subjects 22 | Data Subject is a label commonly used in the regulatory world to describe the users of a system whose data is being processed. In many systems a generic user label may be sufficient, however, Fides language is intended to provide greater control through specificity where needed. 23 | 24 | Examples of this are: 25 | 26 | - `anonymous_user` 27 | - `employee` 28 | - `customer` 29 | - `patient` 30 | - `next_of_kin` 31 | 32 | Learn more about [Data Subjects in the taxonomy reference now](data_subjects.md). 33 | 34 | ### Extensibility & interoperability 35 | The Fides language is designed to support common privacy compliance regulations and standards out of the box, these include GDPR, CCPA, LGPD and ISO 19944. 36 | 37 | You can extend the taxonomy to support your organization's needs. If you do this, we recommend extending from the existing categories to ensure interoperability inside and outside your organization. 38 | 39 | -------------------------------------------------------------------------------- /mkdocs/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Fides Language Privacy Taxonomy 2 | site_url: https://iabtechlab.github.io/fideslang/privacytaxonomy/ 3 | 4 | nav: 5 | - Overview: index.md 6 | 7 | theme: 8 | palette: 9 | - media: "(prefers-color-scheme: light)" 10 | scheme: default 11 | toggle: 12 | icon: material/toggle-switch-off-outline 13 | name: Switch to dark mode 14 | - media: "(prefers-color-scheme: dark)" 15 | scheme: slate 16 | toggle: 17 | icon: material/toggle-switch 18 | name: Switch to light mode 19 | name: material 20 | favicon: img/favicon.ico 21 | logo: img/fideslang.svg 22 | font: 23 | text: Source Sans Pro 24 | features: 25 | - navigation.top 26 | custom_dir: overrides 27 | 28 | markdown_extensions: 29 | - attr_list 30 | - pymdownx.superfences 31 | - pymdownx.snippets 32 | - pymdownx.inlinehilite 33 | - pymdownx.tabbed 34 | - admonition 35 | - pymdownx.highlight: 36 | linenums: true 37 | linenums_style: table 38 | 39 | extra_javascript: 40 | - https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.7.2/highlight.min.js 41 | 42 | extra_css: 43 | - https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.7.2/styles/default.min.css 44 | - css/fides.css 45 | - css/taxonomy.css 46 | - css/logo.css 47 | -------------------------------------------------------------------------------- /mkdocs/overrides/partials/footer.html: -------------------------------------------------------------------------------- 1 | 22 | 23 | {% import "partials/language.html" as lang with context %} 24 | 25 | 26 | 100 | -------------------------------------------------------------------------------- /mkdocs/requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs-material==7.2.5 2 | mkdocs-minify-plugin==0.4.0 3 | jinja2==3.1.2 -------------------------------------------------------------------------------- /noxfile.py: -------------------------------------------------------------------------------- 1 | import nox 2 | 3 | nox.options.sessions = [] 4 | nox.options.reuse_existing_virtualenvs = True 5 | 6 | TESTED_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11"] 7 | TESTED_PYDANTIC_VERSIONS = ["1.8.2", "1.9.2", "1.10.9"] 8 | TESTED_PYYAML_VERSIONS = ["5.4.1", "6.0"] 9 | 10 | 11 | def install_requirements(session: nox.Session) -> None: 12 | session.install("-r", "requirements.txt") 13 | session.install("-r", "dev-requirements.txt") 14 | 15 | 16 | @nox.session(python=TESTED_PYTHON_VERSIONS) 17 | @nox.parametrize("pydantic_version", TESTED_PYDANTIC_VERSIONS) 18 | @nox.parametrize("pyyaml_version", TESTED_PYYAML_VERSIONS) 19 | def tests(session: nox.Session, pydantic_version: str, pyyaml_version: str) -> None: 20 | install_requirements(session) 21 | session.install(".") 22 | session.install(f"pydantic=={pydantic_version}") 23 | session.install(f"pyyaml=={pyyaml_version}") 24 | if session.posargs: 25 | test_args = session.posargs 26 | else: 27 | test_args = [""] 28 | session.run("pytest", *test_args) 29 | 30 | 31 | @nox.session() 32 | def pytest(session: nox.Session) -> None: 33 | """Runs the pytest suite with default versions.""" 34 | install_requirements(session) 35 | session.install(".") 36 | session.run("pytest") 37 | 38 | 39 | @nox.session() 40 | def black(session: nox.Session) -> None: 41 | install_requirements(session) 42 | session.run("black", "--check", "src/") 43 | 44 | 45 | @nox.session() 46 | def mypy(session: nox.Session) -> None: 47 | install_requirements(session) 48 | session.run("mypy") 49 | 50 | 51 | @nox.session() 52 | def pylint(session: nox.Session) -> None: 53 | install_requirements(session) 54 | session.run("pylint", "--jobs", "0", "src/") 55 | 56 | 57 | @nox.session() 58 | def xenon(session: nox.Session) -> None: 59 | install_requirements(session) 60 | session.run( 61 | "xenon", 62 | "src", 63 | "--max-absolute", 64 | "B", 65 | "--max-modules", 66 | "B", 67 | "--max-average", 68 | "A", 69 | "--ignore", 70 | "data,tests,docs", 71 | "--exclude", 72 | "src/fideslang/_version.py", 73 | ) 74 | 75 | 76 | @nox.session() 77 | def static_checks(session: nox.Session) -> None: 78 | """Run the static checks.""" 79 | session.notify("black") 80 | session.notify("xenon") 81 | session.notify("pylint") 82 | session.notify("mypy") 83 | 84 | 85 | @nox.session() 86 | def check_all(session: nox.Session) -> None: 87 | """Run static checks as well as tests.""" 88 | session.notify("static_checks") 89 | session.notify("tests") 90 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | ############### 2 | ## Packaging ## 3 | ############### 4 | [build-system] 5 | requires = ["setuptools", "wheel", "setuptools_scm"] 6 | 7 | [project] 8 | name = "fideslang" 9 | description = "Fides Taxonomy Language" 10 | dynamic = ["dependencies", "version"] 11 | readme = "README.md" 12 | requires-python = ">=3.8, <4" 13 | authors = [{ name = "Ethyca, Inc.", email = "fidesteam@ethyca.com" }] 14 | license = { text = "Apache License 2.0" } 15 | classifiers = [ 16 | "License :: OSI Approved :: Apache Software License", 17 | "Programming Language :: Python :: 3 :: Only", 18 | "Programming Language :: Python :: 3.8", 19 | "Programming Language :: Python :: 3.9", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11", 22 | "Topic :: Software Development :: Libraries", 23 | ] 24 | 25 | [project.urls] 26 | documentation = "https://github.com/ethyca/fideslang" 27 | changelog = "https://github.com/ethyca/fideslang/blob/main/CHANGELOG.md" 28 | 29 | [tool.setuptools_scm] 30 | write_to = "src/fideslang/_version.py" 31 | 32 | [tool.setuptools.dynamic] 33 | dependencies = { file = "requirements.txt" } 34 | 35 | [tool.setuptools.packages.find] 36 | where = ["src"] 37 | 38 | ############ 39 | ## Typing ## 40 | ############ 41 | [tool.mypy] 42 | check_untyped_defs = true 43 | disallow_untyped_defs = true 44 | disallow_any_explicit = true 45 | files = ["src"] 46 | no_implicit_reexport = true 47 | plugins = ["pydantic.mypy"] 48 | pretty = true 49 | show_error_codes = true 50 | warn_redundant_casts = true 51 | warn_unused_configs = true 52 | warn_unused_ignores = true 53 | ignore_missing_imports = true 54 | 55 | [[tool.mypy.overrides]] 56 | module = "fideslang._version" 57 | ignore_errors = true 58 | 59 | [[tool.mypy.overrides]] 60 | module = ["tests.*"] 61 | disallow_untyped_defs = false 62 | 63 | [tool.pydantic-mypy] 64 | init_forbid_extra = true 65 | init_typed = true 66 | warn_required_dynamic_aliases = true 67 | warn_untyped_fields = true 68 | 69 | ########### 70 | ## Black ## 71 | ########### 72 | [tool.black] 73 | py39 = true 74 | line-length = 88 75 | include = '\.pyi?$' 76 | exclude = ''' 77 | /( 78 | \.git 79 | | \.hg 80 | | \.mypy_cache 81 | | \.tox 82 | | \.venv 83 | | _build 84 | | buck-out 85 | | build 86 | | dist 87 | 88 | # The following are specific to Black, you probably don't want those. 89 | | blib2to3 90 | | tests/data 91 | )/ 92 | ''' 93 | 94 | ############ 95 | ## Pylint ## 96 | ############ 97 | [tool.pylint.messages_control] 98 | ignore = ["migrations", "_version.py"] 99 | disable = [ 100 | "line-too-long", 101 | "too-few-public-methods", 102 | "duplicate-code", 103 | "import-error", 104 | "unused-argument", 105 | "no-self-use", 106 | "import-outside-toplevel", 107 | "unsubscriptable-object", # Otherwise throws errors on certain Type annotations 108 | "too-many-arguments", 109 | "missing-module-docstring", 110 | "raise-missing-from", 111 | "fixme", 112 | ] 113 | extension-pkg-whitelist = "pydantic" 114 | 115 | [tool.pylint.reports] 116 | reports = "no" 117 | output-format = "colorized" 118 | 119 | [tool.pylint.format] 120 | max-line-length = "88" 121 | 122 | [tool.pylint.basic] 123 | good-names = "_,i,setUp,tearDown,maxDiff,default_app_config" 124 | 125 | ############ 126 | ## Pytest ## 127 | ############ 128 | [tool.pytest.ini_options] 129 | testpaths = "tests" 130 | log_level = "DEBUG" 131 | addopts = [ 132 | "--cov=fideslang", 133 | "--cov-report=term-missing", 134 | "-vv", 135 | "--no-cov-on-fail", 136 | ] 137 | markers = [ 138 | "unit: only runs tests that don't require non-python dependencies (i.e. a database)", 139 | ] 140 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pydantic>=1.8.1,<1.11.0 2 | pyyaml>=5,<7 3 | packaging>=20.0 4 | -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | ## How To Use 2 | 3 | From the root directory of this repo, setup a virtual environment and install `fideslang` locally: 4 | ``` 5 | python -m venv venv 6 | source venv/bin/activate 7 | pip install . 8 | ``` 9 | 10 | ## Generating JSON/CSV/YAML files from the Taxonomy 11 | The core taxonomy files are in YAML format, but for convenience it's sometimes useful to have JSON or CSV equivalents. 12 | 13 | Use `python scripts/export_default_taxonomy.py` to generate these files whenever a new version of the YAML is created. -------------------------------------------------------------------------------- /scripts/export_default_taxonomy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Export the Default Fideslang Taxonomy as YAML, JSON and CSV files. 3 | """ 4 | import csv 5 | import json 6 | import shutil 7 | import yaml 8 | from fideslang.default_taxonomy import DEFAULT_TAXONOMY 9 | from fideslang.manifests import write_manifest 10 | from typing import Tuple 11 | from packaging.version import Version 12 | 13 | FILE_RESOURCE_PAIRS: Tuple[Tuple[str, str], ...] = ( 14 | ("data_categories", "data_category"), 15 | ("data_subjects", "data_subject"), 16 | ("data_uses", "data_use"), 17 | ) 18 | DATA_DIR = "data_files" 19 | DOCS_CSV_DIR = "mkdocs/docs/csv" 20 | 21 | 22 | def export_yaml() -> None: 23 | """ 24 | Export the default Taxonomy as YAML files. 25 | """ 26 | 27 | for filename, resource_type in FILE_RESOURCE_PAIRS: 28 | output_filename = f"{DATA_DIR}/{filename}.yml" 29 | print(f"> Writing YAML to {output_filename}") 30 | resources = [x.dict() for x in getattr(DEFAULT_TAXONOMY, resource_type)] 31 | 32 | write_manifest( 33 | output_filename, 34 | manifest=resources, 35 | resource_type=resource_type, 36 | ) 37 | 38 | 39 | def export_json() -> None: 40 | """ 41 | Load the default Taxonomy from their YAML files and re-export as JSON. 42 | """ 43 | for filename, _ in FILE_RESOURCE_PAIRS: 44 | input_filename = f"{DATA_DIR}/{filename}.yml" 45 | json_filename = input_filename.replace("yml", "json") 46 | 47 | with open(input_filename, "r") as input_file: 48 | print(f"> Loading YAML from {input_filename}...") 49 | yaml_dict = yaml.safe_load(input_file) 50 | with open(json_filename, "w") as json_file: 51 | print(f"> Writing JSON to {json_filename}...") 52 | json_str = json.dumps(yaml_dict, indent=4) 53 | print(json_str, file=json_file) 54 | 55 | 56 | def export_csv() -> None: 57 | for filename, _ in FILE_RESOURCE_PAIRS: 58 | input_filename = f"{DATA_DIR}/{filename}.yml" 59 | csv_filename = input_filename.replace("yml", "csv") 60 | docs_filename = f"{DOCS_CSV_DIR}/{filename}.csv" 61 | 62 | # Load the Taxonomy from the YAML file 63 | with open(input_filename, "r") as input_file: 64 | print(f"> Loading YAML from {input_filename}...") 65 | yaml_dict = yaml.safe_load(input_file) 66 | 67 | with open(csv_filename, "w") as csv_file: 68 | print(f"> Writing csv to {csv_filename}...") 69 | assert len(yaml_dict.keys()) == 1 # should only have a single top-level key 70 | toplevel_key = next(iter(yaml_dict)) 71 | 72 | # Compute a unique set of keys used across all the sub-items 73 | list_of_keys = [item.keys() for item in yaml_dict[toplevel_key]] 74 | flattened_keys = [keys for sublist in list_of_keys for keys in sublist] 75 | unique_keys = sorted(list(set(flattened_keys))) 76 | 77 | # Insert the parent_key if not defined 78 | if "parent_key" not in unique_keys: 79 | unique_keys.append("parent_key") 80 | 81 | # Write out the CSV file headers. Put "description" last, for readability 82 | if "description" in unique_keys: 83 | unique_keys.remove("description") 84 | unique_keys.append("description") 85 | 86 | print(f"Headers: {unique_keys}") 87 | csv_writer = csv.DictWriter(csv_file, fieldnames=unique_keys) 88 | csv_writer.writeheader() 89 | 90 | # For visualizing as a hierarchy, generate a virtual "root" node to be a single parent 91 | assert {"fides_key", "name", "parent_key"}.issubset( 92 | unique_keys 93 | ), "Missing required keys for CSV!" 94 | root_key = toplevel_key.replace("-", "_") 95 | root_name = " ".join([word.capitalize() for word in root_key.split("_")]) 96 | root_node = {"fides_key": root_key, "name": root_name} 97 | print(f"Generating root node: {root_node}...") 98 | csv_writer.writerow(root_node) 99 | 100 | for item in yaml_dict[toplevel_key]: 101 | if item.get("parent_key", None) is not None: 102 | # Write out the item normally if it has a parent 103 | csv_writer.writerow(item) 104 | else: 105 | # Insert the new "root" node for items that are top-level nodes 106 | new_item = {"parent_key": root_key} 107 | item.update(new_item) 108 | print(f"Edited parent for {item['fides_key']}") 109 | csv_writer.writerow(item) 110 | 111 | print(f"> Copying csv to docs site at {docs_filename}...") 112 | shutil.copy(csv_filename, docs_filename) 113 | 114 | 115 | if __name__ == "__main__": 116 | print("Exporting YAML files...") 117 | export_yaml() 118 | print("*" * 40) 119 | 120 | print("Exporting JSON files...") 121 | export_json() 122 | print("*" * 40) 123 | 124 | print("Exporting JSON files...") 125 | export_csv() 126 | print("*" * 40) 127 | 128 | print(f"Export complete! Check '{DATA_DIR}/' for output files.") 129 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IABTechLab/fideslang/76ba0add91935da45af7cce8ba179c7da210c820/src/__init__.py -------------------------------------------------------------------------------- /src/fideslang/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Exports various fideslang objects for easier use elsewhere. 3 | """ 4 | 5 | from typing import Dict, Type, Union 6 | 7 | from fideslang.default_fixtures import COUNTRY_CODES 8 | from fideslang.default_taxonomy import DEFAULT_TAXONOMY 9 | 10 | from ._version import __version__ 11 | 12 | # export our GVL utilities 13 | from .gvl import ( 14 | GVL_DATA_CATEGORIES, 15 | GVL_PURPOSES, 16 | GVL_SPECIAL_PURPOSES, 17 | MAPPED_GVL_DATA_CATEGORIES, 18 | MAPPED_PURPOSES, 19 | MAPPED_PURPOSES_BY_DATA_USE, 20 | MAPPED_SPECIAL_PURPOSES, 21 | data_category_id_to_data_categories, 22 | data_use_to_purpose, 23 | purpose_to_data_use, 24 | ) 25 | 26 | # Export the Models 27 | from .models import ( 28 | DataCategory, 29 | DataFlow, 30 | Dataset, 31 | DatasetField, 32 | DatasetFieldBase, 33 | DataSubject, 34 | DataUse, 35 | Evaluation, 36 | FidesCollectionKey, 37 | FidesDatasetReference, 38 | FidesMeta, 39 | FidesModel, 40 | Organization, 41 | Policy, 42 | PolicyRule, 43 | PrivacyDeclaration, 44 | PrivacyRule, 45 | System, 46 | Taxonomy, 47 | ) 48 | 49 | FidesModelType = Union[Type[FidesModel], Type[Evaluation]] 50 | model_map: Dict[str, FidesModelType] = { 51 | "data_category": DataCategory, 52 | "data_subject": DataSubject, 53 | "data_use": DataUse, 54 | "dataset": Dataset, 55 | "organization": Organization, 56 | "policy": Policy, 57 | "system": System, 58 | "evaluation": Evaluation, 59 | } 60 | model_list = list(model_map.keys()) 61 | -------------------------------------------------------------------------------- /src/fideslang/default_taxonomy/__init__.py: -------------------------------------------------------------------------------- 1 | """This module contains the the default taxonomy resources that Fideslang ships with.""" 2 | 3 | from fideslang.models import Taxonomy 4 | 5 | from .data_categories import DEFAULT_DATA_CATEGORIES 6 | from .data_subjects import DEFAULT_DATA_SUBJECTS 7 | from .data_uses import DEFAULT_DATA_USES 8 | from .organizations import DEFAULT_ORGANIZATIONS 9 | 10 | sort_data_types = ( 11 | lambda x: x.parent_key if hasattr(x, "parent_key") and x.parent_key else x.fides_key 12 | ) 13 | 14 | DEFAULT_TAXONOMY = Taxonomy( 15 | data_category=sorted(DEFAULT_DATA_CATEGORIES, key=sort_data_types), 16 | data_subject=sorted(DEFAULT_DATA_SUBJECTS, key=sort_data_types), 17 | data_use=sorted(DEFAULT_DATA_USES, key=sort_data_types), 18 | organization=DEFAULT_ORGANIZATIONS, 19 | ) 20 | -------------------------------------------------------------------------------- /src/fideslang/default_taxonomy/data_subjects.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from fideslang.models import DataSubject 4 | 5 | from .utils import default_factory 6 | 7 | default_subject_factory = partial(default_factory, taxonomy_class=DataSubject) 8 | 9 | 10 | DEFAULT_DATA_SUBJECTS = [ 11 | default_subject_factory( 12 | fides_key="anonymous_user", 13 | organization_fides_key="default_organization", 14 | name="Anonymous User", 15 | description="An individual that is unidentifiable to the systems. Note - This should only be applied to truly anonymous users where there is no risk of re-identification", 16 | ), 17 | default_subject_factory( 18 | fides_key="citizen_voter", 19 | organization_fides_key="default_organization", 20 | name="Citizen Voter", 21 | description="An individual registered to voter with a state or authority.", 22 | ), 23 | default_subject_factory( 24 | fides_key="commuter", 25 | organization_fides_key="default_organization", 26 | name="Commuter", 27 | description="An individual that is traveling or transiting in the context of location tracking.", 28 | ), 29 | default_subject_factory( 30 | fides_key="consultant", 31 | organization_fides_key="default_organization", 32 | name="Consultant", 33 | description="An individual employed in a consultative/temporary capacity by the organization.", 34 | ), 35 | default_subject_factory( 36 | fides_key="customer", 37 | organization_fides_key="default_organization", 38 | name="Customer", 39 | description="An individual or other organization that purchases goods or services from the organization.", 40 | ), 41 | default_subject_factory( 42 | fides_key="employee", 43 | organization_fides_key="default_organization", 44 | name="Employee", 45 | description="An individual employed by the organization.", 46 | ), 47 | default_subject_factory( 48 | fides_key="job_applicant", 49 | organization_fides_key="default_organization", 50 | name="Job Applicant", 51 | description="An individual applying for employment to the organization.", 52 | ), 53 | default_subject_factory( 54 | fides_key="next_of_kin", 55 | organization_fides_key="default_organization", 56 | name="Next of Kin", 57 | description="A relative of any other individual subject where such a relationship is known.", 58 | ), 59 | default_subject_factory( 60 | fides_key="passenger", 61 | organization_fides_key="default_organization", 62 | name="Passenger", 63 | description="An individual traveling on some means of provided transport.", 64 | ), 65 | default_subject_factory( 66 | fides_key="patient", 67 | organization_fides_key="default_organization", 68 | name="Patient", 69 | description="An individual identified for the purposes of any medical care.", 70 | ), 71 | default_subject_factory( 72 | fides_key="prospect", 73 | organization_fides_key="default_organization", 74 | name="Prospect", 75 | description="An individual or organization to whom an organization is selling goods or services.", 76 | ), 77 | default_subject_factory( 78 | fides_key="shareholder", 79 | organization_fides_key="default_organization", 80 | name="Shareholder", 81 | description="An individual or organization that holds equity in the organization.", 82 | ), 83 | default_subject_factory( 84 | fides_key="supplier_vendor", 85 | organization_fides_key="default_organization", 86 | name="Supplier/Vendor", 87 | description="An individual or organization that provides services or goods to the organization.", 88 | ), 89 | default_subject_factory( 90 | fides_key="trainee", 91 | organization_fides_key="default_organization", 92 | name="Trainee", 93 | description="An individual undergoing training by the organization.", 94 | ), 95 | default_subject_factory( 96 | fides_key="visitor", 97 | organization_fides_key="default_organization", 98 | name="Visitor", 99 | description="An individual visiting a location.", 100 | ), 101 | ] 102 | -------------------------------------------------------------------------------- /src/fideslang/default_taxonomy/organizations.py: -------------------------------------------------------------------------------- 1 | from fideslang.models import Organization 2 | 3 | DEFAULT_ORGANIZATIONS = [Organization(fides_key="default_organization")] 4 | -------------------------------------------------------------------------------- /src/fideslang/default_taxonomy/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Union 2 | 3 | from fideslang.models import DataCategory, DataSubject, DataUse 4 | 5 | CustomType = Union[DataCategory, DataSubject, DataUse] 6 | 7 | 8 | def default_factory(taxonomy_class: CustomType, **kwargs: Dict) -> CustomType: 9 | """ 10 | Generate default taxonomy objects. 11 | 12 | Given that we know these are defaults, set default values accordingly. 13 | """ 14 | 15 | kwargs["is_default"] = True # type: ignore[assignment] 16 | 17 | if not kwargs.get("version_added"): 18 | # This is the version where we started tracking from, so 19 | # we use it as the default starting point. 20 | kwargs["version_added"] = "2.0.0" # type: ignore[assignment] 21 | item = taxonomy_class.parse_obj(kwargs) 22 | return item 23 | -------------------------------------------------------------------------------- /src/fideslang/gvl/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=too-many-locals 2 | 3 | import os 4 | from json import load 5 | from os.path import dirname, join 6 | from typing import Dict, List, Optional 7 | 8 | from .models import Feature, GVLDataCategory, MappedDataCategory, MappedPurpose, Purpose 9 | 10 | ### (Special) Purposes 11 | 12 | PURPOSE_MAPPING_FILE = join( 13 | dirname(__file__), 14 | "", 15 | "gvl_data_use_mapping.json", 16 | ) 17 | 18 | GVL_PURPOSES: Dict[int, Purpose] = {} 19 | MAPPED_PURPOSES: Dict[int, MappedPurpose] = {} 20 | GVL_SPECIAL_PURPOSES: Dict[int, Purpose] = {} 21 | MAPPED_SPECIAL_PURPOSES: Dict[int, MappedPurpose] = {} 22 | MAPPED_PURPOSES_BY_DATA_USE: Dict[str, MappedPurpose] = {} 23 | 24 | ### (Special) Features 25 | 26 | FEATURE_MAPPING_FILE = join( 27 | dirname(__file__), 28 | "", 29 | "gvl_feature_mapping.json", 30 | ) 31 | GVL_FEATURES: Dict[int, Feature] = {} 32 | GVL_SPECIAL_FEATURES: Dict[int, Feature] = {} 33 | FEATURES_BY_NAME: Dict[str, Feature] = {} 34 | 35 | 36 | ### Data Categories 37 | 38 | DATA_CATEGORY_MAPPING_FILE = join( 39 | dirname(__file__), 40 | "", 41 | "gvl_data_category_mapping.json", 42 | ) 43 | GVL_DATA_CATEGORIES: Dict[int, GVLDataCategory] = {} 44 | MAPPED_GVL_DATA_CATEGORIES: Dict[int, MappedDataCategory] = {} 45 | 46 | 47 | def _load_data() -> None: 48 | with open( 49 | os.path.join(os.curdir, PURPOSE_MAPPING_FILE), encoding="utf-8" 50 | ) as mapping_file: 51 | data = load(mapping_file) 52 | for raw_purpose in data["purposes"].values(): 53 | purpose = Purpose.parse_obj(raw_purpose) 54 | mapped_purpose = MappedPurpose.parse_obj(raw_purpose) 55 | GVL_PURPOSES[purpose.id] = purpose 56 | MAPPED_PURPOSES[mapped_purpose.id] = mapped_purpose 57 | for data_use in mapped_purpose.data_uses: 58 | MAPPED_PURPOSES_BY_DATA_USE[data_use] = mapped_purpose 59 | 60 | for raw_special_purpose in data["specialPurposes"].values(): 61 | special_purpose = Purpose.parse_obj(raw_special_purpose) 62 | mapped_special_purpose = MappedPurpose.parse_obj(raw_special_purpose) 63 | GVL_SPECIAL_PURPOSES[special_purpose.id] = special_purpose 64 | MAPPED_SPECIAL_PURPOSES[mapped_special_purpose.id] = mapped_special_purpose 65 | for data_use in mapped_special_purpose.data_uses: 66 | MAPPED_PURPOSES_BY_DATA_USE[data_use] = mapped_special_purpose 67 | 68 | with open( 69 | os.path.join(os.curdir, FEATURE_MAPPING_FILE), encoding="utf-8" 70 | ) as feature_mapping_file: 71 | feature_data = load(feature_mapping_file) 72 | 73 | for raw_feature in feature_data["features"].values(): 74 | feature = Feature.parse_obj(raw_feature) 75 | GVL_FEATURES[feature.id] = feature 76 | FEATURES_BY_NAME[feature.name] = feature 77 | 78 | for raw_special_feature in feature_data["specialFeatures"].values(): 79 | special_feature = Feature.parse_obj(raw_special_feature) 80 | GVL_SPECIAL_FEATURES[special_feature.id] = special_feature 81 | FEATURES_BY_NAME[special_feature.name] = special_feature 82 | 83 | with open( 84 | os.path.join(os.curdir, DATA_CATEGORY_MAPPING_FILE), encoding="utf-8" 85 | ) as data_category_mapping_file: 86 | data_category_data = load(data_category_mapping_file) 87 | 88 | for raw_data_category in data_category_data.values(): 89 | data_category = GVLDataCategory.parse_obj(raw_data_category) 90 | mapped_data_category = MappedDataCategory.parse_obj(raw_data_category) 91 | GVL_DATA_CATEGORIES[data_category.id] = data_category 92 | MAPPED_GVL_DATA_CATEGORIES[mapped_data_category.id] = mapped_data_category 93 | 94 | 95 | def purpose_to_data_use(purpose_id: int, special_purpose: bool = False) -> List[str]: 96 | """ 97 | Utility function to return the fideslang data uses associated with the 98 | given GVL purpose (or special purpose) ID. 99 | 100 | By default, the given ID is treated as a purpose ID. The `special_purpose` 101 | argument can be set to `True` if looking up special purpose IDs. 102 | 103 | Raises a KeyError if an invalid purpose ID is provided. 104 | """ 105 | purpose_map = MAPPED_SPECIAL_PURPOSES if special_purpose else MAPPED_PURPOSES 106 | return purpose_map[purpose_id].data_uses 107 | 108 | 109 | def data_use_to_purpose(data_use: str) -> Optional[Purpose]: 110 | """ 111 | Utility function to return the GVL purpose (or special purpose) associated 112 | with the given fideslang data use. 113 | 114 | Returns None if no associated purpose (or special purpose) is found 115 | """ 116 | return MAPPED_PURPOSES_BY_DATA_USE.get(data_use, None) 117 | 118 | 119 | def feature_name_to_feature(feature_name: str) -> Optional[Feature]: 120 | """Utility function to return a GVL feature (or special feature) given the feature's name""" 121 | return FEATURES_BY_NAME.get(feature_name, None) 122 | 123 | 124 | def feature_id_to_feature_name( 125 | feature_id: int, special_feature: bool = False 126 | ) -> Optional[str]: 127 | """Utility function to return a GVL feature/special feature name given the feature/special feature's id""" 128 | feature_map = GVL_SPECIAL_FEATURES if special_feature else GVL_FEATURES 129 | feature = feature_map.get(feature_id, None) 130 | if not feature: 131 | return None 132 | return feature.name 133 | 134 | 135 | def data_category_id_to_data_categories(data_category_id: int) -> List[str]: 136 | """ 137 | Utility function to return the fideslang data categories associated with the 138 | given GVL data category ID. 139 | 140 | Raises a KeyError if an invalid GVL data category ID is provided. 141 | """ 142 | return MAPPED_GVL_DATA_CATEGORIES[data_category_id].fides_data_categories 143 | 144 | 145 | _load_data() 146 | -------------------------------------------------------------------------------- /src/fideslang/gvl/gvl_data_category_mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "1": { 3 | "id": 1, 4 | "name": "IP addresses", 5 | "description": "Your IP address is a number assigned by your Internet Service Provider to any Internet connection. It is not always specific to your device and is not always a stable identifier. It is used to route information on the Internet and display online content (including ads) on your connected device.", 6 | "fides_data_categories": [ 7 | "user.device.ip_address" 8 | ] 9 | }, 10 | "2": { 11 | "id": 2, 12 | "name": "Device characteristics", 13 | "description": "Technical characteristics about the device you are using that are not unique to you, such as the language, the time zone or the operating system.", 14 | "fides_data_categories": [ 15 | "user.device", 16 | "user.sensor", 17 | "user.user_sensor", 18 | "user.telemetry" 19 | ] 20 | }, 21 | "3": { 22 | "id": 3, 23 | "name": "Device identifiers", 24 | "description": "A device identifier is a unique string of characters assigned to your device or browser by means of a cookie or other storage technologies. It may be created or accessed to recognise your device e.g. across web pages from the same site or across multiple sites or apps.", 25 | "fides_data_categories": [ 26 | "user.device" 27 | ] 28 | }, 29 | "4": { 30 | "id": 4, 31 | "name": "Probabilistic identifiers", 32 | "description": "A probabilistic identifier can be created by combining characteristics associated with your device (the type of browser or operating system used) and the IP address of the Internet connection. If you give your agreement, additional characteristics (e.g. the installed font or screen resolution) can also be combined to improve precision of the probabilistic identifier. Such an identifier is considered \"probabilistic\" because several devices can share the same characteristics and Internet connection. It may be used to recognise your device across e.g. web pages from the same site or across multiple sites or apps.", 33 | "fides_data_categories": ["user.unique_id.pseudonymous"] 34 | }, 35 | "5": { 36 | "id": 5, 37 | "name": "Authentication-derived identifiers", 38 | "description": "Where an identifier is created on the basis of authentication data, such as contact details associated with online accounts you have created on websites or apps (e.g. e-mail address, phone number) or customer identifiers (e.g. identifier provided by your telecom operator), that identifier may be used to recognise you across websites, apps and devices when you are logged-in with the same contact details.", 39 | "fides_data_categories": [ 40 | "user.account", 41 | "user.unique_id", 42 | "user.device" 43 | ] 44 | }, 45 | "6": { 46 | "id": 6, 47 | "name": "Browsing and interaction data", 48 | "description": "Your online activity such as the websites you visit, apps you are using, the content you search for on this service, or your interactions with content or ads, such as the number of times you have seen a specific content or ad or whether you clicked on it.", 49 | "fides_data_categories": [ 50 | "user.behavior" 51 | ] 52 | }, 53 | "7": { 54 | "id": 7, 55 | "name": "User-provided data", 56 | "description": "The information you may have provided by way of declaration via a form (e.g. feedback, a comment) or when creating an account (e.g. your age, your occupation).", 57 | "fides_data_categories": [ 58 | "user.account", 59 | "user.contact", 60 | "user.content.public", 61 | "user.demographic", 62 | "user.name", 63 | "user.workplace" 64 | ] 65 | }, 66 | "8": { 67 | "id": 8, 68 | "name": "Non-precise location data", 69 | "description": "An approximation of your location, expressed as an area with a radius of at least 500 meters. Your approximate location can be deduced from e.g. the IP address of your connection.", 70 | "fides_data_categories": [ 71 | "user.location.imprecise" 72 | ] 73 | }, 74 | "9": { 75 | "id": 9, 76 | "name": "Precise location data", 77 | "description": "Your precise location within a radius of less than 500 meters based on your GPS coordinates. It may be used only with your acceptance.", 78 | "fides_data_categories": [ 79 | "user.location.precise" 80 | ] 81 | }, 82 | "10": { 83 | "id": 10, 84 | "name": "Users' profiles", 85 | "description": "Certain characteristics (e.g. your possible interests, your purchase intentions, your consumer profile) may be inferred or modeled from your previous online activity (e.g. the content you viewed or the service you used, your time spent on various online content and services) or the information you have provided (e.g. your age, your occupation).", 86 | "fides_data_categories": [ 87 | "user.demographic" 88 | ] 89 | }, 90 | "11": { 91 | "id": 11, 92 | "name": "Privacy choices", 93 | "description": "Your preferences regarding the processing of your data, based on the information you have received.", 94 | "fides_data_categories": [ 95 | "user.privacy_preferences" 96 | ] 97 | } 98 | } 99 | 100 | -------------------------------------------------------------------------------- /src/fideslang/gvl/gvl_feature_mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "features": { 3 | "1": { 4 | "id": 1, 5 | "name": "Match and combine data from other data sources", 6 | "description": "Information about your activity on this service may be matched and combined with other information relating to you and originating from various sources (for instance your activity on a separate online service, your use of a loyalty card in-store, or your answers to a survey), in support of the purposes explained in this notice." 7 | }, 8 | "2": { 9 | "id": 2, 10 | "name": "Link different devices", 11 | "description": "In support of the purposes explained in this notice, your device might be considered as likely linked to other devices that belong to you or your household (for instance because you are logged in to the same service on both your phone and your computer, or because you may use the same Internet connection on both devices)." 12 | }, 13 | "3": { 14 | "id": 3, 15 | "name": "Identify devices based on information transmitted automatically", 16 | "description": "Your device might be distinguished from other devices based on information it automatically sends when accessing the Internet (for instance, the IP address of your Internet connection or the type of browser you are using) in support of the purposes exposed in this notice." 17 | } 18 | }, 19 | "specialFeatures": { 20 | "1": { 21 | "id": 1, 22 | "name": "Use precise geolocation data", 23 | "description": "With your acceptance, your precise location (within a radius of less than 500 metres) may be used in support of the purposes explained in this notice." 24 | }, 25 | "2": { 26 | "id": 2, 27 | "name": "Actively scan device characteristics for identification", 28 | "description": "With your acceptance, certain characteristics specific to your device might be requested and used to distinguish it from other devices (such as the installed fonts or plugins, the resolution of your screen) in support of the purposes explained in this notice." 29 | } 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /src/fideslang/gvl/models.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class Purpose(BaseModel): 7 | """ 8 | Pydantic model for GVL purpose records 9 | """ 10 | 11 | id: int = Field( 12 | description="Official GVL purpose ID. Used for linking with other records, e.g. vendors, cookies, etc." 13 | ) 14 | name: str = Field(description="Name of the GVL purpose.") 15 | description: str = Field(description="Description of the GVL purpose.") 16 | illustrations: List[str] = Field( 17 | description="Illustrative examples of the purpose." 18 | ) 19 | 20 | 21 | class MappedPurpose(Purpose): 22 | """ 23 | Extension of the base GVL purpose model to include properties related to fideslang mappings. 24 | 25 | This is separated from the base GVL purpose model to keep that model a "pristine" representation 26 | of GVL source data. 27 | """ 28 | 29 | data_uses: List[str] = Field( 30 | description="The fideslang default taxonomy data uses that are associated with the purpose." 31 | ) 32 | 33 | 34 | class Feature(BaseModel): 35 | "Pydantic model for GVL feature records" 36 | id: int = Field(description="Official GVL feature ID or special feature ID") 37 | name: str = Field(description="Name of the GVL feature or special feature.") 38 | description: str = Field( 39 | description="Description of the GVL feature or special feature." 40 | ) 41 | 42 | 43 | class GVLDataCategory(BaseModel): 44 | """ 45 | Pydantic model for GVL data category records 46 | """ 47 | 48 | id: int = Field( 49 | description="Official GVL data category ID. Used for linking with vendor records" 50 | ) 51 | name: str = Field(description="Name of the GVL data category.") 52 | description: str = Field(description="Description of the GVL purpose.") 53 | 54 | 55 | class MappedDataCategory(GVLDataCategory): 56 | """ 57 | Extension of the base GVL data category model to include properties related to fideslang mappings. 58 | 59 | This is separated from the base GVL data category model to keep that model a "pristine" representation 60 | of GVL source data. 61 | """ 62 | 63 | fides_data_categories: List[str] = Field( 64 | description="The fideslang default taxonomy data categories that are associated with the GVL data category." 65 | ) 66 | -------------------------------------------------------------------------------- /src/fideslang/manifests.py: -------------------------------------------------------------------------------- 1 | """This module handles anything related to working with raw manifest files.""" 2 | import glob 3 | from functools import reduce 4 | from typing import Dict, List, Set, Union 5 | 6 | import yaml 7 | 8 | 9 | def write_manifest( 10 | file_name: str, manifest: Union[List, Dict], resource_type: str 11 | ) -> None: 12 | """ 13 | Write a dict representation of a resource out to a file. 14 | """ 15 | if isinstance(manifest, dict): 16 | manifest = {resource_type: [manifest]} 17 | else: 18 | manifest = {resource_type: manifest} 19 | 20 | with open(file_name, "w", encoding="utf-8") as manifest_file: 21 | yaml.dump(manifest, manifest_file, sort_keys=False, indent=2) 22 | 23 | 24 | def load_yaml_into_dict(file_path: str) -> Dict: 25 | """ 26 | This loads yaml files into a dictionary to be used in API calls. 27 | """ 28 | with open(file_path, "r", encoding="utf-8") as yaml_file: 29 | loaded = yaml.safe_load(yaml_file) 30 | if isinstance(loaded, dict): 31 | return loaded 32 | 33 | print(f"Failed to parse invalid manifest: {file_path.split('/')[-1]}. Skipping.") 34 | return {} 35 | 36 | 37 | def filter_manifest_by_type( 38 | manifests: Dict[str, List], filter_types: List[str] 39 | ) -> Dict[str, List]: 40 | "Filter the resources so that only the specified resource types are returned." 41 | return {key: value for key, value in manifests.items() if key in filter_types} 42 | 43 | 44 | def union_manifests(manifests: List[Dict]) -> Dict[str, List[Dict]]: 45 | """ 46 | Combine all of the manifests into a single dictionary, 47 | appending resource values with the same keys. 48 | """ 49 | 50 | key_lists: List[List[str]] = [list(manifest.keys()) for manifest in manifests] 51 | key_set: Set[str] = set(reduce(lambda x, y: [*x, *y], key_lists)) 52 | 53 | unioned_dict: Dict[str, List] = {} 54 | for manifest in manifests: 55 | for key in key_set: 56 | if key in manifest.keys() and key in unioned_dict.keys(): 57 | unioned_dict[key] += manifest[key] 58 | elif key in manifest.keys(): 59 | unioned_dict[key] = manifest[key] 60 | return unioned_dict 61 | 62 | 63 | def ingest_manifests(manifests_dir: str) -> Dict[str, List[Dict]]: 64 | """ 65 | Ingest either a single file or all of the manifests available in a 66 | directory and concatenate them into a single object. 67 | 68 | Directories will be searched recursively. 69 | """ 70 | yml_endings = ["yml", "yaml"] 71 | if manifests_dir.split(".")[-1] in yml_endings: 72 | manifests = load_yaml_into_dict(manifests_dir) 73 | 74 | else: 75 | manifest_list = [] 76 | for yml_ending in yml_endings: 77 | manifest_list += glob.glob( 78 | f"{manifests_dir}/**/*.{yml_ending}", recursive=True 79 | ) 80 | 81 | manifests = union_manifests( 82 | [load_yaml_into_dict(file) for file in manifest_list] 83 | ) 84 | return manifests 85 | -------------------------------------------------------------------------------- /src/fideslang/parse.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module handles everything related to parsing resources into Pydantic models, 3 | either from local files or the server. 4 | """ 5 | from typing import Dict, List 6 | 7 | from fideslang import FidesModel, Taxonomy, model_map 8 | 9 | 10 | def parse_dict( 11 | resource_type: str, resource: Dict, from_server: bool = False 12 | ) -> FidesModel: 13 | """ 14 | Parse an individual resource into its Python model. 15 | """ 16 | resource_source = "server" if from_server else "manifest file" 17 | if resource_type not in list(model_map.keys()): 18 | print(f"This resource type does not exist: {resource_type}") 19 | raise SystemExit(1) 20 | 21 | try: 22 | parsed_manifest = model_map[resource_type].parse_obj(resource) 23 | except Exception as err: 24 | print( 25 | "Failed to parse {} from {}:\n{}".format( 26 | resource_type, resource_source, resource 27 | ) 28 | ) 29 | raise SystemExit(err) 30 | return parsed_manifest 31 | 32 | 33 | def load_manifests_into_taxonomy(raw_manifests: Dict[str, List[Dict]]) -> Taxonomy: 34 | """ 35 | Parse the raw resource manifests into resource resources. 36 | """ 37 | taxonomy = Taxonomy.parse_obj( 38 | { 39 | resource_type: [ 40 | parse_dict(resource_type, resource) for resource in resource_list 41 | ] 42 | for resource_type, resource_list in raw_manifests.items() 43 | } 44 | ) 45 | return taxonomy 46 | -------------------------------------------------------------------------------- /src/fideslang/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IABTechLab/fideslang/76ba0add91935da45af7cce8ba179c7da210c820/src/fideslang/py.typed -------------------------------------------------------------------------------- /src/fideslang/relationships.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module is responsible for calculating what resources are referenced 3 | by each other and building a dependency graph of relationships. 4 | """ 5 | 6 | import inspect 7 | from enum import Enum 8 | from functools import reduce 9 | from typing import List, Optional, Set 10 | 11 | from fideslang.models import BaseModel, FidesKey, Taxonomy 12 | from fideslang.utils import get_resource_by_fides_key 13 | 14 | 15 | def find_nested_keys_in_list(parameter_value: List[BaseModel]) -> List[str]: 16 | """ 17 | Iterates a nested object list and returns any keys nested fides keys 18 | """ 19 | nested_keys = [ 20 | nested_key 21 | for param_element in parameter_value 22 | for nested_key in find_referenced_fides_keys(param_element) 23 | ] 24 | return nested_keys 25 | 26 | 27 | def find_referenced_fides_keys(resource: object) -> Set[FidesKey]: 28 | """ 29 | Use type-signature introspection to figure out which fields 30 | include the FidesKey type and return all of those values. 31 | 32 | Note that this finds _all_ fides_keys, including the resource's own fides_key 33 | 34 | This function is used recursively for arbitrary-depth objects. 35 | """ 36 | referenced_fides_keys: Set[FidesKey] = set() 37 | 38 | # Str type doesn't have a signature, so we return early 39 | if isinstance(resource, str) and not isinstance(resource, Enum): 40 | return set() 41 | 42 | signature = inspect.signature(type(resource), follow_wrapped=True) 43 | attributes = filter( 44 | lambda parameter: hasattr(resource, parameter.name), 45 | signature.parameters.values(), 46 | ) 47 | 48 | for attribute in attributes: 49 | attribute_value = resource.__getattribute__(attribute.name) 50 | if attribute_value: 51 | # If it is a single FidesKey, add it directly 52 | if attribute.annotation in (FidesKey, Optional[FidesKey]): 53 | referenced_fides_keys.add(attribute_value) 54 | # Add the list of FidesKeys to the set 55 | elif attribute.annotation == List[FidesKey]: 56 | referenced_fides_keys.update(resource.__getattribute__(attribute.name)) 57 | # If it is a list, but not of strings, recurse into each one 58 | elif ( 59 | isinstance(attribute_value, list) and attribute.annotation != List[str] 60 | ): 61 | nested_keys = find_nested_keys_in_list(attribute_value) 62 | referenced_fides_keys.update(nested_keys) 63 | # If it is a Pydantic Model then recurse 64 | elif isinstance(attribute_value, BaseModel): 65 | referenced_fides_keys.update( 66 | find_referenced_fides_keys(attribute_value) 67 | ) 68 | return referenced_fides_keys 69 | 70 | 71 | def get_referenced_missing_keys(taxonomy: Taxonomy) -> Set[FidesKey]: 72 | """ 73 | Iterate through the Taxonomy and create a set of all of the FidesKeys 74 | that are contained within it. 75 | """ 76 | referenced_keys: List[Set[FidesKey]] = [ 77 | find_referenced_fides_keys(resource) 78 | for resource_type in taxonomy.__fields_set__ 79 | for resource in getattr(taxonomy, resource_type) 80 | ] 81 | key_set: Set[FidesKey] = set( 82 | reduce(lambda x, y: set().union(x).union(y), referenced_keys) 83 | ) 84 | keys_not_in_taxonomy = { 85 | fides_key 86 | for fides_key in key_set 87 | if get_resource_by_fides_key(taxonomy, fides_key) is None 88 | } 89 | return keys_not_in_taxonomy 90 | -------------------------------------------------------------------------------- /src/fideslang/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utils for use within various fideslang modules. 3 | """ 4 | 5 | from typing import Dict, Optional 6 | 7 | from fideslang import FidesModel, Taxonomy 8 | 9 | 10 | def get_resource_by_fides_key( 11 | taxonomy: Taxonomy, fides_key: str 12 | ) -> Optional[Dict[str, FidesModel]]: 13 | """ 14 | Recurse through a taxonomy to find a specific resource its fides_key. 15 | """ 16 | 17 | return { 18 | resource_type: resource 19 | for resource_type in taxonomy.__fields_set__ 20 | for resource in getattr(taxonomy, resource_type) 21 | if resource.fides_key == fides_key 22 | } or None 23 | -------------------------------------------------------------------------------- /src/fideslang/validation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains all of the additional validation for the resource models. 3 | """ 4 | import re 5 | from collections import Counter 6 | from typing import Dict, Generator, List, Optional, Pattern, Set, Tuple 7 | 8 | from packaging.version import Version 9 | from pydantic import ConstrainedStr 10 | 11 | 12 | class FidesValidationError(ValueError): 13 | """Custom exception for when the pydantic ValidationError can't be used.""" 14 | 15 | 16 | class FidesVersion(Version): 17 | """Validate strings as proper semantic versions.""" 18 | 19 | @classmethod 20 | def __get_validators__(cls) -> Generator: 21 | yield cls.validate 22 | 23 | @classmethod 24 | def validate(cls, value: str) -> Version: 25 | """Validates that the provided string is a valid Semantic Version.""" 26 | return Version(value) 27 | 28 | 29 | class FidesKey(ConstrainedStr): 30 | """ 31 | A FidesKey type that creates a custom constrained string. 32 | """ 33 | 34 | regex: Pattern[str] = re.compile(r"^[a-zA-Z0-9_.<>-]+$") 35 | 36 | @classmethod # This overrides the default method to throw the custom FidesValidationError 37 | def validate(cls, value: str) -> str: 38 | """Throws ValueError if val is not a valid FidesKey""" 39 | 40 | if not cls.regex.match(value): 41 | raise FidesValidationError( 42 | f"FidesKeys must only contain alphanumeric characters, '.', '_', '<', '>' or '-'. Value provided: {value}" 43 | ) 44 | 45 | return value 46 | 47 | 48 | def sort_list_objects_by_name(values: List) -> List: 49 | """ 50 | Sort objects in a list by their name. 51 | This makes resource comparisons deterministic. 52 | """ 53 | values.sort(key=lambda value: value.name) 54 | return values 55 | 56 | 57 | def unique_items_in_list(values: List) -> List: 58 | """ 59 | Verify that the `name` attributes of each item in the provided list are unique. 60 | 61 | This is useful for fields where there is no FidesKey but we want to 62 | do a uniqueness check. 63 | """ 64 | names = [item.name for item in values] 65 | duplicates: Dict[str, int] = { 66 | name: count for name, count in Counter(names).items() if count > 1 67 | } 68 | if duplicates: 69 | raise FidesValidationError( 70 | f"Duplicate entries found: [{','.join(duplicates.keys())}]" 71 | ) 72 | 73 | return values 74 | 75 | 76 | def no_self_reference(value: FidesKey, values: Dict) -> FidesKey: 77 | """ 78 | Check to make sure that the fides_key doesn't match other fides_key 79 | references within an object. 80 | 81 | i.e. DataCategory.parent_key != DataCategory.fides_key 82 | """ 83 | fides_key = FidesKey.validate(values.get("fides_key", "")) 84 | if value == fides_key: 85 | raise FidesValidationError("FidesKey can not self-reference!") 86 | return value 87 | 88 | 89 | def deprecated_version_later_than_added( 90 | version_deprecated: Optional[FidesVersion], values: Dict 91 | ) -> Optional[FidesVersion]: 92 | """ 93 | Check to make sure that the deprecated version is later than the added version. 94 | 95 | This will also catch errors where the deprecated version is defined but the added 96 | version is empty. 97 | """ 98 | 99 | if not version_deprecated: 100 | return None 101 | 102 | if version_deprecated < values.get("version_added", Version("0")): 103 | raise FidesValidationError( 104 | "Deprecated version number can't be earlier than version added!" 105 | ) 106 | 107 | if version_deprecated == values.get("version_added", Version("0")): 108 | raise FidesValidationError( 109 | "Deprecated version number can't be the same as the version added!" 110 | ) 111 | return version_deprecated 112 | 113 | 114 | def has_versioning_if_default(is_default: bool, values: Dict) -> bool: 115 | """ 116 | Check to make sure that version fields are set for default items. 117 | """ 118 | 119 | # If it's a default item, it at least needs a starting version 120 | if is_default: 121 | try: 122 | assert values.get("version_added") 123 | except AssertionError: 124 | raise FidesValidationError("Default items must have version information!") 125 | # If it's not default, it shouldn't have version info 126 | else: 127 | try: 128 | assert not values.get("version_added") 129 | assert not values.get("version_deprecated") 130 | assert not values.get("replaced_by") 131 | except AssertionError: 132 | raise FidesValidationError( 133 | "Non-default items can't have version information!" 134 | ) 135 | 136 | return is_default 137 | 138 | 139 | def is_deprecated_if_replaced(replaced_by: str, values: Dict) -> str: 140 | """ 141 | Check to make sure that the item has been deprecated if there is a replacement. 142 | """ 143 | 144 | if replaced_by and not values.get("version_deprecated"): 145 | raise FidesValidationError("Cannot be replaced without deprecation!") 146 | 147 | return replaced_by 148 | 149 | 150 | def matching_parent_key(parent_key: FidesKey, values: Dict) -> FidesKey: 151 | """ 152 | Confirm that the parent_key matches the parent parsed from the FidesKey. 153 | """ 154 | 155 | fides_key = FidesKey.validate(values.get("fides_key", "")) 156 | split_fides_key = fides_key.split(".") 157 | 158 | # Check if it is a top-level resource 159 | if len(split_fides_key) == 1 and not parent_key: 160 | return parent_key 161 | 162 | # Reform the parent_key from the fides_key and compare 163 | parent_key_from_fides_key = ".".join(split_fides_key[:-1]) 164 | if parent_key_from_fides_key != parent_key: 165 | raise FidesValidationError( 166 | "The parent_key ({0}) does match the parent parsed ({1}) from the fides_key ({2})!".format( 167 | parent_key, parent_key_from_fides_key, fides_key 168 | ) 169 | ) 170 | return parent_key 171 | 172 | 173 | def parse_data_type_string(type_string: Optional[str]) -> Tuple[Optional[str], bool]: 174 | """Parse the data type string. Arrays are expressed in the form 'type[]'. 175 | 176 | e.g. 177 | - 'string' -> ('string', false) 178 | - 'string[]' -> ('string', true) 179 | 180 | These data_types are for use in DatasetField.fides_meta. 181 | """ 182 | if not type_string: 183 | return None, False 184 | idx = type_string.find("[]") 185 | if idx == -1: 186 | return type_string, False 187 | return type_string[:idx], True 188 | 189 | 190 | # Data types that Fides is currently configured to handle 191 | DATA_TYPE_NAMES: Set[str] = { 192 | "string", 193 | "integer", 194 | "float", 195 | "boolean", 196 | "object_id", 197 | "object", 198 | } 199 | 200 | 201 | def is_valid_data_type(type_name: str) -> bool: 202 | """Is this type a valid data type identifier in fides?""" 203 | return type_name is None or type_name in DATA_TYPE_NAMES 204 | 205 | 206 | def valid_data_type(data_type_str: Optional[str]) -> Optional[str]: 207 | """If the data_type is provided ensure that it is a member of DataType.""" 208 | 209 | parsed_date_type, _ = parse_data_type_string(data_type_str) 210 | if not is_valid_data_type(parsed_date_type): # type: ignore 211 | raise ValueError(f"The data type {data_type_str} is not supported.") 212 | 213 | return data_type_str 214 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Common fixtures to be used across tests.""" 2 | import os 3 | from typing import Any, Dict 4 | 5 | import pytest 6 | import yaml 7 | 8 | from fideslang import models 9 | 10 | 11 | @pytest.fixture(scope="session") 12 | def resources_dict(): 13 | """ 14 | Yields a resource containing sample representations of different 15 | Fides resources. 16 | """ 17 | resources_dict: Dict[str, Any] = { 18 | "data_category": models.DataCategory( 19 | organization_fides_key=1, 20 | fides_key="user.custom", 21 | parent_key="user", 22 | name="Custom Data Category", 23 | description="Custom Data Category", 24 | ), 25 | "dataset": models.Dataset( 26 | organization_fides_key=1, 27 | fides_key="test_sample_db_dataset", 28 | name="Sample DB Dataset", 29 | description="This is a Sample Database Dataset", 30 | collections=[ 31 | models.DatasetCollection( 32 | name="user", 33 | fields=[ 34 | models.DatasetField( 35 | name="Food_Preference", 36 | description="User's favorite food", 37 | path="some.path", 38 | ), 39 | models.DatasetField( 40 | name="First_Name", 41 | description="A First Name Field", 42 | path="another.path", 43 | data_categories=["user.name"], 44 | ), 45 | models.DatasetField( 46 | name="Email", 47 | description="User's Email", 48 | path="another.another.path", 49 | data_categories=["user.contact.email"], 50 | ), 51 | ], 52 | ) 53 | ], 54 | ), 55 | "data_subject": models.DataSubject( 56 | organization_fides_key=1, 57 | fides_key="custom_subject", 58 | name="Custom Data Subject", 59 | description="Custom Data Subject", 60 | ), 61 | "data_use": models.DataUse( 62 | organization_fides_key=1, 63 | fides_key="custom_data_use", 64 | name="Custom Data Use", 65 | description="Custom Data Use", 66 | ), 67 | "evaluation": models.Evaluation( 68 | fides_key="test_evaluation", status="PASS", details=["foo"], message="bar" 69 | ), 70 | "organization": models.Organization( 71 | fides_key="test_organization", 72 | name="Test Organization", 73 | description="Test Organization", 74 | ), 75 | "policy": models.Policy( 76 | organization_fides_key=1, 77 | fides_key="test_policy", 78 | name="Test Policy", 79 | version="1.3", 80 | description="Test Policy", 81 | rules=[], 82 | ), 83 | "policy_rule": models.PolicyRule( 84 | name="Test Policy", 85 | data_categories=models.PrivacyRule(matches="NONE", values=[]), 86 | data_uses=models.PrivacyRule(matches="NONE", values=["provide.system"]), 87 | data_subjects=models.PrivacyRule(matches="ANY", values=[]), 88 | ), 89 | "system": models.System( 90 | organization_fides_key=1, 91 | fides_key="test_system", 92 | system_type="SYSTEM", 93 | name="Test System", 94 | description="Test Policy", 95 | privacy_declarations=[ 96 | models.PrivacyDeclaration( 97 | name="declaration-name", 98 | data_categories=[], 99 | data_use="provide", 100 | data_subjects=[], 101 | ) 102 | ], 103 | ), 104 | } 105 | yield resources_dict 106 | 107 | 108 | @pytest.fixture() 109 | def test_manifests(): 110 | test_manifests = { 111 | "manifest_1": { 112 | "dataset": [ 113 | { 114 | "name": "Test Dataset 1", 115 | "organization_fides_key": 1, 116 | "datasetType": {}, 117 | "datasetLocation": "somedb:3306", 118 | "description": "Test Dataset 1", 119 | "fides_key": "some_dataset", 120 | "datasetTables": [], 121 | } 122 | ], 123 | "system": [ 124 | { 125 | "name": "Test System 1", 126 | "organization_fides_key": 1, 127 | "systemType": "mysql", 128 | "description": "Test System 1", 129 | "fides_key": "some_system", 130 | } 131 | ], 132 | }, 133 | "manifest_2": { 134 | "dataset": [ 135 | { 136 | "name": "Test Dataset 2", 137 | "description": "Test Dataset 2", 138 | "organization_fides_key": 1, 139 | "datasetType": {}, 140 | "datasetLocation": "somedb:3306", 141 | "fides_key": "another_dataset", 142 | "datasetTables": [], 143 | } 144 | ], 145 | "system": [ 146 | { 147 | "name": "Test System 2", 148 | "organization_fides_key": 1, 149 | "systemType": "mysql", 150 | "description": "Test System 2", 151 | "fides_key": "another_system", 152 | } 153 | ], 154 | }, 155 | } 156 | yield test_manifests 157 | 158 | 159 | @pytest.fixture() 160 | def populated_manifest_dir(test_manifests, tmp_path): 161 | manifest_dir = f"{tmp_path}/populated_manifest" 162 | os.mkdir(manifest_dir) 163 | for manifest in test_manifests.keys(): 164 | with open(f"{manifest_dir}/{manifest}.yml", "w") as manifest_file: 165 | yaml.dump(test_manifests[manifest], manifest_file) 166 | return manifest_dir 167 | 168 | 169 | @pytest.fixture() 170 | def populated_nested_manifest_dir(test_manifests, tmp_path): 171 | manifest_dir = f"{tmp_path}/populated_nested_manifest" 172 | os.mkdir(manifest_dir) 173 | for manifest in test_manifests.keys(): 174 | nested_manifest_dir = f"{manifest_dir}/{manifest}" 175 | os.mkdir(nested_manifest_dir) 176 | with open(f"{nested_manifest_dir}/{manifest}.yml", "w") as manifest_file: 177 | yaml.dump(test_manifests[manifest], manifest_file) 178 | return manifest_dir 179 | -------------------------------------------------------------------------------- /tests/data/failing_dataset_collection_taxonomy.yml: -------------------------------------------------------------------------------- 1 | dataset: 2 | - fides_key: test_db_dataset_failing_dataset 3 | name: Sample DB Dataset 4 | description: This is a Sample Database Dataset 5 | collections: 6 | - name: users 7 | description: User's information 8 | data_categories: 9 | - user.political_opinion 10 | fields: 11 | - name: First_Name 12 | description: A First Name Field 13 | data_categories: 14 | - user.name 15 | 16 | system: 17 | - fides_key: customer_data_sharing_system 18 | name: Customer Data Sharing System 19 | description: Share data about our users with third-parties for advertising 20 | system_type: Service 21 | ingress: 22 | - fides_key: test_db_dataset_failing_dataset 23 | type: dataset 24 | privacy_declarations: 25 | - name: Share Political Opinions 26 | data_categories: 27 | - user 28 | data_use: advertising 29 | data_subjects: 30 | - customer 31 | ingress: 32 | - test_db_dataset_failing_dataset 33 | 34 | policy: 35 | - fides_key: primary_privacy_policy 36 | name: Primary Privacy Policy 37 | description: The main privacy policy for the organization. 38 | rules: 39 | - fides_key: reject_political_opinion 40 | description: Disallow advertising of customer political opinion data 41 | data_categories: 42 | matches: ANY 43 | values: 44 | - user.political_opinion 45 | data_uses: 46 | matches: ANY 47 | values: 48 | - advertising 49 | data_subjects: 50 | matches: ANY 51 | values: 52 | - customer 53 | -------------------------------------------------------------------------------- /tests/data/failing_dataset_field_taxonomy.yml: -------------------------------------------------------------------------------- 1 | dataset: 2 | - fides_key: test_db_dataset_failing_dataset 3 | name: Sample DB Dataset 4 | description: This is a Sample Database Dataset 5 | collections: 6 | - name: users 7 | description: User's information 8 | fields: 9 | - name: First_Name 10 | description: A First Name Field 11 | data_categories: 12 | - user.name 13 | - name: political_opinion 14 | description: User's political opinion 15 | data_categories: 16 | - user.political_opinion 17 | system: 18 | - fides_key: customer_data_sharing_system 19 | name: Customer Data Sharing System 20 | description: Share data about our users with third-parties for advertising 21 | system_type: Service 22 | ingress: 23 | - fides_key: test_db_dataset_failing_dataset 24 | type: dataset 25 | privacy_declarations: 26 | - name: Share Political Opinions 27 | data_categories: 28 | - user 29 | data_use: advertising 30 | data_subjects: 31 | - customer 32 | ingress: 33 | - test_db_dataset_failing_dataset 34 | 35 | policy: 36 | - fides_key: primary_privacy_policy 37 | name: Primary Privacy Policy 38 | description: The main privacy policy for the organization. 39 | rules: 40 | - fides_key: reject_political_opinion 41 | description: Disallow advertising of customer political opinion data 42 | data_categories: 43 | matches: ANY 44 | values: 45 | - user.political_opinion 46 | data_uses: 47 | matches: ANY 48 | values: 49 | - advertising 50 | data_subjects: 51 | matches: ANY 52 | values: 53 | - customer 54 | -------------------------------------------------------------------------------- /tests/data/failing_dataset_taxonomy.yml: -------------------------------------------------------------------------------- 1 | dataset: 2 | - fides_key: test_db_dataset_failing_dataset 3 | name: Sample DB Dataset 4 | description: This is a Sample Database Dataset 5 | data_categories: 6 | - user.political_opinion 7 | collections: 8 | - name: users 9 | description: User's information 10 | fields: 11 | - name: First_Name 12 | description: A First Name Field 13 | data_categories: 14 | - user.name 15 | 16 | system: 17 | - fides_key: customer_data_sharing_system 18 | name: Customer Data Sharing System 19 | description: Share data about our users with third-parties for advertising 20 | system_type: Service 21 | ingress: 22 | - fides_key: test_db_dataset_failing_dataset 23 | type: dataset 24 | privacy_declarations: 25 | - name: Share Political Opinions 26 | data_categories: 27 | - user 28 | data_use: advertising 29 | data_subjects: 30 | - customer 31 | ingress: 32 | - test_db_dataset_failing_dataset 33 | 34 | policy: 35 | - fides_key: primary_privacy_policy 36 | name: Primary Privacy Policy 37 | description: The main privacy policy for the organization. 38 | rules: 39 | - fides_key: reject_political_opinion 40 | description: Disallow advertising of customer political opinion data 41 | data_categories: 42 | matches: ANY 43 | values: 44 | - user.political_opinion 45 | data_uses: 46 | matches: ANY 47 | values: 48 | - advertising 49 | data_subjects: 50 | matches: ANY 51 | values: 52 | - customer 53 | -------------------------------------------------------------------------------- /tests/data/failing_declaration_taxonomy.yml: -------------------------------------------------------------------------------- 1 | system: 2 | - fides_key: customer_data_sharing_system 3 | name: Customer Data Sharing System 4 | description: Share data about our users with third-parties for payment processing 5 | system_type: Service 6 | privacy_declarations: 7 | - name: Share Political Opinions 8 | data_categories: 9 | - user.political_opinion 10 | data_use: third_party_sharing.payment_processing 11 | data_subjects: 12 | - customer 13 | 14 | policy: 15 | - fides_key: primary_privacy_policy 16 | name: Primary Privacy Policy 17 | description: The main privacy policy for the organization. 18 | rules: 19 | - name: reject_targeted_marketing 20 | description: Disallow third party sharing of customer data 21 | data_categories: 22 | matches: ANY 23 | values: 24 | - user 25 | data_uses: 26 | matches: ANY 27 | values: 28 | - third_party_sharing 29 | data_subjects: 30 | matches: ANY 31 | values: 32 | - customer 33 | -------------------------------------------------------------------------------- /tests/data/failing_nested_dataset.yml: -------------------------------------------------------------------------------- 1 | dataset: 2 | - fides_key: test_failing_nested_dataset_field 3 | name: Sample Nested Dataset 4 | description: Nested fields dataset with failure to be captured in evaluation 5 | collections: 6 | - name: organization 7 | description: Organization information 8 | fields: 9 | - name: organization_name 10 | - name: organization_address 11 | fields: 12 | - name: street 13 | data_categories: 14 | - user.account.contact.street 15 | - name: city 16 | data_categories: 17 | - user.account.contact.city 18 | - name: state 19 | data_categories: 20 | - user.account.contact.state 21 | 22 | system: 23 | - fides_key: client_analytics 24 | name: Client Usage Analytics 25 | description: Use aggregated and anonymous data to measure usage 26 | system_type: Service 27 | ingress: 28 | - fides_key: test_failing_nested_dataset_field 29 | type: dataset 30 | privacy_declarations: 31 | - name: Mesaure usage of users 32 | data_categories: 33 | - user 34 | data_use: improve.system 35 | data_subjects: 36 | - customer 37 | ingress: 38 | - test_failing_nested_dataset_field 39 | 40 | policy: 41 | - fides_key: primary_privacy_policy 42 | name: Primary Privacy Policy 43 | description: The main privacy policy for the organization. 44 | rules: 45 | - name: exclude_location_information 46 | description: Do not allow any contact information 47 | data_categories: 48 | matches: OTHER 49 | values: 50 | - user 51 | data_uses: 52 | matches: OTHER 53 | values: 54 | - provide 55 | data_subjects: 56 | matches: OTHER 57 | values: 58 | - anonymous_user 59 | -------------------------------------------------------------------------------- /tests/data/passing_declaration_taxonomy.yml: -------------------------------------------------------------------------------- 1 | system: 2 | - fides_key: customer_data_sharing_system 3 | name: Customer Data Sharing System 4 | description: Share data about our users with third-parties for payment processing 5 | system_type: Service 6 | privacy_declarations: 7 | - name: Share Political Opinions 8 | data_categories: 9 | - user.political_opinion 10 | data_use: third_party_sharing.payment_processing 11 | data_subjects: 12 | - customer 13 | 14 | policy: 15 | - fides_key: primary_privacy_policy 16 | name: Primary Privacy Policy 17 | description: The main privacy policy for the organization. 18 | rules: 19 | - name: reject_targeted_marketing 20 | description: Disallow advertising of customer data 21 | data_categories: 22 | matches: ANY 23 | values: 24 | - user 25 | data_uses: 26 | matches: ANY 27 | values: 28 | - advertising 29 | data_subjects: 30 | matches: ANY 31 | values: 32 | - customer 33 | -------------------------------------------------------------------------------- /tests/data/sample_manifest.yml: -------------------------------------------------------------------------------- 1 | id: 0 2 | name: sample2 3 | version: 0.0.1 4 | description: some description 5 | purpose: security 6 | fields: 7 | - name: myemail 8 | pii: work_email 9 | - name: myotheremail 10 | pii: personal_email 11 | - name: prefs 12 | pii: preferences 13 | raw: none 14 | -------------------------------------------------------------------------------- /tests/fideslang/gvl/test_gvl.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from fideslang.gvl import ( 4 | GVL_FEATURES, 5 | GVL_SPECIAL_FEATURES, 6 | Feature, 7 | data_category_id_to_data_categories, 8 | feature_id_to_feature_name, 9 | feature_name_to_feature, 10 | purpose_to_data_use, 11 | ) 12 | 13 | 14 | def test_purpose_to_data_use(): 15 | assert purpose_to_data_use(1) == ["functional.storage"] 16 | assert purpose_to_data_use(1, False) == [ 17 | "functional.storage" 18 | ] # assert False is the default 19 | 20 | # testing special purpose lookup 21 | assert purpose_to_data_use(1, True) == [ 22 | "essential.fraud_detection", 23 | "essential.service.security", 24 | ] 25 | 26 | # let's test one other purpose just to be comprehensive 27 | assert purpose_to_data_use(4) == [ 28 | "marketing.advertising.first_party.targeted", 29 | "marketing.advertising.third_party.targeted", 30 | ] 31 | 32 | assert purpose_to_data_use(11) == ["personalize.content.limited"] 33 | 34 | # assert invalid uses raise KeyErrors 35 | with pytest.raises(KeyError): 36 | purpose_to_data_use(12) 37 | 38 | with pytest.raises(KeyError): 39 | purpose_to_data_use(3, True) 40 | 41 | 42 | def test_features(): 43 | """Add a sanity check for features and special features parsing""" 44 | assert isinstance(GVL_FEATURES[1], Feature) 45 | assert GVL_FEATURES[1].name == "Match and combine data from other data sources" 46 | 47 | assert isinstance(GVL_SPECIAL_FEATURES[1], Feature) 48 | assert GVL_SPECIAL_FEATURES[1].name == "Use precise geolocation data" 49 | 50 | 51 | def test_feature_name_to_feature(): 52 | assert feature_name_to_feature("Link different devices").id == 2 53 | assert feature_name_to_feature("Use precise geolocation data").id == 1 54 | assert feature_name_to_feature("Name doesn't exist") is None 55 | 56 | 57 | def test_feature_id_to_feature_name(): 58 | assert ( 59 | feature_id_to_feature_name(feature_id=1) 60 | == "Match and combine data from other data sources" 61 | ) 62 | assert ( 63 | feature_id_to_feature_name(feature_id=1, special_feature=True) 64 | == "Use precise geolocation data" 65 | ) 66 | 67 | assert feature_id_to_feature_name(feature_id=1001) is None 68 | 69 | 70 | 71 | def test_data_category_id_to_data_categories(): 72 | assert data_category_id_to_data_categories(1) == [ 73 | "user.device.ip_address" 74 | ] 75 | 76 | # let's test one other data category just to be comprehensive 77 | assert data_category_id_to_data_categories(5) == [ 78 | "user.account", 79 | "user.unique_id", 80 | "user.device" 81 | ] 82 | 83 | 84 | # assert invalid categories raise KeyErrors 85 | with pytest.raises(KeyError): 86 | data_category_id_to_data_categories(12) 87 | -------------------------------------------------------------------------------- /tests/fideslang/test_default_taxonomy.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | from typing import Tuple 3 | 4 | import pytest 5 | 6 | from fideslang.default_taxonomy import DEFAULT_TAXONOMY 7 | 8 | taxonomy_counts = { 9 | "data_category": 85, 10 | "data_use": 55, 11 | "data_subject": 15, 12 | } 13 | 14 | 15 | class TestDefaultTaxonomy: 16 | @pytest.mark.parametrize( 17 | "type_and_count", taxonomy_counts.items(), ids=lambda items: items[0] 18 | ) 19 | def test_taxonomy_count(self, type_and_count: Tuple[str, int]) -> None: 20 | data_type = type_and_count[0] 21 | expected_count = type_and_count[1] 22 | assert len(getattr(DEFAULT_TAXONOMY, data_type)) == expected_count 23 | 24 | @pytest.mark.parametrize("data_type", taxonomy_counts.keys()) 25 | def test_are_set_as_default(self, data_type: str) -> None: 26 | assert all([x.is_default for x in getattr(DEFAULT_TAXONOMY, data_type)]) 27 | 28 | @pytest.mark.parametrize("data_type", taxonomy_counts.keys()) 29 | def test_key_uniqueness(self, data_type: str) -> None: 30 | keys = [x.fides_key for x in getattr(DEFAULT_TAXONOMY, data_type)] 31 | duplicate_keys = { 32 | key: value for key, value in Counter(keys).items() if value > 1 33 | } 34 | print(duplicate_keys) 35 | assert not duplicate_keys 36 | 37 | @pytest.mark.parametrize("data_type", taxonomy_counts.keys()) 38 | def test_name_uniqueness(self, data_type: str) -> None: 39 | keys = [x.name for x in getattr(DEFAULT_TAXONOMY, data_type)] 40 | duplicate_keys = { 41 | key: value for key, value in Counter(keys).items() if value > 1 42 | } 43 | print(duplicate_keys) 44 | assert not duplicate_keys 45 | 46 | @pytest.mark.parametrize("data_type", taxonomy_counts.keys()) 47 | def test_description_uniqueness(self, data_type: str) -> None: 48 | keys = [ 49 | x.description 50 | for x in getattr(DEFAULT_TAXONOMY, data_type) 51 | if not x.version_deprecated 52 | ] 53 | duplicate_keys = { 54 | key: value for key, value in Counter(keys).items() if value > 1 55 | } 56 | print(duplicate_keys) 57 | assert not duplicate_keys 58 | 59 | @pytest.mark.parametrize("data_type", ["data_category", "data_use"]) 60 | def test_parent_keys_exist(self, data_type: str) -> None: 61 | """This test catches any keys that are used as parents but don't exist as fides keys.""" 62 | fides_keys = set([x.fides_key for x in getattr(DEFAULT_TAXONOMY, data_type)]) 63 | parent_keys = set( 64 | [x.parent_key for x in getattr(DEFAULT_TAXONOMY, data_type) if x.parent_key] 65 | ) 66 | diff = parent_keys.difference(fides_keys) 67 | assert not diff 68 | -------------------------------------------------------------------------------- /tests/fideslang/test_manifests.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import yaml 3 | 4 | from fideslang import manifests 5 | 6 | 7 | # Helpers 8 | @pytest.fixture() 9 | def sample_manifest(): 10 | yield manifests.load_yaml_into_dict("tests/data/sample_manifest.yml") 11 | 12 | 13 | @pytest.fixture() 14 | def ingestion_manifest_directory( 15 | populated_manifest_dir, populated_nested_manifest_dir, request 16 | ): 17 | """ 18 | Allows for parameterization of manifests to ingest by returning 19 | the corresponding fixture 20 | """ 21 | return { 22 | "populated_manifest_dir": populated_manifest_dir, 23 | "populated_nested_manifest_dir": populated_nested_manifest_dir, 24 | }[request.param] 25 | 26 | 27 | # Unit 28 | @pytest.mark.unit 29 | def test_load_yaml_into_dict(sample_manifest): 30 | """ 31 | Make sure that the yaml loaded from the sample manifest matches 32 | what is expected. 33 | """ 34 | expected_result = { 35 | "id": 0, 36 | "name": "sample2", 37 | "version": "0.0.1", 38 | "description": "some description", 39 | "fields": [ 40 | {"name": "myemail", "pii": "work_email"}, 41 | {"name": "myotheremail", "pii": "personal_email"}, 42 | {"name": "prefs", "pii": "preferences"}, 43 | ], 44 | "raw": "none", 45 | "purpose": "security", 46 | } 47 | assert expected_result == sample_manifest 48 | 49 | 50 | @pytest.mark.unit 51 | def test_write_manifest(tmp_path): 52 | test_resource = {"foo": "bar", "bar": "baz"} 53 | expected_result = {"test": [{"foo": "bar", "bar": "baz"}]} 54 | test_path = str(tmp_path) + "/test.yml" 55 | manifests.write_manifest(test_path, test_resource, "test") 56 | 57 | with open(test_path, "r") as manifest: 58 | actual_result = yaml.safe_load(manifest) 59 | 60 | assert actual_result == expected_result 61 | 62 | 63 | @pytest.mark.unit 64 | def test_union_manifests(test_manifests): 65 | expected_result = { 66 | "dataset": [ 67 | { 68 | "name": "Test Dataset 1", 69 | "description": "Test Dataset 1", 70 | "fides_key": "some_dataset", 71 | "organization_fides_key": 1, 72 | "datasetType": {}, 73 | "datasetLocation": "somedb:3306", 74 | "datasetTables": [], 75 | }, 76 | { 77 | "name": "Test Dataset 2", 78 | "description": "Test Dataset 2", 79 | "fides_key": "another_dataset", 80 | "organization_fides_key": 1, 81 | "datasetType": {}, 82 | "datasetLocation": "somedb:3306", 83 | "datasetTables": [], 84 | }, 85 | ], 86 | "system": [ 87 | { 88 | "name": "Test System 1", 89 | "organization_fides_key": 1, 90 | "systemType": "mysql", 91 | "description": "Test System 1", 92 | "fides_key": "some_system", 93 | }, 94 | { 95 | "name": "Test System 2", 96 | "organization_fides_key": 1, 97 | "systemType": "mysql", 98 | "description": "Test System 2", 99 | "fides_key": "another_system", 100 | }, 101 | ], 102 | } 103 | actual_result = manifests.union_manifests(test_manifests.values()) 104 | print(expected_result) 105 | print(actual_result) 106 | assert expected_result == actual_result 107 | 108 | 109 | @pytest.mark.unit 110 | @pytest.mark.parametrize( 111 | "ingestion_manifest_directory", 112 | ["populated_manifest_dir", "populated_nested_manifest_dir"], 113 | indirect=["ingestion_manifest_directory"], 114 | ) 115 | def test_ingest_manifests(ingestion_manifest_directory): 116 | actual_result = manifests.ingest_manifests(str(ingestion_manifest_directory)) 117 | 118 | # Battery of assertions for consistency 119 | assert sorted(actual_result) == ["dataset", "system"] 120 | assert len(actual_result["dataset"]) == 2 121 | assert len(actual_result["system"]) == 2 122 | assert sorted(actual_result["dataset"], key=lambda x: x["name"]) == [ 123 | { 124 | "name": "Test Dataset 1", 125 | "organization_fides_key": 1, 126 | "datasetType": {}, 127 | "datasetLocation": "somedb:3306", 128 | "description": "Test Dataset 1", 129 | "fides_key": "some_dataset", 130 | "datasetTables": [], 131 | }, 132 | { 133 | "name": "Test Dataset 2", 134 | "description": "Test Dataset 2", 135 | "organization_fides_key": 1, 136 | "datasetType": {}, 137 | "datasetLocation": "somedb:3306", 138 | "fides_key": "another_dataset", 139 | "datasetTables": [], 140 | }, 141 | ] 142 | assert sorted(actual_result["system"], key=lambda x: x["name"]) == [ 143 | { 144 | "name": "Test System 1", 145 | "organization_fides_key": 1, 146 | "systemType": "mysql", 147 | "description": "Test System 1", 148 | "fides_key": "some_system", 149 | }, 150 | { 151 | "name": "Test System 2", 152 | "organization_fides_key": 1, 153 | "systemType": "mysql", 154 | "description": "Test System 2", 155 | "fides_key": "another_system", 156 | }, 157 | ] 158 | -------------------------------------------------------------------------------- /tests/fideslang/test_parse.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from fideslang import models 4 | from fideslang import parse 5 | 6 | 7 | @pytest.mark.unit 8 | def test_parse_manifest(): 9 | expected_result = models.DataCategory( 10 | organization_fides_key=1, 11 | fides_key="some_resource", 12 | name="Test resource 1", 13 | description="Test Description", 14 | ) 15 | test_dict = { 16 | "organization_fides_key": 1, 17 | "fides_key": "some_resource", 18 | "name": "Test resource 1", 19 | "description": "Test Description", 20 | } 21 | actual_result = parse.parse_dict("data_category", test_dict) 22 | assert actual_result == expected_result 23 | 24 | 25 | @pytest.mark.unit 26 | def test_parse_manifest_no_fides_key_validation_error(): 27 | with pytest.raises(SystemExit): 28 | test_dict = { 29 | "organization_fides_key": 1, 30 | "name": "Test resource 1", 31 | "description": "Test Description", 32 | } 33 | parse.parse_dict("data_category", test_dict) 34 | assert True 35 | 36 | 37 | @pytest.mark.unit 38 | def test_parse_manifest_resource_type_error(): 39 | with pytest.raises(SystemExit): 40 | test_dict = { 41 | "organization_fides_key": 1, 42 | "fides_key": "some_resource", 43 | "name": "Test resource 1", 44 | "description": "Test Description", 45 | } 46 | parse.parse_dict("data-category", test_dict) 47 | assert True 48 | 49 | 50 | @pytest.mark.unit 51 | def test_load_manifests_into_taxonomy(): 52 | manifest_dict = { 53 | "data_category": [ 54 | { 55 | "name": "User Data", 56 | "fides_key": "user", 57 | "description": "Test top-level category", 58 | }, 59 | { 60 | "name": "User Account Data", 61 | "fides_key": "user.account", 62 | "parent_key": "user", 63 | "description": "Test sub-category", 64 | }, 65 | ] 66 | } 67 | 68 | expected_taxonomy = models.Taxonomy( 69 | data_category=[ 70 | models.DataCategory( 71 | name="User Data", 72 | fides_key="user", 73 | description="Test top-level category", 74 | ), 75 | models.DataCategory( 76 | name="User Account Data", 77 | fides_key="user.account", 78 | parent_key="user", 79 | description="Test sub-category", 80 | ), 81 | ] 82 | ) 83 | assert parse.load_manifests_into_taxonomy(manifest_dict) == expected_taxonomy 84 | --------------------------------------------------------------------------------