├── .editorconfig
├── .flake8
├── .gitattributes
├── .github
└── workflows
│ └── python-package.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE.md
├── README.md
├── cnil
├── CNIL_opinion_anonymeter.pdf
└── CNIL_opinion_anonymeter_courtesy_translation.pdf
├── notebooks
├── anonymeter_example.ipynb
└── datasets
│ ├── adults_control.csv
│ ├── adults_syn_ctgan.csv
│ └── adults_train.csv
├── pyproject.toml
├── src
└── anonymeter
│ ├── __init__.py
│ ├── evaluators
│ ├── __init__.py
│ ├── inference_evaluator.py
│ ├── linkability_evaluator.py
│ └── singling_out_evaluator.py
│ ├── neighbors
│ ├── __init__.py
│ └── mixed_types_kneighbors.py
│ ├── preprocessing
│ ├── __init__.py
│ ├── transformations.py
│ └── type_detection.py
│ └── stats
│ ├── __init__.py
│ └── confidence.py
└── tests
├── __init__.py
├── datasets
├── adults_ori.csv
└── adults_syn.csv
├── fixtures.py
├── test_confidence.py
├── test_inference_evaluator.py
├── test_linkability_evaluator.py
├── test_mixed_types_kneigbors.py
├── test_singling_out_evaluator.py
├── test_transformations.py
└── test_type_detection.py
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | end_of_line = lf
5 | charset = utf-8
6 | trim_trailing_whitespace = true
7 | insert_final_newline = true
8 |
9 | [*.cfg]
10 | indent_style = space
11 | indent_size = 8
12 |
13 | [*.{yml,yaml,md,markdown}]
14 | indent_style = space
15 | indent_size = 2
16 |
17 | [*.{sh,py,pyi,js,ts,json,xml,css,handlebars}]
18 | indent_style = space
19 | indent_size = 4
20 |
21 | [*.py]
22 | profile = black
23 |
24 | [*.html]
25 | indent_style = space
26 | indent_size = 2
27 | max_line_length=120
28 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 |
3 | # References:
4 | # https://flake8.readthedocs.io/en/latest/user/configuration.html
5 | # https://flake8.readthedocs.io/en/latest/user/error-codes.html
6 |
7 | ignore =
8 | D100 # Missing docstring in public module
9 | D107 # Missing docstring in __init__
10 | D401 # First line should be in imperative mood
11 |
12 | # flake8-docstring adds D??? errors to flake8 output.
13 | # Those errors are disabled for the code listed below.
14 | # Format: one file (or files if placeholders used) per line,
15 | # then "colon" and finally the warnings (e.g. D404) or group of
16 | # warnings (e.g. D or D4) to ignore per file.
17 | per-file-ignores =
18 | docs/*:D
19 | setup.py:D
20 | tests/*:D
21 |
22 | exclude =
23 | .git,.vscode,env,.env,.venv,.pytest_cache,.mypy_cache, **/__init__.py,
24 | build,dist,htmlcov,setup.py
25 |
26 | max-line-length = 120
27 | # black enforces an equal amount of whitespace around slice operators, we need to ignore flake8's rule
28 | # https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html#configuration
29 | extend-ignore = E203
30 |
31 | max-complexity = 18
32 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.py diff=python
2 | *.png binary
3 | *.jpg binary
4 | *.csv text -whitespace
5 | *.rst text
6 |
--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
1 | name: Python package
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | pull_request:
7 | branches: [ main ]
8 |
9 | jobs:
10 | build:
11 |
12 | runs-on: ubuntu-latest
13 | strategy:
14 | fail-fast: false
15 | matrix:
16 | # We test only the minimum and the maximum supported versions of python
17 | python-version: ["3.8", "3.11"]
18 | pandas-version: ["1.4", "2.1"]
19 | exclude:
20 | - python-version: "3.8"
21 | pandas-version: "2.1"
22 |
23 | steps:
24 | - uses: actions/checkout@v2
25 | - name: Set up Python ${{ matrix.python-version }}
26 | uses: actions/setup-python@v2
27 | with:
28 | python-version: ${{ matrix.python-version }}
29 | - name: Install dependencies
30 | run: |
31 | python -m pip install --upgrade pip
32 | pip install pandas~=${{ matrix.pandas-version }}
33 | pip install -e '.[dev]'
34 | - name: Ruff Ruff
35 | run: |
36 | ruff ./src ./tests
37 | - name: mypy check
38 | run: |
39 | mypy ./src ./tests
40 | - name: Test with pytest
41 | run: |
42 | pytest
43 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: v4.3.0
4 | hooks:
5 | - id: trailing-whitespace
6 | - id: end-of-file-fixer
7 | - id: detect-private-key
8 | - id: check-added-large-files
9 | - id: mixed-line-ending
10 | args: ['--fix', 'lf']
11 |
12 | - repo: https://github.com/jumanjihouse/pre-commit-hooks
13 | rev: 3.0.0
14 | hooks:
15 | - id: git-check
16 |
17 | - repo: https://github.com/astral-sh/ruff-pre-commit
18 | rev: v0.1.7
19 | hooks:
20 | - id: ruff
21 | - id: ruff-format
22 |
23 | - repo: local
24 | hooks:
25 | - id: mypy
26 | name: mypy
27 | language: system
28 | entry: bash -c 'mypy "$@"' --
29 | types: [python]
30 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | All notable changes to this project will be documented in this file.
4 |
5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7 |
8 | ## [Unreleased]
9 |
10 | ## [1.0.0] - 2024-02-02
11 |
12 | ### Changed
13 |
14 | - numba is updated to 0.58 to allow for the newer numpy version
15 | - numpy version range is adapted accordingly to numba's requirements
16 | - python 3.11 is allowed
17 | - pandas version is relaxed to allow for pandas >= 2
18 | * added additional CI pipeline for pandas 2
19 |
20 | ### Fixed
21 |
22 | - singling out evaluators getting stuck on multivariate queries
23 |
24 | ## [0.0.2] - 2023-07-10
25 |
26 | ### Added
27 |
28 | - CNIL mention (#18)
29 | - Customized logging on module level (#19)
30 |
31 | ### Fixed
32 |
33 | - Pre-commit errors (#19)
34 |
35 |
36 | ## [0.0.1] - 2023-04-24
37 |
38 | ### Added
39 |
40 | - Initial release
41 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contribution Guide
2 |
3 | ## Releasing a new version
4 |
5 | ### Building the new version
6 |
7 | 1. Increment the version in `pyproject.toml`.
8 |
9 | 1. Update the `CHANGELOG.md`.
10 |
11 | - Follow [the guidelines](https://keepachangelog.com/)
12 | - Rename `Unreleased` section to the new version
13 | - Create a new empty `Unreleased` section
14 |
15 | 1. Create a PR with the title `Release [version]`, ask for a review.
16 |
17 | 1. Publish the package to Test PyPi and verify that it's working correctly
18 | (the instructions are below).
19 |
20 | 1. Merge the PR.
21 |
22 | 1. [Create new release](https://github.com/statice/anonymeter/releases/new)
23 | in github
24 |
25 | - specify the new tag which equals to the new `[version]`
26 | - copy paste the new release notes from the `CHANGELOG.md`
27 |
28 |
29 | ### Publishing to Test PyPi
30 |
31 | Don't forget to pull the latest `main`.
32 |
33 | Install development dependencies:
34 | ```bash
35 | pip install ".[dev]"
36 | ```
37 |
38 | Build a source and a wheel distributions:
39 | ```bash
40 | rm -rf ./dist # clean the build directory if necessary
41 | python -m build
42 | ```
43 |
44 | Login to Test PyPi. Create a new account if you don't have it yet
45 | and ask to be added as a collaborator for Anonymeter.
46 |
47 | Get the token from [Test PyPi](https://test.pypi.org/manage/account/#api-tokens)
48 | and save it as suggested to `$HOME/.pypirc`:
49 | ```toml
50 | [testpypi]
51 | username = __token__
52 | password = YOUR_TOKEN_HERE
53 | ```
54 |
55 | Upload the artifacts to Test PyPi:
56 | ```bash
57 | twine upload --repository testpypi dist/*
58 | ```
59 |
60 | Test that the package installs and works properly. For example,
61 | you can create a new virtualenv and try to install the package there.
62 | ```bash
63 | mkdir ~/test-anonymeter # create some test directory
64 | cd ~/test-anonymeter
65 | python -m venv .venv # create new virtual env
66 | source .venv/bin/activate
67 | asdf reshim python # in case you use asdf
68 | pip install --upgrade pip
69 | pip install --index-url https://test.pypi.org/simple anonymeter==NEW_VERSION
70 | ```
71 |
72 | You can check that anonymeter is working by running it against the original tests.
73 | For example, if you had Anonymeter repository checked out in `~/code/anonymeter`::
74 | ```
75 | ln -s ~/code/anonymeter/tests ~/test-anonymeter/tests
76 | pip install pytest
77 | python -m pytest
78 | ```
79 |
80 | ### Publishing to PyPi
81 |
82 | Once you tested the package with Test PyPi, you're ready to publish to
83 | the original PyPi.
84 |
85 | Pull the latest `main` and build the package as described above.
86 |
87 | Login to PyPi. Create a new account if you don't have it yet
88 | and ask to be added as a collaborator for Anonymeter.
89 |
90 | Get the token from PyPi: https://pypi.org/manage/account/token
91 | and add it as suggested to `$HOME/.pypirc`:
92 | ```toml
93 | [pypi]
94 | username = __token__
95 | password = YOUR_TOKEN_HERE
96 | ```
97 |
98 | Upload the artifacts to PyPi:
99 | ```bash
100 | twine upload dist/*
101 | ```
102 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | The Clear BSD License
2 |
3 | Copyright (c) 2022 Anonos IP LLC (IP Owner)
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without modification,
7 | are permitted (subject to the limitations in the disclaimer below) provided that
8 | the following conditions are met:
9 |
10 | * Redistributions of source code must retain the above copyright notice,
11 | this list of conditions and the following disclaimer.
12 |
13 | * Redistributions in binary form must reproduce the above copyright
14 | notice, this list of conditions and the following disclaimer in the
15 | documentation and/or other materials provided with the distribution.
16 |
17 | * Neither the name of the IP owner nor the names of its
18 | contributors may be used to endorse or promote products derived from this
19 | software without specific prior written permission.
20 |
21 | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
22 | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE IP OWNER AND
23 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
25 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE IP OWNER OR
26 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
30 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 | POSSIBILITY OF SUCH DAMAGE.
33 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Anonymeter: Unified Framework for Quantifying Privacy Risk in Synthetic Data
2 |
3 | `Anonymeter` is a unified statistical framework to jointly quantify different
4 | types of privacy risks in synthetic tabular datasets. `Anonymeter` is equipped
5 | with attack-based evaluations for the **Singling Out**, **Linkability**, and
6 | **Inference** risks, which are the three key indicators of factual anonymization
7 | according to the [Article 29 Working Party](https://ec.europa.eu/justice/article-29/documentation/opinion-recommendation/files/2014/wp216_en.pdf).
8 |
9 |
10 | > Anonymeter has been positively reviewed by the technical experts from the [Commission Nationale de l’Informatique et des Libertés (CNIL)](https://www.cnil.fr/en/home) which, in their words, _“have not identified any reason suggesting that the proposed set of methods could not allow to effectively evaluate the extent to which the aforementioned three criteria are fulfilled or not in the context of production and use of synthetic datasets”_. The CNIL also expressed the opinion that the results of Anonymeter (i.e. the three risk scores) **should be used by the data controller to decide whether the residual risks of re-identification are acceptable or not, and whether the dataset could be considered anonymous**. [Here](/cnil) you can find the full letter with the CNIL opinion on Anonymeter.
11 |
12 |
13 | ## `Anonymeter` in a nutshell
14 |
15 | In `Anonymeter` each privacy risk is derived from a privacy attacker whose task is to use the synthetic dataset
16 | to come up with a set of *guesses* of the form:
17 | - "there is only one person with attributes X, Y, and Z" (singling out)
18 | - "records A and B belong to the same person" (linkability)
19 | - "a person with attributes X and Y also have Z" (inference)
20 |
21 | Each evaluation consists of running three different attacks:
22 | - the "main" privacy attack, in which the attacker uses the synthetic data to guess information on records in the original data.
23 | - the "control" privacy attack, in which the attacker uses the synthetic data to guess information on records in the control dataset.
24 | - the "baseline" attack, which models a naive attacker who ignores the synthetic data and guess randomly.
25 |
26 | Checking how many of these guesses are correct, the success rates of the different attacks are measured and used to
27 | derive an estimate of the privacy risk. In particular, the "control attack" is used to separate what the attacker
28 | learns from the *utility* of the synthetic data, and what is instead indication of privacy leaks.
29 | The "baseline attack" instead functions as a sanity check. The "main attack" attack should outperform random
30 | guessing in order for the results to be trusted.
31 |
32 | For more details, a throughout
33 | description of the framework and the attack algorithms can be found in the paper
34 | [A Unified Framework for Quantifying Privacy Risk in Synthetic Data](https://petsymposium.org/popets/2023/popets-2023-0055.php), accepted at the 23rd Privacy Enhancing Technologies Symposium ([PETS 2023](https://petsymposium.org/cfp23.php)).
35 |
36 |
37 |
38 | ## Setup and installation
39 |
40 | `Anonymeter` requires Python 3.8.x, 3.9.x or 3.10.x installed. The simplest way to install `Anonymeter` is from `PyPi`. Simply run
41 |
42 | ```
43 | pip install anonymeter
44 | ```
45 |
46 | and you are good to go.
47 |
48 | ### Local installation
49 |
50 | To install `Anonymeter` locally, clone the repository:
51 |
52 | ```shell
53 | git clone git@github.com:statice/anonymeter.git
54 | ```
55 |
56 | and install the dependencies:
57 |
58 | ```shell
59 | cd anonymeter # if you are not there already
60 | pip install . # Basic dependencies
61 | pip install ".[notebooks]" # Dependencies to run example notebooks
62 | pip install -e ".[notebooks,dev]" # Development setup
63 | ```
64 |
65 | If you experience issues with the installation, we recommend to install
66 | `anonymeter` in a new clean virtual environment.
67 |
68 | ## Getting started
69 |
70 | Check out the example notebook in the `notebooks` folder to start playing around
71 | with `anonymeter`. To run this notebook you would need `jupyter` and some plotting libraries.
72 | This should be installed as part of the `notebooks` dependencies. If you haven't done so, please
73 | install them by executing:
74 |
75 | ```shell
76 | pip install anonymeter[notebooks]
77 | ```
78 | if you are installing anonymeter from `PyPi`, or:
79 |
80 | ```shell
81 | pip install ".[notebooks]"
82 | ```
83 |
84 | if you have opted for a local installation.
85 |
86 | ## Basic usage pattern
87 |
88 | For each of the three privacy risks anonymeter provide an `Evaluator` class. The high-level classes `SinglingOutEvaluator`, `LinkabilityEvaluator`, and `InferenceEvaluator` are the only thing that you need to import from `Anonymeter`.
89 |
90 | Despite the different nature of the privacy risks they evaluate, these classes have the same interface and are used in the same way. To instantiate the evaluator you have to provide three dataframes: the original dataset `ori` which has been used to generate the synthetic data, the synthetic data `syn`, and a `control` dataset containing original records which have not been used to generate the synthetic data.
91 |
92 | Another parameter common to all evaluators is the number of target records to attack (`n_attacks`). A higher number will reduce the statistical uncertainties on the results, at the expense of a longer computation time.
93 |
94 | ```python
95 | evaluator = *Evaluator(ori: pd.DataFrame,
96 | syn: pd.DataFrame,
97 | control: pd.DataFrame,
98 | n_attacks: int)
99 | ```
100 |
101 | Once instantiated the evaluation pipeline is executed when calling the `evaluate`, and the resulting estimate of the risk can be accessed using the `risk()` method.
102 |
103 | ```python
104 | evaluator.evaluate()
105 | risk = evaluator.risk()
106 | ```
107 |
108 | ## Configuring logging
109 |
110 | `Anonymeter` uses the standard Python logger named `anonymeter`.
111 | You can configure the logging level and the output destination
112 | using the standard Python logging API (see [here](https://docs.python.org/3/library/logging.html) for more details).
113 |
114 | For example, to set the logging level to `DEBUG` you can use the following snippet:
115 |
116 | ```python
117 | import logging
118 |
119 | # set the logging level to DEBUG
120 | logging.getLogger("anonymeter").setLevel(logging.DEBUG)
121 | ```
122 |
123 | And if you want to log to a file, you can use the following snippet:
124 |
125 | ```python
126 | import logging
127 |
128 | # create a file handler
129 | file_handler = logging.FileHandler("anonymeter.log")
130 |
131 | # set the logging level for the file handler
132 | file_handler.setLevel(logging.DEBUG)
133 |
134 | # add the file handler to the logger
135 | logger = logging.getLogger("anonymeter")
136 | logger.addHandler(file_handler)
137 | logger.setLevel(logging.DEBUG)
138 | ```
139 |
140 |
141 | ## Cite this work
142 |
143 | If you use anonymeter in your work, we would appreciate citations to the following paper:
144 |
145 | "A Unified Framework for Quantifying Privacy Risk in Synthetic Data", M. Giomi *et al*, PoPETS 2023.
146 | This `bibtex` entry can be used to refer to the paper:
147 |
148 | ```text
149 | @misc{anonymeter,
150 | doi = {https://doi.org/10.56553/popets-2023-0055},
151 | url = {https://petsymposium.org/popets/2023/popets-2023-0055.php},
152 | journal = {Proceedings of Privacy Enhancing Technologies Symposium},
153 | year = {2023},
154 | author = {Giomi, Matteo and Boenisch, Franziska and Wehmeyer, Christoph and Tasnádi, Borbála},
155 | title = {A Unified Framework for Quantifying Privacy Risk in Synthetic Data},
156 | }
157 | ```
158 |
159 | ### License
160 |
161 | Licensed under Clear BSD License, see `LICENSE.md` to see the full license text. Patent-pending code (application US-20230401336-A1).
162 |
163 |
--------------------------------------------------------------------------------
/cnil/CNIL_opinion_anonymeter.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/statice/anonymeter/0188bdf5615601e2f31503ae91a2b310af8d917c/cnil/CNIL_opinion_anonymeter.pdf
--------------------------------------------------------------------------------
/cnil/CNIL_opinion_anonymeter_courtesy_translation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/statice/anonymeter/0188bdf5615601e2f31503ae91a2b310af8d917c/cnil/CNIL_opinion_anonymeter_courtesy_translation.pdf
--------------------------------------------------------------------------------
/notebooks/anonymeter_example.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "66b36556-e9af-4cf9-bb9c-25f12d3fc322",
6 | "metadata": {},
7 | "source": [
8 | "# Anonymeter example notebook\n",
9 | "\n",
10 | "This example notebook demonstrates the usage of `Anonymeter`, a software to derive GDPR-aligned measures of the privacy of synthetic datasets in an empirical, attack based fashion.\n",
11 | "\n",
12 | "`Anonymeter` contains privacy evaluators which measures the risks of singling out, linkability, and inference which might incur to data donors following the release of synthetic dataset. These risk are the three key indicators of factual anonymization according to the European General Data Protection Regulation (GDPR). For more details, please refer to [M. Giomi et al. 2022](https://petsymposium.org/popets/2023/popets-2023-0055.php)."
13 | ]
14 | },
15 | {
16 | "cell_type": "markdown",
17 | "id": "1062da6f-d56e-48a5-b62f-fe987e7682fe",
18 | "metadata": {},
19 | "source": [
20 | "### Basic usage pattern\n",
21 | "\n",
22 | "For each of these privacy risks anonymeter provide an `Evaluator` class. The high-level classes `SinglingOutEvaluator`, `LinkabilityEvaluator`, and `InferenceEvaluator` are the only thing that you need to import from `Anonymeter`.\n",
23 | "\n",
24 | "Despite the different nature of the privacy risks they evaluate, these classes have the same interface and are used in the same way. To instantiate the evaluator you have to provide three dataframes: the original dataset `ori` which has been used to generate the synthetic data, the synthetic data `syn`, and a `control` dataset containing original records which have not been used to generate the synthetic data. \n",
25 | "\n",
26 | "Another parameter common to all evaluators is the number of target records to attack (`n_attacks`). A higher number will reduce the statistical uncertainties on the results, at the expense of a longer computation time.\n",
27 | "\n",
28 | "```python\n",
29 | "evaluator = *Evaluator(ori: pd.DataFrame, \n",
30 | " syn: pd.DataFrame, \n",
31 | " control: pd.DataFrame,\n",
32 | " n_attacks: int)\n",
33 | "```\n",
34 | "\n",
35 | "Once instantiated the evaluation pipeline is executed when calling the `evaluate`, and the resulting estimate of the risk can be accessed using the `risk()` method.\n",
36 | "\n",
37 | "```python\n",
38 | "evaluator.evaluate()\n",
39 | "risk = evaluator.risk()\n",
40 | "```"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "id": "cb9d6771-69f3-4e49-b143-d5d0bae3ba3c",
46 | "metadata": {},
47 | "source": [
48 | "### A peak under the hood\n",
49 | "\n",
50 | "In `Anonymeter` each privacy risk is derived from a privacy attacker whose task is to use the synthetic dataset to come up with a set of *guesses* of the form:\n",
51 | "- \"there is only one person with attributes X, Y, and Z\" (singling out)\n",
52 | "- \"records A and B belong to the same person\" (linkability)\n",
53 | "- \"a person with attributes X and Y also have Z\" (inference)\n",
54 | "\n",
55 | "Each evaluation consists of running three different attacks:\n",
56 | "- the \"main\" privacy attack, in which the attacker uses the synthetic data to guess information on records in the original data.\n",
57 | "- the \"control\" privacy attack, in which the attacker uses the synthetic data to guess information on records in the control dataset. \n",
58 | "- the \"baseline\" attack, which models a naive attacker who ignores the synthetic data and guess randomly.\n",
59 | "\n",
60 | "Checking how many of these guesses are correct, the success rates of the different attacks are measured and used to derive an estimate of the privacy risk. In particular, the \"control attack\" is used to separate what the attacker learns from the *utility* of the synthetic data, and what is instead indication of privacy leaks. The \"baseline attack\" instead functions as a sanity check. The \"main attack\" attack should outperform random guessing in order for the results to be trusted. "
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 1,
66 | "id": "c64a6fab-1676-4539-b460-5b2fdb456b04",
67 | "metadata": {
68 | "tags": []
69 | },
70 | "outputs": [],
71 | "source": [
72 | "import os\n",
73 | "import pandas as pd\n",
74 | "import matplotlib.pyplot as plt\n",
75 | "import seaborn as sns\n",
76 | "\n",
77 | "from anonymeter.evaluators import SinglingOutEvaluator\n",
78 | "from anonymeter.evaluators import LinkabilityEvaluator\n",
79 | "from anonymeter.evaluators import InferenceEvaluator"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "id": "ada19947-b895-4279-aac3-9b87fac2fa6b",
85 | "metadata": {},
86 | "source": [
87 | "## Downloading the data\n",
88 | "\n",
89 | "For this example, we will use the famous `Adults` (more details [here](https://archive.ics.uci.edu/ml/datasets/adult)) dataset. This dataset contains aggregated census data, where every row represent a population segment. For the purpose of demonstrating `Anonymeter`, we will use this data as if each row would in fact refer to a real individual. \n",
90 | "\n",
91 | "The synthetic version has been generated by `CTGAN` from [SDV](https://sdv.dev/SDV/user_guides/single_table/ctgan.html), as explained in the paper accompanying this code release. For details on the generation process, e.g. regarding hyperparameters, see Section 6.2.1 of [the accompanying paper](https://petsymposium.org/popets/2023/popets-2023-0055.php))."
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 2,
97 | "id": "fc128115-2f0c-43b1-9198-5c5594eae7f3",
98 | "metadata": {
99 | "tags": []
100 | },
101 | "outputs": [],
102 | "source": [
103 | "dataset_dir = \"./datasets/\"\n",
104 | "\n",
105 | "ori = pd.read_csv(os.path.join(dataset_dir, \"adults_train.csv\"))\n",
106 | "syn = pd.read_csv(os.path.join(dataset_dir, \"adults_syn_ctgan.csv\"))\n",
107 | "control = pd.read_csv(os.path.join(dataset_dir, \"adults_control.csv\"))"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 3,
113 | "id": "f6abeed8-23ae-4d4a-9cdb-006c0bba109c",
114 | "metadata": {
115 | "tags": []
116 | },
117 | "outputs": [
118 | {
119 | "data": {
120 | "text/html": [
121 | "
\n",
122 | "\n",
135 | "
\n",
136 | " \n",
137 | "
\n",
138 | "
\n",
139 | "
age
\n",
140 | "
type_employer
\n",
141 | "
fnlwgt
\n",
142 | "
education
\n",
143 | "
education_num
\n",
144 | "
marital
\n",
145 | "
occupation
\n",
146 | "
relationship
\n",
147 | "
race
\n",
148 | "
sex
\n",
149 | "
capital_gain
\n",
150 | "
capital_loss
\n",
151 | "
hr_per_week
\n",
152 | "
country
\n",
153 | "
income
\n",
154 | "
\n",
155 | " \n",
156 | " \n",
157 | "
\n",
158 | "
0
\n",
159 | "
53
\n",
160 | "
Self-emp-not-inc
\n",
161 | "
138022
\n",
162 | "
11th
\n",
163 | "
7
\n",
164 | "
Divorced
\n",
165 | "
Craft-repair
\n",
166 | "
Not-in-family
\n",
167 | "
White
\n",
168 | "
Male
\n",
169 | "
0
\n",
170 | "
0
\n",
171 | "
60
\n",
172 | "
United-States
\n",
173 | "
<=50K
\n",
174 | "
\n",
175 | "
\n",
176 | "
1
\n",
177 | "
31
\n",
178 | "
Private
\n",
179 | "
344200
\n",
180 | "
HS-grad
\n",
181 | "
9
\n",
182 | "
Married-civ-spouse
\n",
183 | "
Exec-managerial
\n",
184 | "
Husband
\n",
185 | "
White
\n",
186 | "
Male
\n",
187 | "
0
\n",
188 | "
0
\n",
189 | "
40
\n",
190 | "
United-States
\n",
191 | "
>50K
\n",
192 | "
\n",
193 | "
\n",
194 | "
2
\n",
195 | "
28
\n",
196 | "
Private
\n",
197 | "
242482
\n",
198 | "
HS-grad
\n",
199 | "
9
\n",
200 | "
Never-married
\n",
201 | "
Handlers-cleaners
\n",
202 | "
Own-child
\n",
203 | "
White
\n",
204 | "
Male
\n",
205 | "
0
\n",
206 | "
0
\n",
207 | "
40
\n",
208 | "
United-States
\n",
209 | "
<=50K
\n",
210 | "
\n",
211 | "
\n",
212 | "
3
\n",
213 | "
26
\n",
214 | "
Private
\n",
215 | "
193165
\n",
216 | "
Some-college
\n",
217 | "
10
\n",
218 | "
Married-civ-spouse
\n",
219 | "
Transport-moving
\n",
220 | "
Husband
\n",
221 | "
White
\n",
222 | "
Male
\n",
223 | "
0
\n",
224 | "
0
\n",
225 | "
52
\n",
226 | "
United-States
\n",
227 | "
>50K
\n",
228 | "
\n",
229 | "
\n",
230 | "
4
\n",
231 | "
27
\n",
232 | "
Private
\n",
233 | "
267989
\n",
234 | "
Some-college
\n",
235 | "
10
\n",
236 | "
Married-civ-spouse
\n",
237 | "
Machine-op-inspct
\n",
238 | "
Husband
\n",
239 | "
White
\n",
240 | "
Male
\n",
241 | "
0
\n",
242 | "
0
\n",
243 | "
40
\n",
244 | "
United-States
\n",
245 | "
<=50K
\n",
246 | "
\n",
247 | " \n",
248 | "
\n",
249 | "
"
250 | ],
251 | "text/plain": [
252 | " age type_employer fnlwgt education education_num \\\n",
253 | "0 53 Self-emp-not-inc 138022 11th 7 \n",
254 | "1 31 Private 344200 HS-grad 9 \n",
255 | "2 28 Private 242482 HS-grad 9 \n",
256 | "3 26 Private 193165 Some-college 10 \n",
257 | "4 27 Private 267989 Some-college 10 \n",
258 | "\n",
259 | " marital occupation relationship race sex \\\n",
260 | "0 Divorced Craft-repair Not-in-family White Male \n",
261 | "1 Married-civ-spouse Exec-managerial Husband White Male \n",
262 | "2 Never-married Handlers-cleaners Own-child White Male \n",
263 | "3 Married-civ-spouse Transport-moving Husband White Male \n",
264 | "4 Married-civ-spouse Machine-op-inspct Husband White Male \n",
265 | "\n",
266 | " capital_gain capital_loss hr_per_week country income \n",
267 | "0 0 0 60 United-States <=50K \n",
268 | "1 0 0 40 United-States >50K \n",
269 | "2 0 0 40 United-States <=50K \n",
270 | "3 0 0 52 United-States >50K \n",
271 | "4 0 0 40 United-States <=50K "
272 | ]
273 | },
274 | "execution_count": 3,
275 | "metadata": {},
276 | "output_type": "execute_result"
277 | }
278 | ],
279 | "source": [
280 | "ori.head()"
281 | ]
282 | },
283 | {
284 | "cell_type": "markdown",
285 | "id": "f1d19013-b7cf-48e3-a068-6c1e5449884e",
286 | "metadata": {},
287 | "source": [
288 | "As visible the dataset contains several demographic information, as well as information regarding the education, financial situation, and personal life of some tens of thousands of \"individuals\"."
289 | ]
290 | },
291 | {
292 | "cell_type": "markdown",
293 | "id": "52811434-e3ed-464e-8bbc-eafb1dfe0eb1",
294 | "metadata": {
295 | "tags": []
296 | },
297 | "source": [
298 | "### Measuring the singling out risk\n",
299 | "\n",
300 | "The `SinglingOutEvaluator` try to measure how much the synthetic data can help an attacker finding combination of attributes that single out records in the training data. \n",
301 | "\n",
302 | "With the following code we evaluate the robustness of the synthetic data to \"univariate\" singling out attacks, which try to find unique values of some attribute which single out an individual. \n",
303 | "\n",
304 | "\n",
305 | "##### NOTE:\n",
306 | "\n",
307 | "The `SingingOutEvaluator` can sometimes raise a `RuntimeError`. This happens when not enough singling out queries are found. Increasing `n_attacks` will make this condition less frequent and the evaluation more robust, although much slower.\n"
308 | ]
309 | },
310 | {
311 | "cell_type": "code",
312 | "execution_count": 4,
313 | "id": "43acdda6-19d5-4611-ba4f-498fc7bd2d40",
314 | "metadata": {},
315 | "outputs": [
316 | {
317 | "name": "stdout",
318 | "output_type": "stream",
319 | "text": [
320 | "PrivacyRisk(value=0.013741062122476706, ci=(0.0, 0.034101211562263624))\n"
321 | ]
322 | }
323 | ],
324 | "source": [
325 | "evaluator = SinglingOutEvaluator(ori=ori, \n",
326 | " syn=syn, \n",
327 | " control=control,\n",
328 | " n_attacks=500)\n",
329 | "\n",
330 | "try:\n",
331 | " evaluator.evaluate(mode='univariate')\n",
332 | " risk = evaluator.risk()\n",
333 | " print(risk)\n",
334 | "\n",
335 | "except RuntimeError as ex: \n",
336 | " print(f\"Singling out evaluation failed with {ex}. Please re-run this cell.\"\n",
337 | " \"For more stable results increase `n_attacks`. Note that this will \"\n",
338 | " \"make the evaluation slower.\")"
339 | ]
340 | },
341 | {
342 | "cell_type": "markdown",
343 | "id": "f753a354-50c1-4209-933a-a51291c03306",
344 | "metadata": {},
345 | "source": [
346 | "The risk estimate is accompanied by a confidence interval (at 95% level by default) which accounts for the finite number of attacks performed, 500 in this case. \n",
347 | "\n",
348 | "Using the `queries()` method, we can see what kind of singling out queries (i.e. the *guesses*) the attacker has come up with:"
349 | ]
350 | },
351 | {
352 | "cell_type": "code",
353 | "execution_count": 5,
354 | "id": "4af6f284-81a3-4b84-a39b-b2beb6a21b24",
355 | "metadata": {},
356 | "outputs": [
357 | {
358 | "data": {
359 | "text/plain": [
360 | "['fnlwgt == 34465', 'fnlwgt == 95255', 'fnlwgt == 270228']"
361 | ]
362 | },
363 | "execution_count": 5,
364 | "metadata": {},
365 | "output_type": "execute_result"
366 | }
367 | ],
368 | "source": [
369 | "evaluator.queries()[:3]"
370 | ]
371 | },
372 | {
373 | "cell_type": "markdown",
374 | "id": "abc36183-03c4-4612-80cd-c1b3849cb04e",
375 | "metadata": {},
376 | "source": [
377 | "As visible, the attack is picking up on the `fnlwgt` column, which has many (~63%) unique integer values and therefore provides a powerful handle for singling out. This should result in a singling out risk which is *compatible* within the confidence level with a few percentage points. The actual results can vary depending on notebook execution. "
378 | ]
379 | },
380 | {
381 | "cell_type": "markdown",
382 | "id": "b0693e9c-0f69-4c4c-b506-4e7bd7190031",
383 | "metadata": {},
384 | "source": [
385 | "### Inspecting the results in more details"
386 | ]
387 | },
388 | {
389 | "cell_type": "markdown",
390 | "id": "f7db3b47-22e3-4d1f-8495-770799c378cc",
391 | "metadata": {},
392 | "source": [
393 | "There are two methods to inspect the results. The high level `risk()` method gives the high level estimation of the privacy risk, and its confidence interval."
394 | ]
395 | },
396 | {
397 | "cell_type": "code",
398 | "execution_count": 6,
399 | "id": "4ea008ba-05a8-47b2-a316-132fe628cae3",
400 | "metadata": {},
401 | "outputs": [
402 | {
403 | "data": {
404 | "text/plain": [
405 | "PrivacyRisk(value=0.013741062122476706, ci=(0.0, 0.034101211562263624))"
406 | ]
407 | },
408 | "execution_count": 6,
409 | "metadata": {},
410 | "output_type": "execute_result"
411 | }
412 | ],
413 | "source": [
414 | "evaluator.risk(confidence_level=0.95)"
415 | ]
416 | },
417 | {
418 | "cell_type": "markdown",
419 | "id": "ba233989-5556-4f89-8037-1c26d78c8127",
420 | "metadata": {},
421 | "source": [
422 | "for more information, the `results()` method gives the success rates of the three attacks (the \"main\" one, the baseline one, and the one against control) that enters `Anonymeter` risk calculation."
423 | ]
424 | },
425 | {
426 | "cell_type": "code",
427 | "execution_count": 7,
428 | "id": "367928ba-7a1e-4c20-9dc6-84c490873700",
429 | "metadata": {},
430 | "outputs": [
431 | {
432 | "name": "stdout",
433 | "output_type": "stream",
434 | "text": [
435 | "Successs rate of main attack: SuccessRate(value=0.03556819133600645, error=0.015776677709435333)\n",
436 | "Successs rate of baseline attack: SuccessRate(value=0.0038121702307761206, error=0.00381217023077612)\n",
437 | "Successs rate of control attack: SuccessRate(value=0.022131235900891076, error=0.012313616009370306)\n"
438 | ]
439 | }
440 | ],
441 | "source": [
442 | "res = evaluator.results()\n",
443 | "\n",
444 | "print(\"Successs rate of main attack:\", res.attack_rate)\n",
445 | "print(\"Successs rate of baseline attack:\", res.baseline_rate)\n",
446 | "print(\"Successs rate of control attack:\", res.control_rate)"
447 | ]
448 | },
449 | {
450 | "cell_type": "markdown",
451 | "id": "8543aad4-897c-4e80-96a7-a47aea1caf22",
452 | "metadata": {},
453 | "source": [
454 | "Note that you can obtain the `PrivacyRisk` from the attack results by:"
455 | ]
456 | },
457 | {
458 | "cell_type": "code",
459 | "execution_count": 8,
460 | "id": "d1081269-1830-430f-8305-9f254641de89",
461 | "metadata": {},
462 | "outputs": [
463 | {
464 | "data": {
465 | "text/plain": [
466 | "PrivacyRisk(value=0.013741062122476706, ci=(0.0, 0.034101211562263624))"
467 | ]
468 | },
469 | "execution_count": 8,
470 | "metadata": {},
471 | "output_type": "execute_result"
472 | }
473 | ],
474 | "source": [
475 | "res.risk()"
476 | ]
477 | },
478 | {
479 | "cell_type": "markdown",
480 | "id": "00f4e993-defe-4c83-a4b3-da2cdf2ca02f",
481 | "metadata": {},
482 | "source": [
483 | "### Checking singling out with multivariate predicates\n",
484 | "\n",
485 | "The `SinglingOutEvaluator` can also attack the dataset using predicates which are combining different attributes. These are the so called `multivariate` predicates. \n",
486 | "\n",
487 | "To run the analysis using the `multivariate` singling out attack, the `mode` parameter of `evaluate` needs to be set correctly. The number of attributes used in the attacker queries via the `n_cols` parameter, set to 4 in this example. "
488 | ]
489 | },
490 | {
491 | "cell_type": "code",
492 | "execution_count": 9,
493 | "id": "1a875b5f-4f75-4585-83b5-d0703ac82f90",
494 | "metadata": {
495 | "tags": []
496 | },
497 | "outputs": [
498 | {
499 | "name": "stdout",
500 | "output_type": "stream",
501 | "text": [
502 | "PrivacyRisk(value=0.02878005056752415, ci=(0.0, 0.1380156613265963))\n"
503 | ]
504 | }
505 | ],
506 | "source": [
507 | "evaluator = SinglingOutEvaluator(ori=ori, \n",
508 | " syn=syn, \n",
509 | " control=control,\n",
510 | " n_attacks=100, # this attack takes longer\n",
511 | " n_cols=4)\n",
512 | "\n",
513 | "\n",
514 | "try:\n",
515 | " evaluator.evaluate(mode='multivariate')\n",
516 | " risk = evaluator.risk()\n",
517 | " print(risk)\n",
518 | "\n",
519 | "except RuntimeError as ex: \n",
520 | " print(f\"Singling out evaluation failed with {ex}. Please re-run this cell.\"\n",
521 | " \"For more stable results increase `n_attacks`. Note that this will \"\n",
522 | " \"make the evaluation slower.\")"
523 | ]
524 | },
525 | {
526 | "cell_type": "code",
527 | "execution_count": 10,
528 | "id": "786dbb5b-6b52-41c1-8d07-2d9467a3d649",
529 | "metadata": {},
530 | "outputs": [
531 | {
532 | "data": {
533 | "text/plain": [
534 | "[\"education== '9th' & occupation== 'Other-service' & race== 'Amer-Indian-Eskimo' & sex== 'Male'\",\n",
535 | " \"education== '9th' & income== '>50K' & occupation== 'Other-service' & sex== 'Female'\",\n",
536 | " \"age<= 24 & capital_gain<= 0 & country== 'Ireland' & education_num>= 11\"]"
537 | ]
538 | },
539 | "execution_count": 10,
540 | "metadata": {},
541 | "output_type": "execute_result"
542 | }
543 | ],
544 | "source": [
545 | "evaluator.queries()[:3]"
546 | ]
547 | },
548 | {
549 | "cell_type": "markdown",
550 | "id": "acff772b-3f99-402d-b1b6-01b0e09bd5b1",
551 | "metadata": {},
552 | "source": [
553 | "# Measuring the Linkability risk\n",
554 | "\n",
555 | "The `LinkabilityEvaluator` allows one to know how much the synthetic data will help an adversary who tries to link two other datasets based on a subset of attributes. \n",
556 | "\n",
557 | "For example, suppose that the adversary finds dataset A containing, among other fields, information about the profession and education of people, and dataset B containing some demographic and health related information. Can the attacker use the synthetic dataset to link these two datasets?\n",
558 | "\n",
559 | "To run the `LinkabilityEvaluator` one needs to specify which columns of auxiliary information are available to the attacker, and how they are distributed between the two datasets A and B. This is done using the `aux_cols` parameter."
560 | ]
561 | },
562 | {
563 | "cell_type": "code",
564 | "execution_count": 11,
565 | "id": "dad588b3-b241-4256-ac11-ae73d9206782",
566 | "metadata": {},
567 | "outputs": [
568 | {
569 | "data": {
570 | "text/plain": [
571 | "PrivacyRisk(value=0.0, ci=(0.0, 0.001915371248414205))"
572 | ]
573 | },
574 | "execution_count": 11,
575 | "metadata": {},
576 | "output_type": "execute_result"
577 | }
578 | ],
579 | "source": [
580 | "aux_cols = [\n",
581 | " ['type_employer', 'education', 'hr_per_week', 'capital_loss', 'capital_gain'],\n",
582 | " [ 'race', 'sex', 'fnlwgt', 'age', 'country']\n",
583 | "]\n",
584 | "\n",
585 | "evaluator = LinkabilityEvaluator(ori=ori, \n",
586 | " syn=syn, \n",
587 | " control=control,\n",
588 | " n_attacks=2000,\n",
589 | " aux_cols=aux_cols,\n",
590 | " n_neighbors=10)\n",
591 | "\n",
592 | "evaluator.evaluate(n_jobs=-2) # n_jobs follow joblib convention. -1 = all cores, -2 = all execept one\n",
593 | "evaluator.risk()"
594 | ]
595 | },
596 | {
597 | "cell_type": "code",
598 | "execution_count": 12,
599 | "id": "321da78a-d695-4d7c-8a62-f1a226bfc04a",
600 | "metadata": {},
601 | "outputs": [
602 | {
603 | "name": "stdout",
604 | "output_type": "stream",
605 | "text": [
606 | "Successs rate of main attack: SuccessRate(value=0.003453731022423335, error=0.00238542229400942)\n",
607 | "Successs rate of baseline attack: SuccessRate(value=0.0014575651169858408, error=0.001369297386280373)\n",
608 | "Successs rate of control attack: SuccessRate(value=0.005449896927860829, error=0.0030806162908989933)\n"
609 | ]
610 | }
611 | ],
612 | "source": [
613 | "res = evaluator.results()\n",
614 | "\n",
615 | "print(\"Successs rate of main attack:\", res.attack_rate)\n",
616 | "print(\"Successs rate of baseline attack:\", res.baseline_rate)\n",
617 | "print(\"Successs rate of control attack:\", res.control_rate)"
618 | ]
619 | },
620 | {
621 | "cell_type": "markdown",
622 | "id": "f0190af5-a184-46cf-a048-30ecbe82fd51",
623 | "metadata": {},
624 | "source": [
625 | "As visible, the attack is not very successful and the linkability risk is low. The `n_neighbor` parameter can be used to allow for weaker indirect links to be scored as successes. It will have an impact on the risk estimate. To check the measured risk for different values of `n_neighbor` you don't have to re-run the evaluation. Rather, do:"
626 | ]
627 | },
628 | {
629 | "cell_type": "code",
630 | "execution_count": 13,
631 | "id": "c4d3644a-dd36-4807-aa3f-cf244cfd5d55",
632 | "metadata": {},
633 | "outputs": [
634 | {
635 | "name": "stdout",
636 | "output_type": "stream",
637 | "text": [
638 | "PrivacyRisk(value=0.0, ci=(0.0, 0.0007611013198481277))\n"
639 | ]
640 | }
641 | ],
642 | "source": [
643 | "print(evaluator.risk(n_neighbors=7))"
644 | ]
645 | },
646 | {
647 | "cell_type": "markdown",
648 | "id": "0429baae-424d-4ebe-b8ec-9205397515ba",
649 | "metadata": {},
650 | "source": [
651 | "# Measuring the Inference Risk\n",
652 | "\n",
653 | "Finally, `anonymeter` allows to measure the inference risk. It does so by measuring the success of an attacker that tries to discover the value of some secret attribute for a set of target records on which some auxiliary knowledge is available.\n",
654 | "\n",
655 | "Similar to the case of the `LinkabilityEvaluator`, the main parameter here is `aux_cols` which specify what the attacker knows about its target, i.e. which columns are known to the attacker. By selecting the `secret` column, one can identify which attributes, alone or in combinations, exhibit the largest risks and thereby expose a lot of information on the original data.\n",
656 | "\n",
657 | "In the following snippet we will measure the inference risk for each column individually, using all the other columns as auxiliary information to model a very knowledgeable attacker. "
658 | ]
659 | },
660 | {
661 | "cell_type": "code",
662 | "execution_count": 14,
663 | "id": "6c07054c-7ced-46c3-8a12-14123f6cc965",
664 | "metadata": {
665 | "tags": []
666 | },
667 | "outputs": [
668 | {
669 | "name": "stderr",
670 | "output_type": "stream",
671 | "text": [
672 | "/home/matteo/work/gits/anonymeter/src/anonymeter/stats/confidence.py:218: UserWarning: Attack is as good or worse as baseline model. Estimated rates: attack = 0.25195286286290325, baseline = 0.255937555828961. Analysis results cannot be trusted.\n",
673 | " self._sanity_check()\n"
674 | ]
675 | }
676 | ],
677 | "source": [
678 | "columns = ori.columns\n",
679 | "results = []\n",
680 | "\n",
681 | "for secret in columns:\n",
682 | " \n",
683 | " aux_cols = [col for col in columns if col != secret]\n",
684 | " \n",
685 | " evaluator = InferenceEvaluator(ori=ori, \n",
686 | " syn=syn, \n",
687 | " control=control,\n",
688 | " aux_cols=aux_cols,\n",
689 | " secret=secret,\n",
690 | " n_attacks=1000)\n",
691 | " evaluator.evaluate(n_jobs=-2)\n",
692 | " results.append((secret, evaluator.results()))"
693 | ]
694 | },
695 | {
696 | "cell_type": "code",
697 | "execution_count": 15,
698 | "id": "0e492eeb-d296-4973-a08e-c5afc0ac36b5",
699 | "metadata": {
700 | "tags": []
701 | },
702 | "outputs": [
703 | {
704 | "data": {
705 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkAAAAH4CAYAAACi61KzAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAmZNJREFUeJzs3Xl4TOfbB/B7EiS2xBKEiMSSIIvsicROSIgl9n1rai1F7EsFRapKUSq0VbT2XVUVsbQqlgiKqn0ndglBSOb7/uGa85uR6GuSmQTz/VxXrtaZZ87znJkz59znWVUAIEREREQmxCy3C0BERESU0xgAERERkclhAEREREQmhwEQERERmRwGQERERGRyGAARERGRyWEARERERCYnT24X4F2kVqvl5s2bUrhwYVGpVLldHCIiInoLAOTx48dSpkwZMTP77zoeBkCZuHnzptjb2+d2MYiIiCgLrl27JmXLlv3PNAyAMlG4cGERefUBWllZ5XJpiIiI6G0kJyeLvb29ch//LwyAMqFp9rKysmIARERE9J55m+4r7ARNREREJocBEBEREZkcBkBERERkchgAERERkclhAEREREQmhwEQERERmRwGQERERGRyGAARERGRyWEARERERCaHARARERGZHAZAREREZHIYABEREZHJYQBEREREJocBEBEREZkcBkBERERkcvLkdgGIiEh/X+84a9T9D2nobNT9E+U21gARERGRyWEARERERCaHARARERGZHAZAREREZHIYABEREZHJYQBEREREJocBEBEREZmcXA+A5s2bJ46OjmJpaSkBAQFy6NCh/0y/Zs0aqVKlilhaWoq7u7ts3bpV5/UnT57IgAEDpGzZspI/f35xcXGRmJgYYx4CERERvWdyNQBatWqVREZGSlRUlCQkJIiHh4eEhITInTt3Mk2/f/9+6dixo0RERMjRo0clPDxcwsPD5eTJk0qayMhI2bZtm/z8889y+vRpGTx4sAwYMEA2b96cU4dFRERE7zgVAORW5gEBAeLn5ydz584VERG1Wi329vYycOBAGTVqVIb07du3l5SUFNmyZYuyrXr16uLp6anU8ri5uUn79u3ls88+U9L4+PhI48aNZfLkyW9VruTkZLG2tpakpCSxsrLKziESERkFZ4Imykif+3eu1QC9ePFCjhw5IsHBwf8rjJmZBAcHS1xcXKbviYuL00kvIhISEqKTPigoSDZv3iw3btwQALJ79245e/asNGrU6I1lSU1NleTkZJ0/IiIi+nDlWgB07949SU9Pl1KlSulsL1WqlCQmJmb6nsTExP83/TfffCMuLi5StmxZyZcvn4SGhsq8efOkdu3abyxLdHS0WFtbK3/29vbZODIiIiJ61+V6J2hD++abb+TAgQOyefNmOXLkiMyYMUM++eQT2blz5xvfM3r0aElKSlL+rl27loMlJiIiopyWa6vB29jYiLm5udy+fVtn++3bt8XW1jbT99ja2v5n+mfPnsmYMWNkw4YNEhYWJiIi1apVk2PHjslXX32VoflMw8LCQiwsLLJ7SERERPSeyLUaoHz58omPj4/ExsYq29RqtcTGxkpgYGCm7wkMDNRJLyKyY8cOJf3Lly/l5cuXYmame1jm5uaiVqsNfARERET0vsq1GiCRV0PWu3fvLr6+vuLv7y+zZs2SlJQU6dmzp4iIdOvWTezs7CQ6OlpERAYNGiR16tSRGTNmSFhYmKxcuVLi4+Nl4cKFIiJiZWUlderUkeHDh0v+/PnFwcFB9u7dK0uXLpWZM2fm2nESERHRuyVXA6D27dvL3bt3Zfz48ZKYmCienp6ybds2paPz1atXdWpzgoKCZPny5TJu3DgZM2aMODk5ycaNG8XNzU1Js3LlShk9erR07txZHjx4IA4ODjJlyhTp27dvjh8fERERvZtydR6gdxXnASKidx3nASLK6L2YB4iIiIgotzAAIiIiIpPDAIiIiIhMDgMgIiIiMjkMgIiIiMjkMAAiIiIik8MAiIiIiEwOAyAiIiIyOQyAiIiIyOQwACIiIiKTwwCIiIiITA4DICIiIjI5DICIiIjI5DAAIiIiIpPDAIiIiIhMDgMgIiIiMjkMgIiIiMjkMAAiIiIik8MAiIiIiEwOAyAiIiIyOQyAiIiIyOQwACIiIiKTwwCIiIiITA4DICIiIjI5DICIiIjI5DAAIiIiIpPDAIiIiIhMDgMgIiIiMjkMgIiIiMjkMAAiIiIik8MAiIiIiEzOOxEAzZs3TxwdHcXS0lICAgLk0KFD/5l+zZo1UqVKFbG0tBR3d3fZunWrzusqlSrTv+nTpxvzMIiIiOg9kesB0KpVqyQyMlKioqIkISFBPDw8JCQkRO7cuZNp+v3790vHjh0lIiJCjh49KuHh4RIeHi4nT55U0ty6dUvnb9GiRaJSqaR169Y5dVhERET0DlMBQG4WICAgQPz8/GTu3LkiIqJWq8Xe3l4GDhwoo0aNypC+ffv2kpKSIlu2bFG2Va9eXTw9PSUmJibTPMLDw+Xx48cSGxv7VmVKTk4Wa2trSUpKEisrqywcFRGRcX2946xR9z+kobNR909kDPrcv3O1BujFixdy5MgRCQ4OVraZmZlJcHCwxMXFZfqeuLg4nfQiIiEhIW9Mf/v2bfn1118lIiLijeVITU2V5ORknT8iIiL6cOVqAHTv3j1JT0+XUqVK6WwvVaqUJCYmZvqexMREvdIvWbJEChcuLK1atXpjOaKjo8Xa2lr5s7e31/NIiIiI6H2S632AjG3RokXSuXNnsbS0fGOa0aNHS1JSkvJ37dq1HCwhERER5bQ8uZm5jY2NmJuby+3bt3W23759W2xtbTN9j62t7Vun//PPP+XMmTOyatWq/yyHhYWFWFhY6Fl6IiIiel/lag1Qvnz5xMfHR6dzslqtltjYWAkMDMz0PYGBgRk6M+/YsSPT9D/88IP4+PiIh4eHYQtORERE77VcrQESEYmMjJTu3buLr6+v+Pv7y6xZsyQlJUV69uwpIiLdunUTOzs7iY6OFhGRQYMGSZ06dWTGjBkSFhYmK1eulPj4eFm4cKHOfpOTk2XNmjUyY8aMHD8mIiIierflegDUvn17uXv3rowfP14SExPF09NTtm3bpnR0vnr1qpiZ/a+iKigoSJYvXy7jxo2TMWPGiJOTk2zcuFHc3Nx09rty5UoBIB07dszR4yEiIqJ3X67PA/Qu4jxARPSu4zxARBm9N/MAEREREeUGBkBERERkchgAERERkclhAEREREQmhwEQERERmRwGQERERGRyGAARERGRyWEARERERCaHARARERGZHAZAREREZHIYABEREZHJYQBEREREJocBEBEREZkcBkBERERkcvLkdgGIiIjeZV/vOGv0PIY0dDZ6HqSLNUBERERkchgAERERkclhAEREREQmhwEQERERmRwGQERERGRyGAARERGRyWEARERERCaHARARERGZHAZAREREZHL0DoBevnz5xtfu3buXrcIQERER5QS9A6AOHToIgAzbb9++LXXr1jVEmYiIiIiMSu8A6OrVq/Lxxx/rbEtMTJS6detKlSpVDFYwIiIiImPROwDaunWr7N+/XyIjI0VE5ObNm1KnTh1xd3eX1atXG7yARERERIam92rwJUqUkO3bt0vNmjVFRGTLli3i7e0ty5YtEzMz9qkmIiKid5/eAZCIiL29vezYsUNq1aolDRs2lJ9++klUKpWhy0ZERERkFG8VABUtWjTTAOfp06fyyy+/SPHixZVtDx48MFzpiIiIiIzgrQKgWbNmGbkYRERERDnnrQKg7t27G60A8+bNk+nTp0tiYqJ4eHjIN998I/7+/m9Mv2bNGvnss8/k8uXL4uTkJNOmTZMmTZropDl9+rSMHDlS9u7dK2lpaeLi4iLr1q2TcuXKGe04iIiI6P2hd6/lhIQEOXHihPLvTZs2SXh4uIwZM0ZevHih175WrVolkZGREhUVJQkJCeLh4SEhISFy586dTNPv379fOnbsKBEREXL06FEJDw+X8PBwOXnypJLmwoULUrNmTalSpYrs2bNH/v77b/nss8/E0tJS30MlIiKiD5QKmc1q+B/8/Pxk1KhR0rp1a7l48aK4uLhIq1at5PDhwxIWFqZXc1lAQID4+fnJ3LlzRURErVaLvb29DBw4UEaNGpUhffv27SUlJUW2bNmibKtevbp4enpKTEyMiLyaqDFv3rzy008/6XNYOpKTk8Xa2lqSkpLEysoqy/shIjKWr3ecNer+hzR0Nur+3yfG/qxF+Hkbij73b71rgM6ePSuenp4i8qo5qk6dOrJ8+XJZvHixrFu37q338+LFCzly5IgEBwf/rzBmZhIcHCxxcXGZvicuLk4nvYhISEiIkl6tVsuvv/4qzs7OEhISIiVLlpSAgADZuHHjf5YlNTVVkpOTdf6IiIjow6V3AARA1Gq1iIjs3LlT6X9jb2+v11pg9+7dk/T0dClVqpTO9lKlSkliYmKm70lMTPzP9Hfu3JEnT57IF198IaGhobJ9+3Zp2bKltGrVSvbu3fvGskRHR4u1tbXyZ29v/9bHQURERO8fvQMgX19fmTx5svz000+yd+9eCQsLExGRS5cuZQhOcpomMGvRooUMGTJEPD09ZdSoUdK0aVOliSwzo0ePlqSkJOXv2rVrOVVkIiIiygV6T4Q4a9Ys6dy5s2zcuFHGjh0rlSpVEhGRtWvXSlBQ0Fvvx8bGRszNzeX27ds622/fvi22traZvsfW1vY/09vY2EiePHnExcVFJ03VqlVl3759byyLhYWFWFhYvHXZiYiI6P2mdw1QtWrV5MSJE5KUlCRRUVHK9unTp8uSJUveej/58uUTHx8fiY2NVbap1WqJjY2VwMDATN8TGBiok15EZMeOHUr6fPnyiZ+fn5w5c0YnzdmzZ8XBweGty0ZEREQftiwthZGZrAwzj4yMlO7du4uvr6/4+/vLrFmzJCUlRXr27CkiIt26dRM7OzuJjo4WEZFBgwZJnTp1ZMaMGRIWFiYrV66U+Ph4WbhwobLP4cOHS/v27aV27dpSr1492bZtm/zyyy+yZ88egxwnERERvf/eKgAqVqyYnD17VmxsbN64LIaGPkthtG/fXu7evSvjx4+XxMRE8fT0lG3btil9ia5evaqzwGpQUJAsX75cxo0bJ2PGjBEnJyfZuHGjuLm5KWlatmwpMTExEh0dLZ9++qlUrlxZ1q1bpyzeSkRERPRW8wAtWbJEOnToIBYWFv9vM5cxZ43OKZwHiIjedZwHKOdwHqD3hz73b72WwkhLSxOVSiUhISG5PuKLiIiIKKv06gSdJ08e6du3rzx//txY5SEiIiIyOr1Hgfn7+8vRo0eNURYiIiKiHKH3KLD+/fvL0KFD5fr16+Lj4yMFCxbUeb1atWoGKxwRERGRMegdAHXo0EFERD799FNlm0qlEgCiUqkkPT3dcKUjIiIiMgK9A6BLly4ZoxxEREREOUbvAIgzKhMREdH7Tu9O0ERERETvOwZAREREZHIYABEREZHJYQBEREREJidLAdCjR4/k+++/l9GjRyuLnyYkJMiNGzcMWjgiIiIiY9B7FNjff/8twcHBYm1tLZcvX5ZevXpJsWLFZP369XL16lVZunSpMcpJREREZDB61wBFRkZKjx495Ny5c2Jpaalsb9Kkifzxxx8GLRwRERGRMegdAB0+fFj69OmTYbudnZ0kJiYapFBERERExqR3AGRhYSHJyckZtp89e1ZKlChhkEIRERERGZPeAVDz5s1l0qRJ8vLlSxF5tQ7Y1atXZeTIkdK6dWuDF5CIiIjI0PQOgGbMmCFPnjyRkiVLyrNnz6ROnTpSqVIlKVy4sEyZMsUYZSQiIiIyKL1HgVlbW8uOHTvkr7/+kuPHj8uTJ0/E29tbgoODjVE+IiIiIoPTOwDSqFGjhtSoUcOQZSEiIiLKEXo3gX366acyZ86cDNvnzp0rgwcPNkSZiIiIiIxK7wBo3bp1mdb8BAUFydq1aw1SKCIiIiJj0jsAun//vlhbW2fYbmVlJffu3TNIoYiIiIiMSe8AqFKlSrJt27YM23/77TepUKGCQQpFREREZEx6d4KOjIyUAQMGyN27d6V+/foiIhIbGyszZsyQWbNmGbp8RERERAandwD00UcfSWpqqkyZMkU+//xzERFxdHSU+fPnS7du3QxeQCIiIiJDy9Iw+H79+km/fv3k7t27kj9/filUqJChy0VERERkNFmeB0hEuPYXERERvZf07gR9+/Zt6dq1q5QpU0by5Mkj5ubmOn9ERERE7zq9a4B69OghV69elc8++0xKly4tKpXKGOUiIiIiMhq9A6B9+/bJn3/+KZ6enkYoDhEREZHx6d0EZm9vLwAMWoh58+aJo6OjWFpaSkBAgBw6dOg/069Zs0aqVKkilpaW4u7uLlu3btV5vUePHqJSqXT+QkNDDVpmIiIien/pHQDNmjVLRo0aJZcvXzZIAVatWiWRkZESFRUlCQkJ4uHhISEhIXLnzp1M0+/fv186duwoERERcvToUQkPD5fw8HA5efKkTrrQ0FC5deuW8rdixQqDlJeIiIjefyroWZ1TtGhRefr0qaSlpUmBAgUkb968Oq8/ePBArwIEBASIn5+fzJ07V0RE1Gq12Nvby8CBA2XUqFEZ0rdv315SUlJky5Ytyrbq1auLp6enxMTEiMirGqBHjx7Jxo0b9SqLRnJyslhbW0tSUpJYWVllaR9ERMb09Y6zRt3/kIbORt3/+8TYn7UIP29D0ef+rXcfIEPO9vzixQs5cuSIjB49WtlmZmYmwcHBEhcXl+l74uLiJDIyUmdbSEhIhmBnz549UrJkSSlatKjUr19fJk+eLMWLF890n6mpqZKamqr8Ozk5OYtHRERERO8DvQOg7t27Gyzze/fuSXp6upQqVUpne6lSpeTff//N9D2JiYmZpk9MTFT+HRoaKq1atZLy5cvLhQsXZMyYMdK4cWOJi4vLdKh+dHS0TJw40QBHRERERO8DvfsAiYhcuHBBxo0bJx07dlT66vz2229y6tQpgxYuqzp06CDNmzcXd3d3CQ8Ply1btsjhw4dlz549maYfPXq0JCUlKX/Xrl3L2QITERFRjtI7ANq7d6+4u7vLwYMHZf369fLkyRMRETl+/LhERUXptS8bGxsxNzeX27dv62y/ffu22NraZvoeW1tbvdKLiFSoUEFsbGzk/Pnzmb5uYWEhVlZWOn9ERET04dI7ABo1apRMnjxZduzYIfny5VO2169fXw4cOKDXvvLlyyc+Pj4SGxurbFOr1RIbGyuBgYGZvicwMFAnvYjIjh073pheROT69ety//59KV26tF7lIyIiog+T3gHQiRMnpGXLlhm2lyxZUu7du6d3ASIjI+W7776TJUuWyOnTp6Vfv36SkpIiPXv2FBGRbt266XSSHjRokGzbtk1mzJgh//77r0yYMEHi4+NlwIABIiLy5MkTGT58uBw4cEAuX74ssbGx0qJFC6lUqZKEhIToXT4iIiL68OjdCbpIkSJy69YtKV++vM72o0ePip2dnd4FaN++vdy9e1fGjx8viYmJ4unpKdu2bVM6Ol+9elXMzP4XpwUFBcny5ctl3LhxMmbMGHFycpKNGzeKm5ubiIiYm5vL33//LUuWLJFHjx5JmTJlpFGjRvL555+LhYWF3uUjIiKiD4/e8wANGzZMDh48KGvWrBFnZ2dJSEiQ27dvS7du3aRbt2569wN6F3EeICJ613EeoJzDeYDeH/rcv/VuAps6dapUqVJF7O3t5cmTJ+Li4iK1a9eWoKAgGTduXJYLTURERJRT9GoCAyCJiYkyZ84cGT9+vJw4cUKePHkiXl5e4uTkZKwyEhERERmU3gFQpUqV5NSpU+Lk5CT29vbGKhcRERGR0ejVBGZmZiZOTk5y//59Y5WHiIiIyOj07gP0xRdfyPDhwzOsvk5ERET0vtB7GHy3bt3k6dOn4uHhIfny5ZP8+fPrvK7vavBEREREOS1XV4MnIiIiyg25uho8ERERUW74IFeDJyIiIvovuboaPBEREVFuyNXV4ImIiIhyQ66vBk9ERESU0/QOgDSrwb8uq6vBExEREeU0vQOgDh06yMiRIyUxMVFUKpWo1Wr566+/ZNiwYdKtWzdjlJGIiIjIoLgaPBEREZmct5oHKDk5WaysrEREJF++fPLdd99xNXgiIiJ6b71VAFS0aFG5deuWlCxZUurXry/r168Xe3t7rgZPRERE76W3agIrVKiQsgL8nj175OXLl0YtFBEREZExvVUNUHBwsNSrV0+qVq0qIiItW7bUmQNI265duwxXOiIiIiIjeKsA6Oeff5YlS5bIhQsXZO/eveLq6ioFChQwdtmIiIiIjOKtAqD8+fNL3759RUQkPj5epk2bJkWKFDFmuYiIiIiMRu/V4Hfv3m2MchARERHlGL0DoPT0dFm8eLHExsbKnTt3RK1W67zOPkBERET0rtM7ABo0aJAsXrxYwsLCxM3NTVQqlTHKRURERGQ0egdAK1eulNWrV0uTJk2MUR4iIiIio9N7KYx8+fJJpUqVjFEWIiIiohyhdwA0dOhQmT17tgAwRnmIiIiIjE7vJrB9+/bJ7t275bfffhNXV1fJmzevzuvr1683WOGIiIiIjEHvAKhIkSLSsmVLY5SFiIiIKEfoHQD9+OOPxigHERERUY7Ruw8QERER0fvurWqAvL29JTY2VooWLSpeXl7/OfdPQkKCwQpHREREZAxvVQPUokULsbCwEBGR8PBwadGixRv/smLevHni6OgolpaWEhAQIIcOHfrP9GvWrJEqVaqIpaWluLu7y9atW9+Ytm/fvqJSqWTWrFlZKhsRERF9eN6qBigqKirT/zeEVatWSWRkpMTExEhAQIDMmjVLQkJC5MyZM1KyZMkM6ffv3y8dO3aU6Ohoadq0qSxfvlzCw8MlISFB3NzcdNJu2LBBDhw4IGXKlDFomYmIiOj9lut9gGbOnCm9evWSnj17iouLi8TExEiBAgVk0aJFmaafPXu2hIaGyvDhw6Vq1ary+eefi7e3t8ydO1cn3Y0bN2TgwIGybNmyDEP1iYiIyLTlagD04sULOXLkiAQHByvbzMzMJDg4WOLi4jJ9T1xcnE56EZGQkBCd9Gq1Wrp27SrDhw8XV1fX/7ccqampkpycrPNHREREH65cDYDu3bsn6enpUqpUKZ3tpUqVksTExEzfk5iY+P+mnzZtmuTJk0c+/fTTtypHdHS0WFtbK3/29vZ6HgkRERG9T3K9CczQjhw5IrNnz5bFixe/9Ur1o0ePlqSkJOXv2rVrRi4lERER5aZcDYBsbGzE3Nxcbt++rbP99u3bYmtrm+l7bG1t/zP9n3/+KXfu3JFy5cpJnjx5JE+ePHLlyhUZOnSoODo6ZrpPCwsLsbKy0vkjIiKiD9dbjQKLjIx86x3OnDnzrdPmy5dPfHx8JDY2VsLDw0XkVf+d2NhYGTBgQKbvCQwMlNjYWBk8eLCybceOHRIYGCgiIl27ds20j1DXrl2lZ8+eb102IiIi+nC9VQB09OhRnX8nJCRIWlqaVK5cWUREzp49K+bm5uLj46N3ASIjI6V79+7i6+sr/v7+MmvWLElJSVGClW7duomdnZ1ER0eLiMigQYOkTp06MmPGDAkLC5OVK1dKfHy8LFy4UEREihcvLsWLF9fJI2/evGJra6uUl4iIiEzbWwVAu3fvVv5/5syZUrhwYVmyZIkULVpUREQePnwoPXv2lFq1auldgPbt28vdu3dl/PjxkpiYKJ6enrJt2zalo/PVq1fFzOx/LXVBQUGyfPlyGTdunIwZM0acnJxk48aNGeYAIiIiInoTFQDo8wY7OzvZvn17huHlJ0+elEaNGsnNmzcNWsDckJycLNbW1pKUlMT+QET0Tvp6x1mj7n9IQ2ej7v99YuzPWoSft6Hoc//WuxN0cnKy3L17N8P2u3fvyuPHj/XdHREREVGO0zsAatmypfTs2VPWr18v169fl+vXr8u6deskIiJCWrVqZYwyEhERERnUW/UB0hYTEyPDhg2TTp06ycuXL1/tJE8eiYiIkOnTpxu8gERERESGpncAVKBAAfn2229l+vTpcuHCBRERqVixohQsWNDghSMiIiIyhixPhHjr1i25deuWODk5ScGCBUXPvtREREREuUbvAOj+/fvSoEEDcXZ2liZNmsitW7dERCQiIkKGDh1q8AISERERGZreAdCQIUMkb968cvXqVSlQoICyvX379rJt2zaDFo6IiIjIGPTuA7R9+3b5/fffpWzZsjrbnZyc5MqVKwYrGBEREZGx6F0DlJKSolPzo/HgwQOxsLAwSKGIiIiIjEnvAKhWrVqydOlS5d8qlUrUarV8+eWXUq9ePYMWjoiIiMgY9G4C+/LLL6VBgwYSHx8vL168kBEjRsipU6fkwYMH8tdffxmjjEREREQGpXcNkJubm5w9e1Zq1qwpLVq0kJSUFGnVqpUcPXpUKlasaIwyEhERERmUXjVAL1++lNDQUImJiZGxY8caq0xERERERqVXDVDevHnl77//NlZZiIiIiHKE3k1gXbp0kR9++MEYZSEiIiLKEXp3gk5LS5NFixbJzp07xcfHJ8MaYDNnzjRY4YiIiIiMQe8A6OTJk+Lt7S0iImfPntV5TaVSGaZUREREREakdwC0e/duY5SDiIiIKMdkeTV4IiIioveV3jVA9erV+8+mrl27dmWrQERERETGpncA5OnpqfPvly9fyrFjx+TkyZPSvXt3Q5WLiIiIyGj0DoC+/vrrTLdPmDBBnjx5ku0CERERERmbwfoAdenSRRYtWmSo3REREREZjcECoLi4OLG0tDTU7oiIiIiMRu8msFatWun8G4DcunVL4uPj5bPPPjNYwYiIiIiMRe8AyNraWuffZmZmUrlyZZk0aZI0atTIYAUjIiIiMha9A6Aff/zRGOUgIiIiyjF69wG6du2aXL9+Xfn3oUOHZPDgwbJw4UKDFoyIiIjIWPQOgDp16qQsh5GYmCjBwcFy6NAhGTt2rEyaNMngBSQiIiIyNL0DoJMnT4q/v7+IiKxevVrc3d1l//79smzZMlm8eLGhy0dERERkcHoHQC9fvhQLCwsREdm5c6c0b95cRESqVKkit27dMmzpiIiIiIxA7wDI1dVVYmJi5M8//5QdO3ZIaGioiIjcvHlTihcvnqVCzJs3TxwdHcXS0lICAgLk0KFD/5l+zZo1UqVKFbG0tBR3d3fZunWrzusTJkyQKlWqSMGCBaVo0aISHBwsBw8ezFLZiIiI6MOjdwA0bdo0WbBggdStW1c6duwoHh4eIiKyefNmpWlMH6tWrZLIyEiJioqShIQE8fDwkJCQELlz506m6ffv3y8dO3aUiIgIOXr0qISHh0t4eLicPHlSSePs7Cxz586VEydOyL59+8TR0VEaNWokd+/e1bt8RERE9OFRAYC+b0pPT5fk5GQpWrSosu3y5ctSoEABKVmypF77CggIED8/P5k7d66IiKjVarG3t5eBAwfKqFGjMqRv3769pKSkyJYtW5Rt1atXF09PT4mJick0j+TkZLG2tpadO3dKgwYN/t8yadInJSWJlZWVXsdDRJQTvt5x1qj7H9LQ2aj7f58Y+7MW4edtKPrcv7O0FIa5ublO8CMi4ujoqHfw8+LFCzly5IgEBwf/r0BmZhIcHCxxcXGZvicuLk4nvYhISEjIG9O/ePFCFi5cKNbW1kpt1etSU1MlOTlZ54+IiIg+XHpPhCgisnbtWlm9erVcvXpVXrx4ofNaQkLCW+/n3r17kp6eLqVKldLZXqpUKfn3338zfU9iYmKm6RMTE3W2bdmyRTp06CBPnz6V0qVLy44dO8TGxibTfUZHR8vEiRPfutxERET0ftO7BmjOnDnSs2dPKVWqlBw9elT8/f2lePHicvHiRWncuLExypgl9erVk2PHjsn+/fslNDRU2rVr98Z+RaNHj5akpCTl79q1azlcWiIiIspJegdA3377rSxcuFC++eYbyZcvn4wYMUJ27Nghn376qSQlJem1LxsbGzE3N5fbt2/rbL99+7bY2tpm+h5bW9u3Sl+wYEGpVKmSVK9eXX744QfJkyeP/PDDD5nu08LCQqysrHT+iIiI6MOldwB09epVCQoKEhGR/Pnzy+PHj0VEpGvXrrJixQq99pUvXz7x8fGR2NhYZZtarZbY2FgJDAzM9D2BgYE66UVEduzY8cb02vtNTU3Vq3xERET0YdI7ALK1tZUHDx6IiEi5cuXkwIEDIiJy6dIlycKAMomMjJTvvvtOlixZIqdPn5Z+/fpJSkqK9OzZU0REunXrJqNHj1bSDxo0SLZt2yYzZsyQf//9VyZMmCDx8fEyYMAAERFJSUmRMWPGyIEDB+TKlSty5MgR+eijj+TGjRvStm1bvctHREREHx69O0HXr19fNm/eLF5eXtKzZ08ZMmSIrF27VuLj46VVq1Z6F6B9+/Zy9+5dGT9+vCQmJoqnp6ds27ZN6eh89epVMTP7X5wWFBQky5cvl3HjxsmYMWPEyclJNm7cKG5ubiLyaoTav//+K0uWLJF79+5J8eLFxc/PT/78809xdXXVu3xERET04dF7HiC1Wi1qtVry5HkVO61cuVL2798vTk5O0qdPH8mXL59RCpqTOA8QEb3rOA9QzuE8QO8Pfe7fetcAmZmZ6dTIdOjQQTp06KB/KYmIiIhySZYmQvzzzz+lS5cuEhgYKDdu3BARkZ9++kn27dtn0MIRERERGYPeNUDr1q2Trl27SufOneXo0aPKyKqkpCSZOnVqhoVJiXITmwmIiCgzetcATZ48WWJiYuS7776TvHnzKttr1Kih1yzQRERERLlF7wDozJkzUrt27Qzbra2t5dGjR4YoExEREZFRZWkeoPPnz2fYvm/fPqlQoYJBCkVERERkTHoHQL169ZJBgwbJwYMHRaVSyc2bN2XZsmUybNgw6devnzHKSERERGRQeneCHjVqlKjVamnQoIE8ffpUateuLRYWFjJs2DAZOHCgMcpIREREZFB6B0AqlUrGjh0rw4cPl/Pnz8uTJ0/ExcVFChUqZIzyERERERmc3gGQRr58+cTFxcWQZSEiIiLKEW8dAH300UdvlW7RokVZLgwRERFRTnjrAGjx4sXi4OAgXl5eWVr1nYiIiOhd8dYBUL9+/WTFihVy6dIl6dmzp3Tp0kWKFStmzLIRERERGcVbD4OfN2+e3Lp1S0aMGCG//PKL2NvbS7t27eT3339njRARERG9V/SaB8jCwkI6duwoO3bskH/++UdcXV2lf//+4ujoKE+ePDFWGYmIiIgMKkurwYuImJmZiUqlEgCSnp5uyDIRERERGZVeAVBqaqqsWLFCGjZsKM7OznLixAmZO3euXL16lfMAERER0XvjrTtB9+/fX1auXCn29vby0UcfyYoVK8TGxsaYZSMieqd9veOs0fMY0tDZ6HkQvc7Y5/a7cF6/dQAUExMj5cqVkwoVKsjevXtl7969maZbv369wQpHREREZAxvHQB169ZNVCqVMctCRERElCP0mgiRiIiI6EOQ5VFgRERERO8rBkBERERkchgAERERkclhAEREREQmhwEQERERmRwGQERERGRyGAARERGRyXnreYCIiP4Ll4UgovcJa4CIiIjI5DAAIiIiIpPDJjAiIqJ3lCmsyp5b3okaoHnz5omjo6NYWlpKQECAHDp06D/Tr1mzRqpUqSKWlpbi7u4uW7duVV57+fKljBw5Utzd3aVgwYJSpkwZ6datm9y8edPYh0FERETviVwPgFatWiWRkZESFRUlCQkJ4uHhISEhIXLnzp1M0+/fv186duwoERERcvToUQkPD5fw8HA5efKkiIg8ffpUEhIS5LPPPpOEhARZv369nDlzRpo3b56Th0VERETvsFwPgGbOnCm9evWSnj17iouLi8TExEiBAgVk0aJFmaafPXu2hIaGyvDhw6Vq1ary+eefi7e3t8ydO1dERKytrWXHjh3Srl07qVy5slSvXl3mzp0rR44ckatXr+bkoREREdE7KlcDoBcvXsiRI0ckODhY2WZmZibBwcESFxeX6Xvi4uJ00ouIhISEvDG9iEhSUpKoVCopUqRIpq+npqZKcnKyzh8RERF9uHI1ALp3756kp6dLqVKldLaXKlVKEhMTM31PYmKiXumfP38uI0eOlI4dO4qVlVWmaaKjo8Xa2lr5s7e3z8LREBER0fsi15vAjOnly5fSrl07ASDz589/Y7rRo0dLUlKS8nft2rUcLCURERHltFwdBm9jYyPm5uZy+/Ztne23b98WW1vbTN9ja2v7Vuk1wc+VK1dk165db6z9ERGxsLAQCwuLLB4FERERvW9ytQYoX7584uPjI7Gxsco2tVotsbGxEhgYmOl7AgMDddKLiOzYsUMnvSb4OXfunOzcuVOKFy9unAMgIiKi91KuT4QYGRkp3bt3F19fX/H395dZs2ZJSkqK9OzZU0REunXrJnZ2dhIdHS0iIoMGDZI6derIjBkzJCwsTFauXCnx8fGycOFCEXkV/LRp00YSEhJky5Ytkp6ervQPKlasmOTLly93DpSIiIjeGbkeALVv317u3r0r48ePl8TERPH09JRt27YpHZ2vXr0qZmb/q6gKCgqS5cuXy7hx42TMmDHi5OQkGzduFDc3NxERuXHjhmzevFlERDw9PXXy2r17t9StWzdHjouIiIjeXbkeAImIDBgwQAYMGJDpa3v27MmwrW3bttK2bdtM0zs6OgoAQxaPiIiIPjAf9CgwIiIioswwACIiIiKTwwCIiIiITA4DICIiIjI5DICIiIjI5DAAIiIiIpPDAIiIiIhMDgMgIiIiMjkMgIiIiMjkMAAiIiIik8MAiIiIiEwOAyAiIiIyOQyAiIiIyOQwACIiIiKTwwCIiIiITA4DICIiIjI5DICIiIjI5DAAIiIiIpPDAIiIiIhMDgMgIiIiMjkMgIiIiMjkMAAiIiIik8MAiIiIiEwOAyAiIiIyOQyAiIiIyOQwACIiIiKTwwCIiIiITA4DICIiIjI5DICIiIjI5DAAIiIiIpPDAIiIiIhMDgMgIiIiMjm5HgDNmzdPHB0dxdLSUgICAuTQoUP/mX7NmjVSpUoVsbS0FHd3d9m6davO6+vXr5dGjRpJ8eLFRaVSybFjx4xYeiIiInof5WoAtGrVKomMjJSoqChJSEgQDw8PCQkJkTt37mSafv/+/dKxY0eJiIiQo0ePSnh4uISHh8vJkyeVNCkpKVKzZk2ZNm1aTh0GERERvWdyNQCaOXOm9OrVS3r27CkuLi4SExMjBQoUkEWLFmWafvbs2RIaGirDhw+XqlWryueffy7e3t4yd+5cJU3Xrl1l/PjxEhwcnFOHQURERO+ZXAuAXrx4IUeOHNEJVMzMzCQ4OFji4uIyfU9cXFyGwCYkJOSN6d9WamqqJCcn6/wRERHRhyvXAqB79+5Jenq6lCpVSmd7qVKlJDExMdP3JCYm6pX+bUVHR4u1tbXyZ29vn639ERER0bst1ztBvwtGjx4tSUlJyt+1a9dyu0hERERkRHlyK2MbGxsxNzeX27dv62y/ffu22NraZvoeW1tbvdK/LQsLC7GwsMjWPoiIiOj9kWs1QPny5RMfHx+JjY1VtqnVaomNjZXAwMBM3xMYGKiTXkRkx44db0xPRERElJlcqwESEYmMjJTu3buLr6+v+Pv7y6xZsyQlJUV69uwpIiLdunUTOzs7iY6OFhGRQYMGSZ06dWTGjBkSFhYmK1eulPj4eFm4cKGyzwcPHsjVq1fl5s2bIiJy5swZEXlVe5TdmiIiIiL6MORqANS+fXu5e/eujB8/XhITE8XT01O2bdumdHS+evWqmJn9r5IqKChIli9fLuPGjZMxY8aIk5OTbNy4Udzc3JQ0mzdvVgIoEZEOHTqIiEhUVJRMmDAhZw6MiIiI3mm5GgCJiAwYMEAGDBiQ6Wt79uzJsK1t27bStm3bN+6vR48e0qNHDwOVjoiIiD5EHAVGREREJocBEBEREZkcBkBERERkchgAERERkclhAEREREQmhwEQERERmRwGQERERGRyGAARERGRyWEARERERCaHARARERGZHAZAREREZHIYABEREZHJYQBEREREJocBEBEREZkcBkBERERkchgAERERkclhAEREREQmhwEQERERmRwGQERERGRyGAARERGRyWEARERERCaHARARERGZHAZAREREZHIYABEREZHJYQBEREREJocBEBEREZkcBkBERERkchgAERERkclhAEREREQmhwEQERERmRwGQERERGRy3okAaN68eeLo6CiWlpYSEBAghw4d+s/0a9askSpVqoilpaW4u7vL1q1bdV4HIOPHj5fSpUtL/vz5JTg4WM6dO2fMQyAiIqL3SK4HQKtWrZLIyEiJioqShIQE8fDwkJCQELlz506m6ffv3y8dO3aUiIgIOXr0qISHh0t4eLicPHlSSfPll1/KnDlzJCYmRg4ePCgFCxaUkJAQef78eU4dFhEREb3Dcj0AmjlzpvTq1Ut69uwpLi4uEhMTIwUKFJBFixZlmn727NkSGhoqw4cPl6pVq8rnn38u3t7eMnfuXBF5Vfsza9YsGTdunLRo0UKqVasmS5culZs3b8rGjRtz8MiIiIjoXZUnNzN/8eKFHDlyREaPHq1sMzMzk+DgYImLi8v0PXFxcRIZGamzLSQkRAluLl26JImJiRIcHKy8bm1tLQEBARIXFycdOnTIsM/U1FRJTU1V/p2UlCQiIsnJyVk+Nno3PE95YtT98xz5H2N/1iLv3uedm8fMczvnmOL3/L6eX5r9Avh/0+ZqAHTv3j1JT0+XUqVK6WwvVaqU/Pvvv5m+JzExMdP0iYmJyuuabW9K87ro6GiZOHFihu329vZvdyBkssbkdgFMjCl+3rl1zKb4WecmU/uejZ3v48ePxdra+j/T5GoA9K4YPXq0Tq2SWq2WBw8eSPHixUWlUuViyV5Fs/b29nLt2jWxsrL64PPNzbx5zDzmDzHf3Mybx8xjzmkA5PHjx1KmTJn/N22uBkA2NjZibm4ut2/f1tl++/ZtsbW1zfQ9tra2/5le89/bt29L6dKlddJ4enpmuk8LCwuxsLDQ2VakSBF9DsXorKyscuXEyq18czNvHrNp5G1q+eZm3jxm08g7N49Z2/9X86ORq52g8+XLJz4+PhIbG6tsU6vVEhsbK4GBgZm+JzAwUCe9iMiOHTuU9OXLlxdbW1udNMnJyXLw4ME37pOIiIhMS643gUVGRkr37t3F19dX/P39ZdasWZKSkiI9e/YUEZFu3bqJnZ2dREdHi4jIoEGDpE6dOjJjxgwJCwuTlStXSnx8vCxcuFBERFQqlQwePFgmT54sTk5OUr58efnss8+kTJkyEh4enluHSURERO+QXA+A2rdvL3fv3pXx48dLYmKieHp6yrZt25ROzFevXhUzs/9VVAUFBcny5ctl3LhxMmbMGHFycpKNGzeKm5ubkmbEiBGSkpIivXv3lkePHknNmjVl27ZtYmlpmePHl10WFhYSFRWVoYnuQ803N/PmMecsUztmftY5i8f84eebXSq8zVgxIiIiog9Irk+ESERERJTTGAARERGRyWEARERERCaHARARERGZHAZARB8ojm8gInozBkAmJrduimq1WsmfN2bjOnPmjLx48UJUKhU/ayKiN2AAZEIAKGub3bx5M8fyffHihTKX08uXL3N9fbUP2cqVK6Vx48ayadMm5bPOqSCIwRYZm+ZBSkQkNTU1F0uSc7R/V6ZyzDmFAZCJ0A5+Bg8eLGFhYZKcnGz0fLdt2yYrVqwQEZHevXtLcHBwjt4oc7vG6223G0p4eLhUqFBBvvrqK9m8eXOOBUFqtVo5v65evZqjAfbrx2ZqgdjevXtl7dq1OZbfm87hnDjHNA9S33//vfz4449y9+5do+apnbdGWlpajuSpofldff/99/LDDz+ISO6c4x/i7yrXZ4I2Vfv27ZOXL1/Ky5cvpVGjRkbNSzv4OXr0qPz777/y7bff5siidQsXLpQTJ07IqlWr5NChQ7Jnz54cqwHSHPeff/4pu3btktKlS0tQUJC4ubnpfCbGyFdzoV62bJncunVLKlasKA0aNBArKyudC7khpaWliaWlpfz666/SokULmTp1qoiING/eXPLmzWvUY9Ycz5gxY+TXX3+Vq1evSt++faVDhw7i4eFhlDxFdG+Kjx49kgIFCkjevHlFRIx6vPqUy1gASGpqqgwcOFA6d+5s1Lw0tI9r27ZtolKppGDBglKzZk0l0Db2OTZixAhZunSpfPnll/LixQuj5KVN+5inTZsm1apVk8aNGxs939dt2LBBHj16JP3798+R81rzXT548EDy5MkjL168EBsbG6Pnm6NAOW7UqFGoWLEiPD09UaJECbRq1Qpnzpwxer4rVqxAw4YN0axZM7x48QIvX740Wl5qtVr5fzc3N6hUKkyePDnT141p48aNyJ8/P/z9/eHs7AwvLy9s3749R8owevRo2NjYoGrVqnB1dUWbNm1w+/ZtAEB6erpR8kxLSwMAPH/+HI0aNYK3tzfWrl2LFy9eADD8MWsfx08//YSyZcti2bJlmD59OhwdHdGpUyfs37/foHlqaB/LxIkTUa9ePbi6uiIyMhJxcXEZ0hhLbGwsPvvsM3To0AFLly7FlStXjJ6n9nH17NkTvXr1AmC88+r1PIcOHYpixYqhXLlyKFu2LCZNmpRpOkP79ttvUbp0acTHx+tsf/jwocHzXr16Ne7fvw/g1e8qJSUFTk5OOHr0qMHyeBua3/Tp06dRsWJFLF++3Oh5aj7HzZs3Izg4GC4uLmjQoAHmzp1r9LxzEgOgHDZ79myUKFEChw8fBgDMmTMHKpUKf/31l1HzTUtLw7Bhw1C+fHlUqlRJZ7uhaS7CarUaKSkpaNGiBRo1agQ3NzcsXrwYKSkpOuk0aQ3t9u3bGDNmDH744QcAwF9//YXu3bujXLly+P333w2er/ZxJycno3Xr1jh+/DieP3+OJUuWoFatWmjUqJHRgyCNZ8+eoWHDhkYPggBg//79GDZsGJYuXaps27lzJ6pVq4YOHTooAYmhaH92s2bNQpEiRTBnzhwMHDgQoaGhKF++PGJjYw2aZ2bWrVuHggULYtCgQejSpQuCgoJQo0YN5YZsLLdu3VL+f9y4cfDz81O+V0N/v2q1Wmefly9fhqenJ44dO4ajR49izpw5yJMnD8aMGaPzHmPo3bs3IiIiAADnzp3D0qVLERQUhMDAQGzdutVg+SxevBj29vb4/PPPle8yMTERxYoVw5EjRwyWjz7u37+P5s2bo2/fvgCMH9xv2bIFlpaW+PrrrxEbG4uRI0dCpVIZ7YEmNzAAymG9e/fGV199BQBYtWoVihQpgvnz5wN4dcMylMxurk+fPsW0adNgZ2eHPn364MmTJ29MawibNm3C6dOnlX+3adMGLi4uOkEQYNjj1jh69Cg8PDzg5+enc8E6fvy4EgTt2LEDgGEuJNqf4cWLF3H27Fk0aNAA169fV/JYtWoVatWqhZCQEIMGQZryX7lyBX///Tdu3rypfKbGDoLUajVOnDgBS0tL5M2bFzNmzNB5XRMEderUCXv27DFIntr+/vtv9OjRA2vWrFG2HTt2DD179oSLiwv++ecfg+epcfnyZbi6uiq/38TERFhZWWHYsGFGyxMADhw4gLJly8Lb2xuhoaHo1asXvLy8sGXLFjx+/Fjnt5VdmvNF46uvvkLHjh3xySefKOfQkydPMH/+fOTJkwdjx441WN7av43nz58DACIjI1GzZk2MGjUKNWrUQHh4OHr16oVu3bqhUqVKuHv3rsHyHzZsGPz8/DBx4kQ8evQIaWlpKF++PC5dugTg1WejKaOhrqHa+/nxxx8xaNAg3L9/Xzn+TZs2IV++fEZ/YE5NTUWnTp0wZcoUAMCNGzfg6OioBF8fCgZAOSg1NRUeHh6IiYnB/v37UahQIeXi+fLlSwwfPhzr16/Pdj7aP6K//voLu3btwu7du5V8pkyZgoCAAAwePBhPnz4FYJiaIO18Dx48iEqVKqFDhw5KbRcAtG3bFu7u7liwYAFu3LiBunXrokWLFtnO+3Xbt29HaGgoChYsiD///FPntePHj+Ojjz5CoUKFsGvXLoPmO3r0aJQsWRJubm4oU6YMEhMTldc0QVDdunXh4+ODBw8eZDs/zU1ow4YNqFixIipWrIjSpUtj4sSJSvCpCYICAgKwbNmyDDe1rOapbd26dbC1tUV4eHiGoCM2NhalSpXC+PHjs5Xv2LFjkZCQoPx78+bNKFKkCEqWLIlNmzbppD1w4AC8vLywYsWKN5Y5u06cOAFnZ2c8efIEFy9ehL29vdIUBQB79uxBcnKywfP9+++/sXPnTsyaNQsDBgxAq1atoFKp4OTkhNKlS8PLywsdOnTAzJkzs5VP+/bt0aVLF+XfycnJGDFiBAoXLoxGjRrppH3y5AliYmJgYWGBAQMGZCtfQPda8sMPP2DZsmV4/vw5Dh06hJ49e8LNzQ0zZszA8ePHAQA///wzGjZsqFzPskP7Wjh06FB4e3tj0qRJOHHiBOrUqZNpkGWIfLXP0ZiYGHz++eews7NDYGAgevbsifPnz+Px48fo0aMHRo4cqROAGdrTp0/h4uKCdevW4e7du7Czs0Pv3r2V1xcvXowDBw4YJe+cxAAoB2jf6GbNmgVvb2/ky5cPixYtUrY/fPgQISEhmDp1qsHyHTVqFMqXLw9vb28ULlwY7dq1w5kzZ5CamoqJEyeievXqGDJkiFITlB3aP95JkyZh4MCBcHBwQN68edGuXTudH0vnzp3h5OSEChUqwMfHB6mpqdnOPzO7d+9GvXr14OLiohOEAcCRI0fQv39/nD17Nlt5aF+Atm3bBjs7O6xfvx7R0dFwdXWFq6srHj16pKRRq9VYvHgx+vfvb7CL12+//QZra2t8/fXXSE1NxYQJE2BjY4M+ffrgxIkTAF4FQf7+/qhbt262bsraZX49kFq2bBns7OwwYMCADH3a4uPjsxVkHzx4EF26dMnQb23gwIFQqVTo16+f0l9Dw8fHB59++mmW8/z//P3336hRowaOHDkCBwcH9OrVSznGY8eOoX///soN2hBSUlLw8uXLDJ/B33//jTJlymDnzp3YtGkTZs6ciW7duunUvmbF6dOnld+m5riuXLmCSZMmQaVSYfbs2RnKN2PGDNSqVStbAaf2e0eMGIGSJUvixx9/VJr9nj17pvObSktLQ1hYGNq0aZPtQFfzfu3PePDgwQgMDESXLl2gUqlQu3Zt1K1bF2FhYWjSpAmqV6+Or7/+2iD5AsCUKVNQokQJnD17Fs+fP8esWbPQpEkTFCtWDEOGDIGvry+qV6+ufAbGagr75JNPMGLECJQrVw69e/dWfvsPHjxAjx49MH/+fKM34xsbAyAj++mnn+Dg4IB///0XALBv3z7UqFED1atXV27K165dU35IhuqTM2fOHJQsWRKHDh0CAEybNg1mZmbYu3cvgFdVyp9//jkqVKiQ4UKWHV999RUKFy6MXbt24dSpU1i4cCEqV66Mzp07K2UBXgULGzduVI43Ox2yNReAM2fOID4+XqdWZ9++fWjWrBm8vb0zBEGGDLwWLFiA+fPnY86cOUqZ9u7di4CAAHh4eOhcsLVl5wKiVqtx7949hIeHY+LEiQBeVVVXqFAB1atXR/ny5REREaHUyDx//jxbHXS1y/rNN9+gU6dOaNu2LaKiopRgaOnSpW8MgoDs1TRqvue1a9di27ZtyvZ+/frBwcEBs2fPRlJSEgDg8ePH8PLyMugDBfDqfNq4caPy72rVqkGlUmVoGhg2bBiqV6+uNHVm16+//oqOHTvC19cX/fv3x6+//qrzekBAgM4DlSFvit9++y2cnJyUZpgbN25g/PjxKFSoEL755hudtM+ePTNYX6Q5c+bA1tZWp9YvPT0djx8/BgAkJSVh/fr1CAkJgbu7e7abd1//LWrX6gwdOlR5YBs5ciRmzpyJmTNnYsqUKRg7dqzBBpQcPHgQffr0ybT/2tKlSzFs2DCULl0aKpVKp89Vdmg+r+TkZJ2Hmq+//hrm5uaoU6eO8hCvVqsxevRoVKxYERcvXjRI/rmJAZARrVu3DnPnzoVKpUKtWrVw/vx5ZXudOnVga2uLKlWqwMvLCwEBAcrJZ4ggKCIiQhl1pelr9O233wKA0kfg2bNnWLRokUHyU6vVSEtLQ9OmTfHJJ5/ovLZs2TKUKlUK7dq1yxCEAIa5Ka5ZswZly5ZFhQoVUKhQIdStWxcHDx4EAPzxxx9o3rw5AgICDNYZV/si++DBA7i7u0OlUmHEiBHK9vT0dPzxxx8IDAyEl5dXhhoKfWh3sNbQ3JA2bNiAc+fO4d69e3BxccHHH38M4FVzXJEiRdCpUyelJsgQRo4cieLFi2PEiBFo3bo1XFxcUK1aNaXf0U8//YRy5cqhS5cuuHr1qsHyVavVuHDhAipXroyWLVsqzboA8PHHH6NUqVIIDg7GyJEjER4eDhcXl2w392lbu3YtbGxs0K9fP+W3fPbsWVStWhV16tTB7t27sXXrVgwZMgRWVlYGq/3ZtGkTLC0tMXnyZMyfPx/t27eHubm58lClVqtRv3599OnTR3mPIQOgQ4cOoUKFCggKClLOuevXr2P8+PGwsrLCvHnzMrzHEPl//PHHSnPahQsXsGLFCtSsWRMtW7bEL7/8ggcPHuDjjz9Gjx49lAAkq4HI68F9hw4dUL9+fURFRSnXp+HDh6N69eqIjo5WgjBtWbmOaee7bt06VKtWDU5OTrhw4YKyT+3PMj09HWfPnkX79u0RHBxssMBr06ZN8PPzQ8OGDTFo0CBl+6hRo1C4cGF07doV/fr1Q5cuXVC0aNEcHwlnLAyAjGTkyJEoU6YMZs6ciU8++QSVKlWCi4uLcmL/+++/+PXXXzFr1iz88ssv2aoJef1i8/TpU/j7+2P58uU4dOhQhr5GkyZNwubNm3Xek5Ufr3a+mve3adMGPXr0yLDPkSNHonDhwujZsyf+/vtvvfP6L/v370fhwoWxaNEinDx5EqdPn4anpyd8fHyUDtA7d+5EvXr1ULduXTx//txgNwjNMf77778IDQ1FuXLllI7PwKvP6M8//0TFihXRrVu3bOV1/vx5ZQj/6tWrUbt2baSmpiq1HrNnz0aDBg2UQEvz5B4aGqozakhf2p/V8ePHUb58eaUDOQAcPnxYCeI1n8cPP/yAFi1aZLuKPLPvacuWLahRowbatm2rU9unaQ5r1qyZTpOEIW4ScXFxsLa2xqJFizJ02j927Bh8fHxQvnx5ODs7o27dujh27Fi28wReNY0HBwcrx3Pnzh2UKVMmQz+byMhItGrVCi9evMjWuZ3Z95Weno6EhAQ4OzvD399fJwiaMGECVCoV1q5dm+U8M8vvxYsX6Nq1Kxo2bIjPP/8c9erVQ7NmzdC5c2e0b98eQUFBSE9Px61bt5TjNcSDnOa6PX78eCxevBgqlQofffSR8rlERkbCz88PQ4cOzXb/Lu3P+t69ezh58iSaN28OCwsLZeQqgAwBEPCqA76FhYVObWRWxcfHo1ixYhgxYgQ+/fRT2NraomHDhsrrc+fORe/evVGnTh0MHTrUqAMLchoDICM4deoUbG1tdYKMixcvwsvLC66urjh37lym78vuD3jmzJnKk2l0dLTSB0d7aPLDhw/RoEEDfPHFF9nKS1tMTIzyRDB58mQULFgwwxPCjBkzUL9+fVSrVg1RUVEAsvaUuHfv3gydh+fOnYugoCCkpqYqF4iUlBS4u7vrdNb8448/cO3aNb3zfJOpU6fqdCQ/d+4cqlevjkqVKul0fk5PT8exY8ey/f127NhRGW5sbm6OH3/8Uef1iRMnIiAgADdu3ADwqv/E/Pnzs1zzVKdOnQwdHXft2gVra2tcvnxZ2ZaWlobY2FhUrVo1Q9MMkPVmPs2NFng1pcHTp0+Vc2bLli2oXr062rRpoxME9e3bF+7u7oiJiTFo7c+8efMQFhaG1NRUJaB6/fs8c+YMbty4oQSkhnDnzh1UqlQJR44cwY0bN2BnZ6fT0Xrt2rW4fv06fv/9d5w6dSpbeWl/T2fOnMH58+eVwFkTBDk5OekEQVeuXMH333+frSDz9fND8x0fPnwYYWFhqFy5MqZNm6Y8zMyfPx8NGzbUacI2xAPNoUOHUKlSJfzxxx8AgD///BP58uXTCUYA4KOPPsJHH32UrTzXrl2rBC9DhgxBaGgoACAhIQEtWrSAn5+fTlCZ2cNmrVq18PPPP2cpf+39HThwQGktSE1Nxb59+2BnZ4cGDRooadLT05Genp5j87flFAZARhAfH4+iRYsqnRA1P/ATJ06gaNGiqFu3rhKoGKoT2ePHj+Hj44P+/fsDePVjDgkJQbVq1ZRy3LhxA40bN9Z5UjcEd3d31KpVS/l3s2bNULp0aezbtw+3b9/G8+fP0aJFC6xduxZffPEFChQogDt37uidz4YNG+Dq6pphFMbYsWPh6uqq/FsTkMTHx8Pa2jrTZjdDWLhwIVQqFT777DOdICggIABOTk6Z9v/Q93Nfs2aNTl8aX19fmJubIzIyUtmmuSgtWrQIzs7OaNmyJcLDw1GgQIEsd4RNTk7GxIkTdYIQ4NU55OTkhAULFuhsv3fvHmxtbTNsz4qYmBidf0+cOFE5x4YPH658hpogqG3btjrNYb169ULlypXx9ddfG2wUVr9+/XTOMe3fbXYDj8wcPXoUV69eRWpqKpo1a4bvvvsOjo6OOh2tr127hp49e2LDhg3Zzk/7xjZhwgRUrVpVGVWmXduXkJCAypUrIzAwMMO5kZUgSPtznD9/PgYMGICwsDCsWbNG6U+k/cCTlpaGJk2aoGPHjga/GW/fvh3+/v4AXjVHFSpUSDkXHz58qNS+apc7K2VQq9Xo3bs3VCoVwsPDUbhwYZ0aw0OHDqF169aoXbu2zqhg7bw0tVNvepj+//IHXvVnW7RoEVq3bq0zWCA9PV0Jgho3bqz3/t8nDICM4MWLF7C3t8fw4cN1tj98+BD+/v6wtraGl5eXst1QP+Tx48cjMDBQeTJauXIlGjVqhMKFC8Pd3R2enp7w9/fPVl+jzAK2kydPwtXVVZnf6MGDB2jfvj0KFy6MypUrw8nJCU5OTkhLS8P27dvh7Oyc5VoJTfPS5cuXce/ePQCvAp38+fNj1qxZOmnj4uJQsWLFbI/0At4cqP70009QqVQYO3asThAUFBSEwoULZ3mou1qtxrFjx+Ds7KzUWqnVanh7e8PNzQ2lSpXCr7/+mqGWY/r06ejSpQtat25tsH4/U6dOxZYtWwC86njapk0bNGzYEL/99puSJikpCT4+Pll+ItXYsmULbG1tlU7Fy5cvh42NDWJiYtCrVy/4+voiLCxMudlqmsMaNGigMztwx44d4e3tneUJCV/vyLtq1SpUrVoVW7Zs0Wlyefz4MTp37qzzWWTXhg0bUKZMGYwbNw7p6en45JNPoFKp0LJlS53zcNSoUXBxcTFoP6vx48ejVKlS2LJlC65evYqwsDBYW1vrfK9Hjx6FtbW10tfMEIYPHw4bGxt069YNTZs2RZEiRdC3b1/lHE5KSsLatWvRpEkTuLm5GWVSz/j4eGV4vZWVldJtAPhfE7qm3xWQ/YfXihUrIl++fMoglNenEWnTpg3q1auHZcuWZXjv8+fPszXKb/PmzTA3N4eHhwdKliwJDw8PnQESarUaf/31FywtLdG6dess5/OuYwBkIDt27MCGDRuUiD06Ohr+/v5KUAC8qpno0qUL/vzzT5QtWxajR4/OUl5v+uE9fPgQpUqV0pmW/tKlS1i3bh3mzJljsFFXwKvmttWrVyvNLaNHj0azZs2Umi3g1YX8+++/x4IFC5R8P/nkEwQGBurdTKB9oz9z5gzs7Ozw5Zdf4t69e1Cr1Rg/fjzKly+vzH2SnJyM8ePHo0qVKgYbiQMg085/S5cuhUqlwrhx45QO5qdPn9Z5Ws8qzeixv//+Wwn4ACAsLAwlSpTIEARpvldDNf+8fPkSHTp0gEqlUmbPPnfuHGrUqIEaNWqgf//++PHHH1G/fn24u7tn+3gfPnyI2bNnw8PDA71798a0adOwatUqAK+q51esWAEPDw80adJEOda1a9fqDNPVuHnzZpbL8ejRI7x8+VL5Ps+dOwdfX1+0aNFCqXF59OgRoqKiULZsWZ3zPju2bNmC/Pnz47vvvtNpru3evTtKlCiBqVOn4osvvkDv3r0z1BxkV3x8PGrXrq3UdGzatAlFixZF/fr1kTdvXp0b8dmzZ7P9XWu+rz///BP29vY6o0R/+ukneHh4YOjQoXj27BkuXLiAdu3aoVOnTgbt8Kw5BrVajSdPnqB58+awtLTUuTY/e/YMzZo1Q7t27QxSY69pSmrYsCFatmyJAgUK6NTiaYK6gwcPom7duujXr5/O+7N63Jr9ah5QFy9ejHv37iE+Ph6Ojo6oX7++Tl9BtVqNAwcOGOQB8l3FAMgARo0aBTs7O3h5ecHS0hKffPIJdu7cicGDB8PFxQUtW7bE9OnTUbNmTfj5+eHZs2do1KiRTlt+VqxYsQJ79uzRudlNmDABoaGhShNTZk9I2b1w/fvvvyhQoABKliyJPn36YMOGDXj48CHKlSuns96XtjNnzqB3794oVqzYW4+Oyexio3lK+eSTT+Dk5ITZs2cjJSUFiYmJmDBhAvLnz4/y5cvDw8MDJUqUMOi09X/99RdUKpXOk6FGTEwMzM3NMW3atAzNLln5vLVHfzx69AglS5ZEt27ddDqQh4WFKU/rz58/x6RJk1C/fn28fPkyy0/GJ0+eVEa4TJs2DefOncPDhw/Rr18/5MuXT1lu4OLFixg9ejS8vb0RFBSENm3aZHsUo+b7TkpKwtdffw1fX18UK1ZMp1/R8+fPsXLlSnh6eqJp06YZbgbp6enZPr+3bt2KRo0aITAwEI0bN8bJkycBvOrsXK9ePVStWhVly5ZFrVq1UKJECZ1h2tnx7NkztG3bVhnenJKSgjNnzmD69OnYtGkTWrRogdDQUHh5eaFr167ZruF7/Rw5c+aMUou6a9cu2NraKsPc69Wrh2LFiuG7777TeY++n/WECRN0+iRq8ipXrhzOnj2r85tftGgRChQooDQx3rlzJ9sdnrX3P3v2bHz00UeoX78+5syZg3v37mHfvn3w8vJCw4YNsXDhQvzwww9o2LAh3N3dlXMtK0GQ9nten36jT58+yJ8/f4amzAcPHuDmzZsGnWtnz5498Pf3R/369XWC57Nnz8LBwQH16tXT6b/4oWMAlE3Tpk1D6dKllSHX33zzjTJyYM+ePVi+fDlq1qyJ2rVro3Xr1srJHxYWhlGjRgHIWjXugwcPYG9vD19fXwQEBGDfvn24d+8eLl68iMKFCytV8ob48WR2sfn000+VCcoqVKigPJlaWFhg3759OmmTkpKwYcMGhIaG6j00+MKFC8pstJo+QJpmsMGDByvzv2ie1P/55x98/fXXWLZsWbbnqcjse5k4cSIsLCwy9HU5f/48ihcvDpVKZZB+MBobNmzAv//+i40bN8LR0RH9+vXTufG1aNECxYoVQ1BQEKytrXWeovWVkJAANzc3TJ8+XWl20VT5P3jwAH369NEJgtLS0pCenq5Tm2eIp3LgVdA3a9YslC5dOkMV/PPnz7F69WqUKVMGQ4cOBWC4ppANGzagYMGCmDBhAhYvXoxmzZrBxsZGmT7hypUr2LNnD8aPH4+lS5cqozoN4enTp/D19cXAgQNx//59DBgwALVr10aZMmXg4OCAGTNmKEtdGHIOK+3fpKaWsXPnzujXr59SW9G1a1c4Oztna5LDGzduwMfHB40aNdJZuuT333/Xqc3SjLJLT0+Hvb19hs7+hrimjRgxAjY2Npg1axZGjhwJZ2dnNG3aFMCr/j8RERFK7VeXLl2yFdxrf17fffcdBg4ciOjoaJ1zp0+fPihUqBBWrVqFmzdvIjw8HB06dFBeN1QQpHlQValUGQKus2fPolKlSvDy8jJorfm7jAFQNty4cQPdu3fHypUrAbz64RQtWhTjxo2DlZUVOnXqpDNaRmPYsGEoVaqUXlWLmf0AHj16hLi4OLRt2xZVqlRB9erVsWLFCrRt2xYNGzZ84+R7b+v48eM6N7TffvtN6Wz64sULeHl5ISoqClevXkXNmjURHh4Oc3NzhIWFZeio/Pz580znzvgvarUamzZtgo2NDWrUqAGVSpWhPVw7CDLkOkDan/frM2VPnDgR5ubmOoHOtWvXMHr0aPzyyy8Gm5sjPj4eZmZmyvxNq1evRtmyZTMEQfPnz8fs2bMznXhQX5pzs2DBgkogqz0DbJ8+fWBhYaHTIVTDEBPQ/fjjj0qn28ePH2POnDlwdXXVmYYfeHU+xcbGGrQz/+XLl1GjRg1lMstr167BwcEBpUuXRsGCBY2+/hIALFmyBPnz54eVlRVatmyJJUuWAHj1wFGvXj2DnFuvzz3j7++vUyuTlJQET09PfP755wBeBbWtWrXCwYMHszzJoSa9ZrqI4OBgrF69Wnk9LCwMjo6OOs28t27dgrOzc4YpO7IrLi4OVapUUUY4/vbbb7C0tMww2uv27ds651d2pyiZMGECChYsiDZt2iB//vwICQlR+tYB/5vGwdXV1eBzWGl79OgRKlasCB8fnwwPpKdPn0a1atUyvW99iBgAZcOzZ8+wfv16PHz4EIcPH4ajo6PSoe2rr76CSqVCnTp1lE6Kx44dw8CBA1G+fHm9qs21L1j79+/Hzp07M6zIu3v3bnz11VewsbFB6dKldZ6osvL0MHHiRKhUKuzcuRMvXrxQOvZWrVoV06dPBwBs3LgRnTt3xoULF5CcnIwff/wRXl5eqF27tkE7J2pWIfbz81O2ac/FMnjwYDg5OSE6Otqg62sBr75HzagT7eBG8/loVkBv2rQpQkJClPdl90Z1+vRpTJkyRZnhWUM7CNI0zRiC5kK/fPlylCxZElWrVsWXX36Z4fN88OAB+vXrB5VKla3aJg3tz3rkyJGwtbXFnDlzlE7ySUlJmDVrFtzc3HQm+sus7Nnxyy+/IDIyEmPGjMHTp09x7do1ODs74+OPP8bFixfh6+urjGw0tlOnTikBpua3+8knn6Bbt24ZRl7pS/tasGHDBgwePBhWVlbw8PBQHuQAYMCAAShYsCCGDBkCPz8/eHh4KJ9zVmsjtIOgkJAQBAcHK/27NP3KSpYsie+//x6LFi1CkyZN4OXlle3v9/Vr0e+//66M6Fu7di0KFy6sNGs/fvwYmzdvzvCwlt3r2cmTJ9GmTRvlun3lyhXUqFEDDRs21Anwtm7davAZ8s+dO4c9e/bg5MmTSr+y+/fvo1y5cggICMgQBBkr8HoXMQDKJs3JEh0djbCwMKXW5ZtvvkHXrl0RGhqqc8HYsWNHlkdtDB8+HGXKlEH58uVhZmaGzp07Z5gy/dKlS1i+fDnc3NzQsmXLLB7VKyEhIShbtix27twJ4FUfgW+//RYWFhbo1KkTZs6ciQ4dOmDu3LkAXt2IkpKSDLZCsuYHPH/+fAwaNAiVKlXSWThVe6r6fv36wc3NLdsBkPaFbsaMGbC2tsbYsWNRrVo1VK9eHVOnTlW+8wULFsDR0RFubm6oW7euwUamXLp0CXXr1kXJkiWVDu3aE9ytXr0ajo6O6Nq1a7ZrfV4v64ULF3Dt2jUMHz4cPj4++PzzzzOMpHr06BGmTZtmsJou4NXotRIlSuDo0aPKeaO5CTx79gxz586Fh4cH2rVrZ7A8NeLj41G8eHGsXr1aaZbo27cvWrZsqZxjXbp0gbm5OcqUKWOQhS/f1unTpzFmzBhYW1sbdDbvUaNGoUSJEpg1axa++uorODs7IzAwUKlxevHiBSIjIxEaGoru3bsbbJb614OgBg0aKANHbt++jYiICDg5OcHb2xstWrQw6Oz4mmvD9u3bUadOHaxatQpWVlZKDavmtV69ehm0afPbb79FjRo1UKtWLZ2mpbNnz6JGjRpo1KhRhsV8AcMtG1OmTBlUqFABJUuWRM2aNfHLL78AeBUEOTg4KOvZmSIGQNmkOdF69uyJmjVrIikpCc+ePUPTpk11nqiyElVrBxALFixAiRIlsH//fty6dQt79+6Fj48PWrVqpTyJa/9gNm3aBE9Pzyz9kLVvbA0bNkSpUqWwc+dO5VhPnz6NZs2aITw8HDY2NihcuHCG5gFDL5L34sULrFmzBo6OjhlWj9cMB83K3EJvEh8fj759+yrNMc+ePUP//v1RvXp1TJ48WemDce3aNdy+fVs5XkMFBdOmTUOlSpXg6+urNO1p7/vnn3+Gq6trtmZ41v6OHj16lKHJdNCgQfDx8cHUqVOVfj4DBgzQ6VtliON9+fIlOnfujClTpgB4FQBu2LAB9evXx+DBgxEXF4fU1FRMmTIF3bp1M+i5de7cOYwfPx4jR44E8Or3nJqaijp16uh06O/Xrx82btyYo30j4uPj0bFjR1StWtWgo73OnDkDBwcHnZvupUuX0KhRI3h6eiq1MgB0akIMdW6/HgTVr18f69atU16/ceMGkpOTM12YNKtmz56t9Kl5+fIlnJ2dMwxoePbsGRo3box27doZdDZtzUzwRYoU0VnDDnh1/tWuXRve3t4Gr108ePAgChUqhHnz5uHGjRv49ddf0b17d5QtW1Zpenvw4AGsrKwQHBxstEWp32UMgAwkLi4OefPmhZubG5ycnHRGDejr119/zbAKc69evZTOwNrDJCtUqJBpR9DLly/D1tY2y80U2gFbcHCwMiGapunpzp07WL16NVq3bg2VSoWBAwdmKZ/XaY4hISEBS5YswdKlS3Hp0iUAr/rirF27FuXLl0ezZs2QlJSEcePGwdPTM1vrbL1uzZo1qFatGpydnXWeupOSkpRh/FOmTMlwwTB00Ddnzhx4eXnho48+UoI77XPKUKu6T506FaGhoShbtiyioqJ0LsSDBw+Gr68vwsPD0aBBAxQvXjzbNyTt81StVuPFixeoXr06goOD8dNPPyk1A61bt4aXl5eyhMjjx4+V9xris05KSoKvry9KlCiBIUOG6LzWrVs3lClTBuvWrUP//v1RpkyZHF/88enTp/jjjz8MOs8P8CrAcHBwUPrgaK4x169fh42NDfz8/PDTTz/pvMfQkw7+V3OYtuw2t2ns3bsXKpVKmc9Is6RL3bp18fPPP2PJkiUIDg6Gm5ubcn5n95h37typBMyaWbRbtGihDJjROH36NPr27Wvw68c333yDevXqZcira9euCA4OVsqWlJSUpQkVPwQMgAzoyJEjGDt2rE7zgL43izlz5qBixYr45ptvdKp/O3bsiLZt2yr71Ox3/vz5sLGxwd27dzPMFJo/f369Vv/+rx9ggwYNUKZMGezYsUO58WvymzNnjkGe0jT7W7duHcqWLQsPDw8EBQWhdOnSyvw7KSkp2Lx5MxwcHGBvb4/SpUsbpC+KtuvXr6NNmzawsrJCdHS0zmvJyckYOHAgKlasqDQXZIfmmI8fP47ly5fjl19+0ZlZ+KuvvkJQUBAiIiIyBEGGuCmNGTMGNjY2+OGHHzBv3jz4+voiODhYp5Pz9OnTERERke3RMEDGc0yzn7///htVq1aFg4MDJk6cqPSV+PLLL9GoUSODL3ugobkxeXp66vSFOHHiBJo3bw4HBwd4eXkZbKj7u+DOnTtwcnJSHpy0pw4ICQmBm5sbQkNDszyDumYY9f/3PWkHQY0bN4aXl5fOsiZZpZ2vdl/B0aNHo2HDhkqN8alTp1CnTh24uLggMDAQXbt2NViT2x9//AFnZ2cMHjxYqcE9cOAAKlWqpHQoz4whg6BvvvkG9vb2GWot16xZAxsbG4M2872vGAAZUVaCguTkZPTo0QNBQUGYM2eOcuFftGgRVCoV9uzZA+B/P/KlS5fC399fGQYOvPrxLl68WK8p+rV/eIsXL8aQIUOwaNEinX1oB0GZHZshgqA9e/agWLFiWLhwIYBXNWsqlQrW1tbKGj0vX75EYmIiNm3aZPCnY83ncOfOHbRt2xZBQUFYtGiRTppHjx5hxowZBusPsW7dOtja2sLb2xuurq6oX7++0k4PvAqCateujXbt2umMksmuX375BZUrV1Yuxn/88Qfy5MkDT09P1K1bV2d5Ce3zwxDf86xZs5Sh1prvNTk5WWfywpcvXyIkJAQRERHZzu+/HD9+HNWqVcPHH3+sU+OnVqtx5cqVLM8m/S7S7htiZmamTBwKvKr17dy5M9auXQt7e3sMGzZM7/2PGzcODRs2VG6ubxsEnThxAkOGDDFoADBt2jS4ublh9erVuHfvHv7991/4+fkpfRY17ty5o1ObaqimvnHjxiEoKAiRkZHKw8uBAwfg7OyMtm3bKue9IWgHk+fOnUN6ejp2794NJycn/PjjjzpNmWfOnIGTk9MHs6J7djAAeodobqiPHz9Gt27dUL16dcyePVsJgnr06IFChQphy5YtuHPnDh48eICQkBA0a9Ysy0/Fr99Qx40bB2trawQHB8PGxgZt2rRR5n0BXjWH2dvbY8uWLQavsk1JScGYMWMwYcIEAK9qYsqVK4cePXqgdevWKFy4sMFrezKjOa5bt26hVatWqFWrVoYgSCO7QdCuXbtQokQJzJs3D8CrkXWFCxeGk5OTTpPApEmTEBISkq3ZjV8XHx+vjDLbsmULihUrhh9++AE7duxAkSJFULduXZ1huoBhhrqPHz8eNjY26NixI2rVqgUrKyud/iiPHj3C+vXrERYWZrRlD16XkJAAb29vfPzxxwYdXfcu08xZ1qpVK/Tu3Ru1atVSRkdFRETojGp8W3PmzEGdOnXQsWPHtw6C3lQrqC/tIfpPnz5Fnz59lDnZGjdujAsXLmDu3LkoWrSo8jt6vWxZXdvrTWUfP348AgICEBkZqdQEHTx4EFZWVsqEl9mlyX/9+vVwcHDArFmzlIDr448/hp2dHb777jtcuXIFz549w4gRI1CpUiWD9pl8XzEAesdonj40QVBAQAC++eYbpKWl4d69e/jkk0+QN29eVKhQAZUrV4anp6dyg9A3IHF3d1c6fwKvVl/u0KGD0vwQGxuL4OBghIWF6czG6+HhgebNm2f3UAFkvODs27cPcXFxSEpKgr+/vzL0edeuXVCpVFCpVBmmADAG7SCodevWqFu3rjIrrqE8f/4c/fv3V/qfXLt2DY6OjmjZsiVatWqFChUq6NQEZaefU2bnRmpqKu7fv4/Hjx+jfv36SidkAAgICEDFihUz9I3JrkuXLmHixInKxIJXr15F//79oVKplGDr1KlTaNeuHVq2bJntZQ/0kZCQAH9/f3To0CFb6yzlprdtftLYs2cPOnTogGbNmiEiIkK5ljRp0gQDBgx463y18/vhhx9Qu3ZttG/f/q2CIO1z0xBBrqbZ659//kHJkiURGRmJ2bNno0iRIpg/fz7KlCmDdu3a6TSPGcKPP/6Izz//PMMowfHjx8PZ2RkjR45UHjhPnTpl0DmsduzYgYIFC2L+/PkZBkb07t0bVapUQbFixVC9enWDzl7+vmMA9A54U+CSlJSErl27wt/fH3PnzlVuAnv37sXq1auxbt26LM8XMXHiRFSrVk3Je/Xq1WjatCkaNGigUx28a9cuBAcHo2nTpjo1QYao/dFc7Pbv35+hhmXfvn3w8/NThnkfPXoUbdq0wYABA3Ls5qQdBNWtWxf9+/c3eC3E6dOn8eeffyqLiWoWmNy8eTPy5cuHYsWK6cyamxXa39WNGzcyNBvevHkTjo6OWLx4MYBXTQKdOnXC8uXLDVrLt2HDBqhUKjg7O+Off/5RticmJuKTTz6Bubm5co5du3bN4CPr3sahQ4dQp04dg9a05ZSsNj9pf76PHj3CyJEjUaJECb1/Z9rnyvfff/9WQZD2ttmzZ2P8+PF65fm6n376Cf369VMGTqxcuRLVq1fH9evX8dtvv6FNmzYoX748VCpVhhFZ2ZGeno4OHTrA29sbM2fOzBAENWnSBGXKlMHHH3+cYXX77FCr1VCr1ejevXuGxWm1+80dPnwYS5cuxbJly5TPhhgA5TrtC8CiRYswbNgwTJ06Vel/kZycjG7dusHf3x9z5szJdCK0rPyIIiMj4e3tDQCIiopCtWrVULVqVWWovbZdu3YhJCQE1atX13nNUPNUlChRAgMHDtRZ52r9+vVQqVTKyJuxY8eiRYsWBp2D5W2CGc1F/f79+8r/ZzUI0rzvn3/+wR9//KEzquj333+Hr6+v0mn9wIEDCA4OxogRIwzWWXHs2LGoWLEiSpYsierVq2PlypV49OgRHj58iPr166NDhw74/vvvERoaijp16hhsPieNo0ePokePHsiXL59yfms+k8TERGUmXO0pFQzdzPo2DF0zkFOy0vwE/O93fOnSJYwePRoODg569Q95Ux4LFy78zyBI+/8XLFiA/PnzY8WKFW+db2aioqIQEhKC0qVLY9OmTTh27BiGDRum9HW6ePEili9fjtatW2fr+pXZefns2TP07t0bfn5++Oqrr3T6ZY4cORJeXl4YPHiwwc/p9PR0BAQEYNy4cQB0r8tqtVrnYYJ0MQDKRdoXgBEjRsDa2hr16tVD9erVoVKplBmXk5OT0bVrV9SoUQNTp041yOygf/75J6pWrQp3d3cUKVIEDx48wK+//opq1aqhY8eOiI+P13nfb7/9hkGDBhn0h3TgwAFYW1tj4cKFGfb7+PFjNGrUCObm5ggKCkKhQoWyPRdKVsuu/XlntwZow4YNKFSoECpVqgQLCwvExMQgLS0Nv/76K6ysrJTAYPTo0ejRo0e2ljN5fYkJGxsbLF26FL///jtatGgBDw8P5cawcuVK1K9fH5UrV0bDhg2z3KyaWd7aTp8+jZYtW6Jo0aLKUgSaz/TmzZuYMWNGjtb4fAiy0/yk/drz589x9uxZZa29t6H9PV+5cgXXrl3T6Ve4YMGCDOVJT0/XeV9MTAysrKx05gLKjlu3bmH48OGwsbHBp59+ivDwcISGhuLGjRsZ0mYlCNIu+8mTJ3HmzBmlRvP58+dKEDRt2jQ8fPgQarUanTp1wurVqw06jYO21q1bIzAwUPntaI7r8uXL+OKLL0xmaQt9MQB6Bxw5cgStWrVSbghJSUmYPXs2zM3NlVlKk5OT0axZM/Tu3dtgzTAhISFQqVQIDQ1Vti1btgy+vr7o2rXrG2cHNdSPd/bs2QgLC9NZ+Vz7gnTnzh3MnDkT06ZNM+iMx/Pnz0fv3r3x+eefK8f4ps9Ue/uePXuyXDuQnp6O+/fvo0aNGliwYAHOnTuHqVOnQqVSITo6GnFxcWjdujXKli2L6tWro1ChQnovHPsmmzdvxoIFCzKs5D1gwAA4Ozsrw53v3LljkEkdtc+PX3/9FT/99BN++OEHJZi7dOkS2rRpg5IlSyoj0F7//BkE6Se7zU+zZs1SBh9kJc+oqCgEBASgWLFi6NChg848QgsWLECdOnXQqVOnDL/jBQsWwMrKCmvXrtUr78y8fowbN25EREQEfH19oVKp0LBhQ4MuqzF69Gg4OTmhTJkyKFmyJMaOHQvgVdPTgAED4OPjg/Lly8PPzw9VqlTJ9lIi2vnfv39fZ+3DTZs2wd3dHQMHDtQ5xtGjR6Nq1aomtcK7PhgA5bJVq1YhMDAQPj4+GRbznDx5MmxsbJSni6dPn2a7GUbj/v37aNq0KSZNmgQXFxedlYeXLVsGPz8/9OjRQ+msaiivX0A8PDwyzCsEQGcOkuweq/YFZ+zYsbCxsUFYWBi8vb3h5uamzHnzXyNC5s+fD5VKpfeU8Zp9PHv2DE+fPsWYMWN0+gDMmjULZmZmmDNnDn777TfExMRgzJgxyirs2XXx4kXkzZsXKpVKZ3FLDS8vL3Tu3FmnrIBhgtyhQ4eiZMmS8PLyQoECBeDv74+VK1dCrVbjwoULaNeuHUqXLm3Q4cCmxpDNT8uXL89SGTSj+jZt2oTdu3ejadOmKFeuHGJiYpQ03333HVxcXHT6+MyePRv58+c3WM2PhvaxXb16FVu2bIGNjQ1q1qxpsIfH6dOno3jx4ti1axd2796NRYsWIV++fMqUDampqdi4cSMmTpyISZMmZaiZyY7169ejevXqcHBwwNChQ5UO1V9++SW8vLzg5eWFvn37onnz5rC2tuZw9//AACiXzZ8/Hz4+PihYsKAyB4nm5nPo0CGULl06QxBiqBoYTc3LDz/8gCpVqqBjx47Ka8uXL4eDg4OyFpWhaHfAnTt3Luzs7HDo0CGdIawvXrxAz549s90f4HWnT5/G4MGDlaH0hw8fRvfu3VGuXDn8/vvvSv6aP42YmBgULVo0y0+pGzduREhICFxcXFClSpUMNTszZ86EpaUloqKiDLZ+mrbt27fDyckJdevWVYJNTT79+/c3yvpay5YtQ6lSpXD06FE8efIEjx49QvPmzVGjRg1lROHJkyfRqFEjhIWFGTx/U/AuND/t2bMH7u7uSr+t2NhY5M+fH/Xr10eFChV0VljftGmTEgCo1WqMHz9e7994Vn8fSUlJ2aqBef3hIDw8XKnx0dCMVH3TaNHsDu8HXl2zSpQogc8++wxTpkyBg4MDmjdvjsOHD0OtVmPv3r2IiIhAs2bN8Mknn+gMNqCMGADloDf98FauXIlq1aqhUaNGOhOxXbt2DeXKlTPoaIXMPHnyBIsWLUKVKlXQqVMnZfv27dsNOlTz7NmzsLW1xYwZM5RtPj4+cHV1xf79+5GcnKzMBWRvb2/QpQfWrVsHOzs7eHh46PQFOH78OLp37w5HR0elJkj7mDU3iKwGP4cPH4aVlRX69u2LHj16IG/evBg0aFCGNvno6GgUKVIkQy1gVn355ZdYtmyZ8u/t27ejePHiaNmyJR4+fIhnz57h5cuX8PPzyzB6RF9z587NMKfI559/jrp16+rMWv7w4UPUrl0bDRs2VNJdunSJHTSz4F1pfrp9+zY+++wzpKamYtu2bShRogS+//57XLhwAa6urrC1tcVXX32l8x5DBAKLFi1668VhtfPLbp8fze/TxcUFw4cPV8ql6TM3ePBg1K9fHykpKdluxl25cqXOSLzz589j+vTpSk0u8Or64uPjg2bNmmVYj5H+fwyAcoj2j2jLli3YunUr9u7dq2xbvHixsije2rVrlYng3N3dDRqEvMmTJ0/w448/wtXVNcMEaIbK//Llyxg8eLAyWRfwavLDwMBAODg4wNHREbVr10bJkiUNPk/Fli1b0KxZMxQoUCBDB+/jx4+jZ8+esLCw0Jmifu7cuShWrFiWbxDnz5/H+PHjdZbT+Pbbb1G2bFmMGjUqQxCU3ZXstXXq1Al58+ZVVtoGXgVBJUqUgLOzMxo3boz27dujatWqWVqoV2Px4sVo165dhnNkxIgR8PX1Vf6t6Tu1f/9+5M+fP8PNi0FQ1uRk89PBgweVmeEjIyOV38WzZ8+gVqvRunVrjBkzRjkX2rZtCy8vL3Tt2tWgzdgzZsxA2bJl3+oakd2JDl/Pt1+/frh+/TqmTJmC8uXLK031mv2OGzcOwcHBeuWRmWvXrqFmzZpKjfmDBw9gZ2eH/PnzZ1h38eDBg/D29kbr1q11piqh/x8DoBw2YsQIFC1aFGXLlkXlypV1Lko//fQTXF1dYWlpiaZNm2LChAnKsPecCoLmzp2Ljh07GnSeH21XrlzB6NGjUaZMGcyePVvZvmrVKsyaNQuLFi3Kds3Pm8q+d+9e1K9fH9WqVcuwzlF8fDwmT56sfM7Hjx9Hnjx5Ml2g8W1oFtq0sbHJMOOrpulv7NixOsdqiFmWtfXv3x8FChTQCeC2b9+OypUro1y5cjrLnGTnaVXzme3cuVMZxn/kyBGYm5tnWEstNjYWrq6uuHbtWpbzo1dysvnp3LlzcHV1Re/evdGzZ0+YmZnpNOWmpKSgatWqylDs5ORktG/fHitWrNBp3s6ukydP4qOPPsLGjRv/37Ta+W3evFmvpYFeN2LECJQoUQLLly/HlStXcOTIETRr1kxnzbQnT56gUaNG6NGjR5bz0aaZ8uPvv//GgwcPEBcXh3LlyqFmzZoZ+vUcPnwY5cuXR+fOnQ06VciHjgGQkWn/CK9cuYJatWrh77//xrFjxzBjxgwUL15cZ82dn3/+GfXq1UPHjh2V9tvXVx03Js3THJC1p/LX5475888/lVWnNS5fvozRo0fD1tZWWQLCULTLvGPHDmzatEkn/3379qFZs2bw9vbOUBOkoblRZHeFZM1CmzVq1MhQ4zF//nxYWlpi4sSJBhvxlNkw3z59+qBAgQJYt26d0rdp+/btKFasmE7fn6zcnLTPy0OHDqFs2bKIjIxUnlpnzpwJCwsLjB07FmfOnMHZs2cRFhaGevXqscbHAHKi+Un7t7Nq1SrY2toiX758Ss2RWq1Geno6nj17hv79+8PPzw8jRoxAvXr14Ovra5CRTxqaOcPKli2rLJr6NqM358+fj2LFiunUuOtj586dKF++PPbt26ezfdOmTWjevDkKFiwIX19fuLu7G3zplqSkJLi7u6Njx464f/8+4uLiYG9vjx49eujMmwa8eugwZLcBU8AAyIi0f/RJSUk4evQoOnXqpEToDx48wJw5c1CsWDGlPRl4NYy1bt26aNeuXYaTPKdk5cf77bffwtXVVbkAPHr0CD179kTZsmUzVLVfunQJoaGhsLKy0qkJMpShQ4eidOnScHZ2RqFChRAUFKQ8Ke/duxctWrSAn5+f0ZfVOH78ODw9PdG7d+8Ma0x9//33OHv2rEHyWbVqFfLkyZPpqL0ePXqgePHi2LRpk3JObt++Hba2tjpTIOhD+0aquRlNnjwZvr6+GD58OG7fvg21Wo3vv/8eRYsWRZkyZVCpUiVUr14923MMmaLcaH6aNm0aunTponxf+/fvh5OTE6pUqYJ+/fpl6MwfFxeH/v37IyAgAG3atDHK99ytWzeYmZlh7NixygKf/9XMFRMTA2tr62zNpr5o0SK4uroqi+JqH8+FCxfw22+/YdKkSZg/f75Rlm45fPgwfH198dFHH+HBgwfYt2+fEgS9bT8oyhwDoBzw2WefwcvLC7Vq1YKvr69On4sHDx7gm2++QYkSJdC7d29l+7Jly+Dp6Ylu3bplq49GToqLi4ODg4PS+RV4VTOgWYvm9YvQiBEj4ODggGrVquH+/fsGG6K6aNEiZb2bW7du4fr16/Dx8YGPj49ywdixYwdq1qyJjz76yCB5/hfthTazUw2vLbMFJENDQ1G2bNkMEwzGxcXBzMwMKpVKZwbmLVu2oEKFCnpNfAe8WkW+Ro0aAIAhQ4agatWqyvIpkydPhqenJ4YPH66sSXTjxg3s3bsX+/fvz5XlLd53udX8dPPmTeV70m5y+emnn+Dt7Y2IiIhMH9CeP3+e6TIb+nj9/Na+Bnbq1AmVKlXCkiVLdGZbBjIGP9np5K3Z17x581ClShUlAFKr1UqguXr1apw/f17nfcborpCQkABPT0+dIKhChQpo3bq1wa4ppogBkBG8Psy0TJkymDJlCvr37w8LCwt8+umnOukfPnyI6OhohIaG6lwwVq5c+V7N4KlWq5GQkIAqVaogKChIOZYjR44gIiICLi4uOhejESNGYPbs2cqFJStWr16NpKQknW0jR45EeHg4gP9djFJSUlClShW0aNFCSXfkyJEcq4Uw5EKb2mX++eefsXLlSgCvbhLNmzeHra2tEgQBr/o3jR07FtOnT88wq/XrN5C3ceDAAZQuXRpOTk6wtrbOMGeRJggaMWKE0idIW070Z/sQvCvNT7/++isqVaqkzBoOvKq99Pb2Rp8+fZRgrEWLFsqCtpryZYV2eb/77jv06tULXbp00Rle3r59e7i4uGDp0qWZnsPffPMNrK2tDTLH0D///ANzc3NERUXpbH/8+DGaN2+OuXPnZjuPt6EdBD18+BC7d++Gm5tbpk3f9HYYABnRrl278PXXXysdaZ8+fYply5bB0tISgwcP1kmbnJysXDDelxofDc2FGHjVYW/p0qVQqVQICwvTCYJ69+4NGxsbdOjQAR07dkTx4sWztc7V7Nmz0bhx4wwX+G7duqFWrVrKvzVNjr/88gtsbW0z5JlTQZChF9ocNmwYHB0d8fnnnyv7fP78OVq0aAEbGxssXrwYf/zxB5o3b67TMdMQtS9dunSBSqVCQECAsk37vJ0yZQp8fX3Rp08fgw3tNyXvUvPThQsXEBERgRo1augEQT/88AP8/f3h7+8PPz8/lC1b1qDXrhEjRsDOzg6DBg3C5MmToVKpdLoKtG/fHu7u7pg/f77OGonHjx9HtWrVsjyAITMLFixA3rx58emnn2L79u3Ys2cPGjVqhGrVquVobWZCQgJ8fX3Rrl07PHr0iB2es4kBkJFcuHABKpUKKpVKp6PvixcvsGzZMuTPnx9Dhw7N8D5Drzaek9auXYsyZcrgk08+QfXq1VGwYEHUrFlTuUCcP39eZ3I2Qyz1oN3UppmLZteuXShYsCDmzJmjk3bDhg1wc3PL1RuyoRbaXLhwIUqUKKFM6vi6jz76CMWLF4eDgwMCAgIMHlRv3LgRP/30E8qXL4969erpzHitERUVZZAh0KYot5qf3hQwXbhwAb1790ZAQIDOPF6bNm3C1KlTMXToUIP2f9m7d69Ox+Nt27YhT548+P7773XSNWjQAJ07d9Y5xx4+fGiwvnUaarUaGzduRLly5WBnZwdXV1c0atRI+V3lZK3moUOHULt2bYM9SJkyBkAGktlFfufOnbCxsUHbtm2VPhLAqwvEihUroFKpjNIBODdcu3YNZcqUUS6OT58+xe+//w5HR0fUqlVL56KYmpqa6ar2+tB+/+7du1GwYEHMnDkT9+/fR2pqKsaOHQtHR0d8+eWXSEpKwpUrV9C0aVOEhoa+9zdktVqN3r17Y9CgQQD+d9N6/SJ89OhRnDx50qBre2ny19i/fz/KlSuHevXq6aTZtGmTTtr3/TPPLTnZ/KT9nh9//BFRUVGYMGGC0nn/6tWrShCkXR5thgoEVq1ahTp16gB4NYlpoUKFlLmNHj16pPRlA/53fr4+g7sx3L17F+fPn8fZs2dztT+boR6kTB0DIAPQvkEkJycjNTVVOUG3bt2K/Pnzo3fv3jpt1S9evMD27ds/mM6gp06dQunSpXUmJ3v58iW2bt0KCwsLtGzZ0ijD+TVD2QcNGoRKlSph9uzZePr0KW7fvo0pU6agUKFCsLW1RaVKlXQ6oL9PI5AyK2tYWJjO+m2aC//z58+xZ8+eDOmzemPSznvp0qUYN24cBg0apNPvR9P5vUaNGti/fz8aNmyIWrVqGWzdOlOWU81P2t/R0KFDUbRoUdSsWRNeXl7IkycPFixYAODVVB59+vRBUFCQzozEhrZz507Url0b8+fPR+HChTF//nzltd9++w2tW7fWGfKdW7/n9+k6QhkxAMom7R/AtGnT0Lx5c2UxOs3Cmb/99hvy58+PPn36ZNpm+z4GQa/f1J4+fYqKFSti4sSJOtsfPXoET0/PDKvOZ9WWLVvQpEkTAK+mnff391eCTc0s07Nnz8aTJ08AvHpq3bBhA2JjY5Ug4H36vLXPr3/++UcJoj/99FO4ubnh/PnzOt/F9evXER4ebvBp8UeOHAl7e3s0b95cmVFbe9bZ48ePw93dHZUrV0bNmjUNOheKqXgXmp/Onj2LNm3aICEhAWlpaUhPT0dUVBTy5MmjTJx48eJFtGvXDr169TLoDM/aTp8+jerVqyNfvnw6gdbTp08RFhaGbt268dyibGMAZCCjR49G8eLFsXTpUixatAje3t6oUKGCsrzB77//jkKFCqFdu3bZbv7JbdrDq2NiYjBx4kRs374dgwcPRtOmTXVmmE1LS1Nmbs3uJF1paWlYu3Yt7Ozs4OLiAmtr6wxt/dpB0OvrU2n28b7QvsCPGzcO3t7eykKiDx48QLly5VC3bl0cOXIEDx48wM2bN9GkSRPUqVPHIMepyX/BggUoW7asUtu2ZcsWqFQqFC5cWGekUlpaGo4fP86h7lnwLjQ/LVu2DJUrV4a3tzcSExN1gpOhQ4eiRIkSSr8T7dezGoi8vrbX5MmTMWbMGGVY+Zo1a2Bra4uIiAisXr0amzZtQsOGDeHu7q6cWwyCKDsYABnAv//+Cy8vL/zxxx8AXjV7FS5cWKk21lyYNmzY8MHMgrt27VpYW1ujQ4cOCAwMRP369VGzZk1lxe9x48Zh9+7d+PTTT+Hg4GDQoZqtWrWCSqVCgwYNlG3abeJDhgxBxYoVMWXKlAxD5N9HEyZMQMmSJbF161YkJiYq269evYoqVaqgcuXKsLW1ha+vL7y9vbPVzBcVFYUff/xR+fejR48wceJELFq0CMCrJQUKFy6MmJgY9O3bF1ZWVti8eXOGvD6EczynvCvNT99//z0CAwNRpEgR3L59G8D/+todO3YMdnZ2GWoWs/o9ax/zsGHDYGVlheDgYJQrVw6Ojo5K38ilS5eiadOmyJ8/P2rXro1WrVrlSsdj+jAxADKAI0eOwM7ODs+fP8emTZtQqFAhpc06JSUFixYtyjDy6H2+Qfzzzz86Cy6eOnUKFhYWmDFjBs6cOYMxY8bAyckJjo6OcHZ2NtjCpmq1Gqmpqfj+++8xc+ZMVKxYUWdeH+0+Vn379kXLli3f+yfEa9euwcPDI8O6TZon4NTUVGzZsgUxMTFYt25dtpr5bt68CT8/PwQHB+vU7MTHx+Py5cs4e/YsKleurIyu27lzpzLSUTMbNGVdTjY/ZfbetLQ0rFq1Cs7Ozqhduzbu3bunvHbu3DnY2dll2r8sO+7evYuQkBAcPnxYKdMnn3yCatWqKWuZPXnyBFeuXNGZKoS1i2QIDIAM4Ny5c2jQoAFmzpwJKysrnZWYDx06hA4dOrxx3an30e+//w4vLy8Ary7IDg4OOhfk+Ph4PH/+HNevX8/WJIfAmwPF1NRUrFmzBuXLl9cJgoBX648BH8YIpBMnTqBo0aI4duwYAN3P4+nTp5n2KcvKk7HmMzp79iyaNGmCBg0aZAi6fv31V/j5+Slrfe3fvx+DBw/GvHnzeEPKppxsftLe96VLl3Dz5k1l1u60tDQsW7YMfn5+8Pb2xo4dO7BlyxaEhYXBy8vLoLUuX3/9NZydnTOdG6tHjx5wdnbO9Pje598zvVvMhLKtUqVKkjdvXhk6dKgMHTpU+vTpIyIiz549k6ioKElJSREvL69cLqXhqFQqKV26tFy+fFlq164tISEhMn/+fFGpVPLXX3/JypUr5d69e2JnZydFihTJcj5qtVrMzF6domvWrJHo6Gj58ssv5cKFC5IvXz5p3LixTJ8+XU6cOCFNmjSRS5cuSaNGjWTq1KkCQFQqlfLf94Farc6wrWzZslK4cGHZtm2biIiYmZlJWlqaiIjs27dP1q1bp/xbw9zcXO+8VSqVqNVqcXJyklmzZknevHnl+++/l1WrVilp7t+/L/Hx8ZKYmChXr16V6OhoefjwofTv31/y5MmToRz09p49eybFihWTixcvikqlEjMzM0lNTRURka5du0q+fPnk0qVLIiJSqlQpMTMzE7Varfe5rf2bmjRpkrRr104CAwOlZ8+esnnzZjE3N5d27drJkCFD5MmTJxIWFiZLliwRX19f+euvv8Tc3FzS09MNcsyurq6iUqnk+PHj8vz5cxERefHihYiITJ06Va5fvy6xsbEZ3ve+/J7pPZDbEdj7TvM09fz5c/j7+6Ny5coYP348oqOjUa9ePZ3FQd/nZi9tly5dQoECBaBSqTIs6/Hpp5+iUaNGSufvrNJ+ytOsGVanTh2EhoaiRIkSygi7lJQUZa4UBwcHncU23yfa58bGjRsRExOD+fPn4/jx44iIiEBoaCh+/vlnJU1aWhoaNWqELl26GCxv7c/89OnTCA0NRf369ZWlNgAgPDwcKpUKFSpUQLVq1d7Lzzq3vQvNT5999hlKlCiBTZs2YdeuXWjevDkKFy6szJ788uVL/Pzzz6hfvz5CQ0OV8hhy/pm0tDTs2bMHdnZ2GZYBOnXqFMqVK5fp4r5EhsIAyAA0N5CnT5/io48+Qt26ddGoUSMMHDjQKKsDvws2btyIggULYuTIkTh79ixOnDiBYcOGoUiRIgZdoXjevHmws7PD4cOHAUBZZqNw4cJKp/O0tDQkJyd/EIttDh06FDY2NqhZsyYKFiyIRo0a4aOPPkLz5s0RFBSETp06Ydy4cahRowbc3NyyfZzagdf58+dx4cIFpTnk3LlzaNy4cYYgaNOmTdi6det7Oa1Absut5iftoGvv3r3w9vZWZln+7bffULhwYdSpUweFChVS1ut7+fIlli5diqCgIDRv3jzTUZXZpVarsXv3bpQqVQr169fHmjVrEBsbiyZNmsDb25sdncmoGAAZiPaF7fnz5zo3hQ/xBpGWloYff/wRVlZWKFu2LKpWrQoPDw+DdXgGgPv37+PTTz/FkiVLALxay6tw4cKYPn06WrdujSJFimS6FMT7etFcs2YNSpcujfj4eKjVajx48AARERFo2bKlsnBs9erV0bRpU/Tt2zfbwbX2TTEqKgru7u6oUqUKSpcujYULFwJ4FQSFhoaiQYMGma6t9L5+1rlB+xoxceJE+Pn5wcHBAaGhocrM2S9fvsTy5cvh7OyMfPnyoW3btoiKilL6emXl89bO9/Hjx7hz5w7GjBkDtVqN33//HSVLlkRMTAwuXryIatWqoUCBAli8eLHy3pUrV8LFxQXt2rUzSi22JggqX748VCoVBg0ahIEDByq1TTzHyFgYAP0H7RvE23SoNcUOe9euXcOff/6Jo0ePZnuNrcwurnFxcbh48SJOnTqFihUrKisvr1y5UhmBpOkg/L6bNm2asm6X5rO4desWwsPD0bx580zfY4jgetKkSShRogR+//13PHnyBC1btkSRIkVw6tQpAK86RoeFhcHDwwM7d+7Mdn6mLiebn7R/U1999RX69u2LK1euKPtq27YtRowYoVyn2rZtCxcXFzRq1EgJPNLS0rBmzRpcunRJr7xfv/b917UwPT0du3fvzjCy832fM43ebQyA3uD10Tavj2Z60485s6CJ/n/an/eiRYt0FpAFgBUrVqB27drK97B9+3b06dMHM2bMeO9r2DTnyYwZM+Dh4aHMYq05rsOHD0OlUmUYSZjV80v7s05PT0fTpk2V/kUbNmxA0aJF8e233wL43wrvJ06cQGRk5AfTjy0nvQvNTyNGjECJEiWwfPlyJZB59OgRKlasiOjoaACvlvFp27YtNm/erJQ5q7Uv2sccHx//VvvRBEElS5ZE69ats5QvkT4YAGVC+yL/xRdfoEGDBqhUqRI6deqE/fv3v/HHrP2jX7VqFfbu3Wv0sn5ohg0bhnLlyuGLL77AtWvXlO3z58+HSqXC5cuX8fDhQzRv3hz9+vVTXn/fgyDgVcdPc3NzTJgwQWf7gQMH4O7ujgsXLhg0v/Hjx+OLL76AnZ0dzpw5g927d+vMYfX06VOMHTsWly9f1nkfmyTe3rvQ/LRz506dldU11Go1+vfvj/LlyyMqKgq1a9eGv7+/8v0aYpLDkSNHom7duhnOof967549e2BhYYH27dtnKX+it8UA6D+MGzcOtra2WLhwIRISElCkSBE0aNAA169fz5BW+0e/YMECqFQqbN++PSeL+95bsmQJSpYsiYMHD2Z47c6dOwgODoZKpYKzs7PO6LoPyeLFi5E3b15ERkbir7/+wj///IPGjRvrLC6aVdrvX7lyJezt7XHy5El06dIFISEhKFCggDL5HADcuHEDtWrVwtKlSwGwRlNfudn8pG3RokVwdXXVqcXWXs5m2LBh8PPzQ5s2bQw6YvX06dOoU6dOhsArM9r5nThxAvv27VOWxCAyFgZAb3D+/HlUq1YN27ZtAwDs27cP+fPnx3fffZchrfaNISYmBtbW1li3bl2OlfVDMWTIEPTo0QPA/y6Irz9B//zzz1i2bNkHPQJp3bp1KF26NMqUKYNKlSohKCjIoDemPXv2oG/fvpg1axYAYO7cuXB0dESzZs2UNMnJyWjcuDHq1q3LGp9syunmJw3NfubNm4cqVaooAZBarVbOow0bNuDo0aNIS0sz6CzLU6dORYMGDdCsWbP/dzka7evnt99+i+rVqxu8tpMoMwyA3uDUqVNwdXUF8GrIt3bTQHJyMlavXp2hBiImJgZWVlZKOz69WWY38m7duikrvWt79uwZduzYkWH7h3xjvnXrFo4dO4ZDhw4ZdGj/rVu3ULFiRRQuXFi5+aalpWHIkCHw8PCAp6cn2rZti4CAAHh4eHDdpWzK6eanzPzzzz8wNzdHVFSUzvbk5GQ0b95cWdpEU66seP19mzZtgkqlgrW19X8OUni95rxgwYJYs2ZNlspApC8GQMj8R3/37l04OTlh0KBBsLa21lneIiEhAXXq1NGZpOvrr79G8eLFGfzoSTOhIfBqKHbZsmWRkJCg853cuXMHTZs2xW+//ZYbRXwnGPKGePz4caVmSdOxOi0tDZs2bUJkZCQGDhyImTNnfrBzWOWk3Gp+et2CBQuQN29efPrpp9i+fTv27NmDRo0aoVq1atn+frWD4/PnzyvLWhw8eBDm5ubo2rVrhqUugIw151ZWVqw5pxxl8gGQ9sUmNTU1Qwe+woULIyIiQtn2/PlzNG3aFGFhYcp7Hz58CAcHByxbtiznCv4B2L17NywtLZWVnwHAy8sLLi4u2LVrF65du4YrV66gcePGCAoKYi2EAR0/fhyenp74+OOPcfz48Tem42eeNbnZ/PSm8mzcuBHlypWDnZ0dXF1d0ahRo2zV8H377bc4evSo8u9Ro0bB1dUVxYoVQ2RkJBITE3HgwAGYm5ujd+/eyoSPme2nSJEifHikHGfSAZB2sPPFF1+gdevWqFmzJr799ltcuXIFt27dQuvWrVGhQgUMGjQIY8eORb169eDm5qZcODQXrMePH+fKMbzPLly4gOHDh6NcuXJKf5Rnz56hVq1aqFChAooUKQIvLy/4+fmxKcYIEhIS4O3tjV69euHkyZO5XZwPUk40P+nj7t27OH/+PM6ePZutptWLFy+ibNmy6NWrF86dO4dNmzbBzs4OGzZswMSJE+Hv74/mzZvjypUriIuLQ548edC3b1/cuHFDZz+bN29GkSJF2OxFuUIFALm9Hllu0F4UcOrUqfLll1/KgAED5NSpU3L16lUpUqSILFy4UAoUKCArVqyQxYsXS6VKlaRcuXLy1VdfKYs/5smTJ5eP5N2nOcUyW8Tw6tWrMn/+fFm6dKmMGDFCBg0aJCIiu3fvlnv37om1tbU0aNBAzM3N+XkbwdGjR6VPnz7i4OAgX375pZQvXz63i/TBWbhwoQwYMED69esnTZs2lXz58snUqVMlMTFRjhw5kqvntPZ1UF/Hjh2Tjz/+WGrVqiVmZmbi4uIiERERIiKyZcsW+eqrr8TKykrmzZsnN2/elMDAQPniiy9kxIgRyj527NghFhYWUrt2bYMcD5FecjkAy3UXL15Ely5d8PvvvyvbfvnlFzRr1gxNmzZVZjd+/SmJNRFvR/M5aZ5uY2JidBb1BIArV65g1KhRKFWqlNLR/E37IcM7ePAgevbsyUkOjcQYzU/viiNHjsDX1xdFixbF119/rfPaL7/8grp166JFixa4ePEiTp48yf5k9E4xuQBIu5r5559/hkqlgoODQ4ZRGqtXr0aFChWUOWl4c9Bf//79UblyZeVCf/PmTbRr1w6VKlXK0N5/8eJFBAYGonDhwvjqq69yo7gmTfO74HluPIZqfnrX/P3336hQoQIaNmyIv//+W+e1X3/9Fa6urhgxYoSy7UM4ZvowZK3u8z2m3QzTuXNnad68uVy9elXi4+MlNTVVea1t27by8uVLiY2NFRHJcjWxKevcubOo1WoJDg6WtLQ0KV26tIwYMULq1asn48aNk7Vr1yppy5cvL66urlK5cmU5cOCA0mxGOUOlUgkAnudGZGNjIxUrVhQnJycxMzMTtVr9QTTpuru7y/r16+XevXvyzTffyKlTp5TXmjRpIgsXLpSpU6cq2z6EY6YPg0le7b788ktp27atiIhs3LhRGjduLJMnT5YdO3ZIWlqaiIg8fPhQChUqJCVKlMjNor7XgoKCZOXKlXLv3j2pX7++vHz5Unx8fKRv375So0YNiYqKkvXr14uISEpKijx//lyGDRsmq1evVm7IlHMy66NFxvMhBZseHh7yww8/yJEjR2T27Nnyzz//KK8FBQWJubm5pKen52IJiTIyyU7Qq1atkqioKPn555/F19dXRERCQkLkwIED0r59e3F1dZVdu3bJ+fPn5fjx43xi0ZN2x8otW7bIP//8I6NGjZLQ0FDZvHmz5MmTRxISEmThwoWyfPlyCQwMlHv37gkAOXz4sJibmwsA3pCJ3jPsVE/vkw/nEeQNMovvqlWrJubm5pKQkKBs+/3336V+/fry/fffy8GDByUoKEhOnTqljPait6cJfjSjuh4/fizNmjWTAwcOSL169SQtLU28vb1l7NixEhMTIyVKlJDg4GA5dOiQmJubi1qtZvBD9B7y8vKSuXPnSuHChcXBwSG3i0P0nz7oGiDtYdMpKSlSsGBB5bVJkybJggULJD4+XkqXLq1sb968uZw6dUoWLFggwcHBOV7mD8WRI0ckNDRUVqxYIcHBwZKeni579+6ViIgIKVfu/9q7+5iqyz6O4+/DgzxIzCgmOkEm5SYyVAwKVDTUgf2RsjONpKmE6WxItRBHBTqsFFdTF1trFYNmiChFFKIIKoG0mgzQJoZjgBWID2SNfODpd//hPOvcgcHtbajn8/rrnOu6ftf1PT/Yft9zPYAPZWVlODo6AtYzRjrqLnL/uzWDeyfH7EXutgf2N9MwDMuDdNu2bSQkJJCfn2+pj4uLw8fHhyNHjgBw/fp1AIqKivDz8yM+Pp4DBw5o3fp/9Pvvv9PX18fUqVMBsLe3Jzw8nB07dlBZWclzzz1Hd3c3YL0XQsmPyP1Pm+rlfvBA/nYePXqU3NxcAJKSknjjjTfo7e0lPj4es9nMhx9+iLe3N48//jhZWVkAODs709PTA0BpaSleXl4kJSVZnQyTgfX39/+tbPr06Xh4eLBnzx5LmYODA8HBwfj5+VFYWEhiYuK/GaaI/Iu0jC33ugdqCcwwDLq6ujCbzXR3d/PQQw9RVVVFbW0tvr6+/Pjjj+zatcuy0TYyMpKMjAzy8/Mxm82A9RLMuXPn8PHxGcmPdM/76xR3dnY2Z86coauri5CQEKqqqrhw4QIxMTHExMQAcOnSJdavX88rr7xCcHAw9vb2Ixm+iIjYqAcqAbqls7OTsLAwGhsbeeedd0hJSbHUdXd3c/XqVd59911OnjxJaWkpL774Ip988oll3bqvr08P5mFKTk7ms88+IzY2ltbWVlpaWvDw8MDV1ZW2tjZmzpxJWFgYWVlZ9Pb2UlFRgZ2dne61iIiMiAcyAbpy5QqxsbF0dXXh5OTEypUriY2NBaxneDo7OykuLiY+Pp6KigpCQ0NHMuz71sGDB3n55ZfJy8sjJCSE/Px8XnjhBYqKipg6dSp5eXnk5uZib2+Pp6cnRUVFODo6aoOkiIiMmAfy6TNmzBiKi4vZu3cvjo6OfPrpp3z++efAzX0o/f39dHd34+HhwdKlSwkPD+fEiRMjHPX9q62tDW9vb0JCQti/fz+rV69m165dREVF4e3tzezZs6mpqeHIkSMcOHAAR0dHent7lfyIiMiIeaCfQF5eXmRmZuLq6kpOTg7Z2dn09fWxYMEC0tLSgJubn//44w/OnTs3wtHevxwcHPD29qakpIS4uDi2b9/OunXrAPjyyy8pKCigs7MTd3d3y+kQnfYSEZGR9EAugf235uZmkpKSaGho4MaNG7i6ulJTU8OoUaP47rvvWLx4MWVlZQQGBo50qPelM2fOMG3aNHp6esjKymLVqlUAXLt2jejoaCZMmMDHH3+sUyEiInLPsIkECKC9vZ2amho6OjpYuXIlDg4OGIbB+fPnMZlMeHl5jXSI97X9+/ezYsUK1q9fz6JFizAMg61bt9LR0UFNTY3lfisJEhGRe4HNJED/TX9x+P+rr6+P/Px8NmzYANxcfhw/fjwFBQU4OjrqtJeIiNxTbDYBkrvj4sWLXLlyBScnJ7y9vTGZTEo2RUTknqMESO4qHXUXEZF7kRIgERERsTn6ai4iIiI2RwmQiIiI2BwlQCIiImJzlACJiIiIzVECJCIiIjZHCZCIiIjYHCVAIiIiYnOUAImIDJOvry87d+4c6TBE5A4oARKRYbl48SLr1q3Dx8cHJycnvLy8iIyM5Pjx4yMdmhITERky/YMmERkWs9lMd3c3OTk5TJo0iY6ODsrLy7l8+fJdG7O7u5tRo0bdtf5FxPZoBkhEhuzKlStUVlaSkZHB008/zcSJEwkJCSElJYVnn33Wqt3q1avx9PTE3d2diIgI6uvrrfr6+uuvCQ4OxtnZmUcffZTo6GhLna+vL1u2bGHFihW4u7uzZs0aAKqqqpgzZw4uLi54e3uTmJjIn3/+CcC8efNobW3ltddew2QyYTKZbvs51q5dy9ixY3F2diYgIIBvvvnGUl9QUMDUqVNxcnLC19eX999/f9C+WlpaMJlM1NXVWfVvMpk4duwYAMeOHcNkMnHo0CFmzJiBi4sLERERXLhwgZKSEqZMmYK7uzvLly/n6tWrln7mzZtHYmIiycnJeHh44OXlxebNmwf/AYnIkCkBEpEhc3Nzw83NjcLCQm7cuDFou6VLl1oe7jU1NQQFBTF//nw6OzsBKC4uJjo6mmeeeYba2lrKy8sJCQmx6uO9995j2rRp1NbWkpqaSlNTE1FRUZjNZk6ePMnevXupqqoiISEBgC+++IIJEyaQnp5Oe3s77e3tA8bW39/PokWLOH78OLt37+b06dNs27YNe3t7AGpqali2bBkxMTGcOnWKzZs3k5qaSnZ29h3fv82bN5OZmUl1dTU///wzy5YtY+fOneTm5lJcXExpaSkffPCB1TU5OTmMHj2a77//nu3bt5Oens7hw4fvOBYRm2eIiAzD/v37jYcffthwdnY2wsLCjJSUFKO+vt5SX1lZabi7uxvXr1+3us7Pz8/46KOPDMMwjNDQUCM2NnbQMSZOnGgsWbLEqiw+Pt5Ys2aNVVllZaVhZ2dnXLt2zXLdjh07bhv/oUOHDDs7O+Onn34asH758uXGwoULrco2bNhg+Pv7W8V3a5zm5mYDMGpray31v/32mwEYR48eNQzDMI4ePWoARllZmaXN1q1bDcBoamqylK1du9aIjIy0vJ87d64xe/Zsq1iCg4ONjRs33vYzisg/0wyQiAyL2Wymra2NoqIioqKiOHbsGEFBQZYZkvr6erq6unjkkUcsM0Zubm40NzfT1NQEQF1dHfPnz7/tOE888YTV+/r6erKzs636jIyMpL+/n+bm5iHHX1dXx4QJE5g8efKA9Q0NDcyaNcuqbNasWZw9e5a+vr4hjzOQwMBAy+uxY8fi6urKpEmTrMouXLgw6DUA48aN+1sbERk+bYIWkWFzdnZm4cKFLFy4kNTUVFavXs2mTZtYtWoVXV1djBs3zrL/5a/GjBkDgIuLyz+OMXr0aKv3XV1drF27lsTExL+19fHxGXLsQxl7OOzsbn6PNAzDUtbT0zNgW0dHR8trk8lk9f5WWX9//6DXDNZGRIZPCZCI3DF/f38KCwsBCAoK4vz58zg4OODr6ztg+8DAQMrLy4mLixvyGEFBQZw+fZrHHnts0DajRo36x1mawMBAfvnlFxobGwecBZoyZcrfjvQfP36cyZMnW/YJ/ZWnpycA7e3tzJgxA8BqQ7SI3Ju0BCYiQ3b58mUiIiLYvXs3J0+epLm5mX379rF9+3YWL14MwIIFCwgNDWXJkiWUlpbS0tJCdXU1b775JidOnABg06ZN7Nmzh02bNtHQ0MCpU6fIyMi47dgbN26kurqahIQE6urqOHv2LF999ZVlEzTcPD327bff8uuvv3Lp0qUB+5k7dy7h4eGYzWYOHz5Mc3MzJSUlHDx4EIDXX3+d8vJytmzZQmNjIzk5OWRmZpKUlDRgfy4uLjz11FNs27aNhoYGKioqeOutt4Z9b0Xk36UESESGzM3NjSeffJIdO3YQHh5OQEAAqampvPTSS2RmZgI3l2gOHDhAeHg4cXFxTJ48mZiYGFpbWxk7dixw83j3vn37KCoqYvr06URERPDDDz/cduzAwEAqKipobGxkzpw5zJgxg7S0NMaPH29pk56eTktLC35+fpaZmYEUFBQQHBzM888/j7+/P8nJyZaZo6CgIPLz88nLyyMgIIC0tDTS09NZtWrVoP1lZWXR29vLzJkzefXVV3n77beHektFZISYjL8uXIuIiIjYAM0AiYiIiM1RAiQiIiI2RwmQiIiI2BwlQCIiImJzlACJiIiIzVECJCIiIjZHCZCIiIjYHCVAIiIiYnOUAImIiIjNUQIkIiIiNkcJkIiIiNic/wAJmcye1gzPGQAAAABJRU5ErkJggg==",
706 | "text/plain": [
707 | ""
708 | ]
709 | },
710 | "metadata": {},
711 | "output_type": "display_data"
712 | }
713 | ],
714 | "source": [
715 | "fig, ax = plt.subplots()\n",
716 | "\n",
717 | "risks = [res[1].risk().value for res in results]\n",
718 | "columns = [res[0] for res in results]\n",
719 | "\n",
720 | "ax.bar(x=columns, height=risks, alpha=0.5, ecolor='black', capsize=10)\n",
721 | "\n",
722 | "plt.xticks(rotation=45, ha='right')\n",
723 | "ax.set_ylabel(\"Measured inference risk\")\n",
724 | "_ = ax.set_xlabel(\"Secret column\")"
725 | ]
726 | },
727 | {
728 | "cell_type": "markdown",
729 | "id": "b3e8c81d-7813-4779-8e27-3a633ec20ee7",
730 | "metadata": {},
731 | "source": [
732 | "As visible, a few columns in the dataset carry a significant inference risk. This means that an attacker in possession of the synthetic dataset can use it to infer some attribute of records in the original data, *beyond what can be explained by utility*."
733 | ]
734 | }
735 | ],
736 | "metadata": {
737 | "kernelspec": {
738 | "display_name": "Python 3 (ipykernel)",
739 | "language": "python",
740 | "name": "python3"
741 | },
742 | "language_info": {
743 | "codemirror_mode": {
744 | "name": "ipython",
745 | "version": 3
746 | },
747 | "file_extension": ".py",
748 | "mimetype": "text/x-python",
749 | "name": "python",
750 | "nbconvert_exporter": "python",
751 | "pygments_lexer": "ipython3",
752 | "version": "3.11.11"
753 | },
754 | "vscode": {
755 | "interpreter": {
756 | "hash": "237cf5f6b3dcd73bf2688629baee50bd53e43ee0aa8f2bde7060bbd4d3c193da"
757 | }
758 | }
759 | },
760 | "nbformat": 4,
761 | "nbformat_minor": 5
762 | }
763 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
4 | [build-system]
5 | requires = ["setuptools>=61.0"]
6 | build-backend = "setuptools.build_meta"
7 |
8 | [project]
9 | name = "anonymeter"
10 | version = "1.0.0"
11 | authors = [
12 | { name="Statice GmbH", email="hello@statice.ai" },
13 | ]
14 | description = "Measure singling out, linkability, and inference risk for synthetic data."
15 | readme = "README.md"
16 | requires-python = "<3.12, >3.7" # limited by Numba support
17 | license = {file = "LICENSE.md"}
18 | classifiers = [
19 | "Programming Language :: Python :: 3",
20 | "License :: OSI Approved :: BSD License",
21 | "Operating System :: OS Independent",
22 | ]
23 |
24 | dependencies = [
25 | "scikit-learn~=1.2",
26 | "numpy >=1.22, <1.27", # limited by Numba support
27 | "pandas>=1.4",
28 | "joblib~=1.2",
29 | "numba~=0.58",
30 | ]
31 |
32 | [project.optional-dependencies]
33 | notebooks = [
34 | "jupyterlab~=3.4",
35 | "matplotlib~=3.5",
36 | "seaborn~=0.11",
37 | ]
38 |
39 | dev = [
40 | # Linting and formatting
41 | "ruff~=0.1.14",
42 | "mypy~=1.8.0",
43 |
44 | # Pre-commit checks
45 | "pre-commit~=3.5",
46 |
47 | # Testing
48 | "pytest~=7.4",
49 |
50 | # Building and packaging
51 | "build~=0.10",
52 | "twine~=4.0",
53 | ]
54 |
55 | [project.urls]
56 | "Homepage" = "https://github.com/statice/anonymeter"
57 | "Bug Tracker" = "https://github.com/statice/anonymeter/issues"
58 | "Changelog" = "https://github.com/statice/anonymeter/blob/main/CHANGELOG.md"
59 |
60 | [tool.ruff]
61 | # https://docs.astral.sh/ruff/configuration/
62 |
63 | line-length = 120
64 |
65 | select = [
66 | "B", # https://docs.astral.sh/ruff/rules/#flake8-bugbear-b
67 | "C4", # https://docs.astral.sh/ruff/rules/#flake8-comprehensions-c4
68 | "E4", # https://docs.astral.sh/ruff/rules/#error-e
69 | "E7",
70 | "E9",
71 | "NPY",
72 | "F", # https://docs.astral.sh/ruff/rules/#pyflakes-f
73 | "I001", # isort
74 | "W", # https://docs.astral.sh/ruff/rules/#pycodestyle-e-w
75 | "YTT", # https://docs.astral.sh/ruff/rules/#flake8-2020-ytt
76 | "PGH", # https://docs.astral.sh/ruff/rules/#pygrep-hooks-pgh
77 | "PIE", # https://docs.astral.sh/ruff/rules/#flake8-pie-pie
78 | "UP", # https://docs.astral.sh/ruff/rules/#pyupgrade-up
79 | "RUF",
80 | ]
81 |
82 | [tool.ruff.format]
83 | quote-style = "double"
84 | indent-style = "space"
85 |
86 | [tool.ruff.isort]
87 | known-first-party = ["anonymeter"]
88 | forced-separate = ["tests"]
89 |
90 | [tool.ruff.lint]
91 | extend-select = ["NPY201"]
92 | preview = true
93 |
94 | [tool.mypy]
95 | ignore_missing_imports = true
96 | follow_imports = "silent"
97 | show_column_numbers = true
98 | check_untyped_defs = true
99 | show_error_context = false
100 | exclude = [
101 | "docs",
102 | "build",
103 | "dist",
104 | ]
105 |
106 | [tool.pytest.ini_options]
107 | filterwarnings = [
108 | "ignore::UserWarning",
109 | "ignore::FutureWarning",
110 | "ignore::PendingDeprecationWarning",
111 | ]
112 | testpaths = [
113 | "tests",
114 | ]
115 | pythonpath = [
116 | "src",
117 | ]
118 | xfail_strict=true
119 |
--------------------------------------------------------------------------------
/src/anonymeter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/statice/anonymeter/0188bdf5615601e2f31503ae91a2b310af8d917c/src/anonymeter/__init__.py
--------------------------------------------------------------------------------
/src/anonymeter/evaluators/__init__.py:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
4 | """Tools to evaluate privacy risks along the directives of the Article 29 WGP."""
5 | from anonymeter.evaluators.inference_evaluator import InferenceEvaluator
6 | from anonymeter.evaluators.linkability_evaluator import LinkabilityEvaluator
7 | from anonymeter.evaluators.singling_out_evaluator import SinglingOutEvaluator
8 |
9 | __all__ = ["InferenceEvaluator", "LinkabilityEvaluator", "SinglingOutEvaluator"]
10 |
--------------------------------------------------------------------------------
/src/anonymeter/evaluators/inference_evaluator.py:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
4 | """Privacy evaluator that measures the inference risk."""
5 |
6 | from typing import List, Optional
7 |
8 | import numpy as np
9 | import numpy.typing as npt
10 | import pandas as pd
11 |
12 | from anonymeter.neighbors.mixed_types_kneighbors import MixedTypeKNeighbors
13 | from anonymeter.stats.confidence import EvaluationResults, PrivacyRisk
14 |
15 |
16 | def _run_attack(
17 | target: pd.DataFrame,
18 | syn: pd.DataFrame,
19 | n_attacks: int,
20 | aux_cols: List[str],
21 | secret: str,
22 | n_jobs: int,
23 | naive: bool,
24 | regression: Optional[bool],
25 | ) -> int:
26 | if regression is None:
27 | regression = pd.api.types.is_numeric_dtype(target[secret])
28 |
29 | targets = target.sample(n_attacks, replace=False)
30 |
31 | if naive:
32 | guesses = syn.sample(n_attacks)[secret]
33 |
34 | else:
35 | nn = MixedTypeKNeighbors(n_jobs=n_jobs, n_neighbors=1).fit(candidates=syn[aux_cols])
36 |
37 | guesses_idx = nn.kneighbors(queries=targets[aux_cols])
38 | if isinstance(guesses_idx, tuple):
39 | raise RuntimeError("guesses_idx cannot be a tuple")
40 |
41 | guesses = syn.iloc[guesses_idx.flatten()][secret]
42 |
43 | return evaluate_inference_guesses(guesses=guesses, secrets=targets[secret], regression=regression).sum()
44 |
45 |
46 | def evaluate_inference_guesses(
47 | guesses: pd.Series, secrets: pd.Series, regression: bool, tolerance: float = 0.05
48 | ) -> npt.NDArray:
49 | """Evaluate the success of an inference attack.
50 |
51 | The attack is successful if the attacker managed to make a correct guess.
52 |
53 | In case of regression problems, when the secret is a continuous variable,
54 | the guess is correct if the relative difference between guess and target
55 | is smaller than a given tolerance. In the case of categorical target
56 | variables, the inference is correct if the secrets are guessed exactly.
57 |
58 | Parameters
59 | ----------
60 | guesses : pd.Series
61 | Attacker guesses for each of the targets.
62 | secrets : pd.Series
63 | Array with the true values of the secret for each of the targets.
64 | regression : bool
65 | Whether or not the attacker is trying to solve a classification or
66 | a regression task. The first case is suitable for categorical or
67 | discrete secrets, the second for numerical continuous ones.
68 | tolerance : float, default is 0.05
69 | Maximum value for the relative difference between target and secret
70 | for the inference to be considered correct.
71 |
72 | Returns
73 | -------
74 | np.array
75 | Array of boolean values indicating the correcteness of each guess.
76 |
77 | """
78 | guesses_np = guesses.to_numpy()
79 | secrets_np = secrets.to_numpy()
80 |
81 | if regression:
82 | rel_abs_diff = np.abs(guesses_np - secrets_np) / (guesses_np + 1e-12)
83 | value_match = rel_abs_diff <= tolerance
84 | else:
85 | value_match = guesses_np == secrets_np
86 |
87 | nan_match = np.logical_and(pd.isnull(guesses_np), pd.isnull(secrets_np))
88 |
89 | return np.logical_or(nan_match, value_match)
90 |
91 |
92 | class InferenceEvaluator:
93 | """Privacy evaluator that measures the inference risk.
94 |
95 | The attacker's goal is to use the synthetic dataset to learn about some
96 | (potentially all) attributes of a target record from the original database.
97 | The attacker has a partial knowledge of some attributes of the target
98 | record (the auxiliary information AUX) and uses a similarity score to find
99 | the synthetic record that matches best the AUX. The success of the attack
100 | is compared to the baseline scenario of the trivial attacker, who guesses
101 | at random.
102 |
103 | .. note::
104 | For a thorough interpretation of the attack result, it is recommended to
105 | set aside a small portion of the original dataset to use as a *control*
106 | dataset for the Inference Attack. These control records should **not**
107 | have been used to generate the synthetic dataset. For good statistical
108 | accuracy on the attack results, 500 to 1000 control records are usually
109 | enough.
110 |
111 | Comparing how successful the attack is when targeting the *training* and
112 | *control* dataset allows for a more sensitive measure of eventual
113 | information leak during the training process. If, using the synthetic
114 | data as a base, the attack is more successful against the original
115 | records in the training set than it is when targeting the control data,
116 | this indicates that specific information about some records have been
117 | transferred to the synthetic dataset.
118 |
119 | Parameters
120 | ----------
121 | ori : pd.DataFrame
122 | Dataframe with the target records whose secrets the attacker
123 | will try to guess. This is the private dataframe from which
124 | the synthetic one has been derived.
125 | syn : pd.DataFrame
126 | Dataframe with the synthetic records. It is assumed to be
127 | fully available to the attacker.
128 | control : pd.DataFrame (optional)
129 | Independent sample of original records **not** used to
130 | create the synthetic dataset. This is used to evaluate
131 | the excess privacy risk.
132 | aux_cols : list of str
133 | Features of the records that are given to the attacker as auxiliary
134 | information.
135 | secret : str
136 | Secret attribute of the targets that is unknown to the attacker.
137 | This is what the attacker will try to guess.
138 | regression : bool, optional
139 | Specifies whether the target of the inference attack is quantitative
140 | (regression = True) or categorical (regression = False). If None
141 | (default), the code will try to guess this by checking the type of
142 | the variable.
143 | n_attacks : int, default is 500
144 | Number of attack attempts.
145 |
146 | """
147 |
148 | def __init__(
149 | self,
150 | ori: pd.DataFrame,
151 | syn: pd.DataFrame,
152 | aux_cols: List[str],
153 | secret: str,
154 | regression: Optional[bool] = None,
155 | n_attacks: int = 500,
156 | control: Optional[pd.DataFrame] = None,
157 | ):
158 | self._ori = ori
159 | self._syn = syn
160 | self._control = control
161 | self._n_attacks = n_attacks
162 |
163 | # check if secret is a string column
164 | if not isinstance(secret, str):
165 | raise ValueError("secret must be a single column name")
166 |
167 | # check if secret is present in the original dataframe
168 | if secret not in ori.columns:
169 | raise ValueError(f"secret column '{secret}' not found in ori dataframe")
170 |
171 | self._secret = secret
172 | self._regression = regression
173 | self._aux_cols = aux_cols
174 | self._evaluated = False
175 |
176 | def _attack(self, target: pd.DataFrame, naive: bool, n_jobs: int) -> int:
177 | return _run_attack(
178 | target=target,
179 | syn=self._syn,
180 | n_attacks=self._n_attacks,
181 | aux_cols=self._aux_cols,
182 | secret=self._secret,
183 | n_jobs=n_jobs,
184 | naive=naive,
185 | regression=self._regression,
186 | )
187 |
188 | def evaluate(self, n_jobs: int = -2) -> "InferenceEvaluator":
189 | r"""Run the inference attack.
190 |
191 | Parameters
192 | ----------
193 | n_jobs : int, default is -2
194 | The number of jobs to run in parallel.
195 |
196 | Returns
197 | -------
198 | self
199 | The evaluated ``InferenceEvaluator`` object.
200 |
201 | """
202 | self._n_baseline = self._attack(target=self._ori, naive=True, n_jobs=n_jobs)
203 | self._n_success = self._attack(target=self._ori, naive=False, n_jobs=n_jobs)
204 | self._n_control = (
205 | None if self._control is None else self._attack(target=self._control, naive=False, n_jobs=n_jobs)
206 | )
207 |
208 | self._evaluated = True
209 | return self
210 |
211 | def results(self, confidence_level: float = 0.95) -> EvaluationResults:
212 | """Raw evaluation results.
213 |
214 | Parameters
215 | ----------
216 | confidence_level : float, default is 0.95
217 | Confidence level for the error bound calculation.
218 |
219 | Returns
220 | -------
221 | EvaluationResults
222 | Object containing the success rates for the various attacks.
223 |
224 | """
225 | if not self._evaluated:
226 | raise RuntimeError("The inference evaluator wasn't evaluated yet. Please, run `evaluate()` first.")
227 |
228 | return EvaluationResults(
229 | n_attacks=self._n_attacks,
230 | n_success=self._n_success,
231 | n_baseline=self._n_baseline,
232 | n_control=self._n_control,
233 | confidence_level=confidence_level,
234 | )
235 |
236 | def risk(self, confidence_level: float = 0.95, baseline: bool = False) -> PrivacyRisk:
237 | """Compute the inference risk from the success of the attacker.
238 |
239 | This measures how much an attack on training data outperforms
240 | an attack on control data. An inference risk of 0 means that
241 | the attack had no advantage on the training data (no inference
242 | risk), while a value of 1 means that the attack exploited the
243 | maximally possible advantage.
244 |
245 | Parameters
246 | ----------
247 | confidence_level : float, default is 0.95
248 | Confidence level for the error bound calculation.
249 | baseline : bool, default is False
250 | If True, return the baseline risk computed from a random guessing
251 | attack. If False (default) return the risk from the real attack.
252 |
253 | Returns
254 | -------
255 | PrivacyRisk
256 | Estimate of the inference risk and its confidence interval.
257 |
258 | """
259 | results = self.results(confidence_level=confidence_level)
260 | return results.risk(baseline=baseline)
261 |
--------------------------------------------------------------------------------
/src/anonymeter/evaluators/linkability_evaluator.py:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
4 | """Privacy evaluator that measures the linkability risk."""
5 | import logging
6 | from typing import Dict, List, Optional, Set, Tuple, cast
7 |
8 | import numpy as np
9 | import numpy.typing as npt
10 | import pandas as pd
11 |
12 | from anonymeter.neighbors.mixed_types_kneighbors import MixedTypeKNeighbors
13 | from anonymeter.stats.confidence import EvaluationResults, PrivacyRisk
14 |
15 | logger = logging.getLogger(__name__)
16 |
17 |
18 | class LinkabilityIndexes:
19 | """Utility class to store indexes from linkability attack.
20 |
21 | Parameters
22 | ----------
23 | idx_0 : np.ndarray
24 | Array containing the result of the nearest neighbor search
25 | between the first original dataset and the synthetic data.
26 | Rows correspond to original records and the i-th column
27 | contains the index of the i-th closest synthetic record.
28 | idx_1 : np.ndarray
29 | Array containing the result of the nearest neighbor search
30 | between the second original dataset and the synthetic data.
31 | Rows correspond to original records and the i-th column
32 | contains the index of the i-th closest synthetic record.
33 |
34 | """
35 |
36 | def __init__(self, idx_0: npt.NDArray, idx_1: npt.NDArray):
37 | self._idx_0 = idx_0
38 | self._idx_1 = idx_1
39 |
40 | def find_links(self, n_neighbors: int) -> Dict[int, Set[int]]:
41 | """Return synthetic records that link originals in the split datasets.
42 |
43 | Parameters
44 | ----------
45 | n_neighbors : int
46 | Number of neighbors considered for the link search.
47 |
48 | Returns
49 | -------
50 | Dict[int, Set[int]]
51 | Dictionary mapping the index of the linking synthetic record
52 | to the index of the linked original record.
53 |
54 | """
55 | if n_neighbors > self._idx_0.shape[0]:
56 | logger.warning(f"Neighbors too large ({n_neighbors}, using {self._idx_0.shape[0]}) instead.")
57 | n_neighbors = self._idx_0.shape[0]
58 |
59 | if n_neighbors < 1:
60 | raise ValueError(f"Invalid neighbors value ({n_neighbors}): must be positive.")
61 |
62 | links = {}
63 | for ii, (row0, row1) in enumerate(zip(self._idx_0, self._idx_1)):
64 | joined = set(row0[:n_neighbors]) & set(row1[:n_neighbors])
65 | if len(joined) > 0:
66 | links[ii] = joined
67 |
68 | return links
69 |
70 | def count_links(self, n_neighbors: int) -> int:
71 | """Count successfully linked records.
72 |
73 | Parameters
74 | ----------
75 | n_neighbors : int
76 | Number of neighbors considered for the link search.
77 |
78 | Returns
79 | -------
80 | int
81 | Number of target records for which the synthetic dataset
82 | has provided the attacker wth means to link them.
83 |
84 | """
85 | links = self.find_links(n_neighbors=n_neighbors)
86 | return _count_links(links)
87 |
88 |
89 | def _count_links(links: Dict[int, Set[int]]) -> int:
90 | """Count links."""
91 | linkable: Set[int] = set()
92 |
93 | for ori_idx in links:
94 | linkable = linkable | {ori_idx}
95 |
96 | return len(linkable)
97 |
98 |
99 | def _random_links(n_synthetic: int, n_attacks: int, n_neighbors: int) -> npt.NDArray:
100 | rng = np.random.default_rng()
101 |
102 | return np.array([rng.choice(n_synthetic, size=n_neighbors, replace=False) for _ in range(n_attacks)])
103 |
104 |
105 | def _random_linkability_attack(n_synthetic: int, n_attacks: int, n_neighbors: int) -> LinkabilityIndexes:
106 | idx_0 = _random_links(n_synthetic=n_synthetic, n_attacks=n_attacks, n_neighbors=n_neighbors)
107 | idx_1 = _random_links(n_synthetic=n_synthetic, n_attacks=n_attacks, n_neighbors=n_neighbors)
108 |
109 | return LinkabilityIndexes(idx_0=idx_0, idx_1=idx_1)
110 |
111 |
112 | def _find_nn(syn: pd.DataFrame, ori: pd.DataFrame, n_jobs: int, n_neighbors: int) -> npt.NDArray:
113 | nn = MixedTypeKNeighbors(n_jobs=n_jobs, n_neighbors=n_neighbors)
114 |
115 | if syn.ndim == 1:
116 | syn = syn.to_frame()
117 |
118 | if ori.ndim == 1:
119 | ori = ori.to_frame()
120 |
121 | nn.fit(syn)
122 |
123 | return cast(np.ndarray, nn.kneighbors(ori, return_distance=False))
124 |
125 |
126 | def _linkability_attack(
127 | ori: pd.DataFrame,
128 | syn: pd.DataFrame,
129 | n_attacks: int,
130 | aux_cols: Tuple[List[str], List[str]],
131 | n_neighbors: int,
132 | n_jobs: int,
133 | ) -> LinkabilityIndexes:
134 | targets = ori.sample(n_attacks, replace=False)
135 |
136 | idx_0 = _find_nn(syn=syn[aux_cols[0]], ori=targets[aux_cols[0]], n_neighbors=n_neighbors, n_jobs=n_jobs)
137 | idx_1 = _find_nn(syn=syn[aux_cols[1]], ori=targets[aux_cols[1]], n_neighbors=n_neighbors, n_jobs=n_jobs)
138 |
139 | return LinkabilityIndexes(idx_0=idx_0, idx_1=idx_1)
140 |
141 |
142 | class LinkabilityEvaluator:
143 | r"""Measure the linkability risk created by a synthetic dataset.
144 |
145 | The linkability risk is measured from the success of a linkability attack.
146 | The attack is modeled along the following scenario. The attacker posesses
147 | two datasets, both of which share some columns with the *original* dataset
148 | that was used to generate the synthetic data. Those columns will be
149 | referred to as *auxiliary columns*. The attacker's aim is then to use the
150 | information contained in the synthetic data to connect these two datasets,
151 | i.e. to find records that belong to the same individual.
152 |
153 | To model this attack, the original dataset is split vertically into two
154 | parts. Then we try to reconnect the two parts using the synthetic data
155 | by looking for the closest neighbors of the split original records in
156 | the synthetic data. If both splits of an original record have the same
157 | closest synthetic neighbor, they are linked together. The more original
158 | records get relinked in this manner the more successful the attack.
159 |
160 |
161 | Parameters
162 | ----------
163 | ori : pd.DataFrame
164 | Dataframe containing original data.
165 | syn : pd.DataFrame
166 | Dataframe containing synthetic data. It has to have
167 | the same columns as df_ori.
168 | aux_cols : tuple of two lists of strings or tuple of int, optional
169 | Features of the records that are given to the attacker as auxiliary
170 | information.
171 | n_attacks : int, default is 500.
172 | Number of records to attack. If None each record in the original
173 | dataset will be attacked.
174 | n_neighbors : int, default is 1
175 | The number of closest neighbors to include in the analysis. The
176 | default of 1 means that the linkability attack is considered
177 | successful only if the two original record split have the same
178 | synthetic record as closest neighbor.
179 | control : pd.DataFrame (optional)
180 | Independent sample of original records **not** used to create the
181 | synthetic dataset. This is used to evaluate the excess privacy risk.
182 | """
183 |
184 | def __init__(
185 | self,
186 | ori: pd.DataFrame,
187 | syn: pd.DataFrame,
188 | aux_cols: Tuple[List[str], List[str]],
189 | n_attacks: Optional[int] = 500,
190 | n_neighbors: int = 1,
191 | control: Optional[pd.DataFrame] = None,
192 | ):
193 | self._ori = ori
194 | self._syn = syn
195 | self._n_attacks = n_attacks if n_attacks is not None else ori.shape[0]
196 | self._aux_cols = aux_cols
197 | self._n_neighbors = n_neighbors
198 | self._control = control
199 | self._evaluated = False
200 |
201 | def evaluate(self, n_jobs: int = -2) -> "LinkabilityEvaluator":
202 | """Run the linkability attack.
203 |
204 | Parameters
205 | ----------
206 | n_jobs : int, default is -2
207 | The number of parallel jobs to run for neighbors search.
208 |
209 | Returns
210 | -------
211 | self
212 | The evaluated ``LinkabilityEvaluator`` object.
213 |
214 | """
215 | self._baseline_links = _random_linkability_attack(
216 | n_synthetic=self._syn.shape[0], n_attacks=self._n_attacks, n_neighbors=self._n_neighbors
217 | )
218 |
219 | self._attack_links = _linkability_attack(
220 | ori=self._ori,
221 | syn=self._syn,
222 | n_attacks=self._n_attacks,
223 | aux_cols=self._aux_cols,
224 | n_neighbors=self._n_neighbors,
225 | n_jobs=n_jobs,
226 | )
227 |
228 | self._control_links = (
229 | None
230 | if self._control is None
231 | else _linkability_attack(
232 | ori=self._control,
233 | syn=self._syn,
234 | n_attacks=self._n_attacks,
235 | aux_cols=self._aux_cols,
236 | n_neighbors=self._n_neighbors,
237 | n_jobs=n_jobs,
238 | )
239 | )
240 |
241 | self._evaluated = True
242 | return self
243 |
244 | def results(self, confidence_level: float = 0.95, n_neighbors: Optional[int] = None) -> EvaluationResults:
245 | """Raw evaluation results.
246 |
247 | Parameters
248 | ----------
249 | confidence_level : float, default is 0.95
250 | Confidence level for the error bound calculation.
251 | n_neighbors : int, default is None
252 | The number of closest neighbors to include in the analysis.
253 | If `None` (the default), the number used it the one
254 | given by the constructor. The value of this parameter must
255 | be smaller of equal to what has been used to initialize this
256 | evaluator.
257 | Returns
258 | -------
259 | EvaluationResults
260 | Object containing the success rates for the various attacks.
261 |
262 | """
263 | if not self._evaluated:
264 | raise RuntimeError("The linkability evaluator wasn't evaluated yet. Please, run `evaluate()` first.")
265 |
266 | if n_neighbors is None:
267 | n_neighbors = self._n_neighbors
268 |
269 | if n_neighbors > self._n_neighbors:
270 | raise ValueError(
271 | f"Cannot compute linkability results for `n_neighbors` "
272 | f"({n_neighbors}) larger than value used by constructor "
273 | f"({self._n_neighbors}. Using `n_neighbors == {self._n_neighbors}`"
274 | )
275 |
276 | n_control = None if self._control_links is None else self._control_links.count_links(n_neighbors=n_neighbors)
277 |
278 | return EvaluationResults(
279 | n_attacks=self._n_attacks,
280 | n_success=self._attack_links.count_links(n_neighbors=n_neighbors),
281 | n_baseline=self._baseline_links.count_links(n_neighbors=n_neighbors),
282 | n_control=n_control,
283 | confidence_level=confidence_level,
284 | )
285 |
286 | def risk(
287 | self, confidence_level: float = 0.95, baseline: bool = False, n_neighbors: Optional[int] = None
288 | ) -> PrivacyRisk:
289 | """Compute linkability risk.
290 |
291 | The linkability risk reflects how easy linkability attacks are.
292 | A linkability risk of 1 means that every single attacked record
293 | could be successfully linked together. A linkability risk of 0
294 | means that no links were found at all.
295 |
296 | Parameters
297 | ----------
298 | confidence_level : float, default is 0.95
299 | Confidence level for the error bound calculation.
300 | baseline : bool, default is False
301 | If True, return the baseline risk computed from a random guessing
302 | attack. If False (default) return the risk from the real attack.
303 | n_neighbors : int, default is None
304 | The number of closest neighbors to include in the analysis.
305 | If `None` (the default), the number used it the one
306 | given by the constructor. The value of this parameter must
307 | be smaller of equal to what has been used to initialize this
308 | evaluator.
309 |
310 | Returns
311 | -------
312 | PrivacyRisk
313 | Estimate of the linkability risk and its confidence interval.
314 |
315 | """
316 | results = self.results(confidence_level=confidence_level, n_neighbors=n_neighbors)
317 |
318 | return results.risk(baseline=baseline)
319 |
--------------------------------------------------------------------------------
/src/anonymeter/evaluators/singling_out_evaluator.py:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
4 | """Privacy evaluator that measures the singling out risk."""
5 | import logging
6 | from typing import Any, Callable, Dict, List, Optional, Set, Tuple
7 |
8 | import numpy as np
9 | import numpy.typing as npt
10 | import pandas as pd
11 | from pandas.api.types import is_bool_dtype, is_numeric_dtype
12 | from scipy.optimize import curve_fit
13 |
14 | from anonymeter.stats.confidence import EvaluationResults, PrivacyRisk
15 |
16 | rng = np.random.default_rng()
17 | logger = logging.getLogger(__name__)
18 |
19 |
20 | def _escape_quotes(string: str) -> str:
21 | return string.replace('"', '\\"').replace("'", "\\'")
22 |
23 |
24 | def _query_expression(col: str, val: Any, dtype: np.dtype) -> str:
25 | """Generate type-aware query expression."""
26 | query: str = ""
27 |
28 | if pd.api.types.is_datetime64_any_dtype(dtype):
29 | f"{col} == '{val}'"
30 | elif isinstance(val, str):
31 | query = f"{col} == '{_escape_quotes(val)}'"
32 | else:
33 | query = f"{col} == {val}"
34 |
35 | return query
36 |
37 |
38 | def _query_from_record(record: pd.Series, dtypes: pd.Series, columns: List[str], medians: Optional[pd.Series]) -> str:
39 | """Construct a query from the attributes in a record."""
40 | query = []
41 |
42 | for col in sorted(columns):
43 | if pd.isna(record[col]):
44 | item = ".isna()"
45 | elif is_bool_dtype(dtypes[col]):
46 | item = f"== {record[col]}"
47 | elif is_numeric_dtype(dtypes[col]):
48 | if medians is None:
49 | operator = rng.choice([">=", "<="])
50 | else:
51 | if record[col] > medians[col]:
52 | operator = ">="
53 | else:
54 | operator = "<="
55 | item = f"{operator} {record[col]}"
56 |
57 | elif isinstance(dtypes[col], pd.CategoricalDtype) and is_numeric_dtype(dtypes[col].categories.dtype):
58 | item = f"=={record[col]}"
59 | else:
60 | if isinstance(record[col], str):
61 | item = f"== '{_escape_quotes(record[col])}'"
62 | else:
63 | item = f'== "{record[col]}"'
64 |
65 | query.append(f"{col}{item}")
66 |
67 | return " & ".join(query)
68 |
69 |
70 | def _random_operator(data_type: str) -> str:
71 | if data_type == "categorical":
72 | ops = ["==", "!="]
73 | elif data_type == "boolean":
74 | ops = ["", "not "]
75 | elif data_type == "numerical":
76 | ops = ["==", "!=", ">", "<", ">=", "<="]
77 | else:
78 | raise ValueError(f"Unknown `data_type`: {data_type}")
79 |
80 | return rng.choice(ops)
81 |
82 |
83 | def _random_query(unique_values: Dict[str, List[Any]], cols: List[str]):
84 | """Generate a random query using given columns."""
85 | query = []
86 |
87 | for col in sorted(cols):
88 | values = unique_values[col]
89 | val = rng.choice(values)
90 |
91 | if pd.isna(val):
92 | expression = f"{_random_operator('boolean')}{col}.isna()"
93 | elif is_bool_dtype(values):
94 | expression = f"{_random_operator('boolean')}{col}"
95 | elif isinstance(values, pd.CategoricalDtype):
96 | expression = f"{col} {_random_operator('categorical')} {val}"
97 | elif is_numeric_dtype(values):
98 | expression = f"{col} {_random_operator('numerical')} {val}"
99 | elif isinstance(val, str):
100 | expression = f"{col} {_random_operator('categorical')} '{_escape_quotes(val)}'"
101 | else:
102 | expression = f"{col} {_random_operator('categorical')} '{val}'"
103 |
104 | query.append(expression)
105 |
106 | return " & ".join(query)
107 |
108 |
109 | def _random_queries(df: pd.DataFrame, n_queries: int, n_cols: int) -> List[str]:
110 | random_columns = [rng.choice(df.columns, size=n_cols, replace=False).tolist() for _ in range(n_queries)]
111 | unique_values = {col: df[col].unique() for col in df.columns}
112 |
113 | queries: List[str] = [_random_query(unique_values=unique_values, cols=cols) for cols in random_columns]
114 |
115 | return queries
116 |
117 |
118 | def safe_query_counts(query: str, df: pd.DataFrame) -> Optional[int]:
119 | """Return number of elements satisfying a given query."""
120 | try:
121 | return len(df.query(query, engine="python"))
122 | except Exception as ex:
123 | logger.debug(f"Query {query} failed with {ex}.")
124 | return None
125 |
126 |
127 | def singling_out_probability_integral(n: int, w_min: float, w_max: float) -> float:
128 | """Integral of the singling out probability within a given range.
129 |
130 | The probability that a query singles out in a population of size
131 | n is defined by the query "weight" (w), i.e. the chance that the
132 | query matches a random row sampled from the data generating distribution.
133 |
134 | This probability is given by: P(w, n) = n*w * (1 - w)**(n - 1).
135 | See Cohen and Nissim 2020 [1] for more details.
136 |
137 | References
138 | ----------
139 | [1] - https://arxiv.org/abs/1904.06009
140 |
141 | Parameters
142 | ----------
143 | n : int
144 | Size of the population
145 | w_min : float
146 | Lower extreme of integration. Must be between 0 and 1.
147 | w_max : float
148 | Higher extreme of integration. Must be between w_min and 1.
149 |
150 | Returns
151 | -------
152 | float
153 | The integral of the singling out probability in the given range.
154 |
155 | """
156 | if w_min < 0 or w_min > 1:
157 | raise ValueError(f"Parameter `w_min` must be between 0 and 1. Got {w_min} instead.")
158 |
159 | if w_max < w_min or w_max > 1:
160 | raise ValueError(
161 | f"Parameter `w_max` must be greater than w_min ({w_min}) and smaller than 1. Got {w_max} instead."
162 | )
163 |
164 | return ((n * w_min + 1) * (1 - w_min) ** n - (n * w_max + 1) * (1 - w_max) ** n) / (n + 1)
165 |
166 |
167 | def _measure_queries_success(
168 | df: pd.DataFrame, queries: List[str], n_repeat: int, n_meas: int
169 | ) -> Tuple[npt.NDArray, npt.NDArray]:
170 | sizes, successes = [], []
171 | min_rows = min(1000, len(df))
172 |
173 | for n_rows in np.linspace(min_rows, len(df), n_meas).astype(int):
174 | for _ in range(n_repeat):
175 | successes.append(len(_evaluate_queries(df=df.sample(n_rows, replace=False), queries=queries)))
176 | sizes.append(n_rows)
177 |
178 | return np.array(sizes), np.array(successes)
179 |
180 |
181 | def _model(x, w_eff, norm):
182 | return norm * singling_out_probability_integral(n=x, w_min=0, w_max=w_eff)
183 |
184 |
185 | def _fit_model(sizes: npt.NDArray, successes: npt.NDArray) -> Callable:
186 | # initial guesses
187 | w_eff_guess = 1 / np.max(sizes)
188 | norm_guess = 1 / singling_out_probability_integral(n=np.max(sizes), w_min=0, w_max=w_eff_guess)
189 |
190 | popt, _ = curve_fit(_model, xdata=sizes, ydata=successes, bounds=(0, (1, np.inf)), p0=(w_eff_guess, norm_guess))
191 |
192 | return lambda x: _model(x, *popt)
193 |
194 |
195 | def fit_correction_term(df: pd.DataFrame, queries: List[str]) -> Callable:
196 | """Fit correction for different size of the control dataset.
197 |
198 | Parameters
199 | ----------
200 | df : pd.DataFrame
201 | Dataframe on which the queries needs to be evaluated.
202 | queries : list of strings
203 | Singling out queries to evaluate on the data.
204 |
205 | Returns
206 | -------
207 | callable
208 | Model of how the number of queries that singles out
209 | depends on the size of the dataset.
210 |
211 | """
212 | sizes, successes = _measure_queries_success(df=df, queries=queries, n_repeat=5, n_meas=10)
213 | return _fit_model(sizes=sizes, successes=successes)
214 |
215 |
216 | class UniqueSinglingOutQueries:
217 | """Collection of unique queries that single out in a DataFrame."""
218 |
219 | def __init__(self):
220 | self._set: Set[str] = set()
221 | self._list: List[str] = []
222 |
223 | def check_and_append(self, query: str, df: pd.DataFrame):
224 | """Add a singling out query to the collection.
225 |
226 | A query singles out if the following conditions are met:
227 | 1. single out one record in the dataset.
228 | 2. have either a very low or a very high weight. In
229 | Both these cases singling out by chance is unlikely.
230 | Moreover, only queries that are not already in this collection
231 | can be added.
232 |
233 | Parameters
234 | ----------
235 | query : str
236 | query expression to be added.
237 | df : pd.DataFrame
238 | Dataframe on which the queries need to single out.
239 |
240 | """
241 |
242 | if query not in self._set:
243 | counts = safe_query_counts(query=query, df=df)
244 |
245 | if counts is not None and counts == 1:
246 | self._set.add(query)
247 | self._list.append(query)
248 |
249 | def __len__(self):
250 | """Length of the singling out queries in stored."""
251 | return len(self._list)
252 |
253 | @property
254 | def queries(self) -> List[str]:
255 | """Queries that are present in the collection."""
256 | return self._list
257 |
258 |
259 | def univariate_singling_out_queries(df: pd.DataFrame, n_queries: int) -> List[str]:
260 | """Generate singling out queries from rare attributes.
261 |
262 | Parameters
263 | ----------
264 | df: pd.DataFrame
265 | Input dataframe from which queries will be generated.
266 | n_queries: int
267 | Number of queries to generate.
268 |
269 | Returns
270 | -------
271 | List[str]
272 | The singling out queries.
273 |
274 | """
275 | queries = []
276 |
277 | for col in sorted(df.columns):
278 | if df[col].isna().sum() == 1:
279 | queries.append(f"{col}.isna()")
280 |
281 | if pd.api.types.is_numeric_dtype(df.dtypes[col]):
282 | values = df[col].dropna().sort_values()
283 |
284 | if len(values) > 0:
285 | queries.extend([f"{col} <= {values.iloc[0]}", f"{col} >= {values.iloc[-1]}"])
286 |
287 | counts = df[col].value_counts()
288 | rare_values = counts[counts == 1]
289 |
290 | if len(rare_values) > 0:
291 | queries.extend([_query_expression(col=col, val=val, dtype=df.dtypes[col]) for val in rare_values.index])
292 |
293 | rng.shuffle(queries)
294 |
295 | so_queries = UniqueSinglingOutQueries()
296 |
297 | for query in queries:
298 | so_queries.check_and_append(query, df=df)
299 |
300 | if len(so_queries) == n_queries:
301 | break
302 |
303 | return so_queries.queries
304 |
305 |
306 | def multivariate_singling_out_queries(
307 | df: pd.DataFrame, n_queries: int, n_cols: int, max_attempts: Optional[int]
308 | ) -> List[str]:
309 | """Generates singling out queries from a combination of attributes.
310 |
311 | Parameters
312 | ----------
313 | df: pd.DataFrame
314 | Input dataframe from which queries will be generated.
315 | n_queries: int
316 | Number of queries to generate.
317 | n_cols: float
318 | Number of columns that the attacker uses to create the
319 | singling out queries.
320 | max_attemps: int, optional.
321 | Maximum number of attempts that the attacker can make to generate
322 | the requested ``n_attacks`` singling out queries. This is useful to
323 | avoid excessively long running calculations. There can be combinations
324 | of hyperparameters (`n_cols`) and datasets that make the task of
325 | generating enough singling out queries is too hard. This parameter
326 | caps the total number of query generation attempts, both those that
327 | are successfull as those that are not. If ``max_attempts`` is None,
328 | no limit will be imposed.
329 |
330 |
331 | Returns
332 | -------
333 | List[str]
334 | The singling out queries.
335 |
336 | """
337 | so_queries = UniqueSinglingOutQueries()
338 | medians = df.median(numeric_only=True)
339 |
340 | n_attempts = 0
341 |
342 | while len(so_queries) < n_queries:
343 | if max_attempts is not None and n_attempts >= max_attempts:
344 | logger.warning(
345 | f"Reached maximum number of attempts {max_attempts} when generating singling out queries. "
346 | f"Returning {len(so_queries.queries)} instead of the requested {n_queries}."
347 | "To avoid this, increase the number of attempts or set it to ``None`` to disable "
348 | "The limitation entirely."
349 | )
350 | return so_queries.queries
351 |
352 | record = df.iloc[rng.integers(df.shape[0])]
353 | columns = rng.choice(df.columns, size=n_cols, replace=False).tolist()
354 |
355 | query = _query_from_record(record=record, dtypes=df.dtypes, columns=columns, medians=medians)
356 |
357 | so_queries.check_and_append(query=query, df=df)
358 |
359 | n_attempts += 1
360 |
361 | return so_queries.queries
362 |
363 |
364 | def _evaluate_queries(df: pd.DataFrame, queries: List[str]) -> List[str]:
365 | counts = np.array([safe_query_counts(query=q, df=df) for q in queries], dtype=float)
366 |
367 | if np.any(np.isnan(counts)) > 0:
368 | logger.warning(
369 | f"Found {np.sum(np.isnan(counts))} failed queries "
370 | f"out of {len(queries)}. Check DEBUG messages for more details."
371 | )
372 |
373 | success = counts == 1
374 | return [q for iq, q in enumerate(queries) if success[iq]]
375 |
376 |
377 | def _generate_singling_out_queries(
378 | df: pd.DataFrame, mode: str, n_attacks: int, n_cols: int, max_attempts: Optional[int]
379 | ) -> List[str]:
380 | if mode == "univariate":
381 | queries = univariate_singling_out_queries(df=df, n_queries=n_attacks)
382 |
383 | elif mode == "multivariate":
384 | queries = multivariate_singling_out_queries(
385 | df=df,
386 | n_queries=n_attacks,
387 | n_cols=n_cols,
388 | max_attempts=max_attempts,
389 | )
390 |
391 | else:
392 | raise RuntimeError(f"Parameter `mode` can be either `univariate` or `multivariate`. Got {mode} instead.")
393 |
394 | if len(queries) < n_attacks:
395 | logger.warning(
396 | f"Attack `{mode}` could generate only {len(queries)} "
397 | f"singling out queries out of the requested {n_attacks}. "
398 | "This can probably lead to an underestimate of the "
399 | "singling out risk."
400 | )
401 | return queries
402 |
403 |
404 | class SinglingOutEvaluator:
405 | """Privacy evaluator that measures the singling out risk.
406 |
407 | Singling out happens when the attacker can determine that
408 | there is a single individual in the dataset that has certain
409 | attributes (for example "zip_code == XXX and first_name == YYY")
410 | with high enough confidence. According to the Article 29 WGP [2],
411 | singling out is one of the three risks (together with
412 | linkability and inference) that a successful anonymization technique
413 | must protect from.
414 |
415 | See [1] for the definition of some of the concepts used here.
416 |
417 | - [1]: https://arxiv.org/abs/1904.06009
418 | - [2]: https://ec.europa.eu/justice/article-29/documentation/\
419 | opinion-recommendation/files/2014/wp216_en.pdf
420 |
421 | Parameters
422 | ----------
423 | ori : pd.DataFrame
424 | Original dataframe on which the success of the singling out attacker
425 | attacker will be evaluated.
426 | syn : pd.DataFrame
427 | Synthetic dataframe used to generate the singling out queries.
428 | n_attacks : int, default is 500
429 | Number of singling out attacks to attempt.
430 | n_cols : int, default is 3
431 | Number of columns that the attacker uses to create the singling
432 | out queries.
433 | control : pd.DataFrame (optional)
434 | Independent sample of original records **not** used to create the
435 | synthetic dataset. This is used to evaluate the excess privacy risk.
436 | max_attempts : int or None, default is 10.000.000
437 | Maximum number of attempts that the attacker can make to generate
438 | the requested ``n_attacks`` singling out queries. This is useful to
439 | avoid excessively long running calculations. There can be combinations
440 | of hyperparameters (`n_cols`) and datasets that make the task of
441 | generating enough singling out queries is too hard. This parameter
442 | caps the total number of query generation attempts, both those that
443 | are successfull as those that are not. If ``max_attempts`` is None,
444 | no limit will be imposed.
445 |
446 | """
447 |
448 | def __init__(
449 | self,
450 | ori: pd.DataFrame,
451 | syn: pd.DataFrame,
452 | n_attacks: int = 500,
453 | n_cols: int = 3,
454 | control: Optional[pd.DataFrame] = None,
455 | max_attempts: Optional[int] = 10000000,
456 | ):
457 | self._ori = ori.drop_duplicates()
458 | self._syn = syn.drop_duplicates()
459 | self._n_attacks = n_attacks
460 | self._n_cols = n_cols
461 | self._control = None if control is None else control.drop_duplicates()
462 | self._max_attempts = max_attempts
463 | self._queries: List[str] = []
464 | self._random_queries: List[str] = []
465 | self._evaluated = False
466 |
467 | def queries(self, baseline: bool = False) -> List[str]:
468 | """Successful singling out queries.
469 |
470 | Parameters
471 | ----------
472 | baseline: bool, default is False.
473 | If True, return the queries used by the baseline attack (i.e.
474 | created at random). If False (default) return the queries used
475 | by the "real" attack.
476 |
477 | Returns
478 | -------
479 | List[str]:
480 | successful singling out queries.
481 |
482 | """
483 | return self._random_queries if baseline else self._queries
484 |
485 | def evaluate(self, mode: str = "multivariate") -> "SinglingOutEvaluator":
486 | """Run the attack and evaluate the guesses on the original dataset.
487 |
488 | Parameters
489 | ----------
490 | mode : str, default is "multivariate"
491 | Name of the algorithm used to generate the singling out queries.
492 | Could be either `multivariate` or `univariate`.
493 |
494 | Returns
495 | -------
496 | self
497 | The evaluated singling out evaluator.
498 |
499 | """
500 | if mode == "multivariate":
501 | n_cols = self._n_cols
502 | elif mode == "univariate":
503 | n_cols = 1
504 | else:
505 | raise ValueError(f"mode must be either 'multivariate' or 'univariate', got {mode} instead.")
506 |
507 | baseline_queries = _random_queries(df=self._syn, n_queries=self._n_attacks, n_cols=n_cols)
508 | self._baseline_queries = _evaluate_queries(df=self._ori, queries=baseline_queries)
509 | self._n_baseline = len(self._baseline_queries)
510 |
511 | queries = _generate_singling_out_queries(
512 | df=self._syn,
513 | n_attacks=self._n_attacks,
514 | n_cols=self._n_cols,
515 | mode=mode,
516 | max_attempts=self._max_attempts,
517 | )
518 | self._queries = _evaluate_queries(df=self._ori, queries=queries)
519 | self._n_success = len(self._queries)
520 |
521 | if self._control is None:
522 | self._n_control = None
523 | else:
524 | self._n_control = len(_evaluate_queries(df=self._control, queries=queries))
525 |
526 | # correct the number of success against the control set
527 | # to account for different dataset sizes.
528 | if len(self._control) != len(self._ori):
529 | # fit the model to the data:
530 | fitted_model = fit_correction_term(df=self._control, queries=queries)
531 |
532 | correction = fitted_model(len(self._ori)) / fitted_model(len(self._control))
533 | self._n_control *= correction
534 |
535 | self._evaluated = True
536 | return self
537 |
538 | def results(self, confidence_level: float = 0.95) -> EvaluationResults:
539 | """Raw evaluation results.
540 |
541 | Parameters
542 | ----------
543 | confidence_level : float, default is 0.95
544 | Confidence level for the error bound calculation.
545 |
546 | Returns
547 | -------
548 | EvaluationResults
549 | Object containing the success rates for the various attacks.
550 |
551 | """
552 | if not self._evaluated:
553 | raise RuntimeError("The singling out evaluator wasn't evaluated yet. Please, run `evaluate()` first.")
554 |
555 | return EvaluationResults(
556 | n_attacks=self._n_attacks,
557 | n_success=self._n_success,
558 | n_baseline=self._n_baseline,
559 | n_control=self._n_control,
560 | confidence_level=confidence_level,
561 | )
562 |
563 | def risk(self, confidence_level: float = 0.95, baseline: bool = False) -> PrivacyRisk:
564 | """Estimate the singling out risk.
565 |
566 | The risk is estimated comparing the number of successfull singling out
567 | queries to the desired number of attacks (``n_attacks``).
568 |
569 | Parameters
570 | ----------
571 | confidence_level : float
572 | Confidence level for the reported error on the singling out risk.
573 | baseline : bool, default is False
574 | If True, return the baseline risk computed from a random guessing
575 | attack. If False (default) return the risk from the real attack.
576 |
577 | Returns
578 | -------
579 | PrivacyRisk
580 | Estimate of the singling out risk and its confidence interval.
581 |
582 | """
583 | results = self.results(confidence_level=confidence_level)
584 | return results.risk(baseline=baseline)
585 |
--------------------------------------------------------------------------------
/src/anonymeter/neighbors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/statice/anonymeter/0188bdf5615601e2f31503ae91a2b310af8d917c/src/anonymeter/neighbors/__init__.py
--------------------------------------------------------------------------------
/src/anonymeter/neighbors/mixed_types_kneighbors.py:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
4 | """Nearest neighbor search for mixed type data."""
5 | import logging
6 | from math import fabs, isnan
7 | from typing import Dict, List, Optional, Tuple, Union
8 |
9 | import numpy as np
10 | import numpy.typing as npt
11 | import pandas as pd
12 | from joblib import Parallel, delayed
13 | from numba import jit
14 |
15 | from anonymeter.preprocessing.transformations import mixed_types_transform
16 | from anonymeter.preprocessing.type_detection import detect_consistent_col_types
17 |
18 | logger = logging.getLogger(__name__)
19 |
20 |
21 | @jit(nopython=True, nogil=True)
22 | def gower_distance(r0: npt.NDArray, r1: npt.NDArray, cat_cols_index: int) -> float:
23 | r"""Distance between two records inspired by the Gower distance [1].
24 |
25 | To handle mixed type data, the distance is specialized for numerical (continuous)
26 | and categorical data. For numerical records, we use the L1 norm,
27 | computed after the columns have been normalized so that :math:`d(a_i, b_i)\leq 1`
28 | for every :math:`a_i`, :math:`b_i`. For categorical, :math:`d(a_i, b_i)` is 1,
29 | if the entries :math:`a_i`, :math:`b_i` differ, else, it is 0.
30 |
31 | Notes
32 | -----
33 | To keep the balance between numerical and categorical values, the input records
34 | have to be properly normalized. Their numerical part need to be scaled so that
35 | the difference between any two values of a column (from both dataset) is *at most* 1.
36 |
37 | References
38 | ----------
39 | [1]. `Gower (1971) "A general coefficient of similarity and some of its properties.
40 | `_
41 |
42 | Parameters
43 | ----------
44 | r0 : npt.NDArray
45 | Input array of shape (D,).
46 | r1 : npt.NDArray
47 | Input array of shape (D,).
48 | cat_cols_index : int
49 | Index delimiting the categorical columns in r0/r1 if present. For example,
50 | ``r0[:cat_cols_index]`` are the numerical columns, and ``r0[cat_cols_index:]`` are
51 | the categorical ones. For a fully numerical dataset, use ``cat_cols_index =
52 | len(r0)``. For a fully categorical one, set ``cat_cols_index`` to 0.
53 |
54 | Returns
55 | -------
56 | float
57 | distance between the records.
58 |
59 | """
60 | dist = 0.0
61 |
62 | for i in range(len(r0)):
63 | if isnan(r0[i]) and isnan(r1[i]):
64 | dist += 1
65 |
66 | else:
67 | if i < cat_cols_index:
68 | dist += fabs(r0[i] - r1[i])
69 |
70 | else:
71 | if r0[i] != r1[i]:
72 | dist += 1
73 | return dist
74 |
75 |
76 | @jit(nopython=True, nogil=True)
77 | def _nearest_neighbors(
78 | queries: npt.NDArray, candidates: npt.NDArray, cat_cols_index: int, n_neighbors: int
79 | ) -> Tuple[npt.NDArray[np.int64], npt.NDArray[np.float64]]:
80 | r"""For every element of ``queries``, find its nearest neighbors in ``candidates``.
81 |
82 | Parameters
83 | ----------
84 | queries : npt.NDArray
85 | Input array of shape (Nx, D).
86 | candidates : npt.NDArray
87 | Input array of shape (Ny, D).
88 | cat_cols_index : int
89 | Index delimiting the categorical columns in X/Y, if present.
90 | n_neighbors : int
91 | Determines the number of closest neighbors per entry to be returned.
92 |
93 | Returns
94 | -------
95 | idx : npt.NDArray[int64]
96 | Array of shape (Nx, n_neighbors). For each element in ``queries``,
97 | this array contains the indices of the closest neighbors in
98 | ``candidates``. That is, ``candidates[idx[i]]`` are the elements of
99 | ``candidates`` that are closer to ``queries[i]``.
100 | lps : npt.NDArray[float64]
101 | Array of shape (Nx, n_neighbors). This array containing the distances
102 | between the record pairs identified by idx.
103 |
104 | """
105 | idx = np.zeros((queries.shape[0], n_neighbors), dtype=np.int64)
106 | dists = np.zeros((queries.shape[0], n_neighbors), dtype=np.float64)
107 |
108 | for ix in range(queries.shape[0]):
109 | dist_ix = np.zeros((candidates.shape[0]), dtype=np.float64)
110 |
111 | for iy in range(candidates.shape[0]):
112 | dist_ix[iy] = gower_distance(r0=queries[ix], r1=candidates[iy], cat_cols_index=cat_cols_index)
113 |
114 | close_match_idx = dist_ix.argsort()[:n_neighbors]
115 | idx[ix] = close_match_idx
116 | dists[ix] = dist_ix[close_match_idx]
117 |
118 | return idx, dists
119 |
120 |
121 | class MixedTypeKNeighbors:
122 | """Nearest neighbor algorithm for mixed type data.
123 |
124 | To handle mixed type data, we use a distance function inspired by the Gower similarity.
125 | The distance is specialized for numerical (continuous) and categorical data. For
126 | numerical records, we use the L1 norm, computed after the columns have been
127 | normalized so that :math:`d(a_i, b_i) <= 1` for every :math:`a_i`, :math:`b_i`.
128 | For categorical, :math:`d(a_i, b_i)` is 1, if the entries :math:`a_i`, :math:`b_i`
129 | differ, else, it is 0.
130 |
131 | References
132 | ----------
133 | [1]. `Gower (1971) "A general coefficient of similarity and some of its properties.
134 | `_
135 |
136 | Parameters
137 | ----------
138 | n_neighbors : int, default is 5
139 | Determines the number of closest neighbors per entry to be returned.
140 | n_jobs : int, default is -2
141 | Number of jobs to use. It follows joblib convention, so that ``n_jobs = -1``
142 | means all available cores.
143 |
144 | """
145 |
146 | def __init__(self, n_neighbors: int = 5, n_jobs: int = -2):
147 | self._n_neighbors = n_neighbors
148 | self._n_jobs = n_jobs
149 |
150 | def fit(self, candidates: pd.DataFrame, ctypes: Optional[Dict[str, List[str]]] = None):
151 | """Prepare for nearest neighbor search.
152 |
153 | Parameters
154 | ----------
155 | candidates : pd.DataFrame
156 | Dataset containing the records one would find the neighbors in.
157 | ctypes : dict, optional.
158 | Dictionary specifying which columns in X should be treated as
159 | continuous and which should be treated as categorical. For example,
160 | ``ctypes = {'num': ['distance'], 'cat': ['color']}`` specify the types
161 | of a two column dataset.
162 |
163 | """
164 | self._candidates = candidates
165 | self._ctypes = ctypes
166 | return self
167 |
168 | def kneighbors(
169 | self, queries: pd.DataFrame, n_neighbors: Optional[int] = None, return_distance: bool = False
170 | ) -> Union[Tuple[npt.NDArray, npt.NDArray], npt.NDArray]:
171 | """Find the nearest neighbors for a set of query points.
172 |
173 | Note
174 | ----
175 | The search is performed in a brute-force fashion. For large datasets
176 | or large number of query points, the search for nearest neighbor will
177 | become very slow.
178 |
179 | Parameters
180 | ----------
181 | queries : pd.DataFrame
182 | Query points for the nearest neighbor searches.
183 | n_neighbors : int, default is None
184 | Number of neighbors required for each sample.
185 | The default is the value passed to the constructor.
186 | return_distance : bool, default is False
187 | Whether or not to return the distances of the neigbors or
188 | just the indexes.
189 |
190 | Returns
191 | -------
192 | np.narray of shape (df.shape[0], n_neighbors)
193 | Array with the indexes of the elements of the fit dataset closer to
194 | each element in the query dataset.
195 | np.narray of shape (df.shape[0], n_neighbors)
196 | Array with the distances of the neighbors pairs. This is optional and
197 | it is returned only if ``return_distances`` is ``True``
198 |
199 | """
200 | if n_neighbors is None:
201 | n_neighbors = self._n_neighbors
202 |
203 | if n_neighbors > self._candidates.shape[0]:
204 | logger.warning(
205 | f"Parameter ``n_neighbors``={n_neighbors} cannot be "
206 | f"larger than the size of the training data {self._candidates.shape[0]}."
207 | )
208 | n_neighbors = self._candidates.shape[0]
209 |
210 | if self._ctypes is None:
211 | self._ctypes = detect_consistent_col_types(df1=self._candidates, df2=queries)
212 | candidates, queries = mixed_types_transform(
213 | df1=self._candidates, df2=queries, num_cols=self._ctypes["num"], cat_cols=self._ctypes["cat"]
214 | )
215 |
216 | cols = self._ctypes["num"] + self._ctypes["cat"]
217 | queries = queries[cols].values
218 | candidates = candidates[cols].values
219 |
220 | with Parallel(n_jobs=self._n_jobs, backend="threading") as executor:
221 | res = executor(
222 | delayed(_nearest_neighbors)(
223 | queries=queries[ii : ii + 1],
224 | candidates=candidates,
225 | cat_cols_index=len(self._ctypes["num"]),
226 | n_neighbors=n_neighbors,
227 | )
228 | for ii in range(queries.shape[0])
229 | )
230 |
231 | indexes_array, distances_array = zip(*res)
232 | indexes, distances = np.vstack(indexes_array), np.vstack(distances_array)
233 |
234 | if return_distance:
235 | return distances, indexes
236 |
237 | return indexes
238 |
--------------------------------------------------------------------------------
/src/anonymeter/preprocessing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/statice/anonymeter/0188bdf5615601e2f31503ae91a2b310af8d917c/src/anonymeter/preprocessing/__init__.py
--------------------------------------------------------------------------------
/src/anonymeter/preprocessing/transformations.py:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
4 | """Data pre-processing and transformations for the privacy evaluators."""
5 | import logging
6 | from typing import List, Tuple
7 |
8 | import pandas as pd
9 | from sklearn.preprocessing import LabelEncoder
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | def _encode_categorical(
15 | df1: pd.DataFrame,
16 | df2: pd.DataFrame,
17 | ) -> Tuple[pd.DataFrame, pd.DataFrame]:
18 | """Encode dataframes with categorical values keeping label consistend."""
19 | encoded = pd.concat((df1, df2), keys=["df1", "df2"])
20 |
21 | for col in encoded.columns:
22 | encoded[col] = LabelEncoder().fit_transform(encoded[col])
23 |
24 | return encoded.loc["df1"], encoded.loc["df2"]
25 |
26 |
27 | def _scale_numerical(df1: pd.DataFrame, df2: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
28 | """Scale dataframes with *only* numerical values."""
29 | df1_min, df1_max = df1.min(), df1.max()
30 | df2_min, df2_max = df2.min(), df2.max()
31 |
32 | mins = df1_min.where(df1_min < df2_min, df2_min)
33 | maxs = df1_max.where(df1_max > df2_max, df2_max)
34 | ranges = maxs - mins
35 |
36 | if any(ranges == 0):
37 | cnames = ", ".join(ranges[ranges == 0].index.values)
38 | logger.debug(
39 | f"Numerical column(s) {cnames} have a null-range: all elements "
40 | "have the same value. These column(s) won't be scaled."
41 | )
42 | ranges[ranges == 0] = 1
43 |
44 | df1_scaled = df1.apply(lambda x: x / ranges[x.name])
45 | df2_scaled = df2.apply(lambda x: x / ranges[x.name])
46 | if isinstance(df1_scaled, pd.Series) or isinstance(df2_scaled, pd.Series):
47 | raise RuntimeError("Unexpected error: scaling resulted in a Series.")
48 |
49 | return df1_scaled, df2_scaled
50 |
51 |
52 | def mixed_types_transform(
53 | df1: pd.DataFrame, df2: pd.DataFrame, num_cols: List[str], cat_cols: List[str]
54 | ) -> Tuple[pd.DataFrame, pd.DataFrame]:
55 | """Combination of an encoder and a scaler to treat mixed type data.
56 |
57 | Numerical columns are scaled by dividing them by their range across both
58 | datasets, so that the difference between any two values within a column will
59 | be smaller than or equal to one:
60 | x -> x' = x / max{max(x), max(x_other)} - min{min(x), min(x_other)}
61 |
62 | Categorical columns are label encoded. This encoding is based on the
63 | `statice.preprocessing.encoders.DataframeEncoder` fitted on the firts
64 | dataframe, and applied to both of them.
65 |
66 | Parameters
67 | ----------
68 | df1: pd.DataFrame.
69 | Input DataFrame. This dataframe will be used to fit the DataframeLabelEncoder.
70 | df2: pd.DataFrame.
71 | Second input DataFrame.
72 | num_cols: list[str].
73 | Names of the numerical columns to be processed.
74 | cat_cols: list[str].
75 | Names of the columns to be processed.
76 |
77 | Returns
78 | -------
79 | trans_df1: pd.DataFrame.
80 | Transformed df1.
81 | trans_df2: pd.DataFrame.
82 | Transformed df2.
83 |
84 | """
85 | if not set(df1.columns) == set(df2.columns):
86 | raise ValueError(f"Input dataframes have different columns. df1: {df1.columns}, df2: {df2.columns}.")
87 |
88 | if not set(num_cols + cat_cols) == set(df1.columns):
89 | raise ValueError(
90 | f"Dataframes columns {df1.columns} do not match "
91 | "with `num_cols` and `cat_cols`.\n"
92 | f"num_cols: {num_cols}\n"
93 | f"cat_cols: {cat_cols}"
94 | )
95 |
96 | df1_num, df2_num = pd.DataFrame(), pd.DataFrame()
97 | if len(num_cols) > 0:
98 | df1_num, df2_num = _scale_numerical(df1[num_cols], df2[num_cols])
99 |
100 | df1_cat, df2_cat = pd.DataFrame(), pd.DataFrame()
101 | if len(cat_cols) > 0:
102 | df1_cat, df2_cat = _encode_categorical(df1[cat_cols], df2[cat_cols])
103 |
104 | df1_out = pd.concat([df1_num, df1_cat], axis=1)[df1.columns]
105 |
106 | df2_out = pd.concat([df2_num, df2_cat], axis=1)[df2.columns]
107 | return df1_out, df2_out
108 |
--------------------------------------------------------------------------------
/src/anonymeter/preprocessing/type_detection.py:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
4 | from typing import Dict, List
5 |
6 | import pandas as pd
7 |
8 |
9 | def detect_col_types(df: pd.DataFrame) -> Dict[str, List[str]]:
10 | """Identify numerical and non-numerical columns in the dataframe.
11 |
12 | Parameters
13 | ----------
14 | df : pandas.DataFrame
15 |
16 | Returns
17 | -------
18 | Dict[str: List[str]]
19 | Dictionary with column names separated by types. Key of the dictionary are
20 | 'num' or 'cat' (numerical and non-numerical, that is categorical, resp.).
21 | Values are lists of column names.
22 |
23 | """
24 | num_cols: List[str] = list(df.select_dtypes("number").columns.values)
25 | cat_cols: List[str] = [cn for cn in df.columns.values if cn not in num_cols]
26 |
27 | return {"num": sorted(num_cols), "cat": sorted(cat_cols)}
28 |
29 |
30 | def detect_consistent_col_types(df1: pd.DataFrame, df2: pd.DataFrame):
31 | """Detect colum types for a pair dataframe an check that they are the same.
32 |
33 | Parameters
34 | ----------
35 | df1 : pandas.DataFrame
36 | Input dataframe
37 | df2 : pandas.DataFrame
38 | Input dataframe
39 |
40 | Returns
41 | -------
42 | Dict[str: List[str]]
43 | Dictionary with column names separated by types. Key of the dictionary are
44 | 'num' or 'cat' (numerical and non-numerical, that is categorical, resp.).
45 | Values are lists of column names.
46 |
47 | """
48 | ctypes1 = detect_col_types(df1)
49 |
50 | if ctypes1 != detect_col_types(df2):
51 | raise RuntimeError("Input dataframes have different column names/types.")
52 |
53 | return ctypes1
54 |
--------------------------------------------------------------------------------
/src/anonymeter/stats/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/statice/anonymeter/0188bdf5615601e2f31503ae91a2b310af8d917c/src/anonymeter/stats/__init__.py
--------------------------------------------------------------------------------
/src/anonymeter/stats/confidence.py:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
4 | """Functions for estimating rates and errors in privacy attacks."""
5 |
6 | import warnings
7 | from math import sqrt
8 | from typing import NamedTuple, Optional, Tuple
9 |
10 | from scipy.stats import norm
11 |
12 |
13 | class PrivacyRisk(NamedTuple):
14 | """Measure of a privacy risk.
15 |
16 | Parameters
17 | ----------
18 | value : float
19 | Best estimate of the privacy risk.
20 | ci : (float, float)
21 | Confidence interval on the best estimate.
22 |
23 | """
24 |
25 | value: float
26 | ci: Tuple[float, float]
27 |
28 |
29 | class SuccessRate(NamedTuple):
30 | """Estimate of the success rate of a privacy attack.
31 |
32 | Parameters
33 | ----------
34 | value : float
35 | Best estimate of the success rate of the attacker.
36 | error : float
37 | Error on the best estimate.
38 |
39 | """
40 |
41 | value: float
42 | error: float
43 |
44 | def to_risk(self) -> PrivacyRisk:
45 | """Convert attacker success rate to `PrivacyRisk`."""
46 | return bind_value(point_estimate=self.value, error_bound=self.error)
47 |
48 |
49 | def probit(confidence_level: float) -> float:
50 | """Compute the probit for the given confidence level."""
51 | result = norm.ppf(0.5 * (1.0 + confidence_level))
52 | if not isinstance(result, float):
53 | raise RuntimeError("Unexpected error: probit resulted in a non-float value.")
54 | return result
55 |
56 |
57 | def success_rate(n_total: int, n_success: int, confidence_level: float) -> SuccessRate:
58 | """Estimate success rate in a Bernoulli-distributed sample.
59 |
60 | Attack scores follow a Bernoulli distribution (success/failure with rates p/1-p).
61 | The Wilson score interval is a frequentist-type estimator for success rate and
62 | confidence which is robust in problematic cases (e.g., when p goes extreme or
63 | sample size is small). The estimated rate is a weighted average between the
64 | MLE result and 0.5 which, however, in the sample sizes used in privacy attacks
65 | does not differ visibly from the MLE outcome.
66 |
67 | Parameters
68 | ----------
69 | n_total : int
70 | Size of the sample.
71 | n_success : int
72 | Number of successful trials in the sample.
73 | confidence_level : float
74 | Confidence level for the error estimation.
75 |
76 | Returns
77 | -------
78 | float
79 | Point estimate for the success rate.
80 | float
81 | Error bound of the point-estimated rate for the requested confidence level.
82 |
83 | Notes
84 | -----
85 | E.B. WILSON
86 | Probable inference, the law of succession, and statistical inference
87 | Journal of the American Statistical Association 22, 209-212 (1927)
88 | DOI 10.1080/01621459.1927.10502953
89 |
90 | """
91 | if confidence_level > 1 or confidence_level < 0:
92 | raise ValueError(f"Parameter `confidence_level` must be between 0 and 1. Got {confidence_level} instead.")
93 |
94 | z = probit(confidence_level)
95 |
96 | z_squared = z * z
97 | n_success_var = n_success * (n_total - n_success) / n_total
98 | denominator = n_total + z_squared
99 |
100 | rate = (n_success + 0.5 * z_squared) / denominator
101 | error = (z / denominator) * sqrt(n_success_var + 0.25 * z_squared)
102 | return SuccessRate(value=rate, error=error)
103 |
104 |
105 | def residual_success(
106 | attack_rate: SuccessRate,
107 | control_rate: SuccessRate,
108 | ) -> SuccessRate:
109 | """Compute residual success in a privacy attack.
110 |
111 | Residual success is defined as the excess of training attack
112 | success over control attack success, normalized w.r.t.
113 | the margin of improvement (unsuccessful attacks on control).
114 |
115 | Parameters
116 | ----------
117 | attack_rate : SuccessRate
118 | Success rate on training data.
119 | control_rate : SuccessRate
120 | Success rate on control data.
121 |
122 | Returns
123 | -------
124 | SuccessRate
125 | Residual success score without sign correction (i.e., negative
126 | outcome if control more attack-able than training). The correction
127 | would yield ``0 ≤ score ≤ 1`` (zero for negative uncorrected score).
128 | The error estimate is the propagated error bound of the residual
129 | success rate.
130 |
131 | """
132 | residual = (attack_rate.value - control_rate.value) / (1.0 - control_rate.value)
133 |
134 | # propagate the error using
135 | # dF = sqrt[ (dF/dx)^2 dx^2 + (dF/dy)^2 dy^2 + ... ]
136 | der_wrt_attack = 1 / abs(1 - control_rate.value)
137 | der_wrt_control = (attack_rate.value - 1) / (1 - control_rate.value) ** 2
138 |
139 | error = sqrt((attack_rate.error * der_wrt_attack) ** 2 + (control_rate.error * der_wrt_control) ** 2)
140 |
141 | return SuccessRate(value=residual, error=error)
142 |
143 |
144 | def bind_value(point_estimate: float, error_bound: float) -> PrivacyRisk:
145 | """Force point_estimate and error into fixed bounds.
146 |
147 | Parameters
148 | ----------
149 | point_estimate : float
150 | Point estimate of a rate or risk value.
151 | error_bound : float
152 | Symmetric error around the point estimate.
153 |
154 |
155 | Returns
156 | -------
157 | float
158 | Point estimate respecting the bounds 0-1 or 0-100.
159 | Tuple[float, float]
160 | Asymmetric confidence interval respecting the bounds 0-1 or 0-100.
161 |
162 | """
163 | bound_point = min(max(point_estimate, 0.0), 1.0)
164 | bound_lower = min(max(point_estimate - error_bound, 0.0), 1.0)
165 | bound_upper = min(max(point_estimate + error_bound, 0.0), 1.0)
166 | return PrivacyRisk(value=bound_point, ci=(bound_lower, bound_upper))
167 |
168 |
169 | class EvaluationResults:
170 | """Results of a privacy evaluator.
171 |
172 | This class will compute the attacker's success rates
173 | and estimate for the corresponding privacy risk.
174 |
175 | Parameters
176 | ----------
177 | n_attacks : int
178 | Total number of attacks performed.
179 | n_success : int
180 | Number of successful attacks.
181 | n_baseline : int
182 | Number of successful attacks for the
183 | baseline (i.e. random-guessing) attacker.
184 | n_control : int, default is None
185 | Number of successful attacks against the
186 | control dataset. If this parameter is not None
187 | the privacy risk will be measured relative to
188 | the attacker success on the control set.
189 | confidence_level : float, default is 0.95
190 | Desired confidence level for the confidence
191 | intervals on the risk.
192 |
193 | """
194 |
195 | def __init__(
196 | self,
197 | n_attacks: int,
198 | n_success: int,
199 | n_baseline: int,
200 | n_control: Optional[int] = None,
201 | confidence_level: float = 0.95,
202 | ):
203 | self.attack_rate = success_rate(n_total=n_attacks, n_success=n_success, confidence_level=confidence_level)
204 |
205 | self.baseline_rate = success_rate(n_total=n_attacks, n_success=n_baseline, confidence_level=confidence_level)
206 |
207 | self.control_rate = (
208 | None
209 | if n_control is None
210 | else success_rate(n_total=n_attacks, n_success=n_control, confidence_level=confidence_level)
211 | )
212 |
213 | self.n_attacks = n_attacks
214 | self.n_success = n_success
215 | self.n_baseline = n_baseline
216 | self.n_control = n_control
217 |
218 | self._sanity_check()
219 |
220 | def _sanity_check(self):
221 | if self.baseline_rate.value >= self.attack_rate.value:
222 | warnings.warn(
223 | "Attack is as good or worse as baseline model. "
224 | f"Estimated rates: attack = {self.attack_rate.value}, "
225 | f"baseline = {self.baseline_rate.value}. "
226 | "Analysis results cannot be trusted.",
227 | stacklevel=2,
228 | )
229 |
230 | if self.control_rate is not None and self.control_rate.value == 1:
231 | warnings.warn("Success of control attack is 100%. Cannot measure residual privacy risk.", stacklevel=2)
232 |
233 | def risk(self, baseline: bool = False) -> PrivacyRisk:
234 | """Estimate the privacy risk."""
235 | if baseline:
236 | return self.baseline_rate.to_risk()
237 |
238 | if self.control_rate is None:
239 | return self.attack_rate.to_risk()
240 | else:
241 | return residual_success(attack_rate=self.attack_rate, control_rate=self.control_rate).to_risk()
242 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/statice/anonymeter/0188bdf5615601e2f31503ae91a2b310af8d917c/tests/__init__.py
--------------------------------------------------------------------------------
/tests/datasets/adults_ori.csv:
--------------------------------------------------------------------------------
1 | age,type_employer,education,education_num,marital,occupation,relationship,race,sex,capital_gain,capital_loss,hr_per_week,country,income
2 | 35,Private,Some-college,10,Never-married,Sales,Not-in-family,White,Male,0,0,50,United-States,<=50K
3 | 27,Private,Assoc-voc,11,Divorced,Other-service,Unmarried,Amer-Indian-Eskimo,Female,0,0,40,United-States,<=50K
4 | 28,Private,Doctorate,16,Never-married,Prof-specialty,Not-in-family,White,Female,0,0,60,Germany,>50K
5 | 30,Local-gov,HS-grad,9,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
6 | 20,Private,Some-college,10,Never-married,Sales,Own-child,White,Female,0,0,35,United-States,<=50K
7 | 47,Private,Bachelors,13,Married-civ-spouse,Sales,Husband,White,Male,0,0,60,United-States,<=50K
8 | 23,Private,10th,6,Never-married,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
9 | 45,Private,Some-college,10,Separated,Adm-clerical,Unmarried,White,Female,0,0,27,United-States,<=50K
10 | 30,Private,HS-grad,9,Married-civ-spouse,Craft-repair,Husband,White,Male,0,0,40,United-States,<=50K
11 | 27,Private,HS-grad,9,Separated,Handlers-cleaners,Own-child,White,Female,0,1594,25,United-States,<=50K
12 | 21,Self-emp-not-inc,Some-college,10,Never-married,Farming-fishing,Own-child,White,Male,0,0,40,United-States,<=50K
13 | 25,Local-gov,Bachelors,13,Never-married,Prof-specialty,Own-child,White,Female,0,0,40,United-States,<=50K
14 | 29,Private,Masters,14,Never-married,Sales,Not-in-family,White,Male,0,0,50,United-States,>50K
15 | 36,Private,HS-grad,9,Married-civ-spouse,Craft-repair,Husband,White,Male,0,0,64,United-States,>50K
16 | 52,Private,5th-6th,3,Widowed,Other-service,Unmarried,White,Female,0,0,40,Mexico,<=50K
17 | 37,Self-emp-inc,Bachelors,13,Married-civ-spouse,Craft-repair,Husband,White,Male,0,0,60,United-States,<=50K
18 | 52,Private,7th-8th,4,Divorced,Machine-op-inspct,Not-in-family,White,Female,0,0,64,United-States,<=50K
19 | 57,Private,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,40,United-States,>50K
20 | 63,Private,HS-grad,9,Married-civ-spouse,Sales,Husband,White,Male,0,0,40,Scotland,<=50K
21 | 40,Private,Some-college,10,Married-civ-spouse,Craft-repair,Husband,White,Male,0,0,40,United-States,<=50K
22 | 34,Self-emp-not-inc,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,60,United-States,<=50K
23 | 41,Self-emp-inc,Masters,14,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,50,United-States,>50K
24 | 32,Private,Bachelors,13,Divorced,Exec-managerial,Not-in-family,White,Female,0,0,40,United-States,<=50K
25 | 58,Self-emp-not-inc,Some-college,10,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,45,United-States,<=50K
26 | 31,Self-emp-not-inc,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,91,United-States,<=50K
27 | 46,Private,Some-college,10,Married-civ-spouse,Craft-repair,Husband,White,Male,3103,0,60,United-States,>50K
28 | 26,Private,Some-college,10,Separated,Sales,Unmarried,White,Female,0,0,35,United-States,<=50K
29 | 53,Private,12th,8,Divorced,Transport-moving,Not-in-family,White,Female,0,0,40,United-States,<=50K
30 | 42,Local-gov,HS-grad,9,Widowed,Transport-moving,Unmarried,White,Female,0,0,40,United-States,<=50K
31 | 44,Private,HS-grad,9,Married-civ-spouse,Transport-moving,Husband,White,Male,0,0,40,United-States,<=50K
32 | 33,Private,Some-college,10,Never-married,Prof-specialty,Not-in-family,White,Female,3674,0,16,United-States,<=50K
33 | 27,Private,HS-grad,9,Divorced,Adm-clerical,Other-relative,White,Female,0,0,40,United-States,<=50K
34 | 55,?,HS-grad,9,Married-civ-spouse,?,Wife,White,Female,0,0,6,United-States,>50K
35 | 29,State-gov,Masters,14,Never-married,Prof-specialty,Not-in-family,White,Male,0,0,40,United-States,<=50K
36 | 19,Private,HS-grad,9,Never-married,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
37 | 52,Private,HS-grad,9,Widowed,Other-service,Unmarried,Asian-Pac-Islander,Female,0,0,40,India,>50K
38 | 44,Private,Bachelors,13,Divorced,Prof-specialty,Not-in-family,White,Female,0,0,36,United-States,<=50K
39 | 17,Private,11th,7,Never-married,Sales,Own-child,White,Female,0,0,20,United-States,<=50K
40 | 53,Private,Bachelors,13,Never-married,Sales,Unmarried,White,Male,0,1669,50,United-States,<=50K
41 | 38,Private,Some-college,10,Divorced,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
42 | 33,Private,Assoc-voc,11,Married-civ-spouse,Tech-support,Husband,Asian-Pac-Islander,Male,0,0,10,United-States,<=50K
43 | 19,Local-gov,Some-college,10,Never-married,Protective-serv,Own-child,White,Male,0,1721,35,United-States,<=50K
44 | 31,Private,5th-6th,3,Married-civ-spouse,Handlers-cleaners,Husband,White,Male,0,0,40,Mexico,<=50K
45 | 53,Private,9th,5,Married-civ-spouse,Other-service,Husband,White,Male,0,0,40,Dominican-Republic,<=50K
46 | 22,Private,Some-college,10,Never-married,Adm-clerical,Own-child,Black,Female,0,0,35,United-States,<=50K
47 | 32,Federal-gov,HS-grad,9,Never-married,Exec-managerial,Unmarried,White,Female,0,1380,40,United-States,<=50K
48 | 50,Self-emp-inc,HS-grad,9,Married-civ-spouse,Sales,Wife,White,Female,0,0,30,United-States,<=50K
49 | 39,Private,Assoc-acdm,12,Separated,Prof-specialty,Not-in-family,White,Female,0,0,30,United-States,<=50K
50 | 41,Local-gov,Some-college,10,Married-civ-spouse,Adm-clerical,Wife,Black,Female,0,0,40,United-States,<=50K
51 | 47,Private,5th-6th,3,Separated,Other-service,Unmarried,White,Female,0,0,40,United-States,<=50K
52 | 31,Private,Bachelors,13,Married-civ-spouse,Prof-specialty,Husband,White,Male,5013,0,32,United-States,<=50K
53 | 37,Private,HS-grad,9,Divorced,Craft-repair,Unmarried,White,Female,0,0,48,United-States,<=50K
54 | 68,?,HS-grad,9,Married-civ-spouse,?,Husband,White,Male,0,0,45,United-States,<=50K
55 | 36,State-gov,Doctorate,16,Married-civ-spouse,Prof-specialty,Husband,White,Male,7298,0,40,United-States,>50K
56 | 36,Private,Bachelors,13,Married-civ-spouse,Exec-managerial,Wife,Asian-Pac-Islander,Female,0,0,40,Philippines,>50K
57 | 19,Private,HS-grad,9,Never-married,Farming-fishing,Not-in-family,White,Male,0,0,40,United-States,<=50K
58 | 52,Private,Some-college,10,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,40,United-States,<=50K
59 | 60,Private,HS-grad,9,Never-married,Adm-clerical,Not-in-family,Black,Female,0,0,38,United-States,<=50K
60 | 51,Private,Bachelors,13,Married-civ-spouse,Sales,Husband,White,Male,0,0,47,United-States,>50K
61 | 45,Private,HS-grad,9,Married-civ-spouse,Craft-repair,Husband,White,Male,0,0,40,United-States,<=50K
62 | 37,Private,HS-grad,9,Married-civ-spouse,Handlers-cleaners,Husband,White,Male,0,0,24,United-States,<=50K
63 | 53,Federal-gov,HS-grad,9,Married-civ-spouse,Adm-clerical,Husband,White,Male,0,0,40,United-States,<=50K
64 | 30,Private,Bachelors,13,Never-married,Exec-managerial,Not-in-family,White,Male,0,0,40,United-States,<=50K
65 | 45,Private,Doctorate,16,Separated,Exec-managerial,Unmarried,White,Male,0,0,40,United-States,>50K
66 | 24,Private,HS-grad,9,Never-married,Other-service,Not-in-family,White,Female,0,0,30,United-States,<=50K
67 | 37,?,Assoc-acdm,12,Married-civ-spouse,?,Husband,White,Male,0,0,32,United-States,<=50K
68 | 58,Private,10th,6,Married-civ-spouse,Transport-moving,Husband,White,Male,0,0,50,United-States,>50K
69 | 24,Private,Bachelors,13,Never-married,Sales,Not-in-family,White,Female,0,0,50,United-States,<=50K
70 | 23,Private,HS-grad,9,Never-married,Handlers-cleaners,Not-in-family,White,Male,0,0,30,United-States,<=50K
71 | 21,Private,Some-college,10,Never-married,Other-service,Own-child,White,Male,0,0,40,United-States,<=50K
72 | 39,Private,HS-grad,9,Divorced,Machine-op-inspct,Unmarried,White,Male,0,1726,40,United-States,<=50K
73 | 32,Private,Some-college,10,Never-married,Machine-op-inspct,Other-relative,White,Female,0,2205,40,Holand-Netherlands,<=50K
74 | 40,Private,7th-8th,4,Married-civ-spouse,Craft-repair,Husband,White,Male,0,0,60,United-States,<=50K
75 | 26,Private,Bachelors,13,Never-married,Adm-clerical,Own-child,White,Male,0,0,30,United-States,<=50K
76 | 58,?,Some-college,10,Never-married,?,Not-in-family,White,Female,0,0,40,United-States,<=50K
77 | 35,Self-emp-not-inc,Masters,14,Married-civ-spouse,Sales,Husband,White,Male,0,0,40,United-States,>50K
78 | 22,Private,Assoc-voc,11,Never-married,Other-service,Not-in-family,White,Female,0,1762,40,United-States,<=50K
79 | 18,Private,Some-college,10,Never-married,Sales,Own-child,White,Female,0,0,35,United-States,<=50K
80 | 57,Private,HS-grad,9,Divorced,Adm-clerical,Not-in-family,White,Female,0,0,40,United-States,<=50K
81 | 55,Federal-gov,Bachelors,13,Married-spouse-absent,Exec-managerial,Not-in-family,Black,Male,0,0,40,United-States,>50K
82 | 50,Self-emp-not-inc,Some-college,10,Married-civ-spouse,Transport-moving,Husband,White,Male,0,0,55,United-States,<=50K
83 | 25,Private,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,40,United-States,<=50K
84 | 43,Self-emp-not-inc,Bachelors,13,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,2377,50,United-States,<=50K
85 | 40,Private,11th,7,Married-civ-spouse,Transport-moving,Husband,White,Male,0,0,55,United-States,<=50K
86 | 32,Private,HS-grad,9,Married-civ-spouse,Sales,Wife,White,Female,0,0,40,United-States,<=50K
87 | 23,Private,Assoc-voc,11,Never-married,Adm-clerical,Not-in-family,White,Female,0,0,40,United-States,<=50K
88 | 36,?,HS-grad,9,Married-civ-spouse,?,Husband,White,Male,0,0,15,United-States,<=50K
89 | 53,Private,Bachelors,13,Never-married,Other-service,Not-in-family,Asian-Pac-Islander,Female,0,0,21,Japan,>50K
90 | 20,Private,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,40,United-States,<=50K
91 | 64,Private,11th,7,Married-civ-spouse,Craft-repair,Husband,White,Male,0,2179,40,United-States,<=50K
92 | 37,Private,Masters,14,Married-civ-spouse,Exec-managerial,Husband,White,Male,15024,0,60,United-States,>50K
93 | 40,Private,Bachelors,13,Married-spouse-absent,Sales,Not-in-family,White,Male,0,0,40,United-States,>50K
94 | 23,Local-gov,Some-college,10,Never-married,Adm-clerical,Own-child,White,Female,0,0,20,United-States,<=50K
95 | 43,Private,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,Black,Male,7688,0,40,United-States,>50K
96 | 46,Local-gov,Assoc-acdm,12,Divorced,Protective-serv,Not-in-family,White,Male,0,0,40,United-States,<=50K
97 | 36,Private,Bachelors,13,Married-civ-spouse,Prof-specialty,Husband,White,Male,3103,0,45,United-States,>50K
98 | 52,Private,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,1887,40,United-States,>50K
99 | 22,Private,10th,6,Never-married,Craft-repair,Own-child,White,Male,0,0,15,United-States,<=50K
100 | 29,Private,Bachelors,13,Never-married,Prof-specialty,Not-in-family,White,Male,0,0,40,United-States,<=50K
101 | 28,Self-emp-not-inc,Bachelors,13,Never-married,Prof-specialty,Not-in-family,White,Male,0,0,60,United-States,<=50K
102 |
--------------------------------------------------------------------------------
/tests/datasets/adults_syn.csv:
--------------------------------------------------------------------------------
1 | age,type_employer,education,education_num,marital,occupation,relationship,race,sex,capital_gain,capital_loss,hr_per_week,country,income
2 | 49,Private,Prof-school,15,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,48,United-States,<=50K
3 | 57,Private,Bachelors,13,Married-civ-spouse,Sales,Wife,White,Female,0,0,40,United-States,<=50K
4 | 22,Private,10th,6,Never-married,Machine-op-inspct,Not-in-family,White,Male,0,0,30,United-States,<=50K
5 | 60,Self-emp-not-inc,HS-grad,9,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,48,United-States,<=50K
6 | 90,Private,11th,7,Married-civ-spouse,Craft-repair,Husband,White,Male,0,0,40,United-States,<=50K
7 | 72,Self-emp-not-inc,Prof-school,15,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,10,United-States,<=50K
8 | 68,Self-emp-inc,11th,7,Married-civ-spouse,Sales,Husband,White,Male,0,1258,40,United-States,<=50K
9 | 27,?,Some-college,10,Married-civ-spouse,?,Wife,White,Female,0,0,40,United-States,>50K
10 | 18,Private,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Female,0,0,20,United-States,<=50K
11 | 30,Local-gov,9th,5,Divorced,Farming-fishing,Unmarried,White,Female,0,0,40,Mexico,<=50K
12 | 34,Self-emp-not-inc,11th,7,Married-civ-spouse,Sales,Wife,White,Female,0,0,30,United-States,<=50K
13 | 18,?,11th,7,Never-married,?,Own-child,White,Female,0,0,25,United-States,<=50K
14 | 34,Local-gov,Masters,14,Never-married,Prof-specialty,Not-in-family,White,Male,0,0,35,United-States,<=50K
15 | 50,Local-gov,Bachelors,13,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,44,United-States,>50K
16 | 45,Self-emp-not-inc,Prof-school,15,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,38,United-States,>50K
17 | 35,Self-emp-inc,Some-college,10,Married-civ-spouse,Sales,Husband,White,Male,0,0,50,United-States,>50K
18 | 21,Private,Some-college,10,Never-married,Sales,Not-in-family,White,Female,0,0,50,United-States,<=50K
19 | 53,Private,Some-college,10,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,1977,40,United-States,>50K
20 | 35,Private,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,40,United-States,<=50K
21 | 20,Private,Some-college,10,Never-married,Sales,Own-child,White,Female,0,0,15,United-States,<=50K
22 | 18,Private,11th,7,Never-married,Handlers-cleaners,Own-child,White,Male,0,0,20,United-States,<=50K
23 | 45,Self-emp-inc,Assoc-voc,11,Divorced,Sales,Unmarried,White,Female,0,0,30,United-States,<=50K
24 | 29,Private,HS-grad,9,Never-married,Other-service,Unmarried,Black,Female,0,0,40,Japan,<=50K
25 | 57,Private,10th,6,Married-civ-spouse,Other-service,Husband,White,Male,0,0,40,United-States,<=50K
26 | 40,Private,Some-college,10,Never-married,Exec-managerial,Not-in-family,White,Male,0,0,50,United-States,>50K
27 | 52,Private,Some-college,10,Divorced,Adm-clerical,Unmarried,White,Female,0,0,45,United-States,<=50K
28 | 50,Private,Some-college,10,Divorced,Craft-repair,Not-in-family,Black,Female,0,0,45,United-States,<=50K
29 | 53,Private,Bachelors,13,Married-civ-spouse,Prof-specialty,Husband,Asian-Pac-Islander,Male,0,0,40,Japan,<=50K
30 | 37,Local-gov,Some-college,10,Divorced,Adm-clerical,Not-in-family,White,Female,0,0,44,United-States,<=50K
31 | 56,Private,Bachelors,13,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,40,United-States,<=50K
32 | 62,Private,HS-grad,9,Widowed,Craft-repair,Unmarried,Black,Female,0,0,40,United-States,<=50K
33 | 32,Private,HS-grad,9,Married-civ-spouse,Craft-repair,Husband,White,Male,2829,0,40,?,<=50K
34 | 57,Private,HS-grad,9,Widowed,Adm-clerical,Not-in-family,White,Female,0,0,40,United-States,<=50K
35 | 45,Private,Bachelors,13,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,50,United-States,>50K
36 | 42,Private,Masters,14,Married-civ-spouse,Craft-repair,Husband,White,Male,0,0,55,United-States,>50K
37 | 53,Private,Masters,14,Divorced,Sales,Not-in-family,White,Female,0,0,40,United-States,<=50K
38 | 28,Private,5th-6th,3,Never-married,Craft-repair,Other-relative,White,Male,0,0,40,Mexico,<=50K
39 | 28,Private,Some-college,10,Married-civ-spouse,Adm-clerical,Husband,White,Male,0,0,40,United-States,<=50K
40 | 57,?,Bachelors,13,Married-civ-spouse,?,Husband,White,Male,0,0,40,United-States,>50K
41 | 83,Self-emp-inc,HS-grad,9,Divorced,Sales,Not-in-family,White,Male,0,0,20,United-States,<=50K
42 | 33,Private,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,55,United-States,>50K
43 | 25,Private,Assoc-acdm,12,Divorced,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
44 | 29,Private,HS-grad,9,Never-married,Other-service,Not-in-family,White,Male,0,0,40,United-States,<=50K
45 | 43,Private,Bachelors,13,Married-civ-spouse,Sales,Husband,White,Male,15024,0,50,United-States,>50K
46 | 27,Private,Bachelors,13,Never-married,Exec-managerial,Not-in-family,White,Female,0,0,40,United-States,<=50K
47 | 40,Private,HS-grad,9,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,45,United-States,>50K
48 | 24,Private,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,0,0,40,United-States,<=50K
49 | 27,Private,Bachelors,13,Married-civ-spouse,Craft-repair,Husband,White,Male,0,0,45,United-States,>50K
50 | 39,Private,HS-grad,9,Married-civ-spouse,Other-service,Husband,Black,Male,0,0,40,United-States,<=50K
51 | 57,Private,Bachelors,13,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,55,United-States,>50K
52 | 28,Private,Bachelors,13,Never-married,Sales,Not-in-family,White,Male,0,0,40,United-States,<=50K
53 | 17,Private,11th,7,Never-married,Sales,Own-child,White,Male,0,0,30,United-States,<=50K
54 | 27,Private,Assoc-voc,11,Married-civ-spouse,Sales,Husband,White,Male,0,0,40,United-States,<=50K
55 | 49,Local-gov,Masters,14,Separated,Prof-specialty,Unmarried,White,Female,0,0,50,United-States,<=50K
56 | 64,Private,HS-grad,9,Married-civ-spouse,Craft-repair,Husband,White,Male,2635,0,40,United-States,<=50K
57 | 21,Private,Preschool,1,Never-married,Farming-fishing,Not-in-family,White,Male,0,0,50,Mexico,<=50K
58 | 34,Self-emp-inc,Bachelors,13,Never-married,Exec-managerial,Not-in-family,White,Male,0,0,50,United-States,<=50K
59 | 49,Self-emp-not-inc,HS-grad,9,Divorced,Transport-moving,Not-in-family,White,Male,0,0,70,United-States,<=50K
60 | 49,Private,HS-grad,9,Married-civ-spouse,Sales,Husband,White,Male,0,0,50,United-States,>50K
61 | 42,Private,HS-grad,9,Divorced,Other-service,Not-in-family,White,Female,0,0,40,United-States,<=50K
62 | 47,Private,Some-college,10,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,40,United-States,<=50K
63 | 27,Private,10th,6,Married-civ-spouse,Handlers-cleaners,Husband,White,Male,0,0,40,United-States,<=50K
64 | 44,Private,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Female,0,0,48,United-States,<=50K
65 | 29,Private,Bachelors,13,Never-married,Exec-managerial,Not-in-family,White,Male,0,1590,50,United-States,<=50K
66 | 25,Federal-gov,HS-grad,9,Never-married,Handlers-cleaners,Not-in-family,Amer-Indian-Eskimo,Male,0,0,40,United-States,<=50K
67 | 23,Self-emp-inc,HS-grad,9,Never-married,Adm-clerical,Not-in-family,White,Female,0,0,40,United-States,<=50K
68 | 67,?,9th,5,Married-civ-spouse,?,Husband,White,Male,0,0,15,United-States,<=50K
69 | 39,Self-emp-not-inc,Bachelors,13,Divorced,Craft-repair,Not-in-family,Black,Male,0,1669,60,?,<=50K
70 | 41,Private,HS-grad,9,Divorced,Adm-clerical,Unmarried,White,Female,0,0,36,United-States,<=50K
71 | 18,Private,12th,8,Never-married,Sales,Own-child,White,Female,0,0,15,United-States,<=50K
72 | 19,Private,HS-grad,9,Never-married,Handlers-cleaners,Own-child,White,Female,0,0,25,United-States,<=50K
73 | 45,Private,HS-grad,9,Married-civ-spouse,Transport-moving,Husband,Other,Male,4064,0,40,United-States,<=50K
74 | 74,Self-emp-not-inc,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,1825,12,United-States,>50K
75 | 30,State-gov,Bachelors,13,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,37,United-States,>50K
76 | 27,State-gov,Bachelors,13,Never-married,Prof-specialty,Not-in-family,Black,Male,0,0,40,United-States,<=50K
77 | 19,Private,Some-college,10,Never-married,Adm-clerical,Own-child,White,Female,0,0,20,United-States,<=50K
78 | 25,Private,Some-college,10,Never-married,Handlers-cleaners,Own-child,White,Male,0,0,30,United-States,<=50K
79 | 25,Private,Assoc-acdm,12,Married-civ-spouse,Adm-clerical,Wife,Asian-Pac-Islander,Female,0,0,37,India,>50K
80 | 31,Self-emp-not-inc,Assoc-voc,11,Married-civ-spouse,Sales,Husband,White,Male,0,0,48,United-States,<=50K
81 | 53,Private,HS-grad,9,Divorced,Sales,Not-in-family,White,Female,0,0,35,United-States,<=50K
82 | 20,Private,Some-college,10,Never-married,Other-service,Not-in-family,White,Female,0,0,40,United-States,<=50K
83 | 26,Private,Some-college,10,Never-married,Sales,Not-in-family,White,Female,0,0,15,United-States,<=50K
84 | 33,Private,Bachelors,13,Never-married,Sales,Not-in-family,White,Female,0,0,40,United-States,<=50K
85 | 21,Private,Some-college,10,Never-married,Other-service,Own-child,White,Female,0,0,25,United-States,<=50K
86 | 34,Private,Bachelors,13,Never-married,Craft-repair,Unmarried,White,Male,0,0,40,United-States,<=50K
87 | 24,Private,Some-college,10,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,50,United-States,<=50K
88 | 22,Private,Bachelors,13,Never-married,Exec-managerial,Not-in-family,White,Female,0,0,40,United-States,<=50K
89 | 20,Private,9th,5,Never-married,Other-service,Unmarried,White,Male,0,0,30,Mexico,<=50K
90 | 67,Self-emp-not-inc,Some-college,10,Married-civ-spouse,Sales,Husband,White,Male,0,0,40,United-States,<=50K
91 | 30,Private,Some-college,10,Never-married,Sales,Not-in-family,White,Female,0,0,40,United-States,<=50K
92 | 43,Private,Assoc-acdm,12,Never-married,Adm-clerical,Not-in-family,Black,Female,0,0,45,United-States,<=50K
93 | 44,Private,Masters,14,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,55,United-States,>50K
94 | 35,Private,Some-college,10,Divorced,Adm-clerical,Unmarried,White,Female,0,0,39,United-States,<=50K
95 | 42,Private,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,45,United-States,>50K
96 | 34,Private,HS-grad,9,Never-married,Machine-op-inspct,Not-in-family,White,Male,4416,0,30,United-States,<=50K
97 | 44,Private,HS-grad,9,Married-civ-spouse,Transport-moving,Husband,Amer-Indian-Eskimo,Male,0,0,40,United-States,<=50K
98 | 52,Private,HS-grad,9,Separated,Priv-house-serv,Not-in-family,White,Female,0,0,50,United-States,<=50K
99 | 38,Private,HS-grad,9,Married-civ-spouse,Exec-managerial,Wife,White,Female,0,0,60,United-States,>50K
100 | 34,Private,Bachelors,13,Married-civ-spouse,Tech-support,Husband,White,Male,0,0,47,United-States,>50K
101 | 47,Self-emp-not-inc,Some-college,10,Married-civ-spouse,Sales,Husband,White,Male,0,0,40,United-States,<=50K
102 |
--------------------------------------------------------------------------------
/tests/fixtures.py:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details..
4 |
5 |
6 | import os
7 | from typing import Optional
8 |
9 | import pandas as pd
10 |
11 | TEST_DIR_PATH = os.path.dirname(os.path.realpath(__file__))
12 |
13 |
14 | def get_adult(which: str, n_samples: Optional[int] = None) -> pd.DataFrame:
15 | """Fixture for the adult dataset.
16 |
17 | For details see:
18 | https://archive.ics.uci.edu/ml/datasets/adult
19 |
20 | Parameters
21 | ----------
22 | which : str, in ['ori', 'syn']
23 | Whether to return the "original" or "synthetic" samples.
24 | n_samples : int
25 | Number of sample records to return.
26 | If `None` - return all samples.
27 |
28 | Returns
29 | -------
30 | df : pd.DataFrame
31 | Adult dataframe.
32 | """
33 | if which == "ori":
34 | fname = "adults_ori.csv"
35 | elif which == "syn":
36 | fname = "adults_syn.csv"
37 | else:
38 | raise ValueError(f"Invalid value {which} for parameter `which`. Available are: 'ori' or 'syn'.")
39 |
40 | return pd.read_csv(os.path.join(TEST_DIR_PATH, "datasets", fname), nrows=n_samples)
41 |
--------------------------------------------------------------------------------
/tests/test_confidence.py:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
4 | import numpy as np
5 | import pytest
6 |
7 | from anonymeter.stats.confidence import (
8 | EvaluationResults,
9 | SuccessRate,
10 | bind_value,
11 | probit,
12 | residual_success,
13 | success_rate,
14 | )
15 |
16 |
17 | def test_probit():
18 | assert np.round(probit(0.95), decimals=2) == 1.96
19 |
20 |
21 | @pytest.mark.parametrize(
22 | "n_success, expected_risk, expected_error",
23 | [
24 | (850, 0.849, 0.022),
25 | (0, 0.002, 0.002),
26 | (1000, 0.998, 0.002),
27 | ],
28 | )
29 | def test_success_rate(n_success, expected_risk, expected_error):
30 | rate, error = success_rate(n_total=1000, n_success=n_success, confidence_level=0.95)
31 | assert np.round(rate, decimals=3) == expected_risk
32 | assert np.round(error, decimals=3) == expected_error
33 |
34 |
35 | @pytest.mark.parametrize(
36 | "attack_rate, control_rate, expected",
37 | [
38 | (SuccessRate(0.9, 0.0), SuccessRate(0.8, 0.0), SuccessRate(0.5, 0.0)),
39 | (SuccessRate(0.9, 0.02), SuccessRate(0.85, 0.02), SuccessRate(0.333, 0.16)),
40 | ],
41 | )
42 | def test_residual_success(attack_rate, control_rate, expected):
43 | residual = residual_success(attack_rate=attack_rate, control_rate=control_rate)
44 | np.testing.assert_equal(np.round(residual, decimals=3), expected)
45 |
46 |
47 | @pytest.mark.parametrize(
48 | "point_estimate, error_bound, expected",
49 | [
50 | (0.1, 0.3, (0.1, 0.0, 0.4)),
51 | (1.1, 0.5, (1.0, 0.6, 1.0)),
52 | (-0.1, 0.2, (0.0, 0.0, 0.1)),
53 | ],
54 | )
55 | def test_bind_value(point_estimate, error_bound, expected):
56 | risk = bind_value(point_estimate, error_bound)
57 | np.testing.assert_almost_equal(np.array([risk.value, risk.ci[0], risk.ci[1]]), expected)
58 |
59 |
60 | @pytest.mark.parametrize(
61 | "n_attacks, n_success, n_baseline",
62 | [(100, 100, 0), (100, 23, 11), (111, 84, 42), (100, 0, 100)],
63 | )
64 | def test_evaluation_results_simple(n_attacks, n_success, n_baseline):
65 | results = EvaluationResults(
66 | n_attacks=n_attacks,
67 | n_success=n_success,
68 | n_baseline=n_baseline,
69 | n_control=None,
70 | confidence_level=0,
71 | )
72 |
73 | risk = results.risk()
74 | baseline_risk = results.risk(baseline=True)
75 |
76 | assert results.control_rate is None
77 | assert results.attack_rate.value == n_success / n_attacks
78 | assert results.baseline_rate.value == n_baseline / n_attacks
79 |
80 | assert risk.value == n_success / n_attacks
81 | assert baseline_risk.value == n_baseline / n_attacks
82 | assert risk.ci == (risk.value, risk.value)
83 | assert baseline_risk.ci == (baseline_risk.value, baseline_risk.value)
84 |
85 |
86 | @pytest.mark.parametrize(
87 | "n_attacks, n_success, n_baseline, n_control, confidence_level, expected_rate, expected_baseline",
88 | [
89 | (
90 | 100,
91 | 100,
92 | 0,
93 | None,
94 | 0.95,
95 | SuccessRate(value=0.9815032508965071, error=0.01849674910349284),
96 | SuccessRate(value=0.01849674910349284, error=0.01849674910349284),
97 | ),
98 | (
99 | 100,
100 | 100,
101 | 0,
102 | None,
103 | 0.68,
104 | SuccessRate(value=0.9951036894831882, error=0.004896310516811869),
105 | SuccessRate(value=0.0048963105168118685, error=0.004896310516811869),
106 | ),
107 | (
108 | 100,
109 | 23,
110 | 11,
111 | None,
112 | 0.95,
113 | SuccessRate(value=0.23998824451588613, error=0.08155558571285167),
114 | SuccessRate(value=0.1244274643007244, error=0.06188550073007873),
115 | ),
116 | ],
117 | )
118 | def test_evaluation_results_confidence(
119 | n_attacks,
120 | n_success,
121 | n_baseline,
122 | n_control,
123 | confidence_level,
124 | expected_rate,
125 | expected_baseline,
126 | ):
127 | results = EvaluationResults(
128 | n_attacks=n_attacks,
129 | n_success=n_success,
130 | n_baseline=n_baseline,
131 | n_control=n_control,
132 | confidence_level=confidence_level,
133 | )
134 | np.testing.assert_equal(results.attack_rate, expected_rate)
135 | np.testing.assert_equal(results.baseline_rate, expected_baseline)
136 | np.testing.assert_equal(results.risk(baseline=False), expected_rate.to_risk())
137 | np.testing.assert_equal(results.risk(baseline=True), expected_baseline.to_risk())
138 |
139 |
140 | def test_evaluation_results_warns_baseline():
141 | with pytest.warns(UserWarning):
142 | EvaluationResults(
143 | n_attacks=100,
144 | n_success=49,
145 | n_baseline=50,
146 | n_control=None,
147 | confidence_level=0.95,
148 | )
149 |
150 |
151 | def test_evaluation_results_warns_control():
152 | with pytest.warns(UserWarning):
153 | EvaluationResults(n_attacks=100, n_success=49, n_baseline=0, n_control=100, confidence_level=0)
154 |
155 |
156 | @pytest.mark.parametrize("confidence_level", [-0.1, 1.2])
157 | def test_confidence_exception(confidence_level):
158 | with pytest.raises(ValueError):
159 | EvaluationResults(
160 | n_attacks=100,
161 | n_success=49,
162 | n_baseline=0,
163 | n_control=None,
164 | confidence_level=confidence_level,
165 | )
166 |
--------------------------------------------------------------------------------
/tests/test_inference_evaluator.py:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
4 | from typing import Iterable
5 |
6 | import numpy as np
7 | import pandas as pd
8 | import pytest
9 |
10 | from anonymeter.evaluators.inference_evaluator import InferenceEvaluator, evaluate_inference_guesses
11 |
12 | from tests.fixtures import get_adult
13 |
14 |
15 | @pytest.mark.parametrize(
16 | "guesses, secrets, expected",
17 | [
18 | (("a", "b"), ("a", "b"), (True, True)),
19 | ((np.nan, "b"), (np.nan, "b"), (True, True)),
20 | ((np.nan, np.nan), (np.nan, np.nan), (True, True)),
21 | ((np.nan, "b"), ("a", np.nan), (False, False)),
22 | (("a", "b"), ("a", "c"), (True, False)),
23 | (("b", "b"), ("a", "c"), (False, False)),
24 | ((1, 0), (2, 0), (False, True)),
25 | ],
26 | )
27 | def test_evaluate_inference_guesses_classification(guesses, secrets, expected):
28 | out = evaluate_inference_guesses(guesses=pd.Series(guesses), secrets=pd.Series(secrets), regression=False)
29 | np.testing.assert_equal(out, expected)
30 |
31 |
32 | @pytest.mark.parametrize(
33 | "guesses, secrets, expected",
34 | [
35 | ((1.0, 1.0), (1.0, 1.0), (True, True)),
36 | ((1.01, 1.0), (1.0, 1.01), (True, True)),
37 | ((1.0, 1.0), (2.0, 1.01), (False, True)),
38 | ((1.0, 2.0), (2.0, 1.01), (False, False)),
39 | ],
40 | )
41 | def test_evaluate_inference_guesses_regression(guesses, secrets, expected):
42 | out = evaluate_inference_guesses(guesses=pd.Series(guesses), secrets=pd.Series(secrets), regression=True)
43 | np.testing.assert_equal(out, expected)
44 |
45 |
46 | @pytest.mark.parametrize(
47 | "guesses, secrets, tolerance, expected",
48 | [
49 | ((1.0, 1.0), (1.05, 1.06), 0.05, (True, False)),
50 | ((1.0, 1.0), (1.05, 1.06), 0.06, (True, True)),
51 | ((1.0, np.nan), (1.05, np.nan), 0.06, (True, True)),
52 | ((np.nan, np.nan), (np.nan, np.nan), 0.06, (True, True)),
53 | ((1, np.nan), (np.nan, 1.06), 0.06, (False, False)),
54 | ((1.0, 1.0), (1.05, 1.06), 0.04, (False, False)),
55 | ((1.0, 1.0), (1.25, 1.26), 0.2, (False, False)),
56 | ((1.0, 1.0), (1.26, 1.25), 0.25, (False, True)),
57 | ],
58 | )
59 | def test_evaluate_inference_guesses_regression_tolerance(guesses, secrets, tolerance, expected):
60 | out = evaluate_inference_guesses(
61 | guesses=pd.Series(guesses), secrets=pd.Series(secrets), tolerance=tolerance, regression=True
62 | )
63 | np.testing.assert_equal(out, expected)
64 |
65 |
66 | @pytest.mark.parametrize(
67 | "ori, syn, expected",
68 | [
69 | ([["a", "b"], ["c", "d"]], [["a", "b"], ["c", "d"]], 1.0),
70 | ([["a", "b"], ["c", "d"]], [["a", "b"], ["c", "e"]], 0.5),
71 | ([["a", "b"], ["c", "d"]], [["a", "h"], ["c", "g"]], 0.0),
72 | ],
73 | )
74 | def test_inference_evaluator_rates(
75 | ori: Iterable,
76 | syn: Iterable,
77 | expected: float,
78 | ):
79 | # created a dataframe from ori and name columns c0 and c1
80 | ori = pd.DataFrame(ori, columns=pd.Index(["c0", "c1"]))
81 | syn = pd.DataFrame(syn, columns=pd.Index(["c0", "c1"]))
82 | evaluator = InferenceEvaluator(
83 | ori=ori,
84 | syn=syn,
85 | control=ori,
86 | aux_cols=["c0"],
87 | secret="c1",
88 | n_attacks=2,
89 | ).evaluate(n_jobs=1)
90 | results = evaluator.results(confidence_level=0)
91 |
92 | np.testing.assert_equal(results.attack_rate, (expected, 0))
93 | np.testing.assert_equal(results.control_rate, (expected, 0))
94 |
95 |
96 | @pytest.mark.parametrize(
97 | "aux_cols",
98 | [
99 | ["type_employer", "capital_loss", "hr_per_week", "age"],
100 | ["education_num", "marital", "capital_loss"],
101 | ["age", "type_employer", "race"],
102 | ],
103 | )
104 | @pytest.mark.parametrize("secret", ["education", "marital", "capital_gain"])
105 | def test_inference_evaluator_leaks(aux_cols, secret):
106 | ori = get_adult("ori", n_samples=10)
107 | evaluator = InferenceEvaluator(ori=ori, syn=ori, control=ori, aux_cols=aux_cols, secret=secret, n_attacks=10)
108 | evaluator.evaluate(n_jobs=1)
109 | results = evaluator.results(confidence_level=0)
110 |
111 | np.testing.assert_equal(results.attack_rate, (1, 0))
112 | np.testing.assert_equal(results.control_rate, (1, 0))
113 |
114 |
115 | def test_evaluator_not_evaluated():
116 | df = get_adult("ori", n_samples=10)
117 | evaluator = InferenceEvaluator(
118 | ori=df,
119 | syn=df,
120 | control=df,
121 | aux_cols=["education_num", "marital", "capital_loss"],
122 | secret="age",
123 | )
124 | with pytest.raises(RuntimeError):
125 | evaluator.risk()
126 |
--------------------------------------------------------------------------------
/tests/test_linkability_evaluator.py:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
4 | import numpy as np
5 | import pandas as pd
6 | import pytest
7 |
8 | from anonymeter.evaluators.linkability_evaluator import LinkabilityEvaluator, LinkabilityIndexes
9 |
10 | from tests.fixtures import get_adult
11 |
12 | rng = np.random.default_rng(seed=42)
13 |
14 |
15 | @pytest.mark.parametrize("n_attacks", [4, None])
16 | @pytest.mark.parametrize(
17 | "n_neighbors, confidence_level, expected_risk, expected_ci",
18 | [
19 | (1, 0, 0.25, (0.25, 0.25)),
20 | (2, 0, 1, (1.0, 1.0)),
21 | (3, 0, 1, (1.0, 1.0)),
22 | (4, 0, 1, (1.0, 1.0)),
23 | (1, 0.95, 0.3725, (0.045587, 0.699358)),
24 | (2, 0.95, 0.7551, (0.5102, 1.0)),
25 | ],
26 | )
27 | def test_linkability_evaluator(n_neighbors, confidence_level, expected_risk, expected_ci, n_attacks):
28 | ori = pd.DataFrame({"col0": [0, 0, 4, 0], "col1": [0, 1, 9, 4]})
29 | syn = pd.DataFrame({"col0": [0, 1, 4, 9], "col1": [0, 1, 4, 9]})
30 |
31 | evaluator = LinkabilityEvaluator(
32 | ori=ori, syn=syn, n_attacks=n_attacks, n_neighbors=n_neighbors, aux_cols=(["col0"], ["col1"])
33 | )
34 | evaluator.evaluate(n_jobs=1)
35 | risk, ci = evaluator.risk(confidence_level=confidence_level)
36 | np.testing.assert_allclose(risk, expected_risk, atol=1e-4)
37 | np.testing.assert_allclose(ci, expected_ci, atol=1e-4)
38 |
39 |
40 | @pytest.mark.parametrize("n_attacks", [4, None])
41 | @pytest.mark.parametrize(
42 | "n_neighbors, confidence_level, expected_risk, expected_ci",
43 | [
44 | (1, 0, 0.25, (0.25, 0.25)),
45 | (2, 0, 1, (1.0, 1.0)),
46 | (3, 0, 1, (1.0, 1.0)),
47 | (4, 0, 1, (1.0, 1.0)),
48 | (1, 0.95, 0.3725, (0.045587, 0.699358)),
49 | (2, 0.95, 0.7551, (0.5102, 1.0)),
50 | ],
51 | )
52 | def test_linkability_evaluator_neighbors(n_neighbors, confidence_level, expected_risk, expected_ci, n_attacks):
53 | # see comment in the test_linkability_evaluator to understand
54 | # the ground truth on which this test is based.
55 | ori = pd.DataFrame({"col0": [0, 0, 4, 0], "col1": [0, 1, 9, 4]})
56 | syn = pd.DataFrame({"col0": [0, 1, 4, 9], "col1": [0, 1, 4, 9]})
57 |
58 | evaluator = LinkabilityEvaluator(
59 | ori=ori, syn=syn, n_attacks=n_attacks, n_neighbors=4, aux_cols=(["col0"], ["col1"])
60 | )
61 | evaluator.evaluate(n_jobs=1)
62 | risk, ci = evaluator.risk(confidence_level=confidence_level, n_neighbors=n_neighbors)
63 | np.testing.assert_allclose(risk, expected_risk, atol=1e-4)
64 | np.testing.assert_allclose(ci, expected_ci, atol=1e-4)
65 |
66 |
67 | @pytest.mark.parametrize("n_neighbors, fails", [(1, False), (2, False), (3, False), (4, False), (5, True), (45, True)])
68 | def test_linkability_evaluator_neighbors_fails(n_neighbors, fails):
69 | ori = pd.DataFrame({"col0": [0, 0, 4, 0], "col1": [0, 1, 9, 4]})
70 | syn = pd.DataFrame({"col0": [0, 1, 4, 9], "col1": [0, 1, 4, 9]})
71 |
72 | evaluator = LinkabilityEvaluator(ori=ori, syn=syn, n_attacks=4, n_neighbors=4, aux_cols=(["col0"], ["col1"]))
73 | evaluator.evaluate(n_jobs=1)
74 |
75 | if fails:
76 | with pytest.raises(ValueError):
77 | evaluator.risk(n_neighbors=n_neighbors)
78 | else:
79 | evaluator.risk(n_neighbors=n_neighbors)
80 |
81 |
82 | @pytest.mark.parametrize("n_neighbors, expected_risk", [(1, 0.25), (2, 5 / 6), (3, 1), (4, 1)])
83 | def test_baseline(n_neighbors, expected_risk):
84 | # note that for the baseline attack, it does not really matter
85 | # what's inside the synthetic or the original dataframe.
86 | ori = pd.DataFrame(rng.choice(["a", "b"], size=(400, 2)), columns=["c0", "c1"])
87 | syn = pd.DataFrame([["a", "a"], ["b", "b"], ["a", "a"], ["a", "a"]], columns=["c0", "c1"])
88 | evaluator = LinkabilityEvaluator(
89 | ori=ori,
90 | syn=syn,
91 | n_attacks=None,
92 | n_neighbors=n_neighbors,
93 | aux_cols=(
94 | ["c0"],
95 | ["c1"],
96 | ),
97 | )
98 | evaluator.evaluate(n_jobs=1)
99 | baseline_risk, _ = evaluator.risk(confidence_level=0.95, baseline=True)
100 | np.testing.assert_allclose(baseline_risk, expected_risk, atol=5e-2)
101 |
102 |
103 | @pytest.mark.parametrize(
104 | "n_neighbors, idx_0, idx_1, expected, n_expected",
105 | [
106 | (1, [[0], [1], [2], [3]], [[4], [5], [6], [7]], {}, 0),
107 | (1, [[0], [1], [2], [3]], [[4], [1], [6], [7]], {1: {1}}, 1),
108 | (1, [[0], [1], [2], [3]], [[4], [1], [6], [7]], {1: {1}}, 1),
109 | (1, [[0], [1], [6], [3]], [[4], [1], [6], [7]], {1: {1}, 2: {6}}, 2),
110 | (1, [[0, 1], [2, 3]], [[1, 0], [3, 2]], {}, 0),
111 | (2, [[0, 1], [2, 3]], [[1, 0], [3, 2]], {0: {0, 1}, 1: {2, 3}}, 2),
112 | ],
113 | )
114 | def test_find_links(n_neighbors, idx_0, idx_1, expected, n_expected):
115 | indexes = LinkabilityIndexes(idx_0=np.array(idx_0), idx_1=np.array(idx_1))
116 | links = indexes.find_links(n_neighbors=n_neighbors)
117 | n_links = indexes.count_links(n_neighbors=n_neighbors)
118 | assert links == expected
119 | assert n_links == n_expected
120 |
121 |
122 | @pytest.mark.parametrize("confidence_level", [0.5, 0.68, 0.95, 0.99])
123 | def test_linkability_risk(confidence_level):
124 | ori = get_adult("ori", n_samples=10)
125 | col_sample = rng.choice(ori.columns, size=4, replace=False)
126 |
127 | evaluator = LinkabilityEvaluator(
128 | ori=ori,
129 | syn=ori,
130 | n_attacks=10,
131 | n_neighbors=5,
132 | aux_cols=(
133 | col_sample[:2].tolist(),
134 | col_sample[2:].tolist(),
135 | ),
136 | )
137 | evaluator.evaluate(n_jobs=1)
138 | _, ci = evaluator.risk(confidence_level=confidence_level)
139 | np.testing.assert_allclose(ci[1], 1.0)
140 |
141 |
142 | def test_evaluator_not_evaluated():
143 | evaluator = LinkabilityEvaluator(
144 | ori=pd.DataFrame(),
145 | syn=pd.DataFrame(),
146 | aux_cols=([], []),
147 | )
148 | with pytest.raises(RuntimeError):
149 | evaluator.risk()
150 |
--------------------------------------------------------------------------------
/tests/test_mixed_types_kneigbors.py:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
4 | import numpy as np
5 | import pandas as pd
6 | import pytest
7 |
8 | from anonymeter.neighbors.mixed_types_kneighbors import MixedTypeKNeighbors, gower_distance
9 |
10 | from tests.fixtures import get_adult
11 |
12 | rng = np.random.default_rng()
13 |
14 |
15 | def test_mixed_type_kNN():
16 | df = get_adult("ori", n_samples=10)
17 | nn = MixedTypeKNeighbors().fit(df)
18 | shuffled_idx = rng.integers(10, size=10)
19 | dist, ids = nn.kneighbors(df.iloc[shuffled_idx], n_neighbors=1, return_distance=True)
20 | np.testing.assert_equal(ids.flatten(), shuffled_idx)
21 | np.testing.assert_equal(dist, 0)
22 |
23 |
24 | def test_mixed_type_kNN_numerical():
25 | ori = pd.DataFrame([[0.0, "a"], [0.2, "a"], [0.15, "a"], [0.1, "a"]])
26 | syn = pd.DataFrame([[0.01, "a"]])
27 | nn = MixedTypeKNeighbors().fit(ori)
28 | ids = nn.kneighbors(syn, n_neighbors=4, return_distance=False)
29 | np.testing.assert_equal(ids, [[0, 3, 2, 1]])
30 |
31 |
32 | def test_mixed_type_kNN_numerical_scaling():
33 | ori = pd.DataFrame([[0.0, "a"], [0.2, "a"], [0.15, "a"], [0.1, "a"]])
34 |
35 | # this is equal to the min value in the fitted dataframe.
36 | # The distance to the 2nd record in ori will be maximal.
37 | syn = pd.DataFrame([[0.0, "a"]])
38 | nn = MixedTypeKNeighbors().fit(ori)
39 | dist, ids = nn.kneighbors(syn, n_neighbors=4, return_distance=True)
40 | np.testing.assert_equal(ids, [[0, 3, 2, 1]])
41 | np.testing.assert_equal(dist[ids == 1], 1)
42 |
43 |
44 | @pytest.mark.parametrize("n_neighbors, n_queries", [(1, 10), (3, 5)])
45 | def test_mixed_type_kNN_shape(n_neighbors, n_queries):
46 | df = get_adult("ori", n_samples=10)
47 | nn = MixedTypeKNeighbors(n_neighbors=n_neighbors).fit(df)
48 | ids = nn.kneighbors(df.head(n_queries))
49 | assert isinstance(ids, np.ndarray)
50 | assert ids.shape == (n_queries, n_neighbors)
51 |
52 | nn = MixedTypeKNeighbors().fit(df)
53 | ids = nn.kneighbors(df.head(n_queries), n_neighbors=n_neighbors)
54 | assert isinstance(ids, np.ndarray)
55 | assert ids.shape == (n_queries, n_neighbors)
56 |
57 |
58 | @pytest.mark.parametrize(
59 | "r0, r1, expected",
60 | [
61 | ([0, 1, 0, 0], [0, 1, 0, 0], 0),
62 | ([1, 1, 0, 0], [0, 1, 0, 0], 1),
63 | ([1, 1, 1, 0], [0, 1, 0, 0], 2),
64 | ([1, 0, 1, 0], [1, 1, 0, 1], 3),
65 | ([1, 0, 1, 0], [0, 1, 0, 1], 4),
66 | ],
67 | )
68 | def test_gower_distance(r0, r1, expected):
69 | r0, r1 = np.array(r0), np.array(r1)
70 | dist = gower_distance(r0=r0, r1=r1, cat_cols_index=0)
71 | np.testing.assert_equal(dist, expected)
72 |
73 | # numerical and categorical should behave the same
74 | dist = gower_distance(r0=r0, r1=r1, cat_cols_index=4)
75 | np.testing.assert_equal(dist, expected)
76 |
77 |
78 | def test_gower_distance_numerical():
79 | r0, r1 = rng.random(size=10), rng.random(size=10)
80 | dist = gower_distance(r0=r0, r1=r1, cat_cols_index=10)
81 | np.testing.assert_almost_equal(dist, np.sum(np.abs(r0 - r1)))
82 |
--------------------------------------------------------------------------------
/tests/test_singling_out_evaluator.py:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
4 | import numpy as np
5 | import pandas as pd
6 | import pytest
7 | from scipy import integrate
8 |
9 | from anonymeter.evaluators.singling_out_evaluator import (
10 | SinglingOutEvaluator,
11 | UniqueSinglingOutQueries,
12 | multivariate_singling_out_queries,
13 | safe_query_counts,
14 | singling_out_probability_integral,
15 | univariate_singling_out_queries,
16 | )
17 |
18 | from tests.fixtures import get_adult
19 |
20 |
21 | @pytest.mark.parametrize("mode", ["univariate", "multivariate"])
22 | def test_so_general(mode):
23 | ori = get_adult("ori", n_samples=10)
24 | syn = get_adult("syn", n_samples=10)
25 | soe = SinglingOutEvaluator(ori=ori, syn=syn, n_attacks=5).evaluate(mode=mode)
26 |
27 | for q in soe.queries():
28 | assert len(syn.query(q) == 1)
29 | assert len(ori.query(q) == 1)
30 |
31 |
32 | def test_singling_out_queries_unique():
33 | df = pd.DataFrame({"c1": [1], "c2": [2]})
34 |
35 | queries = UniqueSinglingOutQueries()
36 | q1, q2 = "c1 == 1", "c2 == 2"
37 |
38 | queries.check_and_append(q1, df=df)
39 | queries.check_and_append(q1, df=df)
40 | assert queries.queries == [q1]
41 |
42 | queries.check_and_append(q2, df=df)
43 | assert queries.queries == [q1, q2]
44 |
45 |
46 | def test_singling_out_queries_same_characters():
47 | df = pd.DataFrame([{"c": 1.2}, {"c": 2.1}])
48 |
49 | queries = UniqueSinglingOutQueries()
50 | q1, q2 = "c == 1.2", "c == 2.1"
51 |
52 | queries.check_and_append(q1, df=df)
53 | queries.check_and_append(q1, df=df)
54 | assert queries.queries == [q1]
55 |
56 | queries.check_and_append(q2, df=df)
57 | assert queries.queries == [q1, q2]
58 |
59 |
60 | def test_singling_out_queries():
61 | df = pd.DataFrame({"c1": [1, 1], "c2": [2, 3]})
62 |
63 | queries = UniqueSinglingOutQueries()
64 | queries.check_and_append("c1 == 1", df=df) # does not single out
65 | assert len(queries) == 0
66 |
67 | queries.check_and_append("c1 == 1 and c2 == 3", df=df) # does single out
68 | assert len(queries) == 1
69 |
70 |
71 | @pytest.mark.parametrize(
72 | "query, result", [("c1 == 0 and c2 == 'a'", 2), ("c3 == 'fuffa'", None), ("c1 == 2 and c2 == 'c'", 1)]
73 | )
74 | def test_safe_query_counts(query, result):
75 | df = pd.DataFrame({"c1": [0, 0, 2], "c2": ["a", "a", "c"]})
76 | assert safe_query_counts(query=query, df=df) == result
77 |
78 |
79 | def test_univariate_singling_out_queries():
80 | df = pd.DataFrame({"col1": ["a", "b", "c", "d"]})
81 | queries = univariate_singling_out_queries(df=df, n_queries=10)
82 | expected_queries = ["col1 == 'a'", "col1 == 'b'", "col1 == 'c'", "col1 == 'd'"]
83 | assert sorted(queries) == sorted(expected_queries)
84 |
85 |
86 | def test_singling_out_query_generator():
87 | df = pd.DataFrame({"c0": ["a", "b"], "c1": [1.23, 9.87]})
88 | queries = multivariate_singling_out_queries(df=df, n_queries=2, n_cols=2, max_attempts=None)
89 | possible_queries = [
90 | "c1<= 1.23 & c1>= 9.87",
91 | "c1>= 9.87 & c1<= 1.23",
92 | "c0== 'b' & c1<= 1.23",
93 | "c0== 'b' & c1>= 9.87",
94 | "c0== 'b' & c0== 'a'",
95 | "c0== 'a' & c1<= 1.23",
96 | "c0== 'a' & c1>= 9.87",
97 | "c0== 'a' & c0== 'b'",
98 | ]
99 | for query in queries:
100 | assert query in possible_queries
101 |
102 |
103 | @pytest.mark.parametrize("confidence_level", [0.5, 0.68, 0.95, 0.99])
104 | @pytest.mark.parametrize("mode", ["univariate", "multivariate"])
105 | def test_singling_out_risk_estimate(confidence_level, mode):
106 | ori = get_adult("ori", 10)
107 | soe = SinglingOutEvaluator(ori=ori, syn=ori, n_attacks=5)
108 | soe.evaluate(mode=mode)
109 | _, ci = soe.risk(confidence_level=confidence_level)
110 | np.testing.assert_allclose(ci[1], 1.0)
111 |
112 |
113 | def test_evaluator_not_evaluated():
114 | soe = SinglingOutEvaluator(ori=pd.DataFrame(), syn=pd.DataFrame())
115 | with pytest.raises(RuntimeError):
116 | soe.risk()
117 |
118 |
119 | @pytest.mark.parametrize("n", [100, 4242, 11235])
120 | @pytest.mark.parametrize("w_min, w_max", [(0, 1), (1 / 10000, 1 / 1000), (0.0013414, 0.2314)])
121 | def test_probability_integral(n, w_min, w_max):
122 | def _so_probability(n: int, w: float):
123 | return n * w * ((1 - w) ** (n - 1))
124 |
125 | desired, _ = integrate.quad(lambda x: _so_probability(w=x, n=n), a=w_min, b=w_max)
126 | integral = singling_out_probability_integral(n=n, w_min=w_min, w_max=w_max)
127 | np.testing.assert_almost_equal(desired, integral)
128 |
129 |
130 | @pytest.mark.parametrize("max_attempts", [1, 2, 3])
131 | def test_so_evaluator_max_attempts(max_attempts):
132 | ori = get_adult("ori", 10)
133 | soe = SinglingOutEvaluator(ori=ori, syn=ori, n_attacks=10, max_attempts=max_attempts)
134 | soe.evaluate(mode="multivariate")
135 |
136 | assert len(soe.queries()) <= max_attempts
137 |
--------------------------------------------------------------------------------
/tests/test_transformations.py:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
4 | import numpy as np
5 | import pandas as pd
6 | import pytest
7 | from scipy.spatial.distance import pdist, squareform
8 |
9 | from anonymeter.preprocessing.transformations import mixed_types_transform
10 |
11 | rng = np.random.default_rng()
12 |
13 |
14 | def test_scaling_numerical():
15 | df_ori = pd.DataFrame({"c": rng.random(5)})
16 | df_syn = pd.DataFrame({"c": rng.random(5)})
17 | tdf_ori, tdf_syn = mixed_types_transform(df_ori, df_syn, num_cols=["c"], cat_cols=[])
18 | # values are scaled so that abs(difference) is between 0 and 1.
19 | # since this is a square distance matrix, there will be two elements with d=1
20 | vals = pd.concat([tdf_ori, tdf_syn])["c"].values
21 | dm = squareform(pdist(vals[:, np.newaxis], "cityblock"))
22 | assert np.sum(np.isclose(dm, 1)) == 2
23 | assert np.amin(dm) == 0
24 |
25 |
26 | @pytest.mark.parametrize(
27 | "df1, df2, exp1, exp2",
28 | [
29 | (
30 | pd.DataFrame({"c": ["a", "b", "c", "d"]}),
31 | pd.DataFrame({"c": ["a", "b", "c", "c"]}),
32 | pd.DataFrame({"c": [0, 1, 2, 3]}),
33 | pd.DataFrame({"c": [0, 1, 2, 2]}),
34 | ),
35 | (
36 | pd.DataFrame({"c": ["a", "b", "c", None]}),
37 | pd.DataFrame({"c": ["a", "b", "c", "c"]}),
38 | pd.DataFrame({"c": [0, 1, 2, 3]}),
39 | pd.DataFrame({"c": [0, 1, 2, 2]}),
40 | ),
41 | (
42 | pd.DataFrame({"c": ["a", "b", "c", "d"]}),
43 | pd.DataFrame({"c": ["a", "b", None, "c"]}),
44 | pd.DataFrame({"c": [0, 1, 2, 3]}),
45 | pd.DataFrame({"c": [0, 1, 4, 2]}),
46 | ),
47 | ],
48 | )
49 | def test_encoding_categorical(df1, df2, exp1, exp2):
50 | enc1, enc2 = mixed_types_transform(df1=df1, df2=df2, cat_cols=["c"], num_cols=[])
51 | pd.testing.assert_frame_equal(enc1, exp1)
52 | pd.testing.assert_frame_equal(enc2, exp2)
53 |
54 |
55 | @pytest.mark.parametrize(
56 | "df1, df2, exp1, exp2",
57 | [
58 | (
59 | pd.DataFrame({"c": ["a", "b", "c"]}),
60 | pd.DataFrame({"c": ["a", "b", "d"]}),
61 | pd.DataFrame({"c": [0, 1, 2]}),
62 | pd.DataFrame({"c": [0, 1, 3]}),
63 | ),
64 | (
65 | pd.DataFrame({"c": ["a", "b", "c"]}),
66 | pd.DataFrame({"c": ["a", "b", None]}),
67 | pd.DataFrame({"c": [0, 1, 2]}),
68 | pd.DataFrame({"c": [0, 1, 3]}),
69 | ),
70 | (
71 | pd.DataFrame({"c": [None, "b", "c"]}),
72 | pd.DataFrame({"c": ["a", "b", None]}),
73 | pd.DataFrame({"c": [3, 1, 2]}),
74 | pd.DataFrame({"c": [0, 1, 3]}),
75 | ),
76 | ],
77 | )
78 | def test_encoding_categorical_new_values(df1, df2, exp1, exp2):
79 | enc1, enc2 = mixed_types_transform(df1=df1, df2=df2, cat_cols=["c"], num_cols=[])
80 | pd.testing.assert_frame_equal(enc1, exp1)
81 | pd.testing.assert_frame_equal(enc2, exp2)
82 |
--------------------------------------------------------------------------------
/tests/test_type_detection.py:
--------------------------------------------------------------------------------
1 | # This file is part of Anonymeter and is released under BSD 3-Clause Clear License.
2 | # Copyright (c) 2022 Anonos IP LLC.
3 | # See https://github.com/statice/anonymeter/blob/main/LICENSE.md for details.
4 | import numpy as np
5 | import pandas as pd
6 | import pytest
7 |
8 | from anonymeter.preprocessing.type_detection import detect_col_types, detect_consistent_col_types
9 |
10 | rng = np.random.default_rng()
11 |
12 |
13 | @pytest.mark.parametrize(
14 | "df, expected",
15 | [
16 | (pd.DataFrame({"num": rng.random(5), "cat": list("abcde")}), {"cat": ["cat"], "num": ["num"]}),
17 | (pd.DataFrame({"num1": rng.random(5), "num2": [1, 2, 3, 4, 5]}), {"cat": [], "num": ["num1", "num2"]}),
18 | (
19 | pd.DataFrame({"num1": rng.random(5), "num2": [1, 2, 3, 4, 5]}).astype("object"),
20 | {"cat": ["num1", "num2"], "num": []},
21 | ),
22 | (
23 | pd.DataFrame({"cat1": list("abcde"), "cat2": ["1", "2", "3", "4", "5"]}),
24 | {"cat": ["cat1", "cat2"], "num": []},
25 | ),
26 | ],
27 | )
28 | def test_detect_col_types(df, expected):
29 | ctypes = detect_col_types(df=df)
30 | assert ctypes == expected
31 |
32 |
33 | def test_detect_col_types_consistent():
34 | df1 = pd.DataFrame({"num": rng.random(5), "cat": list("abcde")})
35 | df2 = pd.DataFrame({"num": rng.random(5), "cat": list("fghil")})
36 | assert detect_consistent_col_types(df1, df2) == {"cat": ["cat"], "num": ["num"]}
37 |
38 |
39 | def test_detect_col_types_consistent_raises():
40 | df1 = pd.DataFrame({"num": rng.random(5), "cat": list("abcde")})
41 | df2 = pd.DataFrame({"num": [str(_) for _ in rng.random(5)], "cat": list("fghil")})
42 | with pytest.raises(RuntimeError):
43 | detect_consistent_col_types(df1, df2)
44 |
--------------------------------------------------------------------------------