├── .github
    └── workflows
    │   └── python-app.yml
├── .gitignore
├── LICENSE.md
├── README.md
├── clean.sh
├── config
    ├── config.yaml
    ├── deepsets
    │   └── base.yaml
    ├── experiments
    │   ├── all.yaml
    │   ├── all_regression.yaml
    │   ├── simple.yaml
    │   ├── test.yaml
    │   ├── test_multiclass.yaml
    │   └── test_regression.yaml
    ├── generators
    │   └── base.yaml
    └── models
    │   └── base.yaml
├── docs
    ├── experiment.html
    ├── generator.html
    ├── index.html
    └── model.html
├── env.yml
├── logging.ini
├── propinfer
    ├── __init__.py
    ├── deepsets.py
    ├── experiment.py
    ├── generator.py
    ├── model.py
    └── model_utils.py
├── pyproject.toml
├── run.py
├── setup.cfg
└── tests
    ├── __init__.py
    ├── test_deepsets.py
    ├── test_experiment.py
    ├── test_generator.py
    ├── test_model.py
    └── test_model_utils.py


/.github/workflows/python-app.yml:
--------------------------------------------------------------------------------
 1 | name: Continuous Integration
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build-linux:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 |     - uses: actions/checkout@v2
11 |     - name: Set up Python 3.9
12 |       uses: actions/setup-python@v2
13 |       with:
14 |         python-version: 3.9
15 |     - name: Install dependencies
16 |       run: |
17 |         # $CONDA is an environment variable pointing to the root of the miniconda directory
18 |         $CONDA/bin/conda env update --file env.yml --name base
19 |         conda install pytorch torchvision torchaudio cpuonly -c pytorch
20 |     - name: Unittests
21 |       run: |
22 |         conda install pytest
23 |         $CONDA/bin/pytest
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea
 2 | results
 3 | logs
 4 | outputs
 5 | dist
 6 | propinfer.egg-info
 7 | local
 8 | 
 9 | *.pyc
10 | .DS_Store
11 | *.ipynb


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 EPFL dlab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Property Inference Attacks
 2 | 
 3 | In this repository, we propose a modular framework to run Property Inference Attacks on Machine Learning models.
 4 | 
 5 | [![Continuous Integration](https://github.com/epfl-dlab/property-inference-framework/actions/workflows/python-app.yml/badge.svg)](https://github.com/epfl-dlab/property-inference-framework/actions/workflows/python-app.yml)
 6 | [![PyPI](https://img.shields.io/pypi/v/propinfer)](https://pypi.org/project/propinfer/)
 7 | [![Documentation](https://img.shields.io/badge/Documentation-v1.3.0-informational)](https://epfl-dlab.github.io/property-inference-attacks/)
 8 | 
 9 | 
10 | ## Installation
11 | 
12 | You can get this package directly from pip:
13 | 
14 | `python -m pip install propinfer`
15 | 
16 | Please note that PyTorch is required to run this framework. Please find installation instructions corresponding to you [here](https://pytorch.org/).
17 | 
18 | ## Usage
19 | 
20 | This framework is made modular for any of your experiments: you simply should define subclasses of `Generator` and `Model`
21 | to represent your data source and your evaluated model respectively.
22 | 
23 | From these, you can create a specific experiment configuration file. We suggest using [hydra](https://hydra.cc/docs/intro/) for your configurations, but parameters can also be passed in a standard `dict`.
24 | 
25 | Alternatively, you can extend the Experiment class.
26 | 
27 | ## Threat models and attacks
28 | 
29 | ### White-Box 
30 | In this threat model, we have access to the model's parameters directly. In this case, [1] defines three different attacks:
31 |  * Simple meta-classifier attack
32 |  * Simple meta-classifier attack, with layer weights' sorting
33 |  * DeepSets attack
34 |  
35 | They are respectively designated by the keywords `Naive`, `Sort`and `DeepSets`.
36 | 
37 | ### Grey- and Black-Box
38 |  
39 | In this threat model, we have only query access to the model (we do not know its parameters). In the scope of the Grey-Box threat model, we know the model's architecture and hyperparameters - in the scope of Black-Box we do not.
40 | 
41 | For the Grey-Box case, [2] describes two simple attacks:
42 |  * The Loss Test (represented by the `LossTest` keyword)
43 |  * The Threshold Test (represented by the `ThresholdTest` keyword)
44 |  
45 | [3] also proposes a meta-classifier-based attack, that we use for both the Grey-Box and Black-Box cases: these are respectively represented by the `GreyBox` and `BlackBox` keywords. For the latter case, we simply default on a pre-defined model architecture.
46 | 
47 | ## Unit tests
48 | 
49 | The framework is provided with a few, simple unit tests. Run them with:
50 | 
51 | `python -m unittest discover`
52 | 
53 | to check the correctness of your installation.
54 | 
55 | ## Running an experiment
56 | 
57 | To run a simple experiment, please simply use the provided `run.py`. You can change any experiment parameter with the help of the yaml config files, inside the `config` folder.
58 | 
59 | To run an experiment using a specific `my_experiments.yaml` config file, you should place its yaml config file in `/config/experiments`, and then run:
60 | 
61 | `python run.py experiments=my_experiments`
62 | 
63 | Alternatively, you can instanciate an `Experiment` object using a specific `Generator` and `Model`, and then run both targets and shadows before performing an attack.
64 | 
65 | It is possible to provide a list as a model hyperparameter: in that case, the framework will automatically optimise between the given options.
66 | 
67 | ## Citation
68 | 
69 | If you use this library for your work, please cite [our paper](https://doi.org/10.1109/SaTML54575.2023.00018) as follows:
70 | 
71 | ```
72 | V. Hartmann, L. Meynent, M. Peyrard, D. Dimitriadis, S. Tople and R. West, "Distribution Inference Risks: Identifying and Mitigating Sources of Leakage," 2023 IEEE Conference on Secure and Trustworthy Machine Learning (SaTML), Raleigh, NC, USA, 2023, pp. 136-149, doi: 10.1109/SaTML54575.2023.00018.
73 | ```
74 | 
75 | ```
76 | @INPROCEEDINGS{10136150,
77 |   author={Hartmann, Valentin and Meynent, Léo and Peyrard, Maxime and Dimitriadis, Dimitrios and Tople, Shruti and West, Robert},
78 |   booktitle={2023 IEEE Conference on Secure and Trustworthy Machine Learning (SaTML)}, 
79 |   title={Distribution Inference Risks: Identifying and Mitigating Sources of Leakage}, 
80 |   year={2023},
81 |   volume={},
82 |   number={},
83 |   pages={136-149},
84 |   doi={10.1109/SaTML54575.2023.00018}
85 | }
86 | ```
87 | 
88 | ## References
89 | 
90 | [1] Karan Ganju, Qi Wang, Wei Yang, Carl A. Gunter, and Nikita Borisov. 2018. Property Inference Attacks on Fully Connected Neural Networks using Permutation Invariant Representations. In Proceedings of the 2018 ACM SIGSAC Conference on Computer and Communications Security (CCS '18). Association for Computing Machinery, New York, NY, USA, 619–633. DOI:https://doi.org/10.1145/3243734.3243834
91 | 
92 | [2] Anshuman Suri, David Evans. 2021. Formalizing Distribution Inference Risks. 2021 Workshop on Theory and Practice of Differential Privacy, ICML. https://arxiv.org/abs/2106.03699
93 | 
94 | [3] Wanrong Zhang, Shruti Tople, Olga Ohrimenko. 2021. Leakage of Dataset Properties in Multi-Party Machine Learning. https://arxiv.org/abs/2006.07267
95 | 


--------------------------------------------------------------------------------
/clean.sh:
--------------------------------------------------------------------------------
1 | read -p "You are going to erase all saved logs and results. Do you want to proceed? (y/n):  " -n 1 -r
2 | if [[ $REPLY =~ ^[Yy]$ ]]
3 | then
4 |   rm logs/*
5 |   rm results/*
6 | fi


--------------------------------------------------------------------------------
/config/config.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |    - experiments: test
3 |    - generators: base
4 |    - models: base
5 |    - deepsets: base
6 |    - _self_
7 | 
8 | outdir: results


--------------------------------------------------------------------------------
/config/deepsets/base.yaml:
--------------------------------------------------------------------------------
1 | latent_dim: 5
2 | epochs: 20
3 | learning_rate: 1e-2
4 | weight_decay: 1e-3


--------------------------------------------------------------------------------
/config/experiments/all.yaml:
--------------------------------------------------------------------------------
 1 | n_targets: 256
 2 | n_shadows: 2048
 3 | n_queries: 1024
 4 | models:
 5 |   - LogReg
 6 |   - MLP
 7 | generators:
 8 |   - GaussianGenerator
 9 |   - IndependentPropertyGenerator
10 |   - NonlinearGenerator
11 |   - ProbitGenerator
12 | runs:
13 |   - LossTest
14 |   - ThresholdTest
15 |   - Naive
16 |   - Sort
17 |   - DeepSets
18 |   - GreyBox
19 |   - BlackBox
20 | blackbox_model: LogReg


--------------------------------------------------------------------------------
/config/experiments/all_regression.yaml:
--------------------------------------------------------------------------------
 1 | n_targets: 512
 2 | n_shadows: 4096
 3 | n_queries: 1024
 4 | n_classes: 1
 5 | range: [0., 1.]
 6 | models:
 7 |   - LogReg
 8 |   - MLP
 9 | generators:
10 |   - NonlinearGenerator
11 |   - ProbitGenerator
12 | runs:
13 |   - Naive
14 |   - Sort
15 |   - DeepSets
16 |   - GreyBox
17 |   - BlackBox
18 | blackbox_model: LogReg


--------------------------------------------------------------------------------
/config/experiments/simple.yaml:
--------------------------------------------------------------------------------
 1 | n_targets: 16
 2 | n_shadows: 64
 3 | n_queries: 1024
 4 | models:
 5 |   - LogReg
 6 |   - MLP
 7 | generators:
 8 |   - GaussianGenerator
 9 |   - IndependentPropertyGenerator
10 | runs:
11 |   - LossTest
12 |   - ThresholdTest
13 | blackbox_model: LogReg


--------------------------------------------------------------------------------
/config/experiments/test.yaml:
--------------------------------------------------------------------------------
 1 | n_targets: 16
 2 | n_shadows: 64
 3 | n_queries: 1024
 4 | models:
 5 |   - LogReg
 6 |   - MLP
 7 | generators:
 8 |   - GaussianGenerator
 9 | runs:
10 |   - LossTest
11 |   - ThresholdTest
12 |   - Naive
13 |   - Sort
14 |   - DeepSets
15 |   - GreyBox
16 |   - BlackBox
17 | blackbox_model: LogReg


--------------------------------------------------------------------------------
/config/experiments/test_multiclass.yaml:
--------------------------------------------------------------------------------
 1 | n_targets: 16
 2 | n_shadows: 64
 3 | n_queries: 1024
 4 | n_classes: 4
 5 | models:
 6 |   - LogReg
 7 |   - MLP
 8 | generators:
 9 |   - GaussianGenerator
10 | runs:
11 |   - Naive
12 |   - Sort
13 |   - DeepSets
14 |   - GreyBox
15 |   - BlackBox
16 | blackbox_model: LogReg


--------------------------------------------------------------------------------
/config/experiments/test_regression.yaml:
--------------------------------------------------------------------------------
 1 | n_targets: 16
 2 | n_shadows: 64
 3 | n_queries: 1024
 4 | n_classes: 1
 5 | range: [0., 1.]
 6 | models:
 7 |   - LogReg
 8 |   - MLP
 9 | generators:
10 |   - NonlinearGenerator
11 | runs:
12 |   - Naive
13 |   - Sort
14 |   - DeepSets
15 |   - GreyBox
16 |   - BlackBox
17 | blackbox_model: LogReg


--------------------------------------------------------------------------------
/config/generators/base.yaml:
--------------------------------------------------------------------------------
1 | num_samples: 1024
2 | label_col: label


--------------------------------------------------------------------------------
/config/models/base.yaml:
--------------------------------------------------------------------------------
 1 | LogReg:
 2 |     max_iter: 100
 3 | 
 4 | MLP:
 5 |     input_size: 4
 6 |     num_classes: 2
 7 |     epochs: 20
 8 |     learning_rate: 1e-2
 9 |     weight_decay: 1e-2
10 |     batch_size: 32
11 |     layers: [16, 4]


--------------------------------------------------------------------------------
/docs/experiment.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html lang="en">
  3 | <head>
  4 | <meta charset="utf-8">
  5 | <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" />
  6 | <meta name="generator" content="pdoc 0.10.0" />
  7 | <title>propinfer.experiment API documentation</title>
  8 | <meta name="description" content="" />
  9 | <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/sanitize.min.css" integrity="sha256-PK9q560IAAa6WVRRh76LtCaI8pjTJ2z11v0miyNNjrs=" crossorigin>
 10 | <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/typography.min.css" integrity="sha256-7l/o7C8jubJiy74VsKTidCy1yBkRtiUGbVkYBylBqUg=" crossorigin>
 11 | <link rel="stylesheet preload" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/styles/github.min.css" crossorigin>
 12 | <style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
 13 | <style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style>
 14 | <style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
 15 | <script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/highlight.min.js" integrity="sha256-Uv3H6lx7dJmRfRvH8TH6kJD1TSK1aFcwgx+mdg3epi8=" crossorigin></script>
 16 | <script>window.addEventListener('DOMContentLoaded', () => hljs.initHighlighting())</script>
 17 | </head>
 18 | <body>
 19 | <main>
 20 | <article id="content">
 21 | <header>
 22 | <h1 class="title">Module <code>propinfer.experiment</code></h1>
 23 | </header>
 24 | <section id="section-intro">
 25 | </section>
 26 | <section>
 27 | </section>
 28 | <section>
 29 | </section>
 30 | <section>
 31 | </section>
 32 | <section>
 33 | <h2 class="section-title" id="header-classes">Classes</h2>
 34 | <dl>
 35 | <dt id="propinfer.experiment.Experiment"><code class="flex name class">
 36 | <span>class <span class="ident">Experiment</span></span>
 37 | <span>(</span><span>generator, label_col, model, n_targets, n_shadows, hyperparams, n_queries=1024, n_classes=2, range=None)</span>
 38 | </code></dt>
 39 | <dd>
 40 | <div class="desc"><p>Object representing an experiment, based on its data generator and model pair</p>
 41 | <h2 id="args">Args</h2>
 42 | <dl>
 43 | <dt><strong><code>generator</code></strong> :&ensp;<code>Generator</code></dt>
 44 | <dd>data abstraction used for this experiment</dd>
 45 | <dt><strong><code>model</code></strong> :&ensp;<code>Model.__class__</code></dt>
 46 | <dd>a Model class that represents the model to be used</dd>
 47 | <dt><strong><code>n_targets</code></strong> :&ensp;<code>int</code></dt>
 48 | <dd>the total number of target models</dd>
 49 | <dt><strong><code>n_shadows</code></strong> :&ensp;<code>int</code></dt>
 50 | <dd>the total number of shadow models</dd>
 51 | <dt><strong><code>hyperparams</code></strong> :&ensp;<code>dict</code> or <code>DictConfig</code></dt>
 52 | <dd>dictionary containing every useful hyper-parameter for the Model;
 53 | if a list is provided for some hyperparameter(s), we grid optimise between all given options (except for keyword <code>layers</code>)</dd>
 54 | <dt><strong><code>n_queries</code></strong> :&ensp;<code>int</code></dt>
 55 | <dd>the number of queries used in the scope of grey- and black-box attacks. Must be strictly superior to <code>n_targets</code></dd>
 56 | <dt><strong><code>n_classes</code></strong> :&ensp;<code>int</code></dt>
 57 | <dd>the number of classes considered for property inference; if 1 then a regression is performed</dd>
 58 | <dt><strong><code>range</code></strong> :&ensp;<code>tuple</code></dt>
 59 | <dd>the range of values accepted for regression tasks (needed for regression, ignored for classification)
 60 | it is possible to pass an iterable of multiple ranges in order to perform multi-variable property inference regression, in which case the values of the variables are passed to the Generator as a list</dd>
 61 | </dl></div>
 62 | <h3>Methods</h3>
 63 | <dl>
 64 | <dt id="propinfer.experiment.Experiment.run_blackbox"><code class="name flex">
 65 | <span>def <span class="ident">run_blackbox</span></span>(<span>self, n_outputs=1)</span>
 66 | </code></dt>
 67 | <dd>
 68 | <div class="desc"><p>Runs a blackbox attack on the target models, by using the result of random queries as features for a meta-classifier</p>
 69 | <h2 id="args">Args</h2>
 70 | <dl>
 71 | <dt><strong><code>n_outputs</code></strong> :&ensp;<code>int</code></dt>
 72 | <dd>number of attack results to output, using multiple random subsets of the shadow models</dd>
 73 | </dl>
 74 | <p>Returns: Attack accuracy on target models for the classification task, or mean absolute error for the regression task</p></div>
 75 | </dd>
 76 | <dt id="propinfer.experiment.Experiment.run_loss_test"><code class="name flex">
 77 | <span>def <span class="ident">run_loss_test</span></span>(<span>self)</span>
 78 | </code></dt>
 79 | <dd>
 80 | <div class="desc"><p>Runs a loss test attack on target models. Works only for the binary classification attack on a classifier.</p>
 81 | <p>Returns: Attack accuracy on target models</p></div>
 82 | </dd>
 83 | <dt id="propinfer.experiment.Experiment.run_shadows"><code class="name flex">
 84 | <span>def <span class="ident">run_shadows</span></span>(<span>self, model=None, hyperparams=None)</span>
 85 | </code></dt>
 86 | <dd>
 87 | <div class="desc"><p>Create and fit shadow models</p>
 88 | <h2 id="args">Args</h2>
 89 | <dl>
 90 | <dt><strong><code>model</code></strong> :&ensp;<code>Model.__class__</code></dt>
 91 | <dd>a Model class that represents the model to be used. If None, will be the same as target models</dd>
 92 | <dt><strong><code>hyperparams</code></strong> :&ensp;<code>dict</code> or <code>DictConfig</code></dt>
 93 | <dd>dictionary containing every useful hyper-parameter for the Model;
 94 | Hyperparameters of shadow models will NOT be optimised. If None, will be the same as target models.</dd>
 95 | </dl></div>
 96 | </dd>
 97 | <dt id="propinfer.experiment.Experiment.run_targets"><code class="name flex">
 98 | <span>def <span class="ident">run_targets</span></span>(<span>self)</span>
 99 | </code></dt>
100 | <dd>
101 | <div class="desc"><p>Create and fit target models</p></div>
102 | </dd>
103 | <dt id="propinfer.experiment.Experiment.run_threshold_test"><code class="name flex">
104 | <span>def <span class="ident">run_threshold_test</span></span>(<span>self, n_outputs=1)</span>
105 | </code></dt>
106 | <dd>
107 | <div class="desc"><p>Runs a threshold test attack on target models. Works only for the binary classification attack on a classifier.</p>
108 | <h2 id="args">Args</h2>
109 | <dl>
110 | <dt><strong><code>n_outputs</code></strong> :&ensp;<code>int</code></dt>
111 | <dd>number of attack results to output, using multiple random subsets of the shadow models</dd>
112 | </dl>
113 | <p>Returns: Attack accuracy on target models</p></div>
114 | </dd>
115 | <dt id="propinfer.experiment.Experiment.run_whitebox_deepsets"><code class="name flex">
116 | <span>def <span class="ident">run_whitebox_deepsets</span></span>(<span>self, hyperparams, n_outputs=1)</span>
117 | </code></dt>
118 | <dd>
119 | <div class="desc"><p>Runs a whitebox attack on the target models using a DeepSets meta-classifier</p>
120 | <h2 id="args">Args</h2>
121 | <dl>
122 | <dt><strong><code>hyperparams</code></strong> :&ensp;<code>dict</code> or <code>DictConfig</code></dt>
123 | <dd>Hyperparameters for the DeepSets meta-classifier.
124 | Accepted keywords are: latent_dim (default=5); epochs (default=20); learning_rate (default=1e-4); weight_decay (default=1e-4)</dd>
125 | <dt><strong><code>n_outputs</code></strong> :&ensp;<code>int</code></dt>
126 | <dd>number of attack results to output, using multiple random subsets of the shadow models</dd>
127 | </dl>
128 | <p>Returns: Attack accuracy on target models</p></div>
129 | </dd>
130 | <dt id="propinfer.experiment.Experiment.run_whitebox_sort"><code class="name flex">
131 | <span>def <span class="ident">run_whitebox_sort</span></span>(<span>self, sort=True, n_outputs=1)</span>
132 | </code></dt>
133 | <dd>
134 | <div class="desc"><p>Runs a whitebox attack on the target models, by using the model parameters as features for a meta-classifier</p>
135 | <h2 id="args">Args</h2>
136 | <dl>
137 | <dt><strong><code>sort</code></strong> :&ensp;<code>bool</code></dt>
138 | <dd>whether to perform node sorting (to be used for permutation-invariant DNN)</dd>
139 | <dt><strong><code>n_outputs</code></strong> :&ensp;<code>int</code></dt>
140 | <dd>number of attack results to output, using multiple random subsets of the shadow models</dd>
141 | </dl>
142 | <p>Returns: Attack accuracy on target models for the classification task, or mean absolute error for the regression task</p></div>
143 | </dd>
144 | </dl>
145 | </dd>
146 | </dl>
147 | </section>
148 | </article>
149 | <nav id="sidebar">
150 | <header>
151 | <a class="homelink" rel="home" title="Home" href="index.html">
152 | <img src="https://dlab.epfl.ch/assets/img/dlab.svg" alt="Home" width="200">
153 | </a>
154 | </header>
155 | <h1>Index</h1>
156 | <div class="toc">
157 | <ul></ul>
158 | </div>
159 | <ul id="index">
160 | <li><h3>Super-module</h3>
161 | <ul>
162 | <li><code><a title="propinfer" href="index.html">propinfer</a></code></li>
163 | </ul>
164 | </li>
165 | <li><h3><a href="#header-classes">Classes</a></h3>
166 | <ul>
167 | <li>
168 | <h4><code><a title="propinfer.experiment.Experiment" href="#propinfer.experiment.Experiment">Experiment</a></code></h4>
169 | <ul class="">
170 | <li><code><a title="propinfer.experiment.Experiment.run_blackbox" href="#propinfer.experiment.Experiment.run_blackbox">run_blackbox</a></code></li>
171 | <li><code><a title="propinfer.experiment.Experiment.run_loss_test" href="#propinfer.experiment.Experiment.run_loss_test">run_loss_test</a></code></li>
172 | <li><code><a title="propinfer.experiment.Experiment.run_shadows" href="#propinfer.experiment.Experiment.run_shadows">run_shadows</a></code></li>
173 | <li><code><a title="propinfer.experiment.Experiment.run_targets" href="#propinfer.experiment.Experiment.run_targets">run_targets</a></code></li>
174 | <li><code><a title="propinfer.experiment.Experiment.run_threshold_test" href="#propinfer.experiment.Experiment.run_threshold_test">run_threshold_test</a></code></li>
175 | <li><code><a title="propinfer.experiment.Experiment.run_whitebox_deepsets" href="#propinfer.experiment.Experiment.run_whitebox_deepsets">run_whitebox_deepsets</a></code></li>
176 | <li><code><a title="propinfer.experiment.Experiment.run_whitebox_sort" href="#propinfer.experiment.Experiment.run_whitebox_sort">run_whitebox_sort</a></code></li>
177 | </ul>
178 | </li>
179 | </ul>
180 | </li>
181 | </ul>
182 | </nav>
183 | </main>
184 | <footer id="footer">
185 | <p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.10.0</a>.</p>
186 | </footer>
187 | </body>
188 | </html>


--------------------------------------------------------------------------------
/docs/generator.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html lang="en">
  3 | <head>
  4 | <meta charset="utf-8">
  5 | <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" />
  6 | <meta name="generator" content="pdoc 0.10.0" />
  7 | <title>propinfer.generator API documentation</title>
  8 | <meta name="description" content="" />
  9 | <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/sanitize.min.css" integrity="sha256-PK9q560IAAa6WVRRh76LtCaI8pjTJ2z11v0miyNNjrs=" crossorigin>
 10 | <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/typography.min.css" integrity="sha256-7l/o7C8jubJiy74VsKTidCy1yBkRtiUGbVkYBylBqUg=" crossorigin>
 11 | <link rel="stylesheet preload" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/styles/github.min.css" crossorigin>
 12 | <style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
 13 | <style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style>
 14 | <style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
 15 | <script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/highlight.min.js" integrity="sha256-Uv3H6lx7dJmRfRvH8TH6kJD1TSK1aFcwgx+mdg3epi8=" crossorigin></script>
 16 | <script>window.addEventListener('DOMContentLoaded', () => hljs.initHighlighting())</script>
 17 | </head>
 18 | <body>
 19 | <main>
 20 | <article id="content">
 21 | <header>
 22 | <h1 class="title">Module <code>propinfer.generator</code></h1>
 23 | </header>
 24 | <section id="section-intro">
 25 | </section>
 26 | <section>
 27 | </section>
 28 | <section>
 29 | </section>
 30 | <section>
 31 | </section>
 32 | <section>
 33 | <h2 class="section-title" id="header-classes">Classes</h2>
 34 | <dl>
 35 | <dt id="propinfer.generator.GaussianGenerator"><code class="flex name class">
 36 | <span>class <span class="ident">GaussianGenerator</span></span>
 37 | <span>(</span><span>n_samples=1024)</span>
 38 | </code></dt>
 39 | <dd>
 40 | <div class="desc"><p>Generator sampling from a multivariate Gaussian Distribution in which features are correlated.
 41 | Label is made categorical by checking whether it is positive or negative.
 42 | Sensitive attribute is the mean of the fourth feature vector</p></div>
 43 | <h3>Ancestors</h3>
 44 | <ul class="hlist">
 45 | <li><a title="propinfer.generator.Generator" href="#propinfer.generator.Generator">Generator</a></li>
 46 | </ul>
 47 | <h3>Inherited members</h3>
 48 | <ul class="hlist">
 49 | <li><code><b><a title="propinfer.generator.Generator" href="#propinfer.generator.Generator">Generator</a></b></code>:
 50 | <ul class="hlist">
 51 | <li><code><a title="propinfer.generator.Generator.sample" href="#propinfer.generator.Generator.sample">sample</a></code></li>
 52 | </ul>
 53 | </li>
 54 | </ul>
 55 | </dd>
 56 | <dt id="propinfer.generator.Generator"><code class="flex name class">
 57 | <span>class <span class="ident">Generator</span></span>
 58 | <span>(</span><span>n_samples=1024)</span>
 59 | </code></dt>
 60 | <dd>
 61 | <div class="desc"><p>An abstraction class used to query for data</p></div>
 62 | <h3>Subclasses</h3>
 63 | <ul class="hlist">
 64 | <li><a title="propinfer.generator.GaussianGenerator" href="#propinfer.generator.GaussianGenerator">GaussianGenerator</a></li>
 65 | <li><a title="propinfer.generator.IndependentPropertyGenerator" href="#propinfer.generator.IndependentPropertyGenerator">IndependentPropertyGenerator</a></li>
 66 | <li><a title="propinfer.generator.LinearGenerator" href="#propinfer.generator.LinearGenerator">LinearGenerator</a></li>
 67 | <li><a title="propinfer.generator.MultilabelProbitGenerator" href="#propinfer.generator.MultilabelProbitGenerator">MultilabelProbitGenerator</a></li>
 68 | <li><a title="propinfer.generator.SubsamplingGenerator" href="#propinfer.generator.SubsamplingGenerator">SubsamplingGenerator</a></li>
 69 | </ul>
 70 | <h3>Methods</h3>
 71 | <dl>
 72 | <dt id="propinfer.generator.Generator.sample"><code class="name flex">
 73 | <span>def <span class="ident">sample</span></span>(<span>self, label, adv=False)</span>
 74 | </code></dt>
 75 | <dd>
 76 | <div class="desc"><p>Returns a dataset sampled from the data; the label variable corresponds to the property being attacked</p>
 77 | <h2 id="args">Args</h2>
 78 | <dl>
 79 | <dt><strong><code>label</code></strong> :&ensp;<code>int</code> or <code>float</code> or <code>numpy.array</code></dt>
 80 | <dd>the label corresponding to the dataset being queried - when performing regression, the value of the target variable(s)</dd>
 81 | <dt><strong><code>adv</code></strong> :&ensp;<code>bool</code></dt>
 82 | <dd>a boolean describing whether we are using target or adversary data split</dd>
 83 | </dl>
 84 | <h2 id="returns">Returns</h2>
 85 | <p>a pandas DataFrame representing our dataset for this experiment</p></div>
 86 | </dd>
 87 | </dl>
 88 | </dd>
 89 | <dt id="propinfer.generator.IndependentPropertyGenerator"><code class="flex name class">
 90 | <span>class <span class="ident">IndependentPropertyGenerator</span></span>
 91 | <span>(</span><span>n_samples=1024)</span>
 92 | </code></dt>
 93 | <dd>
 94 | <div class="desc"><p>Generator sampling from a multivariate Gaussian Distribution in which features are not correlated with the label, but are correlated between each other.
 95 | Label is made categorical by checking whether it is positive or negative.
 96 | Sensitive attribute is the mean of the fourth feature vector</p></div>
 97 | <h3>Ancestors</h3>
 98 | <ul class="hlist">
 99 | <li><a title="propinfer.generator.Generator" href="#propinfer.generator.Generator">Generator</a></li>
100 | </ul>
101 | <h3>Inherited members</h3>
102 | <ul class="hlist">
103 | <li><code><b><a title="propinfer.generator.Generator" href="#propinfer.generator.Generator">Generator</a></b></code>:
104 | <ul class="hlist">
105 | <li><code><a title="propinfer.generator.Generator.sample" href="#propinfer.generator.Generator.sample">sample</a></code></li>
106 | </ul>
107 | </li>
108 | </ul>
109 | </dd>
110 | <dt id="propinfer.generator.LinearGenerator"><code class="flex name class">
111 | <span>class <span class="ident">LinearGenerator</span></span>
112 | <span>(</span><span>n_samples=1024)</span>
113 | </code></dt>
114 | <dd>
115 | <div class="desc"><p>Generator sampling from a linear model with additive white gaussian noise</p>
116 | <p>The sensitive attribute defines the mean of the covariates</p></div>
117 | <h3>Ancestors</h3>
118 | <ul class="hlist">
119 | <li><a title="propinfer.generator.Generator" href="#propinfer.generator.Generator">Generator</a></li>
120 | </ul>
121 | <h3>Subclasses</h3>
122 | <ul class="hlist">
123 | <li><a title="propinfer.generator.ProbitGenerator" href="#propinfer.generator.ProbitGenerator">ProbitGenerator</a></li>
124 | </ul>
125 | <h3>Inherited members</h3>
126 | <ul class="hlist">
127 | <li><code><b><a title="propinfer.generator.Generator" href="#propinfer.generator.Generator">Generator</a></b></code>:
128 | <ul class="hlist">
129 | <li><code><a title="propinfer.generator.Generator.sample" href="#propinfer.generator.Generator.sample">sample</a></code></li>
130 | </ul>
131 | </li>
132 | </ul>
133 | </dd>
134 | <dt id="propinfer.generator.MultilabelProbitGenerator"><code class="flex name class">
135 | <span>class <span class="ident">MultilabelProbitGenerator</span></span>
136 | <span>(</span><span>n_samples=1024)</span>
137 | </code></dt>
138 | <dd>
139 | <div class="desc"><p>Generator sampling from a probit model of which sensitive attribute are the mean and variance of the covariates</p></div>
140 | <h3>Ancestors</h3>
141 | <ul class="hlist">
142 | <li><a title="propinfer.generator.Generator" href="#propinfer.generator.Generator">Generator</a></li>
143 | </ul>
144 | <h3>Inherited members</h3>
145 | <ul class="hlist">
146 | <li><code><b><a title="propinfer.generator.Generator" href="#propinfer.generator.Generator">Generator</a></b></code>:
147 | <ul class="hlist">
148 | <li><code><a title="propinfer.generator.Generator.sample" href="#propinfer.generator.Generator.sample">sample</a></code></li>
149 | </ul>
150 | </li>
151 | </ul>
152 | </dd>
153 | <dt id="propinfer.generator.ProbitGenerator"><code class="flex name class">
154 | <span>class <span class="ident">ProbitGenerator</span></span>
155 | <span>(</span><span>n_samples=1024)</span>
156 | </code></dt>
157 | <dd>
158 | <div class="desc"><p>Generator sampling from a probit model with additive white gaussian noise</p>
159 | <p>The sensitive attribute defines the mean of the covariates</p></div>
160 | <h3>Ancestors</h3>
161 | <ul class="hlist">
162 | <li><a title="propinfer.generator.LinearGenerator" href="#propinfer.generator.LinearGenerator">LinearGenerator</a></li>
163 | <li><a title="propinfer.generator.Generator" href="#propinfer.generator.Generator">Generator</a></li>
164 | </ul>
165 | <h3>Inherited members</h3>
166 | <ul class="hlist">
167 | <li><code><b><a title="propinfer.generator.LinearGenerator" href="#propinfer.generator.LinearGenerator">LinearGenerator</a></b></code>:
168 | <ul class="hlist">
169 | <li><code><a title="propinfer.generator.LinearGenerator.sample" href="#propinfer.generator.Generator.sample">sample</a></code></li>
170 | </ul>
171 | </li>
172 | </ul>
173 | </dd>
174 | <dt id="propinfer.generator.SubsamplingGenerator"><code class="flex name class">
175 | <span>class <span class="ident">SubsamplingGenerator</span></span>
176 | <span>(</span><span>data, label_col, sensitive_attribute, target_category=None, n_samples=1024, proportion=None, split=False, regression=False)</span>
177 | </code></dt>
178 | <dd>
179 | <div class="desc"><p>An abstraction class used to query for data</p>
180 | <p>Generator subsampling records from a larger dataset.</p>
181 | <p>Classification case: samples using a specific proportion for label 1, and for proportion of 0.5 for label 0. Only works with boolean labels.
182 | Regression mode: samples using a specific given proportion between 0 and 1</p>
183 | <h2 id="args">Args</h2>
184 | <dl>
185 | <dt><strong><code>data</code></strong> :&ensp;<code>pandas.Dataframe</code></dt>
186 | <dd>the larger dataset to subsample from</dd>
187 | <dt><strong><code>label_col</code></strong> :&ensp;<code>str</code></dt>
188 | <dd>the label being predicted by the models</dd>
189 | <dt><strong><code>sensitive_attribute</code></strong> :&ensp;<code>str</code></dt>
190 | <dd>the attribute which distribution being inferred by the property inference attack; is always considered as categorical</dd>
191 | <dt><strong><code>target_category</code></strong></dt>
192 | <dd>if sensitive_attribute is not a binary vector, the category considered in the sensitive attribute</dd>
193 | <dt><strong><code>n_samples</code></strong> :&ensp;<code>int</code></dt>
194 | <dd>the number of records to sample</dd>
195 | <dt><strong><code>proportion</code></strong> :&ensp;<code>float</code></dt>
196 | <dd>the proportion of the target_category in the datasets subsampled with label 1 ; ignored in the regression case</dd>
197 | <dt><strong><code>split</code></strong> :&ensp;<code>bool</code></dt>
198 | <dd>whether to split original dataset between target and adversary</dd>
199 | <dt><strong><code>regression</code></strong> :&ensp;<code>bool</code></dt>
200 | <dd>whether to use the sampler in regression or classification mode</dd>
201 | </dl></div>
202 | <h3>Ancestors</h3>
203 | <ul class="hlist">
204 | <li><a title="propinfer.generator.Generator" href="#propinfer.generator.Generator">Generator</a></li>
205 | </ul>
206 | <h3>Methods</h3>
207 | <dl>
208 | <dt id="propinfer.generator.SubsamplingGenerator.set_proportion"><code class="name flex">
209 | <span>def <span class="ident">set_proportion</span></span>(<span>self, proportion)</span>
210 | </code></dt>
211 | <dd>
212 | <div class="desc"></div>
213 | </dd>
214 | </dl>
215 | <h3>Inherited members</h3>
216 | <ul class="hlist">
217 | <li><code><b><a title="propinfer.generator.Generator" href="#propinfer.generator.Generator">Generator</a></b></code>:
218 | <ul class="hlist">
219 | <li><code><a title="propinfer.generator.Generator.sample" href="#propinfer.generator.Generator.sample">sample</a></code></li>
220 | </ul>
221 | </li>
222 | </ul>
223 | </dd>
224 | </dl>
225 | </section>
226 | </article>
227 | <nav id="sidebar">
228 | <header>
229 | <a class="homelink" rel="home" title="Home" href="index.html">
230 | <img src="https://dlab.epfl.ch/assets/img/dlab.svg" alt="Home" width="200">
231 | </a>
232 | </header>
233 | <h1>Index</h1>
234 | <div class="toc">
235 | <ul></ul>
236 | </div>
237 | <ul id="index">
238 | <li><h3>Super-module</h3>
239 | <ul>
240 | <li><code><a title="propinfer" href="index.html">propinfer</a></code></li>
241 | </ul>
242 | </li>
243 | <li><h3><a href="#header-classes">Classes</a></h3>
244 | <ul>
245 | <li>
246 | <h4><code><a title="propinfer.generator.GaussianGenerator" href="#propinfer.generator.GaussianGenerator">GaussianGenerator</a></code></h4>
247 | </li>
248 | <li>
249 | <h4><code><a title="propinfer.generator.Generator" href="#propinfer.generator.Generator">Generator</a></code></h4>
250 | <ul class="">
251 | <li><code><a title="propinfer.generator.Generator.sample" href="#propinfer.generator.Generator.sample">sample</a></code></li>
252 | </ul>
253 | </li>
254 | <li>
255 | <h4><code><a title="propinfer.generator.IndependentPropertyGenerator" href="#propinfer.generator.IndependentPropertyGenerator">IndependentPropertyGenerator</a></code></h4>
256 | </li>
257 | <li>
258 | <h4><code><a title="propinfer.generator.LinearGenerator" href="#propinfer.generator.LinearGenerator">LinearGenerator</a></code></h4>
259 | </li>
260 | <li>
261 | <h4><code><a title="propinfer.generator.MultilabelProbitGenerator" href="#propinfer.generator.MultilabelProbitGenerator">MultilabelProbitGenerator</a></code></h4>
262 | </li>
263 | <li>
264 | <h4><code><a title="propinfer.generator.ProbitGenerator" href="#propinfer.generator.ProbitGenerator">ProbitGenerator</a></code></h4>
265 | </li>
266 | <li>
267 | <h4><code><a title="propinfer.generator.SubsamplingGenerator" href="#propinfer.generator.SubsamplingGenerator">SubsamplingGenerator</a></code></h4>
268 | <ul class="">
269 | <li><code><a title="propinfer.generator.SubsamplingGenerator.set_proportion" href="#propinfer.generator.SubsamplingGenerator.set_proportion">set_proportion</a></code></li>
270 | </ul>
271 | </li>
272 | </ul>
273 | </li>
274 | </ul>
275 | </nav>
276 | </main>
277 | <footer id="footer">
278 | <p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.10.0</a>.</p>
279 | </footer>
280 | </body>
281 | </html>


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 | <head>
 4 | <meta charset="utf-8">
 5 | <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" />
 6 | <meta name="generator" content="pdoc 0.10.0" />
 7 | <title>propinfer API documentation</title>
 8 | <meta name="description" content="propinfer is a modular framework to run Property Inference Attacks on Machine Learning models …" />
 9 | <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/sanitize.min.css" integrity="sha256-PK9q560IAAa6WVRRh76LtCaI8pjTJ2z11v0miyNNjrs=" crossorigin>
10 | <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/typography.min.css" integrity="sha256-7l/o7C8jubJiy74VsKTidCy1yBkRtiUGbVkYBylBqUg=" crossorigin>
11 | <link rel="stylesheet preload" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/styles/github.min.css" crossorigin>
12 | <style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
13 | <style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style>
14 | <style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
15 | <script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/highlight.min.js" integrity="sha256-Uv3H6lx7dJmRfRvH8TH6kJD1TSK1aFcwgx+mdg3epi8=" crossorigin></script>
16 | <script>window.addEventListener('DOMContentLoaded', () => hljs.initHighlighting())</script>
17 | </head>
18 | <body>
19 | <main>
20 | <article id="content">
21 | <header>
22 | <h1 class="title">Package <code>propinfer</code></h1>
23 | </header>
24 | <section id="section-intro">
25 | <p>propinfer is a modular framework to run Property Inference Attacks on Machine Learning models.</p>
26 | <p>To run an experiment, you simply should define subclasses of <code>Generator</code> and <code>Model</code>
27 | to represent your data source and your evaluated model respectively.</p>
28 | <p>Logging is available for this framework, using logger <code><a title="propinfer" href="#propinfer">propinfer</a></code>.</p>
29 | <p>Version 1.3.0</p>
30 | <p>(c) <a href="https://epfl.ch/">EPFL</a> <a href="https://dlab.epfl.ch/">Data Science Lab (dlab)</a> 2022</p>
31 | </section>
32 | <section>
33 | <h2 class="section-title" id="header-submodules">Sub-modules</h2>
34 | <dl>
35 | <dt><code class="name"><a title="propinfer.experiment" href="experiment.html">propinfer.experiment</a></code></dt>
36 | <dd>
37 | <div class="desc"></div>
38 | </dd>
39 | <dt><code class="name"><a title="propinfer.generator" href="generator.html">propinfer.generator</a></code></dt>
40 | <dd>
41 | <div class="desc"></div>
42 | </dd>
43 | <dt><code class="name"><a title="propinfer.model" href="model.html">propinfer.model</a></code></dt>
44 | <dd>
45 | <div class="desc"></div>
46 | </dd>
47 | </dl>
48 | </section>
49 | <section>
50 | </section>
51 | <section>
52 | </section>
53 | <section>
54 | </section>
55 | </article>
56 | <nav id="sidebar">
57 | <header>
58 | <a class="homelink" rel="home" title="Home" href="index.html">
59 | <img src="https://dlab.epfl.ch/assets/img/dlab.svg" alt="Home" width="200">
60 | </a>
61 | </header>
62 | <h1>Index</h1>
63 | <div class="toc">
64 | <ul></ul>
65 | </div>
66 | <ul id="index">
67 | <li><h3><a href="#header-submodules">Sub-modules</a></h3>
68 | <ul>
69 | <li><code><a title="propinfer.experiment" href="experiment.html">propinfer.experiment</a></code></li>
70 | <li><code><a title="propinfer.generator" href="generator.html">propinfer.generator</a></code></li>
71 | <li><code><a title="propinfer.model" href="model.html">propinfer.model</a></code></li>
72 | </ul>
73 | </li>
74 | </ul>
75 | </nav>
76 | </main>
77 | <footer id="footer">
78 | <p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.10.0</a>.</p>
79 | </footer>
80 | </body>
81 | </html>


--------------------------------------------------------------------------------
/docs/model.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html lang="en">
  3 | <head>
  4 | <meta charset="utf-8">
  5 | <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" />
  6 | <meta name="generator" content="pdoc 0.10.0" />
  7 | <title>propinfer.model API documentation</title>
  8 | <meta name="description" content="" />
  9 | <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/sanitize.min.css" integrity="sha256-PK9q560IAAa6WVRRh76LtCaI8pjTJ2z11v0miyNNjrs=" crossorigin>
 10 | <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/typography.min.css" integrity="sha256-7l/o7C8jubJiy74VsKTidCy1yBkRtiUGbVkYBylBqUg=" crossorigin>
 11 | <link rel="stylesheet preload" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/styles/github.min.css" crossorigin>
 12 | <style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
 13 | <style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style>
 14 | <style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
 15 | <script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/highlight.min.js" integrity="sha256-Uv3H6lx7dJmRfRvH8TH6kJD1TSK1aFcwgx+mdg3epi8=" crossorigin></script>
 16 | <script>window.addEventListener('DOMContentLoaded', () => hljs.initHighlighting())</script>
 17 | </head>
 18 | <body>
 19 | <main>
 20 | <article id="content">
 21 | <header>
 22 | <h1 class="title">Module <code>propinfer.model</code></h1>
 23 | </header>
 24 | <section id="section-intro">
 25 | </section>
 26 | <section>
 27 | </section>
 28 | <section>
 29 | </section>
 30 | <section>
 31 | </section>
 32 | <section>
 33 | <h2 class="section-title" id="header-classes">Classes</h2>
 34 | <dl>
 35 | <dt id="propinfer.model.LinReg"><code class="flex name class">
 36 | <span>class <span class="ident">LinReg</span></span>
 37 | <span>(</span><span>label_col, hyperparams=None)</span>
 38 | </code></dt>
 39 | <dd>
 40 | <div class="desc"><p>A linear regression based model</p>
 41 | <h2 id="args">Args</h2>
 42 | <dl>
 43 | <dt><strong><code>label_col</code></strong></dt>
 44 | <dd>the index of the column to be used as Label</dd>
 45 | <dt><strong><code>hyperparams</code></strong> :&ensp;<code>dict</code> of <code>DictConfig</code></dt>
 46 | <dd>hyperperameters for the Model
 47 | Accepted keywords: max_iter (default = 100), normalise (default=False)</dd>
 48 | </dl></div>
 49 | <h3>Ancestors</h3>
 50 | <ul class="hlist">
 51 | <li><a title="propinfer.model.Model" href="#propinfer.model.Model">Model</a></li>
 52 | </ul>
 53 | <h3>Subclasses</h3>
 54 | <ul class="hlist">
 55 | <li><a title="propinfer.model.LogReg" href="#propinfer.model.LogReg">LogReg</a></li>
 56 | </ul>
 57 | <h3>Inherited members</h3>
 58 | <ul class="hlist">
 59 | <li><code><b><a title="propinfer.model.Model" href="#propinfer.model.Model">Model</a></b></code>:
 60 | <ul class="hlist">
 61 | <li><code><a title="propinfer.model.Model.fit" href="#propinfer.model.Model.fit">fit</a></code></li>
 62 | <li><code><a title="propinfer.model.Model.parameters" href="#propinfer.model.Model.parameters">parameters</a></code></li>
 63 | <li><code><a title="propinfer.model.Model.predict" href="#propinfer.model.Model.predict">predict</a></code></li>
 64 | <li><code><a title="propinfer.model.Model.predict_proba" href="#propinfer.model.Model.predict_proba">predict_proba</a></code></li>
 65 | </ul>
 66 | </li>
 67 | </ul>
 68 | </dd>
 69 | <dt id="propinfer.model.LogReg"><code class="flex name class">
 70 | <span>class <span class="ident">LogReg</span></span>
 71 | <span>(</span><span>label_col, hyperparams)</span>
 72 | </code></dt>
 73 | <dd>
 74 | <div class="desc"><p>A logistic regression based model</p>
 75 | <h2 id="args">Args</h2>
 76 | <dl>
 77 | <dt><strong><code>label_col</code></strong></dt>
 78 | <dd>the index of the column to be used as Label</dd>
 79 | <dt><strong><code>hyperparams</code></strong> :&ensp;<code>dict</code> of <code>DictConfig</code></dt>
 80 | <dd>hyperperameters for the Model
 81 | Accepted keywords: max_iter (default = 100), normalise (default=False)</dd>
 82 | </dl></div>
 83 | <h3>Ancestors</h3>
 84 | <ul class="hlist">
 85 | <li><a title="propinfer.model.LinReg" href="#propinfer.model.LinReg">LinReg</a></li>
 86 | <li><a title="propinfer.model.Model" href="#propinfer.model.Model">Model</a></li>
 87 | </ul>
 88 | <h3>Inherited members</h3>
 89 | <ul class="hlist">
 90 | <li><code><b><a title="propinfer.model.LinReg" href="#propinfer.model.LinReg">LinReg</a></b></code>:
 91 | <ul class="hlist">
 92 | <li><code><a title="propinfer.model.LinReg.fit" href="#propinfer.model.Model.fit">fit</a></code></li>
 93 | <li><code><a title="propinfer.model.LinReg.parameters" href="#propinfer.model.Model.parameters">parameters</a></code></li>
 94 | <li><code><a title="propinfer.model.LinReg.predict" href="#propinfer.model.Model.predict">predict</a></code></li>
 95 | <li><code><a title="propinfer.model.LinReg.predict_proba" href="#propinfer.model.Model.predict_proba">predict_proba</a></code></li>
 96 | </ul>
 97 | </li>
 98 | </ul>
 99 | </dd>
100 | <dt id="propinfer.model.MLP"><code class="flex name class">
101 | <span>class <span class="ident">MLP</span></span>
102 | <span>(</span><span>label_col, hyperparams)</span>
103 | </code></dt>
104 | <dd>
105 | <div class="desc"><p>A Multi-Layer Perceptron based model, for either regression or classification</p>
106 | <h2 id="args">Args</h2>
107 | <dl>
108 | <dt><strong><code>label_col</code></strong></dt>
109 | <dd>the index of the column to be used as Label</dd>
110 | <dt><strong><code>hyperparams</code></strong> :&ensp;<code>dict</code> of <code>DictConfig</code></dt>
111 | <dd>hyperperameters for the Model
112 | Accepted keywords: input_size (mandatory), n_classes (mandatory, performs regression if is 1),
113 | layers (default=[64,16]), epochs (default=20), learning_rate (default=1e-1), weight_decay (default=1e-2),
114 | batch_size (default=32), normalise (default=False)</dd>
115 | </dl></div>
116 | <h3>Ancestors</h3>
117 | <ul class="hlist">
118 | <li><a title="propinfer.model.Model" href="#propinfer.model.Model">Model</a></li>
119 | </ul>
120 | <h3>Inherited members</h3>
121 | <ul class="hlist">
122 | <li><code><b><a title="propinfer.model.Model" href="#propinfer.model.Model">Model</a></b></code>:
123 | <ul class="hlist">
124 | <li><code><a title="propinfer.model.Model.fit" href="#propinfer.model.Model.fit">fit</a></code></li>
125 | <li><code><a title="propinfer.model.Model.parameters" href="#propinfer.model.Model.parameters">parameters</a></code></li>
126 | <li><code><a title="propinfer.model.Model.predict" href="#propinfer.model.Model.predict">predict</a></code></li>
127 | <li><code><a title="propinfer.model.Model.predict_proba" href="#propinfer.model.Model.predict_proba">predict_proba</a></code></li>
128 | </ul>
129 | </li>
130 | </ul>
131 | </dd>
132 | <dt id="propinfer.model.Model"><code class="flex name class">
133 | <span>class <span class="ident">Model</span></span>
134 | <span>(</span><span>label_col, normalise)</span>
135 | </code></dt>
136 | <dd>
137 | <div class="desc"><p>An abstract class to be extended to represent the models that will be attacked.</p>
138 | <h2 id="args">Args</h2>
139 | <dl>
140 | <dt><strong><code>label_col</code></strong></dt>
141 | <dd>the index of the column to be used as Label</dd>
142 | <dt><strong><code>normalise</code></strong> :&ensp;<code>bool</code></dt>
143 | <dd>whether to normalise data before fit/predict</dd>
144 | </dl></div>
145 | <h3>Subclasses</h3>
146 | <ul class="hlist">
147 | <li><a title="propinfer.model.LinReg" href="#propinfer.model.LinReg">LinReg</a></li>
148 | <li><a title="propinfer.model.MLP" href="#propinfer.model.MLP">MLP</a></li>
149 | </ul>
150 | <h3>Methods</h3>
151 | <dl>
152 | <dt id="propinfer.model.Model.fit"><code class="name flex">
153 | <span>def <span class="ident">fit</span></span>(<span>self, data)</span>
154 | </code></dt>
155 | <dd>
156 | <div class="desc"><p>Fits the model according to the given data</p>
157 | <h2 id="args">Args</h2>
158 | <dl>
159 | <dt><strong><code>data</code></strong></dt>
160 | <dd>DataFrame containing all useful data</dd>
161 | </dl>
162 | <p>Returns: Model, the model itself</p></div>
163 | </dd>
164 | <dt id="propinfer.model.Model.parameters"><code class="name flex">
165 | <span>def <span class="ident">parameters</span></span>(<span>self)</span>
166 | </code></dt>
167 | <dd>
168 | <div class="desc"><p>Returns the model's parameters.</p>
169 | <ul>
170 | <li>If the model has only one layer, or is not a DNN, as a numpy array.</li>
171 | <li>If the model has multiple layers without biases, as a list of numpy arrays representing each layer.</li>
172 | <li>If the model has multiple layers with weights and biases, arrays of the corresponding weights and biases are
173 | grouped in a list, with weights going before biases.</li>
174 | </ul>
175 | <p>Returns: the model's parameters</p></div>
176 | </dd>
177 | <dt id="propinfer.model.Model.predict"><code class="name flex">
178 | <span>def <span class="ident">predict</span></span>(<span>self, data)</span>
179 | </code></dt>
180 | <dd>
181 | <div class="desc"><p>Makes predictions on the given data</p>
182 | <h2 id="args">Args</h2>
183 | <dl>
184 | <dt><strong><code>data</code></strong></dt>
185 | <dd>DataFrame containing all useful data</dd>
186 | </dl>
187 | <p>Returns: np.array containing predictions</p></div>
188 | </dd>
189 | <dt id="propinfer.model.Model.predict_proba"><code class="name flex">
190 | <span>def <span class="ident">predict_proba</span></span>(<span>self, data)</span>
191 | </code></dt>
192 | <dd>
193 | <div class="desc"><p>Outputs prediction probability scores for the given data</p>
194 | <h2 id="args">Args</h2>
195 | <dl>
196 | <dt><strong><code>data</code></strong></dt>
197 | <dd>DataFrame containing all useful data</dd>
198 | </dl>
199 | <p>Returns:np.array containing probability scores</p></div>
200 | </dd>
201 | </dl>
202 | </dd>
203 | </dl>
204 | </section>
205 | </article>
206 | <nav id="sidebar">
207 | <header>
208 | <a class="homelink" rel="home" title="Home" href="index.html">
209 | <img src="https://dlab.epfl.ch/assets/img/dlab.svg" alt="Home" width="200">
210 | </a>
211 | </header>
212 | <h1>Index</h1>
213 | <div class="toc">
214 | <ul></ul>
215 | </div>
216 | <ul id="index">
217 | <li><h3>Super-module</h3>
218 | <ul>
219 | <li><code><a title="propinfer" href="index.html">propinfer</a></code></li>
220 | </ul>
221 | </li>
222 | <li><h3><a href="#header-classes">Classes</a></h3>
223 | <ul>
224 | <li>
225 | <h4><code><a title="propinfer.model.LinReg" href="#propinfer.model.LinReg">LinReg</a></code></h4>
226 | </li>
227 | <li>
228 | <h4><code><a title="propinfer.model.LogReg" href="#propinfer.model.LogReg">LogReg</a></code></h4>
229 | </li>
230 | <li>
231 | <h4><code><a title="propinfer.model.MLP" href="#propinfer.model.MLP">MLP</a></code></h4>
232 | </li>
233 | <li>
234 | <h4><code><a title="propinfer.model.Model" href="#propinfer.model.Model">Model</a></code></h4>
235 | <ul class="">
236 | <li><code><a title="propinfer.model.Model.fit" href="#propinfer.model.Model.fit">fit</a></code></li>
237 | <li><code><a title="propinfer.model.Model.parameters" href="#propinfer.model.Model.parameters">parameters</a></code></li>
238 | <li><code><a title="propinfer.model.Model.predict" href="#propinfer.model.Model.predict">predict</a></code></li>
239 | <li><code><a title="propinfer.model.Model.predict_proba" href="#propinfer.model.Model.predict_proba">predict_proba</a></code></li>
240 | </ul>
241 | </li>
242 | </ul>
243 | </li>
244 | </ul>
245 | </nav>
246 | </main>
247 | <footer id="footer">
248 | <p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.10.0</a>.</p>
249 | </footer>
250 | </body>
251 | </html>


--------------------------------------------------------------------------------
/env.yml:
--------------------------------------------------------------------------------
 1 | name: property-inference
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - scikit-learn
 6 |   - python=3.9
 7 |   - pandas
 8 |   - seaborn
 9 |   - numpy
10 |   - matplotlib
11 |   - pip:
12 |       - hydra-core
13 | 


--------------------------------------------------------------------------------
/logging.ini:
--------------------------------------------------------------------------------
 1 | [loggers]
 2 | keys=root, propinfer
 3 | 
 4 | [handlers]
 5 | keys=consoleHandler, fileHandler
 6 | 
 7 | [formatters]
 8 | keys=simpleFormatter
 9 | 
10 | [logger_root]
11 | level=WARNING
12 | handlers=consoleHandler
13 | 
14 | [logger_propinfer]
15 | level=DEBUG
16 | handlers=consoleHandler, fileHandler
17 | qualname=propinfer
18 | propagate=0
19 | 
20 | [handler_consoleHandler]
21 | class=StreamHandler
22 | level=INFO
23 | formatter=simpleFormatter
24 | args=(sys.stdout,)
25 | 
26 | [handler_fileHandler]
27 | class=FileHandler
28 | level=DEBUG
29 | formatter=simpleFormatter
30 | args=('%(logfilename)s', 'w')
31 | 
32 | [formatter_simpleFormatter]
33 | format=%(asctime)s - %(name)s - %(levelname)s - %(message)s


--------------------------------------------------------------------------------
/propinfer/__init__.py:
--------------------------------------------------------------------------------
 1 | """propinfer is a modular framework to run Property Inference Attacks on Machine Learning models.
 2 | 
 3 | To run an experiment, you simply should define subclasses of `Generator` and `Model`
 4 | to represent your data source and your evaluated model respectively.
 5 | 
 6 | Logging is available for this framework, using logger `propinfer`.
 7 | 
 8 | Version 1.3.0
 9 | 
10 | (c) [EPFL](https://epfl.ch/) [Data Science Lab (dlab)](https://dlab.epfl.ch/) 2022"""
11 | 
12 | import logging
13 | 
14 | from propinfer.experiment import Experiment
15 | from propinfer.generator import Generator, GaussianGenerator, IndependentPropertyGenerator, ProbitGenerator, \
16 |                                 LinearGenerator, SubsamplingGenerator, MultilabelProbitGenerator
17 | from propinfer.model import Model, LinReg, LogReg, MLP
18 | 
19 | logging.getLogger('propinfer').addHandler(logging.NullHandler())
20 | 
21 | __pdoc__ = {
22 |     'deepsets': False,
23 |     'model_utils': False
24 | }


--------------------------------------------------------------------------------
/propinfer/deepsets.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.utils.data import TensorDataset, DataLoader
  4 | import numpy as np
  5 | 
  6 | import logging
  7 | logger = logging.getLogger('propinfer')
  8 | 
  9 | __pdoc__ = {
 10 |     'DeepSets': False
 11 | }
 12 | 
 13 | 
 14 | class DeepSets(nn.Module):
 15 |     def __init__(self, param, latent_dim, epochs, lr, wd, dropout=0.5, bs=32, n_classes=2, out_dim=1):
 16 |         super().__init__()
 17 | 
 18 |         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 19 | 
 20 |         if isinstance(param, np.ndarray):
 21 |             param = list(param)
 22 |         if isinstance(param, list):
 23 |             self.reducer = list()
 24 |             self.dimensions = list()
 25 |             context_size = 0
 26 |             for i, layer in enumerate(param):
 27 | 
 28 |                 if isinstance(layer, list):
 29 |                     self.dimensions.append((layer[0].shape[0], layer[0].shape[1] + 1))
 30 |                     dim = layer[0].shape[1] + 1 + context_size
 31 |                     context_size = layer[0].shape[0]*latent_dim
 32 |                 else:
 33 |                     if len(layer.shape) < 2:
 34 |                         layer = layer.reshape((1, -1))
 35 | 
 36 |                     self.dimensions.append((layer.shape[0], layer.shape[1]))
 37 |                     dim = layer.shape[1] + context_size
 38 |                     context_size = layer.shape[0] * latent_dim
 39 | 
 40 |                 self.reducer.append(
 41 |                     nn.Sequential(nn.Linear(dim, 64), nn.ReLU(),
 42 |                                   nn.Linear(64, latent_dim), nn.Dropout(dropout), nn.ReLU()).to(self.device))
 43 |         else:
 44 |             raise AttributeError('The given param is not a list or ndarray, but is {}'.format(type(param).__name__))
 45 | 
 46 |         dim = len(param) * latent_dim
 47 | 
 48 |         out_dim = n_classes if n_classes > 1 else out_dim
 49 | 
 50 |         self.classifier = nn.Sequential(
 51 |             nn.Linear(dim, out_dim)
 52 |         ).to(self.device)
 53 | 
 54 |         self.epochs = epochs
 55 |         self.lr = lr
 56 |         self.wd = wd
 57 |         self.bs = bs
 58 |         self.n_classes = n_classes
 59 |         self.out_dim = out_dim
 60 | 
 61 |     def forward(self, X):
 62 |         offset = 0
 63 |         context = None
 64 |         l = list()
 65 | 
 66 |         for i, dim in enumerate(self.dimensions):
 67 | 
 68 |             layer = X[:, offset:offset + dim[0] * dim[1]].view(-1, dim[0], dim[1])
 69 |             offset += dim[0] * dim[1]
 70 | 
 71 |             if context is not None:
 72 |                 layer = torch.cat((layer, context.view(layer.size()[0], 1, -1).repeat_interleave(dim[0], dim=1)), dim=2)
 73 | 
 74 |             n = self.reducer[i](layer)
 75 |             context = n.flatten(start_dim=1)
 76 | 
 77 |             l.append(n.sum(axis=1))
 78 | 
 79 |         x = torch.cat(l, dim=1)
 80 |         x = self.classifier(x)
 81 |         return x.view(X.shape[0], -1) if self.out_dim > 1 else x.flatten()
 82 | 
 83 |     def parameters(self, recurse: bool = True):
 84 |         params = list(self.classifier.parameters())
 85 |         for r in self.reducer:
 86 |             params.extend(list(r.parameters()))
 87 |         return params
 88 | 
 89 |     def __transform(self, parameters):
 90 |         tensors = list()
 91 |         for param in parameters:
 92 |             if isinstance(param, np.ndarray):
 93 |                 param = list(param)
 94 | 
 95 |             flat = list()
 96 |             for i, p in enumerate(param):
 97 |                 if isinstance(p, list):
 98 |                     flat.append(np.concatenate(p, axis=1).flatten())
 99 |                 else:
100 |                     flat.append(p.flatten())
101 | 
102 |             tensors.append(torch.tensor(np.concatenate(flat), dtype=torch.float32, device=self.device).view(1, -1))
103 |         return torch.cat(tensors, dim=0)
104 | 
105 |     def fit(self, parameters, labels):
106 |         y_true_dtype = torch.int64 if self.n_classes > 1 else torch.float32
107 |         ds = TensorDataset(self.__transform(parameters),
108 |                            torch.tensor(labels, dtype=y_true_dtype, device=self.device))
109 |         loader = DataLoader(ds, batch_size=self.bs, shuffle=True)
110 |         opt = torch.optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.wd)
111 |         criterion = torch.nn.CrossEntropyLoss() if self.n_classes > 1 else torch.nn.MSELoss()
112 |         for e in range(self.epochs):
113 |             tot_loss = 0
114 |             for X, y_true in loader:
115 |                 opt.zero_grad()
116 |                 y_pred = self.forward(X)
117 |                 if self.out_dim > 1 and self.n_classes == 1:
118 |                     y_true = y_true.view(X.shape[0], -1)
119 |                 else:
120 |                     y_true = y_true.view(-1)
121 |                 loss = criterion(y_pred, y_true)
122 |                 tot_loss += loss.item()
123 |                 loss.backward()
124 |                 opt.step()
125 |             if e % 10 == 0 or e == self.epochs-1:
126 |                 logger.debug('Training DeepSets - Epoch {} - Loss={:.4f}'.format(e, tot_loss))
127 | 
128 |     def predict(self, parameters):
129 |         for r in self.reducer:
130 |             r.train(False)
131 |         self.classifier.train(False)
132 | 
133 |         loader = DataLoader(self.__transform(parameters), batch_size=self.bs, shuffle=False)
134 | 
135 |         predictions = list()
136 | 
137 |         if self.n_classes > 1:
138 |             for X in loader:
139 |                 predictions.append(self.forward(X).detach().argmax(dim=1).cpu().numpy())
140 |         else:
141 |             for X in loader:
142 |                 predictions.append(self.forward(X).detach().cpu().numpy())
143 | 
144 |         return np.concatenate(predictions)
145 | 


--------------------------------------------------------------------------------
/propinfer/experiment.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | 
  4 | from sklearn.neural_network import MLPClassifier, MLPRegressor
  5 | from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error
  6 | from sklearn.model_selection import StratifiedShuffleSplit
  7 | from omegaconf import DictConfig
  8 | from itertools import product
  9 | from random import sample
 10 | 
 11 | from propinfer.generator import Generator
 12 | from propinfer.model import Model
 13 | from propinfer.deepsets import DeepSets
 14 | from propinfer.model_utils import transform_parameters
 15 | 
 16 | import logging
 17 | logger = logging.getLogger('propinfer')
 18 | 
 19 | 
 20 | class Experiment:
 21 |     def __init__(self, generator, label_col,  model, n_targets, n_shadows, hyperparams, n_queries=1024, n_classes=2, range=None):
 22 |         """Object representing an experiment, based on its data generator and model pair
 23 | 
 24 |         Args:
 25 |             generator (Generator): data abstraction used for this experiment
 26 |             model (Model.__class__): a Model class that represents the model to be used
 27 |             n_targets (int): the total number of target models
 28 |             n_shadows (int): the total number of shadow models
 29 |             hyperparams (dict or DictConfig): dictionary containing every useful hyper-parameter for the Model;
 30 |                          if a list is provided for some hyperparameter(s), we grid optimise between all given options (except for keyword `layers`)
 31 |             n_queries (int): the number of queries used in the scope of grey- and black-box attacks. Must be strictly superior to `n_targets`
 32 |             n_classes (int): the number of classes considered for property inference; if 1 then a regression is performed
 33 |             range (tuple): the range of values accepted for regression tasks (needed for regression, ignored for classification)
 34 |                          it is possible to pass an iterable of multiple ranges in order to perform multi-variable property inference regression, in which case the values of the variables are passed to the Generator as a list
 35 |         """
 36 | 
 37 |         assert isinstance(generator, Generator), 'The given generator is not an instance of Generator, but {}'.format(type(generator).__name__)
 38 |         self.generator = generator
 39 | 
 40 |         assert isinstance(label_col, str), 'label_col should be a string, but is {}'.format(type(label_col).__name__)
 41 |         self.label_col = label_col
 42 | 
 43 |         assert issubclass(model, Model), 'The given model is not a subclass of Model'
 44 |         self.model = model
 45 | 
 46 |         assert isinstance(n_targets, int), 'The given n_targets is not an integer, but is {}'.format(type(n_targets).__name__)
 47 |         self.n_targets = n_targets
 48 | 
 49 |         assert isinstance(n_shadows, int), 'The given n_shadows is not an integer, but is {}'.format(type(n_shadows).__name__)
 50 |         self.n_shadows = n_shadows
 51 | 
 52 |         if hyperparams is not None:
 53 |             assert isinstance(hyperparams, DictConfig) or isinstance(hyperparams, dict),\
 54 |                 'The given hyperparameters are not a dict or a DictConfig, but are {}'.format(type(hyperparams).__name__)
 55 |             self.hyperparams = hyperparams
 56 |             if np.any([isinstance(p, list) for p in hyperparams.values()]):
 57 |                 self.__optimise_hyperparams()
 58 |         else:
 59 |             self.hyperparams = dict()
 60 | 
 61 |         assert isinstance(n_queries, int), 'The given n_queries is not an integer, but is {}'.format(type(n_queries).__name__)
 62 |         assert n_queries > n_targets, f'n_queries={n_queries} must be strictly superior to n_targets={n_targets}'
 63 |         self.n_queries = n_queries
 64 | 
 65 |         assert isinstance(n_classes, int), 'The given n_classes is not an integer, but is {}'.format(type(n_classes).__name__)
 66 |         if n_classes == 1:
 67 |             assert range is not None
 68 |             assert hasattr(range, '__getitem__')
 69 | 
 70 |         self.n_classes = n_classes
 71 |         self.range = range
 72 | 
 73 |         self.targets = None
 74 |         self.labels = None
 75 | 
 76 |         self.shadow_models = None
 77 |         self.shadow_labels = None
 78 | 
 79 |         self.shadow_models = None
 80 |         self.shadow_labels = None
 81 | 
 82 |         if n_classes == 1 and hasattr(self.range[0], '__getitem__'):
 83 |             label = [r[0] for r in self.range]
 84 |             data = self.generator.sample(label)
 85 |         elif n_classes == 1:
 86 |             data = self.generator.sample(range[0])
 87 |         else:
 88 |             data = self.generator.sample(0)
 89 |         reg = self.model(self.label_col, self.hyperparams).fit(data).predict_proba(data)
 90 |         self.is_regression = len(reg.shape) < 2 or reg.shape[1] == 1
 91 | 
 92 |     def __optimise_hyperparams(self):
 93 |         """Private method for hyperparamters grid optimisation"""
 94 |         optims = list()
 95 |         keys = list()
 96 | 
 97 |         for k, v in self.hyperparams.items():
 98 |             if isinstance(v, list) and k != 'layers':
 99 |                 optims.append(v)
100 |                 keys.append(k)
101 | 
102 |         logger.debug('Optimising hyperparameters: {}'.format(keys))
103 | 
104 |         optims = list(product(*optims))
105 | 
106 |         best_res = -np.inf
107 |         reg_checked = False
108 |         is_reg = False
109 | 
110 |         for params in optims:
111 |             hyperparams = self.hyperparams.copy()
112 |             for i, p in enumerate(params):
113 |                 hyperparams[keys[i]] = p
114 |             train = [self.generator.sample(b) for b in [False, True]]
115 |             test = [self.generator.sample(b) for b in [False, True]]
116 | 
117 |             res = 0.
118 | 
119 |             for i in range(len(train)):
120 |                 models = [self.model(self.label_col, hyperparams).fit(train[i]) for _ in range(10)]
121 | 
122 |                 if not reg_checked and \
123 |                         (len(models[0].predict_proba(test[0]).shape) < 2 or
124 |                          models[0].predict_proba(test[0]).shape[1] == 1):
125 |                     is_reg = True
126 | 
127 |                 reg_checked = True
128 | 
129 |                 if is_reg:
130 |                     res -= np.mean([mean_squared_error(test[i][self.label_col], m.predict(train[i])) for m in models])
131 |                 else:
132 |                     res += np.mean([accuracy_score(test[i][self.label_col], m.predict(train[i])) for m in models])
133 | 
134 |             res /= len(train)
135 | 
136 |             if res > best_res:
137 |                 best_res = res
138 |                 self.hyperparams = hyperparams
139 | 
140 |         logger.debug('Best hyperparameters defined as: {}'.format(self.hyperparams))
141 |         if is_reg:
142 |             logger.debug('Best MSE: {:.2}'.format(-best_res))
143 |         else:
144 |             logger.debug('Best accuracy: {:.2%}'.format(best_res))
145 | 
146 |     def run_targets(self):
147 |         """Create and fit target models """
148 |         if self.n_classes > 1:
149 |             self.labels = np.concatenate([[i]*(self.n_targets//self.n_classes) for i in range(self.n_classes)],
150 |                                          dtype=np.int8)
151 |             if self.n_targets % self.n_classes > 0:
152 |                 self.labels = np.concatenate((self.labels,
153 |                                              np.random.randint(0, self.n_classes, self.n_targets % self.n_classes)),
154 |                                              dtype=np.int8)
155 |         elif self.n_classes == 1:
156 |             if hasattr(self.range[0], '__getitem__'):
157 |                 bounds = np.array(self.range)
158 |                 self.labels = np.random.uniform(bounds[:, 0], bounds[:, 1], (self.n_targets, len(self.range)))
159 |             else:
160 |                 self.labels = np.arange(self.range[0], self.range[1], (self.range[1] - self.range[0])/self.n_targets)
161 |         else:
162 |             raise AttributeError("Invalid n_classes provided: {}".format(self.n_classes))
163 | 
164 |         self.targets = [self.model(self.label_col, self.hyperparams).fit(data) for data in
165 |                         [self.generator.sample(label) for label in self.labels]]
166 | 
167 |         if self.is_regression:
168 |             scores = [mean_squared_error(data[self.label_col], self.targets[i].predict(data)) for i, data in
169 |                       enumerate([self.generator.sample(label) for label in self.labels])]
170 |             logger.debug('Target models MAE - mean={:.2} - std={:.2} - min={:.2} - max={:.2}'.format(
171 |                 np.mean(scores), np.std(scores), np.min(scores), np.max(scores)))
172 | 
173 |         else:
174 |             scores = [accuracy_score(data[self.label_col], self.targets[i].predict(data)) for i, data in
175 |                         enumerate([self.generator.sample(label) for label in self.labels])]
176 |             logger.debug('Target models accuracy - mean={:.2%} - std={:.2%} - min={:.2%} - max={:.2%}'.format(
177 |                         np.mean(scores), np.std(scores), np.min(scores), np.max(scores)))
178 | 
179 |     def run_shadows(self, model=None, hyperparams=None):
180 |         """Create and fit shadow models
181 | 
182 |         Args:
183 |             model (Model.__class__): a Model class that represents the model to be used. If None, will be the same as target models
184 |             hyperparams (dict or DictConfig): dictionary containing every useful hyper-parameter for the Model;
185 |                 Hyperparameters of shadow models will NOT be optimised. If None, will be the same as target models.
186 |         """
187 |         if model is not None:
188 |             assert issubclass(model, Model), 'The given model is not a subclass of Model'
189 | 
190 |             if hyperparams is not None:
191 |                 assert isinstance(hyperparams, DictConfig) or isinstance(hyperparams, dict),\
192 |                     'The given hyperparameters are not a dict or a DictConfig, but are {}'.format(type(hyperparams).__name__)
193 |             else:
194 |                 self.hyperparams = dict()
195 | 
196 |         else:
197 |             model = self.model
198 |             hyperparams = self.hyperparams
199 | 
200 |         if self.n_classes > 1:
201 |             self.shadow_labels = np.concatenate([[i]*(self.n_shadows//self.n_classes) for i in range(self.n_classes)],
202 |                                                 dtype=np.int8)
203 |             if self.n_shadows % self.n_classes > 0:
204 |                 self.shadow_labels = np.concatenate((self.shadow_labels,
205 |                                                     np.random.randint(0, self.n_classes, self.n_shadows % self.n_classes)),
206 |                                                     dtype=np.int8)
207 |         elif self.n_classes == 1:
208 |             if hasattr(self.range[0], '__getitem__'):
209 |                 bounds = np.array(self.range)
210 |                 self.shadow_labels = np.random.uniform(bounds[:, 0], bounds[:, 1], (self.n_shadows, len(self.range)))
211 |             else:
212 |                 self.shadow_labels = np.arange(self.range[0], self.range[1], (self.range[1] - self.range[0])/self.n_shadows)
213 |         else:
214 |             raise AttributeError("Invalid n_classes provided: {}".format(self.n_classes))
215 | 
216 |         self.shadow_models = [model(self.label_col, hyperparams).fit(data) for data in
217 |                               [self.generator.sample(label, adv=True) for label in self.shadow_labels]]
218 | 
219 |         if self.is_regression:
220 |             scores = [mean_squared_error(data[self.label_col], self.shadow_models[i].predict(data)) for i, data in
221 |                       enumerate([self.generator.sample(label, adv=True) for label in self.shadow_labels])]
222 |             logger.debug('Shadow models MAE - mean={:.2} - std={:.2} - min={:.2} - max={:.2}'.format(
223 |                 np.mean(scores), np.std(scores), np.min(scores), np.max(scores)))
224 | 
225 |         else:
226 |             scores = [accuracy_score(data[self.label_col], self.shadow_models[i].predict(data)) for i, data in
227 |                       enumerate([self.generator.sample(label, adv=True) for label in self.shadow_labels])]
228 |             logger.debug('Shadow models accuracy - mean={:.2%} - std={:.2%} - min={:.2%} - max={:.2%}'.format(
229 |                 np.mean(scores), np.std(scores), np.min(scores), np.max(scores)))
230 | 
231 |     def run_loss_test(self):
232 |         """Runs a loss test attack on target models. Works only for the binary classification attack on a classifier.
233 | 
234 |         Returns: Attack accuracy on target models
235 |         """
236 |         assert self.targets is not None
237 |         assert self.n_classes == 2
238 | 
239 |         y_true = [False, True]
240 |         X_test = [self.generator.sample(b, adv=True) for b in y_true]
241 | 
242 |         accuracy = [[accuracy_score(X[self.label_col], t.predict(X)) for X in X_test] for t in self.targets]
243 |         return accuracy_score(self.labels, [np.argmax(acc) for acc in accuracy])
244 | 
245 |     def __run_multiple(self, n, func, *args):
246 |         """Helper private method to run a same attack multiple times"""
247 | 
248 |         sss = StratifiedShuffleSplit(n_splits=n, train_size=0.5)
249 |         shadow_models = np.array(self.shadow_models)
250 |         shadow_labels = np.array(self.shadow_labels)
251 | 
252 |         accs = []
253 | 
254 |         if self.n_classes > 1:
255 |             for idx, _ in sss.split(shadow_models, shadow_labels):
256 |                 self.shadow_models, self.shadow_labels = shadow_models[idx], shadow_labels[idx]
257 |                 accs.append(func(*args))
258 |         else:
259 |             for _ in range(n):
260 |                 idx = sample(range(self.n_shadows), self.n_shadows//2)
261 |                 self.shadow_models, self.shadow_labels = shadow_models[idx], shadow_labels[idx]
262 |                 accs.append(func(*args))
263 | 
264 |         self.shadow_models, self.shadow_labels = shadow_models, shadow_labels
265 | 
266 |         return accs
267 | 
268 |     def run_threshold_test(self, n_outputs=1):
269 |         """Runs a threshold test attack on target models. Works only for the binary classification attack on a classifier.
270 | 
271 |         Args:
272 |             n_outputs (int): number of attack results to output, using multiple random subsets of the shadow models
273 | 
274 |         Returns: Attack accuracy on target models
275 |         """
276 |         assert self.targets is not None
277 |         assert self.shadow_models is not None
278 |         assert self.n_classes == 2
279 | 
280 |         if n_outputs > 1:
281 |             return self.__run_multiple(n_outputs, self.run_threshold_test)
282 | 
283 |         y_true = [False, True]
284 |         X_test = [self.generator.sample(b, adv=True) for b in y_true]
285 | 
286 |         shadow_labels = np.array(self.shadow_labels, dtype=bool)
287 |         accuracy = np.array([[accuracy_score(X[self.label_col], s.predict(X)) for X in X_test] for s in self.shadow_models])
288 |         k = np.argmax(np.abs(np.sum(accuracy[shadow_labels, :], axis=0) -
289 |                              np.sum(accuracy[~shadow_labels, :], axis=0)))
290 |         higher_acc = np.argmax([np.sum(accuracy[~shadow_labels, k]), np.sum(accuracy[shadow_labels, k])])
291 | 
292 |         thr = 0.0
293 |         best_acc = 0.0
294 |         for z in np.arange(0, 1, 1e-2):
295 |             thr_labels = [higher_acc if acc > z else not higher_acc for acc in accuracy[:, k]]
296 |             acc = accuracy_score(shadow_labels, thr_labels)
297 |             if acc > best_acc:
298 |                 thr = z
299 |                 best_acc = acc
300 | 
301 |         accuracy = np.array([accuracy_score(X_test[k][self.label_col], t.predict(X_test[k])) for t in self.targets])
302 |         y_pred = [higher_acc if acc > thr else not higher_acc for acc in accuracy]
303 |         return accuracy_score(self.labels, y_pred)
304 | 
305 |     def __get_score(self, y_pred):
306 |         if self.n_classes > 1:
307 |             return accuracy_score(self.labels, y_pred)
308 |         else:
309 |             if len(y_pred.shape) == 1:
310 |                 return mean_absolute_error(self.labels, y_pred)
311 |             else:
312 |                 return [mean_absolute_error(self.labels[:, i], y_pred[:, i]) for i in range(y_pred.shape[1])]
313 | 
314 |     def run_whitebox_deepsets(self, hyperparams, n_outputs=1):
315 |         """Runs a whitebox attack on the target models using a DeepSets meta-classifier
316 | 
317 |         Args:
318 |             hyperparams (dict or DictConfig): Hyperparameters for the DeepSets meta-classifier.
319 |                 Accepted keywords are: latent_dim (default=5); epochs (default=20); learning_rate (default=1e-4); weight_decay (default=1e-4)
320 |             n_outputs (int): number of attack results to output, using multiple random subsets of the shadow models
321 | 
322 |         Returns: Attack accuracy on target models
323 |         """
324 |         assert self.targets is not None
325 |         assert self.shadow_models is not None
326 | 
327 |         if n_outputs > 1:
328 |             return self.__run_multiple(n_outputs, self.run_whitebox_deepsets, hyperparams)
329 | 
330 |         if hyperparams is not None:
331 |             assert isinstance(hyperparams, DictConfig) or isinstance(hyperparams, dict),\
332 |                 'The given hyperparameters are not a dict or a DictConfig, but are {}'.format(type(hyperparams).__name__)
333 |         else:
334 |             hyperparams = dict()
335 | 
336 |         latent_dim = hyperparams['latent_dim'] if 'latent_dim' in hyperparams.keys() else 5
337 |         epochs = hyperparams['epochs'] if 'epochs' in hyperparams.keys() else 20
338 |         lr = hyperparams['learning_rate'] if 'learning_rate' in hyperparams.keys() else 1e-4
339 |         wd = hyperparams['weight_decay'] if 'weight_decay' in hyperparams.keys() else 1e-4
340 |         out_dim = 1 if self.n_classes > 1 or not hasattr(self.range[0], '__getitem__') else len(self.range)
341 | 
342 |         meta_classifier = DeepSets(self.shadow_models[0].parameters(), latent_dim=latent_dim,
343 |                                    epochs=epochs, lr=lr, wd=wd, n_classes=self.n_classes, out_dim=out_dim)
344 | 
345 |         train = [s.parameters() for s in self.shadow_models]
346 |         test = [t.parameters() for t in self.targets]
347 | 
348 |         meta_classifier.fit(train, self.shadow_labels)
349 |         y_pred = meta_classifier.predict(test)
350 | 
351 |         del train, test, meta_classifier
352 | 
353 |         return self.__get_score(y_pred)
354 | 
355 |     def run_whitebox_sort(self, sort=True, n_outputs=1):
356 |         """Runs a whitebox attack on the target models, by using the model parameters as features for a meta-classifier
357 | 
358 |         Args:
359 |             sort (bool): whether to perform node sorting (to be used for permutation-invariant DNN)
360 |             n_outputs (int): number of attack results to output, using multiple random subsets of the shadow models
361 | 
362 |         Returns: Attack accuracy on target models for the classification task, or mean absolute error for the regression task
363 |         """
364 |         assert self.targets is not None
365 |         assert self.shadow_models is not None
366 | 
367 |         if n_outputs > 1:
368 |             return self.__run_multiple(n_outputs, self.run_whitebox_sort, sort)
369 | 
370 |         train = pd.DataFrame(data=[transform_parameters(s.parameters(), sort=sort)
371 |                                     for s in self.shadow_models])
372 | 
373 |         test = pd.DataFrame(data=[transform_parameters(t.parameters(), sort=sort)
374 |                                   for t in self.targets])
375 | 
376 |         meta_classifier = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=1024, early_stopping=True) \
377 |             if self.n_classes > 1 else MLPRegressor(hidden_layer_sizes=(128, 64), max_iter=1024, early_stopping=True)
378 | 
379 |         meta_classifier.fit(train, self.shadow_labels)
380 |         y_pred = meta_classifier.predict(test)
381 | 
382 |         del train, test, meta_classifier
383 | 
384 |         return self.__get_score(y_pred)
385 | 
386 |     def run_blackbox(self, n_outputs=1):
387 |         """Runs a blackbox attack on the target models, by using the result of random queries as features for a meta-classifier
388 | 
389 |         Args:
390 |             n_outputs (int): number of attack results to output, using multiple random subsets of the shadow models
391 | 
392 |         Returns: Attack accuracy on target models for the classification task, or mean absolute error for the regression task
393 |         """
394 |         assert self.targets is not None
395 |         assert self.shadow_models is not None
396 | 
397 |         if n_outputs > 1:
398 |             return self.__run_multiple(n_outputs, self.run_blackbox)
399 | 
400 |         meta_classifier = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=1024, early_stopping=True) \
401 |             if self.n_classes > 1 else MLPRegressor(hidden_layer_sizes=(128, 64), max_iter=1024, early_stopping=True)
402 | 
403 |         if self.n_classes > 1:
404 |             queries = pd.concat([self.generator.sample(i, adv=True) for i in range(self.n_classes)])
405 |             labels = np.concatenate([[i]*len(self.generator.sample(i, adv=True)) for i in range(self.n_classes)])
406 |         elif self.n_classes == 1:
407 |             if hasattr(self.range[0], '__getitem__'):
408 |                 bounds = np.array(self.range)
409 |                 labels = np.random.uniform(bounds[:, 0], bounds[:, 1], (10*len(self.range), len(self.range)))
410 |                 sample_len = len(self.generator.sample([0]*len(self.range), adv=True))
411 |             else:
412 |                 labels = np.arange(self.range[0], self.range[1], (self.range[1] - self.range[0])/10)
413 |                 sample_len = len(self.generator.sample(0, adv=True))
414 | 
415 |             queries = pd.concat([self.generator.sample(l, adv=True) for l in labels])
416 |             labels = np.concatenate([[l]*sample_len for l in labels])
417 |         else:
418 |             raise AttributeError("Invalid n_classes provided: {}".format(self.n_classes))
419 | 
420 |         sss = StratifiedShuffleSplit(n_splits=1, train_size=self.n_queries)
421 |         idx, _ = list(sss.split(queries, labels))[0]
422 |         queries = queries.iloc[idx]
423 | 
424 |         train = pd.DataFrame(data=[s.predict(queries).flatten() for s in self.shadow_models])
425 |         test  = pd.DataFrame(data=[s.predict(queries).flatten() for s in self.targets])
426 | 
427 |         meta_classifier.fit(train, self.shadow_labels)
428 |         y_pred = meta_classifier.predict(test)
429 | 
430 |         del train, test, meta_classifier
431 | 
432 |         return self.__get_score(y_pred)
433 | 


--------------------------------------------------------------------------------
/propinfer/generator.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numpy import array, eye, ones, int32, int64, float32
  3 | from numpy.random import normal, multivariate_normal
  4 | from pandas import DataFrame, concat, get_dummies
  5 | from sklearn.model_selection import StratifiedShuffleSplit
  6 | 
  7 | __pdoc__ = {
  8 |     'multivariate_normal': False,
  9 |     'normal': False
 10 | }
 11 | 
 12 | 
 13 | class Generator:
 14 |     """An abstraction class used to query for data"""
 15 | 
 16 |     def __init__(self, n_samples=1024):
 17 |         assert isinstance(n_samples, int), 'n_samples should be an int, but {} was provided'.format(type(n_samples).__name__)
 18 |         self.n_samples = n_samples
 19 | 
 20 |     def sample(self, label, adv=False):
 21 |         """Returns a dataset sampled from the data; the label variable corresponds to the property being attacked
 22 | 
 23 |         Args:
 24 |             label (int or float or numpy.array): the label corresponding to the dataset being queried - when performing regression, the value of the target variable(s)
 25 |             adv (bool): a boolean describing whether we are using target or adversary data split
 26 | 
 27 |         Returns:
 28 |             a pandas DataFrame representing our dataset for this experiment
 29 |         """
 30 |         raise NotImplementedError
 31 | 
 32 | 
 33 | class GaussianGenerator(Generator):
 34 |     """Generator sampling from a multivariate Gaussian Distribution in which features are correlated.
 35 |     Label is made categorical by checking whether it is positive or negative.
 36 |     Sensitive attribute is the mean of the fourth feature vector"""
 37 | 
 38 |     def sample(self, label, adv=False):
 39 |         mean = array([0.]*5)
 40 |         mean[4] = label
 41 | 
 42 |         cov = eye(5)
 43 | 
 44 |         for i in range(1, 5):
 45 |             cov[0, i] = cov[i, 0] = 0.5
 46 | 
 47 |         data = DataFrame(data=multivariate_normal(mean, cov, size=self.n_samples),
 48 |                          columns=['label', 'f1', 'f2', 'f3', 'f4'], dtype=float32)
 49 |         data['label'] = (data['label'] > 0).astype('int32')
 50 | 
 51 |         return data
 52 | 
 53 | 
 54 | class IndependentPropertyGenerator(Generator):
 55 |     """Generator sampling from a multivariate Gaussian Distribution in which features are not correlated with the label, but are correlated between each other.
 56 |     Label is made categorical by checking whether it is positive or negative.
 57 |     Sensitive attribute is the mean of the fourth feature vector"""
 58 |     def sample(self, label, adv=False):
 59 |         mean = array([0.] * 5)
 60 |         mean[4] = label
 61 | 
 62 |         cov = eye(5)
 63 |         for i in range(1, 4):
 64 |             cov[0, i] = cov[i, 0] = 0.5
 65 | 
 66 |         data = DataFrame(data=multivariate_normal(mean, cov, size=self.n_samples),
 67 |                          columns=['label', 'f1', 'f2', 'f3', 'f4'], dtype=float32)
 68 |         data['label'] = (data['label'] > 0).astype('int32')
 69 | 
 70 |         return data
 71 | 
 72 | 
 73 | class LinearGenerator(Generator):
 74 |     """Generator sampling from a linear model with additive white gaussian noise
 75 | 
 76 |     The sensitive attribute defines the mean of the covariates"""
 77 | 
 78 |     def __init__(self, n_samples=1024):
 79 |         super().__init__(n_samples)
 80 |         self.beta = ones(4) + normal(0., 1., 4)
 81 | 
 82 |     def sample(self, label, adv=False):
 83 |         x = multivariate_normal(ones(4) * label, eye(4), size=self.n_samples)
 84 |         y = x @ self.beta + normal(0., size=self.n_samples) + 0.5
 85 | 
 86 |         data = DataFrame(data=x,
 87 |                          columns=['f1', 'f2', 'f3', 'f4'], dtype=float32)
 88 |         data['label'] = y.astype('float32')
 89 | 
 90 |         return data
 91 | 
 92 | 
 93 | class ProbitGenerator(LinearGenerator):
 94 |     """Generator sampling from a probit model with additive white gaussian noise
 95 | 
 96 |     The sensitive attribute defines the mean of the covariates"""
 97 | 
 98 |     def sample(self, label, adv=False):
 99 |         data = super().sample(label, adv)
100 |         data['label'] = (data['label'] > 0).astype('int32')
101 |         return data
102 | 
103 | 
104 | class MultilabelProbitGenerator(Generator):
105 |     """Generator sampling from a probit model of which sensitive attribute are the mean and variance of the covariates"""
106 | 
107 |     def sample(self, label, adv=False):
108 |         mean = array([label[0]] * 4)
109 |         cov = eye(4) + 2 * eye(4) * label[1]
110 | 
111 |         x = multivariate_normal(mean, cov, size=self.n_samples)
112 | 
113 |         beta = array([-1., 1., -0.5, 1.5])
114 |         y = x @ beta + 0.25 + normal(0., 1., size=self.n_samples)
115 | 
116 |         data = DataFrame(data=x, columns=['f1', 'f2', 'f3', 'f4'], dtype=float32)
117 |         data['label'] = (y > 0).astype('int32')
118 | 
119 |         return data
120 | 
121 | 
122 | class SubsamplingGenerator(Generator):
123 |     def __init__(self, data, label_col, sensitive_attribute, target_category=None,
124 |                  n_samples=1024, proportion=None, split=False, regression=False):
125 |         """Generator subsampling records from a larger dataset.
126 | 
127 |         Classification case: samples using a specific proportion for label 1, and for proportion of 0.5 for label 0. Only works with boolean labels.
128 |         Regression mode: samples using a specific given proportion between 0 and 1
129 | 
130 |         Args:
131 |             data (pandas.Dataframe): the larger dataset to subsample from
132 |             label_col (str): the label being predicted by the models
133 |             sensitive_attribute (str): the attribute which distribution being inferred by the property inference attack; is always considered as categorical
134 |             target_category: if sensitive_attribute is not a binary vector, the category considered in the sensitive attribute
135 |             n_samples (int): the number of records to sample
136 |             proportion (float): the proportion of the target_category in the datasets subsampled with label 1 ; ignored in the regression case
137 |             split (bool): whether to split original dataset between target and adversary
138 |             regression (bool): whether to use the sampler in regression or classification mode
139 |         """
140 |         super().__init__(n_samples)
141 | 
142 |         assert isinstance(data, DataFrame), 'Given data should be a DataFrame, but is {}'.format(type(data).__name__)
143 |         self.data = data
144 | 
145 |         assert isinstance(label_col, str), 'label_col should be a string, but is {}' .format(type(label_col).__name__)
146 |         assert label_col in data.columns, 'label_col not in data columns'
147 |         self.label_col = label_col
148 | 
149 |         assert isinstance(sensitive_attribute, str), 'sensitive_attribute should be a string, but is {}'.format(type(sensitive_attribute).__name__)
150 |         assert sensitive_attribute in data.columns, 'sensitive_attribute not in data columns'
151 |         self.attr = sensitive_attribute
152 | 
153 |         assert isinstance(split, bool), 'Split should be a bool, but is {}'.format(type(split).__name__)
154 |         self.split = split
155 |         if split:
156 |             sss = StratifiedShuffleSplit(train_size=0.5)
157 |             self.tar, self.adv = next(sss.split(data, data[[self.label_col, self.attr]]))
158 | 
159 |         self.data[sensitive_attribute] = self.data[sensitive_attribute].astype('category')
160 | 
161 |         if not target_category:
162 |             assert len(data[data[sensitive_attribute] == 0]) + len(data[data[sensitive_attribute] == 1]) == len(data), \
163 |                 'target_category not specified but sensitive attribute is not a binary vector'
164 |             self.pos = data[sensitive_attribute] == 1
165 |             self.data['attr'] = self.data[sensitive_attribute]
166 | 
167 |         else:
168 |             assert target_category in self.data[sensitive_attribute].cat.categories, \
169 |                 'target category {} not in {} column'.format(target_category, sensitive_attribute)
170 |             self.pos = data[sensitive_attribute] == target_category
171 |             self.data['attr'] = self.data[sensitive_attribute].cat.codes
172 | 
173 |         assert isinstance(regression, bool), 'Regression should be a bool, but is {}'.format(type(regression).__name__)
174 |         self.regression = regression
175 | 
176 |         if not self.regression:
177 |             self.set_proportion(proportion)
178 | 
179 |     def sample(self, label, adv=False):
180 |         if not self.regression:
181 |             assert np.isclose(label, 0) or np.isclose(label, 1)
182 | 
183 |         if self.split:
184 |             data = self.data.iloc[self.adv] if adv else self.data.iloc[self.tar]
185 |             pos = self.pos.iloc[self.adv] if adv else self.pos.iloc[self.tar]
186 |         else:
187 |             data = self.data
188 |             pos = self.pos
189 | 
190 |         if self.regression:
191 |             prop = label
192 |         else:
193 |             prop = self.proportion if label else 0.5
194 | 
195 |         # Sampling positive examples
196 |         n = int(self.n_samples * prop)
197 |         if n > 0:
198 |             sss = StratifiedShuffleSplit(train_size=n)
199 |             try:
200 |                 idx, _ = next(sss.split(data[pos], data[pos][[self.label_col, 'attr']]))
201 |                 pos_df = data[pos].iloc[idx]
202 |             except ValueError:
203 |                 pos_df = data[pos].sample(n)
204 |         else:
205 |             pos_df = None
206 | 
207 |         # Sampling negative examples
208 |         n = self.n_samples - int(self.n_samples * prop)
209 |         if n > 0:
210 |             sss = StratifiedShuffleSplit(train_size=n)
211 |             try:
212 |                 idx, _ = next(sss.split(data[~pos], data[~pos][[self.label_col, 'attr']]))
213 |                 neg_df = data[~pos].iloc[idx]
214 |             except ValueError:
215 |                 neg_df = data[~pos].sample(n)
216 |         else:
217 |             neg_df = None
218 | 
219 |         if pos_df is not None:
220 |             out = concat((pos_df, neg_df)) if neg_df is not None else pos_df
221 |         else:
222 |             out = neg_df
223 | 
224 |         if not (out.dtypes[self.label_col] == int32 or out.dtypes[self.label_col] == int64):
225 |             out[self.label_col] = out[self.label_col].astype('category').cat.codes
226 | 
227 |         out = out.drop('attr', axis=1)
228 | 
229 |         return get_dummies(out)
230 | 
231 |     def set_proportion(self, proportion):
232 |         assert 0. <= proportion <= 1., 'proportion is {:.2f} but should be in [0., 1.]'.format(proportion)
233 |         self.proportion = proportion
234 | 


--------------------------------------------------------------------------------
/propinfer/model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import warnings
  5 | 
  6 | from sklearn.linear_model import LinearRegression, LogisticRegression
  7 | from sklearn.exceptions import ConvergenceWarning
  8 | 
  9 | from torch.nn.functional import softmax
 10 | from omegaconf import DictConfig
 11 | 
 12 | 
 13 | class Model:
 14 |     def __init__(self, label_col, normalise):
 15 |         """An abstract class to be extended to represent the models that will be attacked.
 16 | 
 17 |         Args:
 18 |             label_col: the index of the column to be used as Label
 19 |             normalise (bool): whether to normalise data before fit/predict
 20 |         """
 21 |         assert isinstance(label_col, str), 'label_col should be a string'
 22 |         self.label_col = label_col
 23 | 
 24 |         assert isinstance(normalise, bool), 'normalise should be bool'
 25 |         self.normalise = normalise
 26 | 
 27 |         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 28 | 
 29 |         self.train_mean = None
 30 |         self.train_std = None
 31 | 
 32 |     def _prepare_data(self, df, train=True):
 33 |         """Prepares data by separating features from labels and eventually normalising features.
 34 | 
 35 |         Args:
 36 |             df (DataFrame): the data to be prepared
 37 |             train (bool): whether we are preparing a train or test set
 38 | 
 39 |         Returns:
 40 |             X (DataFrame): feature data
 41 |             y (Series): label data
 42 |         """
 43 |         feature_cols = df.columns.to_list()
 44 |         feature_cols.remove(self.label_col)
 45 | 
 46 |         X = df[feature_cols].copy()
 47 |         y = df[self.label_col].copy()
 48 | 
 49 |         if self.normalise:
 50 |             norm = X.select_dtypes(exclude=[np.uint8, np.int8])
 51 | 
 52 |             if train or self.train_mean is None:
 53 |                 self.train_mean = norm.mean()
 54 |                 self.train_std = norm.std()
 55 |                 if self.train_std < 1e-5:
 56 |                     self.train_std = 1.
 57 | 
 58 |             X[norm.columns] = (norm - self.train_mean) / self.train_std
 59 | 
 60 |         return X, y
 61 | 
 62 |     def _prepare_dataloader(self, df, bs=32, train=True, regression=False):
 63 |         """Prepares data, and puts it inside a ready-to-use PyTorch DataLoader.
 64 | 
 65 |         Args:
 66 |             df (DataFrame): the data to be prepared
 67 |             bs (int): batch-size
 68 |             train (bool): whether we are preparing a train or test set
 69 | 
 70 |         Returns: a PyTorch DataLoader
 71 |         """
 72 |         X, y = self._prepare_data(df, train)
 73 | 
 74 |         X = torch.tensor(X.values.astype(np.float32), device=self.device)
 75 |         y = torch.tensor(y.values.astype(np.int64 if not regression else np.float32), device=self.device)
 76 |         data = torch.utils.data.TensorDataset(X, y)
 77 |         loader = torch.utils.data.DataLoader(dataset=data, batch_size=bs, shuffle=train)
 78 | 
 79 |         return loader
 80 | 
 81 |     def fit(self, data):
 82 |         """Fits the model according to the given data
 83 |         Args:
 84 |             data: DataFrame containing all useful data
 85 |         Returns: Model, the model itself
 86 |         """
 87 |         raise NotImplementedError
 88 | 
 89 |     def predict(self, data):
 90 |         """Makes predictions on the given data
 91 |         Args:
 92 |             data: DataFrame containing all useful data
 93 |         Returns: np.array containing predictions
 94 |         """
 95 |         res = self.predict_proba(data)
 96 |         return res.flatten() if len(res.shape) < 2 or res.shape[1] == 1 else res.argmax(axis=1)
 97 | 
 98 |     def predict_proba(self, data):
 99 |         """Outputs prediction probability scores for the given data
100 |         Args:
101 |             data: DataFrame containing all useful data
102 |         Returns:np.array containing probability scores
103 |         """
104 |         raise NotImplementedError
105 | 
106 |     def parameters(self):
107 |         """Returns the model's parameters.
108 | 
109 |          * If the model has only one layer, or is not a DNN, as a numpy array.
110 |          * If the model has multiple layers without biases, as a list of numpy arrays representing each layer.
111 |          * If the model has multiple layers with weights and biases, arrays of the corresponding weights and biases are
112 |         grouped in a list, with weights going before biases.
113 | 
114 |         Returns: the model's parameters
115 |         """
116 | 
117 |         return []
118 | 
119 | 
120 | class LinReg(Model):
121 |     def __init__(self, label_col, hyperparams=None):
122 |         """A linear regression based model
123 | 
124 |         Args:
125 |             label_col: the index of the column to be used as Label
126 |             hyperparams (dict of DictConfig): hyperperameters for the Model
127 |                 Accepted keywords: max_iter (default = 100), normalise (default=False)
128 |         """
129 |         if hyperparams is not None:
130 |             assert isinstance(hyperparams, DictConfig) or isinstance(hyperparams, dict),\
131 |                 'The given hyperparameters are not a dict or a DictConfig, but are {}'.format(type(hyperparams).__name__)
132 |         else:
133 |             hyperparams = dict()
134 | 
135 |         if 'normalise' in hyperparams.keys():
136 |             normalise = hyperparams['normalise']
137 |         elif 'normalize' in hyperparams.keys():
138 |             normalise = hyperparams['normalize']
139 |         else:
140 |             normalise = False
141 | 
142 |         super().__init__(label_col, normalise)
143 |         self.model = LinearRegression()
144 | 
145 |     def fit(self, data):
146 |         X, y = self._prepare_data(data, train=True)
147 |         with warnings.catch_warnings():
148 |             warnings.simplefilter("ignore", category=ConvergenceWarning)
149 |             self.model.fit(X, y)
150 |         return self
151 | 
152 |     def predict_proba(self, data):
153 |         X, _ = self._prepare_data(data, train=True)
154 |         return self.model.predict(X)
155 | 
156 |     def parameters(self):
157 |         intercept = self.model.intercept_
158 |         if not isinstance(intercept, np.ndarray):
159 |             intercept = np.array([intercept])
160 |         return np.concatenate([intercept, self.model.coef_.flatten()])
161 | 
162 | 
163 | class LogReg(LinReg):
164 |     def __init__(self, label_col, hyperparams):
165 |         """A logistic regression based model
166 | 
167 |         Args:
168 |             label_col: the index of the column to be used as Label
169 |             hyperparams (dict of DictConfig): hyperperameters for the Model
170 |                 Accepted keywords: max_iter (default = 100), normalise (default=False)
171 |         """
172 |         if hyperparams is not None:
173 |             assert isinstance(hyperparams, DictConfig) or isinstance(hyperparams, dict),\
174 |                 'The given hyperparameters are not a dict or a DictConfig, but are {}'.format(type(hyperparams).__name__)
175 |         else:
176 |             hyperparams = dict()
177 | 
178 |         max_iter = hyperparams['max_iter'] if 'max_iter' in hyperparams.keys() else 100
179 | 
180 |         super().__init__(label_col, hyperparams)
181 |         self.model = LogisticRegression(max_iter=max_iter)
182 | 
183 |     def predict_proba(self, data):
184 |         X, _ = self._prepare_data(data, train=True)
185 |         return self.model.predict_proba(X)
186 | 
187 | 
188 | class MLP(Model):
189 |     def __init__(self, label_col, hyperparams):
190 |         """A Multi-Layer Perceptron based model, for either regression or classification
191 | 
192 |         Args:
193 |             label_col: the index of the column to be used as Label
194 |             hyperparams (dict of DictConfig): hyperperameters for the Model
195 |                 Accepted keywords: input_size (mandatory), n_classes (mandatory, performs regression if is 1),
196 |                 layers (default=[64,16]), epochs (default=20), learning_rate (default=1e-1), weight_decay (default=1e-2),
197 |                 batch_size (default=32), normalise (default=False)
198 |         """
199 |         assert isinstance(hyperparams, DictConfig) or isinstance(hyperparams, dict), \
200 |             'The given hyperparameters are not a dict or a DictConfig, but are {}'.format(type(hyperparams).__name__)
201 | 
202 |         if 'normalise' in hyperparams.keys():
203 |             normalise = hyperparams['normalise']
204 |         elif 'normalize' in hyperparams.keys():
205 |             normalise = hyperparams['normalize']
206 |         else:
207 |             normalise = False
208 |         super(MLP, self).__init__(label_col, normalise)
209 | 
210 |         layers = hyperparams['layers'] if 'layers' in hyperparams.keys() else [64, 16]
211 | 
212 |         input_size = hyperparams['input_size']
213 | 
214 |         # Legacy version compatibility
215 |         if 'num_classes' in hyperparams.keys():
216 |             hyperparams['n_classes'] = hyperparams['num_classes']
217 | 
218 |         self.n_classes = hyperparams['n_classes']
219 | 
220 |         seq = list()
221 |         for l in layers:
222 |             seq.extend([
223 |                 nn.Linear(input_size, l),
224 |                 nn.ReLU()
225 |             ])
226 |             input_size = l
227 | 
228 |         seq.extend([
229 |             nn.Linear(input_size, self.n_classes)
230 |         ])
231 | 
232 |         self.model = nn.Sequential(*seq).to(self.device)
233 | 
234 |         self.epochs = hyperparams['epochs'] if 'epochs' in hyperparams.keys() else 10
235 |         self.lr = hyperparams['learning_rate'] if 'learning_rate' in hyperparams.keys() else 1e-1
236 |         self.wd = hyperparams['weight_decay'] if 'weight_decay' in hyperparams.keys() else 1e-2
237 |         self.bs = hyperparams['batch_size'] if 'batch_size' in hyperparams.keys() else 32
238 | 
239 |     def fit(self, data):
240 |         loader = self._prepare_dataloader(data, bs=self.bs, train=True, regression=self.n_classes == 1)
241 | 
242 |         opt = torch.optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=self.wd)
243 |         criterion = nn.CrossEntropyLoss() if self.n_classes > 1 else nn.MSELoss()
244 | 
245 |         for _ in range(self.epochs):
246 |             for X, y_true in loader:
247 |                 opt.zero_grad()
248 |                 y_pred = self.model(X)
249 | 
250 |                 if y_pred.shape[1] == 1:
251 |                     y_pred = y_pred.flatten()
252 | 
253 |                 loss = criterion(y_pred, y_true)
254 |                 loss.backward()
255 |                 opt.step()
256 | 
257 |         return self
258 | 
259 |     def predict_proba(self, data):
260 |         loader = self._prepare_dataloader(data, bs=self.bs, train=False, regression=self.n_classes == 1)
261 |         preds = list()
262 | 
263 |         if self.n_classes > 1:
264 |             for X, _ in loader:
265 |                 preds.append(softmax(self.model(X).cpu(), dim=1))
266 | 
267 |         else:
268 |             for X, _ in loader:
269 |                 preds.append(self.model(X).cpu())
270 | 
271 |         return np.nan_to_num(torch.cat(preds, dim=0).detach().cpu().numpy())
272 | 
273 |     def parameters(self):
274 |         params = self.model.state_dict()
275 |         out = list()
276 |         for i in {int(k.split('.')[0]) for k in params.keys()}:
277 |             w = np.nan_to_num(params['{}.weight'.format(i)].detach().cpu().numpy())
278 |             b = np.nan_to_num(params['{}.bias'.format(i)].view(-1, 1).detach().cpu().numpy())
279 |             out.append([w, b])
280 |         return out


--------------------------------------------------------------------------------
/propinfer/model_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def transform_parameters(parameters, sort=False):
 5 |     if isinstance(parameters, np.ndarray):
 6 |         if sort:
 7 |             return np.sort(parameters.flatten())
 8 |         else:
 9 |             return parameters.flatten()
10 |     elif isinstance(parameters, list):
11 |         if sort:
12 |             return sort_parameters(parameters)
13 |         else:
14 |             return flatten_parameters(parameters)
15 |     else:
16 |         raise AttributeError(
17 |             'Parameters should be a numpy array or a list, but is {}'.format(type(parameters).__name__))
18 | 
19 | 
20 | def flatten_parameters(parameters):
21 |     out = []
22 |     for p in parameters:
23 |         if isinstance(p, list):
24 |             out.extend([array.flatten() for array in p])
25 |         else:
26 |             out.append(p.flatten())
27 |     return np.concatenate(out)
28 | 
29 | 
30 | def sort_parameters(parameters):
31 |     out = []
32 |     for i in range(len(parameters)-1):
33 |         if isinstance(parameters[i], list):
34 |             order = np.argsort(parameters[i][0].sum(axis=1))
35 |             out.append(parameters[i][0][order, :].flatten())
36 |             out.append(parameters[i][1][order, :].flatten())
37 |         else:
38 |             order = np.argsort(np.abs(parameters[i].sum(axis=1)))
39 |             out.append(parameters[i][order, :].flatten())
40 | 
41 |         if isinstance(parameters[i + 1], list):
42 |             parameters[i+1][0] = parameters[i+1][0][:, order]
43 |         else:
44 |             parameters[i+1] = parameters[i+1][:, order]
45 | 
46 |     if isinstance(parameters[-1], list):
47 |         out.extend([array.flatten() for array in parameters[-1]])
48 |     else:
49 |         out.append(parameters[-1].flatten())
50 | 
51 |     out = np.concatenate(out)
52 |     return out
53 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools>=42",
4 |     "wheel"
5 | ]
6 | build-backend = "setuptools.build_meta"


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging.config
  3 | 
  4 | from os import path
  5 | 
  6 | from omegaconf import DictConfig
  7 | import hydra
  8 | 
  9 | from propinfer import Experiment
 10 | from propinfer import GaussianGenerator, IndependentPropertyGenerator, ProbitGenerator, NonlinearGenerator
 11 | from propinfer import LogReg, MLP
 12 | 
 13 | CWD = path.dirname(__file__)
 14 | 
 15 | MODELS = {
 16 |     'LogReg': LogReg,
 17 |     'MLP': MLP
 18 | }
 19 | 
 20 | GENERATORS = {
 21 |     'GaussianGenerator': GaussianGenerator,
 22 |     'IndependentPropertyGenerator': IndependentPropertyGenerator,
 23 |     'ProbitGenerator': ProbitGenerator,
 24 |     'NonlinearGenerator': NonlinearGenerator
 25 | }
 26 | 
 27 | from os import path, mkdir
 28 | from time import strftime
 29 | 
 30 | TIMESTAMP = strftime('%d%m%y_%H:%M:%S')
 31 | 
 32 | config = path.abspath(path.join(path.dirname(__file__), 'logging.ini'))
 33 | 
 34 | logdir = path.abspath(path.join(path.dirname(__file__),"./logs"))
 35 | if not path.isdir(logdir):
 36 |     mkdir(logdir)
 37 | logfile = logdir + '/logs_property-inference-attacks_' + TIMESTAMP + '.txt'
 38 | 
 39 | logging.config.fileConfig(config, defaults={'logfilename': logfile})
 40 | 
 41 | # create logger
 42 | logger = logging.getLogger('propinfer')
 43 | 
 44 | 
 45 | @hydra.main(config_path="config", config_name="config")
 46 | def main(cfg: DictConfig):
 47 |     experiments = dict()
 48 |     for gen in cfg.experiments.generators:
 49 |         generator = GENERATORS[gen](num_samples=cfg.generators.n_samples)
 50 |         for model in cfg.experiments.models:
 51 |             n_classes = cfg.experiments.n_classes if 'n_classes' in cfg.experiments.keys() else 2
 52 |             exp_range = None if n_classes > 1 else cfg.experiments.range
 53 |             experiment = Experiment(generator, cfg.generators.label_col, MODELS[model], cfg.experiments.n_targets, cfg.experiments.n_shadows,
 54 |                                     cfg.models[model], cfg.experiments.n_queries, n_classes=n_classes, range=exp_range)
 55 | 
 56 |             logger.info('Training target models: {} - {}'.format(gen, model))
 57 |             experiment.run_targets()
 58 |             logger.info('Training shadow models: {} - {}'.format(gen, model))
 59 |             experiment.run_shadows(MODELS[model], cfg.models[model])
 60 | 
 61 |             runs = list(cfg.experiments.runs)
 62 |             if 'BlackBox' in runs:
 63 |                 runs.remove('BlackBox')
 64 |                 runs.append('BlackBox')
 65 | 
 66 |             for run in cfg.experiments.runs:
 67 |                 name = '{} - {} - {}'.format(gen, model, run)
 68 |                 logger.info('Running {}...'.format(name))
 69 |                 if run == 'LossTest':
 70 |                     experiments[name] = experiment.run_loss_test()
 71 |                 elif run == 'ThresholdTest':
 72 |                     experiments[name] = experiment.run_threshold_test()
 73 |                 elif run == 'Naive':
 74 |                     experiments[name] = experiment.run_whitebox_sort(sort=False)
 75 |                 elif run == 'Sort':
 76 |                     experiments[name] = experiment.run_whitebox_sort(sort=True)
 77 |                 elif run == 'DeepSets':
 78 |                     experiments[name] = experiment.run_whitebox_deepsets(cfg.deepsets)
 79 |                 elif run == 'GreyBox':
 80 |                     experiments[name] = experiment.run_blackbox()
 81 |                 elif run == 'BlackBox':
 82 |                     logger.info('Training default shadow models: {} - {}'.format(gen, cfg.experiments.blackbox_model))
 83 |                     experiment.run_shadows(MODELS[cfg.experiments.blackbox_model], cfg.models[cfg.experiments.blackbox_model])
 84 |                     experiments[name] = experiment.run_blackbox()
 85 |                 else:
 86 |                     raise AttributeError('Invalid run provided: should be Naive, Sort, DeepSets, GreyBox or BlackBox'
 87 |                                          ' - instead is {}'.format(run))
 88 | 
 89 |                 if n_classes > 1:
 90 |                     logger.info('Attack accuracy for {}: {:.2%}'.format(name, experiments[name]))
 91 |                 else:
 92 |                     logger.info('Mean absolute error for {}: {:.2f}'.format(name, experiments[name]))
 93 | 
 94 |     # Output results
 95 |     outfile_name = 'results_PIA_' + TIMESTAMP + '.json'
 96 |     outdir = path.join(CWD, cfg.outdir)
 97 |     if not path.isdir(outdir):
 98 |         mkdir(outdir)
 99 |     with open(path.join(outdir, outfile_name), 'w') as f:
100 |         json.dump(experiments, f)
101 | 
102 | 
103 | if __name__ == "__main__":
104 |     main()
105 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = propinfer
 3 | version = 1.3.0
 4 | author = Léo Meynent
 5 | author_email = leo.meynent@epfl.ch
 6 | description = Modular framework to run Property Inference Attacks on Machine Learning models.
 7 | long_description = file: README.md
 8 | long_description_content_type = text/markdown
 9 | url = https://epfl-dlab.github.io/property-inference-attacks/
10 | project_urls =
11 |     Repository = https://github.com/epfl-dlab/property-inference-attacks/
12 |     Tracker = https://github.com/epfl-dlab/property-inference-attacks/issues
13 | classifiers =
14 |     Programming Language :: Python :: 3
15 |     License :: OSI Approved :: MIT License
16 |     Operating System :: OS Independent
17 |     Intended Audience :: Science/Research
18 | license = MIT
19 | license_files = LICENSE.md
20 | 
21 | [options]
22 | packages = propinfer
23 | python_requires = >=3.6
24 | install_requires =
25 |     scikit-learn
26 |     pandas
27 |     numpy
28 |     hydra-core


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 | logger = logging.getLogger('propinfer')
3 | 
4 | for handler in logger.handlers:
5 |     if handler.name == 'consoleHandler':
6 |         handler.setLevel(logging.WARNING)


--------------------------------------------------------------------------------
/tests/test_deepsets.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from propinfer.deepsets import DeepSets
 4 | from propinfer import MLP
 5 | 
 6 | from numpy import array
 7 | 
 8 | DEFAULT_HYPERPARAMS_MLP = {
 9 |     'input_size': 5,
10 |     'num_classes': 2,
11 |     'epochs': 10,
12 |     'learning_rate': 1e-3,
13 |     'weight_decay': 1e-4,
14 |     'normalise': False,
15 |     'layers': (128, 64)
16 | }
17 | 
18 | 
19 | class Test(TestCase):
20 |     def test_deepsets(self):
21 |         model = MLP('label', DEFAULT_HYPERPARAMS_MLP)
22 |         multi_params = [model.parameters()]*64
23 |         ds = DeepSets(model.parameters(), 8, 2, 1e-3, 1e-4)
24 |         ds.fit(multi_params, array([0]*64))
25 |         assert len(ds.predict(multi_params)) == 64


--------------------------------------------------------------------------------
/tests/test_experiment.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | 
  3 | from propinfer import Experiment
  4 | from propinfer import GaussianGenerator, IndependentPropertyGenerator, MultilabelProbitGenerator, LinearGenerator
  5 | from propinfer import LinReg, LogReg, MLP
  6 | 
  7 | import numpy as np
  8 | 
  9 | 
 10 | DEFAULT_HYPERPARAMS_MLP = {
 11 |     "input_size": 4,
 12 |     "layers": (4, 4),
 13 |     "num_classes": 2,
 14 |     "epochs": 1,
 15 |     "learning_rate": [1e-1, 1e-2],
 16 |     "weight_decay": [1e-2, 1e-3],
 17 |     "batch_size": 32
 18 | }
 19 | 
 20 | DEFAULT_HYPERPARAMS_DEEPSETS = {
 21 |     'latent_dim': 8,
 22 |     'epochs': 1,
 23 |     'learning_rate': 1e-3,
 24 |     'weight_decay': 1e-4
 25 | }
 26 | 
 27 | 
 28 | class TestExperiment(TestCase):
 29 |     def setUp(self):
 30 |         self.num_targets = 128
 31 |         self.num_shadows = 512
 32 | 
 33 |         self.gen = GaussianGenerator()
 34 |         self.model = LogReg
 35 |         self.exp = Experiment(self.gen, 'label', self.model, self.num_targets, self.num_shadows, {'max_iter': 100})
 36 | 
 37 |     def test_prepare_attacks(self):
 38 |         self.exp.run_targets()
 39 |         assert self.exp.targets is not None
 40 |         assert sum(self.exp.labels) == self.num_targets//2
 41 |         assert len(self.exp.labels) == self.num_targets
 42 | 
 43 |     def test_run_shadows(self):
 44 |         self.exp.run_shadows(LogReg, {'max_iter': 100})
 45 |         assert self.exp.shadow_models is not None
 46 |         assert sum(self.exp.shadow_labels) == self.num_shadows // 2
 47 |         assert len(self.exp.shadow_labels) == self.num_shadows
 48 | 
 49 |     def test_attacks(self):
 50 |         self.exp.run_targets()
 51 |         self.exp.run_shadows(LogReg, {'max_iter': 100})
 52 | 
 53 |         assert self.exp.run_loss_test() > 0.25
 54 |         assert self.exp.run_threshold_test() > 0.25
 55 |         assert self.exp.run_whitebox_deepsets(DEFAULT_HYPERPARAMS_DEEPSETS) > 0.25
 56 | 
 57 |         res = dict()
 58 |         res['whitebox'] = self.exp.run_whitebox_sort()
 59 |         res['blackbox'] = self.exp.run_blackbox()
 60 | 
 61 |         indep = IndependentPropertyGenerator()
 62 |         exp_indep = Experiment(indep, 'label', self.model, self.num_targets, self.num_shadows, {'max_iter': 100})
 63 | 
 64 |         exp_indep.run_targets()
 65 |         exp_indep.run_shadows(LogReg, {'max_iter': 100})
 66 | 
 67 |         res_indep = dict()
 68 |         res_indep['whitebox'] = exp_indep.run_whitebox_sort()
 69 |         res_indep['blackbox'] = exp_indep.run_blackbox()
 70 | 
 71 |         assert res['whitebox'] > res_indep['whitebox']
 72 |         assert res['blackbox'] > res_indep['blackbox']
 73 | 
 74 |     def test_optimise_classifier(self):
 75 |         assert isinstance(DEFAULT_HYPERPARAMS_MLP['learning_rate'], list)
 76 |         assert isinstance(DEFAULT_HYPERPARAMS_MLP['weight_decay'], list)
 77 | 
 78 |         self.model = MLP
 79 |         self.exp = Experiment(self.gen, 'label', self.model, self.num_targets, self.num_shadows, DEFAULT_HYPERPARAMS_MLP)
 80 | 
 81 |         assert not isinstance(self.exp.hyperparams['learning_rate'], list)
 82 |         assert not isinstance(self.exp.hyperparams['weight_decay'], list)
 83 | 
 84 |     def test_optimise_regressor(self):
 85 |         self.gen = LinearGenerator()
 86 |         self.model = LinReg
 87 |         self.exp = Experiment(self.gen, 'label', self.model, self.num_targets, self.num_shadows, {'dummy': [0., 1.]})
 88 | 
 89 |         assert not isinstance(self.exp.hyperparams['dummy'], list)
 90 | 
 91 |     def test_attacks_multiple(self):
 92 |         self.exp.run_targets()
 93 |         self.exp.run_shadows(LogReg, {'max_iter': 100})
 94 | 
 95 |         assert len(self.exp.run_threshold_test(n_outputs=2)) == 2
 96 |         assert len(self.exp.run_whitebox_sort(n_outputs=2)) == 2
 97 |         assert len(self.exp.run_whitebox_deepsets(DEFAULT_HYPERPARAMS_DEEPSETS, n_outputs=2)) == 2
 98 |         assert len(self.exp.run_whitebox_sort(n_outputs=2)) == 2
 99 |         assert len(self.exp.run_blackbox(n_outputs=2)) == 2
100 | 
101 |     def test_wb_bb_regression(self):
102 |         self.exp = Experiment(self.gen, 'label', self.model, self.num_targets, self.num_shadows, {'max_iter': 100},
103 |                               n_classes=1, range=(-1., 1.))
104 | 
105 |         self.exp.run_targets()
106 |         self.exp.run_shadows(LogReg, {'max_iter': 100})
107 | 
108 |         assert self.exp.run_whitebox_sort() < 1
109 |         assert self.exp.run_blackbox() < 1
110 | 
111 |     def test_wb_bb_multiclass(self):
112 |         self.exp = Experiment(self.gen, 'label', self.model, self.num_targets, self.num_shadows, {'max_iter': 100},
113 |                               n_classes=3)
114 | 
115 |         self.exp.run_targets()
116 |         self.exp.run_shadows(LogReg, {'max_iter': 100})
117 | 
118 |         assert self.exp.run_whitebox_sort() > 0.5
119 |         assert self.exp.run_blackbox() > 0.5
120 | 
121 |     def test_deepsets_regression(self):
122 |         self.exp = Experiment(self.gen, 'label', self.model, self.num_targets, self.num_shadows, {'max_iter': 100},
123 |                               n_classes=1, range=(-1., 1.))
124 | 
125 |         self.exp.run_targets()
126 |         self.exp.run_shadows(LogReg, {'max_iter': 100})
127 | 
128 |         assert self.exp.run_whitebox_deepsets(DEFAULT_HYPERPARAMS_DEEPSETS) < 1
129 | 
130 |     def test_deepsets_multiclass(self):
131 |         self.exp = Experiment(self.gen, 'label', self.model, self.num_targets, self.num_shadows, {'max_iter': 100},
132 |                               n_classes=3)
133 | 
134 |         self.exp.run_targets()
135 |         self.exp.run_shadows(LogReg, {'max_iter': 100})
136 | 
137 |         assert self.exp.run_whitebox_deepsets(DEFAULT_HYPERPARAMS_DEEPSETS) > 0.25
138 | 
139 |     def test_multiple_regression(self):
140 |         self.exp = Experiment(self.gen, 'label', self.model, self.num_targets, self.num_shadows, {'max_iter': 100},
141 |                               n_classes=1, range=(-1., 1.))
142 | 
143 |         self.exp.run_targets()
144 |         self.exp.run_shadows(LogReg, {'max_iter': 100})
145 | 
146 |         assert len(self.exp.run_whitebox_sort(n_outputs=2)) == 2
147 | 
148 |     def test_multiple_multiclass(self):
149 |         self.exp = Experiment(self.gen, 'label', self.model, self.num_targets, self.num_shadows, {'max_iter': 100},
150 |                               n_classes=3)
151 | 
152 |         self.exp.run_targets()
153 |         self.exp.run_shadows(LogReg, {'max_iter': 100})
154 | 
155 |         assert len(self.exp.run_whitebox_sort(n_outputs=2)) == 2
156 | 
157 |     def test_multiclass_nondivisible_number_models(self):
158 |         self.exp = Experiment(self.gen, 'label', self.model, 10, 13, {'max_iter': 100},
159 |                               n_classes=3)
160 |         self.exp.run_targets()
161 |         self.exp.run_shadows()
162 | 
163 |         assert len(self.exp.labels) == 10
164 |         assert len(self.exp.targets) == 10
165 |         assert len(self.exp.shadow_labels) == 13
166 |         assert len(self.exp.shadow_models) == 13
167 | 
168 |     def test_multivariable_regression(self):
169 |         gen = MultilabelProbitGenerator()
170 |         self.exp = Experiment(gen, 'label', self.model, self.num_targets, self.num_shadows, {'max_iter': 100},
171 |                               n_classes=1, range=np.array(((0., 1.), (0., 1.))))
172 | 
173 |         self.exp.run_targets()
174 |         self.exp.run_shadows()
175 | 
176 |         assert len(self.exp.run_whitebox_sort()) == 2
177 |         assert len(self.exp.run_blackbox()) == 2
178 |         assert len(self.exp.run_whitebox_deepsets(DEFAULT_HYPERPARAMS_DEEPSETS)) == 2
179 | 
180 |     def test_attack_regressor(self):
181 |         gen = LinearGenerator()
182 |         self.exp = Experiment(gen, 'label', LinReg, self.num_targets, self.num_shadows, dict())
183 | 
184 |         self.exp.run_targets()
185 |         self.exp.run_shadows()
186 | 
187 |         assert self.exp.run_whitebox_sort() > 0.25
188 |         assert self.exp.run_blackbox() > 0.25
189 |         assert self.exp.run_whitebox_deepsets(DEFAULT_HYPERPARAMS_DEEPSETS) > 0.25


--------------------------------------------------------------------------------
/tests/test_generator.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from propinfer import Generator, GaussianGenerator, ProbitGenerator, IndependentPropertyGenerator, \
 4 |     LinearGenerator, SubsamplingGenerator, MultilabelProbitGenerator
 5 | from numpy import stack, sum, int32
 6 | from numpy.random import randint
 7 | from pandas import DataFrame
 8 | 
 9 | class TestExperiment(TestCase):
10 |     def test_subsampling_generator(self):
11 |         attr1 = randint(0, 2, 32768)
12 |         attr2 = randint(0, 3, 32768)
13 |         attr3 = randint(0, 4, 32768)
14 | 
15 |         data = DataFrame(data=stack((attr1, attr2, attr3), axis=1), columns=['Bin', 'Tri', 'Quad'], dtype=int32)
16 |         data.loc[:, 'Cat'] = data.Quad.astype('category')
17 | 
18 |         gen = SubsamplingGenerator(data, 'Quad', 'Bin', proportion=0.1)
19 | 
20 |         sample = gen.sample(False)
21 |         assert 0.49 < sum(sample['Bin_1']) / len(sample) < 0.51
22 |         assert 0.2 < sum(sample['Quad'] == 1) / len(sample) < 0.3
23 | 
24 |         sample = gen.sample(True)
25 |         assert 0.09 < sum(sample['Bin_1']) / len(sample) < 0.11
26 |         assert 0.2 < sum(sample['Quad'] == 1) / len(sample) < 0.3
27 | 
28 |         self.assertRaises(AssertionError, SubsamplingGenerator, data, 'Tri', 'Quad', proportion=0.1)
29 | 
30 |         gen = SubsamplingGenerator(data, 'Tri', 'Quad', target_category=1, proportion=0.1)
31 |         sample = gen.sample(False)
32 |         assert 0.25 < sum(sample['Tri'] == 1) / len(sample) < 0.4
33 |         assert 0.49 < sum(sample['Quad_1']) / len(sample) < 0.51
34 | 
35 |         sample = gen.sample(True)
36 |         assert 0.25 < sum(sample['Tri'] == 1) / len(sample) < 0.4
37 |         assert 0.09 < sum(sample['Quad_1']) / len(sample) < 0.11
38 |         assert 0.25 < sum(sample['Quad_0']) / len(sample) < 0.35
39 |         assert 0.25 < sum(sample['Quad_2']) / len(sample) < 0.35
40 |         assert 0.25 < sum(sample['Quad_3']) / len(sample) < 0.35
41 | 
42 |         gen = SubsamplingGenerator(data, 'Tri', 'Quad', target_category=1, proportion=0.1, split=True)
43 |         sample = gen.sample(False)
44 |         assert 0.25 < sum(sample['Tri'] == 1) / len(sample) < 0.4
45 |         assert 0.49 < sum(sample['Quad_1']) / len(sample) < 0.51
46 | 
47 |         sample = gen.sample(True)
48 |         assert 0.25 < sum(sample['Tri'] == 1) / len(sample) < 0.4
49 |         assert 0.09 < sum(sample['Quad_1']) / len(sample) < 0.11
50 | 
51 |         gen = SubsamplingGenerator(data, 'Tri', 'Cat', target_category=1, proportion=0.1)
52 |         gen.sample(False)
53 | 
54 |         gen = SubsamplingGenerator(data, 'Cat', 'Bin', proportion=0.1)
55 |         gen.sample(False)
56 | 
57 |         gen = SubsamplingGenerator(data, 'Tri', 'Quad', target_category=1, regression=True)
58 |         sample = gen.sample(0.5)
59 |         assert 0.25 < sum(sample['Tri'] == 1) / len(sample) < 0.4
60 |         assert 0.49 < sum(sample['Quad_1']) / len(sample) < 0.51
61 | 
62 |         sample = gen.sample(0.25)
63 |         assert 0.24 < sum(sample['Quad_1']) / len(sample) < 0.26
64 | 
65 |         sample = gen.sample(0.75)
66 |         assert 0.74 < sum(sample['Quad_1']) / len(sample) < 0.76
67 | 
68 |         sample = gen.sample(0.)
69 |         assert sum(sample['Quad_1']) / len(sample) < 0.01
70 | 
71 |         sample = gen.sample(1.)
72 |         assert 0.99 < sum(sample['Quad_1']) / len(sample)
73 | 
74 |     def test_generator(self):
75 |         gen = Generator()
76 |         self.assertRaises(NotImplementedError, gen.sample, 0)
77 | 
78 |         gen = GaussianGenerator()
79 |         assert gen.sample(0).mean()[1] < 0.1
80 | 
81 |         gen = IndependentPropertyGenerator()
82 |         assert gen.sample(0).mean()[1] < 0.1
83 | 
84 |         gen = ProbitGenerator()
85 |         assert gen.sample(0).mean()[1] < 0.1
86 | 
87 |         gen = LinearGenerator()
88 |         assert gen.sample(0).mean()[1] < 0.1
89 | 
90 |     def test_multilabel_probit_generator(self):
91 |         gen = MultilabelProbitGenerator()
92 |         assert gen.sample((0., 1.)).mean()['f1'] < 0.1
93 |         assert gen.sample((0., 1.)).var()['f1'] > 2.
94 | 
95 |         assert gen.sample((1., 0.)).mean()['f1'] > 0.9
96 |         assert gen.sample((1., 0.)).var()['f1'] < 1.5
97 | 


--------------------------------------------------------------------------------
/tests/test_model.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from propinfer import LinReg, LogReg, MLP
 4 | from propinfer import GaussianGenerator, LinearGenerator
 5 | 
 6 | from sklearn.metrics import accuracy_score, mean_squared_error
 7 | 
 8 | DEFAULT_HYPERPARAMS_LOGREG = {
 9 |     "max_iter": 100
10 | }
11 | 
12 | DEFAULT_HYPERPARAMS_MLP = {
13 |     "input_size": 4,
14 |     "num_classes": 2,
15 |     "epochs": 20,
16 |     "learning_rate": 1e-3,
17 |     "weight_decay": 1e-4,
18 |     "batch_size": 32,
19 |     "layers": [8]
20 | }
21 | 
22 | DEFAULT_HYPERPARAMS_MLP_REGRESSOR = {
23 |     "input_size": 4,
24 |     "num_classes": 1,
25 |     "epochs": 20,
26 |     "learning_rate": 1e-2,
27 |     "weight_decay": 1e-3,
28 |     "batch_size": 32,
29 |     "layers": [8]
30 | }
31 | 
32 | 
33 | class Test(TestCase):
34 |     def test_linreg(self):
35 |         gen = LinearGenerator()
36 |         model = LinReg('label')
37 | 
38 |         train = gen.sample(False)
39 |         model.fit(train)
40 | 
41 |         assert mean_squared_error(train['label'], model.predict(train)) < 2.
42 | 
43 |     def test_logreg(self):
44 |         gen = GaussianGenerator()
45 |         model = LogReg('label', DEFAULT_HYPERPARAMS_LOGREG)
46 | 
47 |         train = gen.sample(False)
48 |         model.fit(train)
49 | 
50 |         assert accuracy_score(train['label'], model.predict(train)) > 0.75
51 | 
52 |     def test_mlp(self):
53 |         gen = GaussianGenerator()
54 | 
55 |         model = MLP('label', DEFAULT_HYPERPARAMS_MLP)
56 |         assert model.parameters()[0][0].shape[0] == 8
57 | 
58 |         train = gen.sample(False)
59 |         model.fit(train)
60 | 
61 |         assert accuracy_score(train['label'], model.predict(train)) > 0.75
62 | 
63 |     def test_mlp_regression(self):
64 |         gen = LinearGenerator()
65 | 
66 |         model = MLP('label', DEFAULT_HYPERPARAMS_MLP_REGRESSOR)
67 | 
68 |         train = gen.sample(False)
69 |         model.fit(train)
70 | 
71 |         assert mean_squared_error(train['label'], model.predict(train)) < 2.
72 | 


--------------------------------------------------------------------------------
/tests/test_model_utils.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from propinfer import MLP
 4 | from propinfer.model_utils import sort_parameters, flatten_parameters
 5 | 
 6 | DEFAULT_HYPERPARAMS = {
 7 |     "input_size": 4,
 8 |     "hidden_size": 20,
 9 |     "num_classes": 2,
10 |     "epochs": 20,
11 |     "learning_rate": 1e-3,
12 |     "batch_size": 32
13 | }
14 | 
15 | 
16 | class Test(TestCase):
17 |     def test_sort_parameters(self):
18 |         model = MLP('None', DEFAULT_HYPERPARAMS)
19 |         params_flat = flatten_parameters(model.parameters())
20 |         params_transf = sort_parameters(model.parameters())
21 | 
22 |         assert params_flat.shape[0] == params_transf.shape[0]
23 |         assert params_flat[-1] == params_transf[-1]
24 | 


--------------------------------------------------------------------------------