├── tensorflow_privacy
├── research
│ ├── pate_2017
│ │ ├── __init__.py
│ │ ├── train_student_mnist_250_lap_20_count_50_epochs_600.sh
│ │ ├── utils.py
│ │ ├── metrics.py
│ │ ├── train_teachers.py
│ │ ├── aggregation.py
│ │ └── README.md
│ ├── README.md
│ └── pate_2018
│ │ ├── ICLR2018
│ │ ├── generate_figures.sh
│ │ ├── download.py
│ │ ├── README.md
│ │ ├── generate_table.sh
│ │ ├── generate_table_data_independent.sh
│ │ ├── plot_ls_q.py
│ │ └── utility_queries_answered.py
│ │ ├── README.md
│ │ ├── core_test.py
│ │ └── smooth_sensitivity_test.py
├── requirements.txt
├── privacy
│ ├── BUILD
│ ├── bolt_on
│ │ ├── __init__.py
│ │ └── README.md
│ ├── dp_query
│ │ ├── normalized_query_test.py
│ │ ├── test_utils.py
│ │ ├── no_privacy_query.py
│ │ ├── no_privacy_query_test.py
│ │ ├── normalized_query.py
│ │ ├── BUILD
│ │ ├── nested_query.py
│ │ ├── gaussian_query.py
│ │ ├── nested_query_test.py
│ │ └── gaussian_query_test.py
│ ├── __init__.py
│ ├── analysis
│ │ ├── tensor_buffer_test_graph.py
│ │ ├── tensor_buffer_test_eager.py
│ │ ├── compute_dp_sgd_privacy.py
│ │ ├── tensor_buffer.py
│ │ ├── privacy_ledger_test.py
│ │ └── rdp_accountant_test.py
│ └── optimizers
│ │ ├── dp_optimizer_eager_test.py
│ │ └── dp_optimizer_vectorized.py
├── CONTRIBUTING.md
├── setup.py
├── README.md
└── tutorials
│ ├── walkthrough
│ └── mnist_scratch.py
│ ├── mnist_dpsgd_tutorial_keras.py
│ ├── mnist_dpsgd_tutorial_eager.py
│ ├── README.md
│ └── bolton_tutorial.py
├── requirements.txt
├── figures
├── Mnist_epoch-ε_lr_3e-06.png
├── Mnist_accuracy-epoch_lr_3e-06.png
├── Mnist_accuracy-epoch_lr_e-03.png
├── Mnist_accuracy-epsilon_lr_3e-06.png
├── cifar10_accuracy-epoch_lr_e-03.png
└── cifar10_accuracy-epsilon_lr_e-03.png
├── README.md
├── results
├── mnist_dpsgd_delta_1e-05_lr_3e-06.txt
├── mnist_dpsgd_delta_0.0001_lr_3e-06.txt
├── mnist_dpsgd_delta_0.001_lr_3e-06.txt
├── mnist_dpsgd_delta_0.01_lr_3e-06.txt
├── cifar_dpsgd_delta_0.0001_lr_0.001.txt
├── cifar_dpsgd_delta_1e-06_lr_0.001.txt
└── cifar_dpsgd_delta_1e-05_lr_0.001.txt
├── dp_optimizer.py
└── mnist.py
/tensorflow_privacy/research/pate_2017/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow==2.0.0
2 | tensorflow-privacy==0.1.0
3 |
--------------------------------------------------------------------------------
/tensorflow_privacy/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow>=1.13
2 | mpmath
3 | scipy>=0.17
4 |
--------------------------------------------------------------------------------
/figures/Mnist_epoch-ε_lr_3e-06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/Mnist_epoch-ε_lr_3e-06.png
--------------------------------------------------------------------------------
/figures/Mnist_accuracy-epoch_lr_3e-06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/Mnist_accuracy-epoch_lr_3e-06.png
--------------------------------------------------------------------------------
/figures/Mnist_accuracy-epoch_lr_e-03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/Mnist_accuracy-epoch_lr_e-03.png
--------------------------------------------------------------------------------
/figures/Mnist_accuracy-epsilon_lr_3e-06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/Mnist_accuracy-epsilon_lr_3e-06.png
--------------------------------------------------------------------------------
/figures/cifar10_accuracy-epoch_lr_e-03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/cifar10_accuracy-epoch_lr_e-03.png
--------------------------------------------------------------------------------
/figures/cifar10_accuracy-epsilon_lr_e-03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/cifar10_accuracy-epsilon_lr_e-03.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Deep Learning with Differential Privacy
2 |
3 | ## Prerequisites
4 | Windows 10 + CUDA 10 + CUDNN 7 + TensorFlow 2.0 with Anaconda 3
5 | ```
6 | conda create -n tf2 python=3.6
7 | activate tf2
8 | conda install tensorflow-gpu==2.0.0
9 | pip install tensorflow-privacy==0.1.0
10 | ```
--------------------------------------------------------------------------------
/tensorflow_privacy/research/README.md:
--------------------------------------------------------------------------------
1 | # Research
2 |
3 | This folder contains code to reproduce results from research papers. Currently,
4 | the following papers are included:
5 |
6 | * Semi-supervised Knowledge Transfer for Deep Learning from Private Training
7 | Data (ICLR 2017): `pate_2017`
8 |
9 | * Scalable Private Learning with PATE (ICLR 2018): `pate_2018`
10 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 |
3 | licenses(["notice"]) # Apache 2.0
4 |
5 | exports_files(["LICENSE"])
6 |
7 | py_library(
8 | name = "privacy",
9 | srcs = ["__init__.py"],
10 | deps = [
11 | "//third_party/py/tensorflow_privacy/privacy/analysis:privacy_ledger",
12 | "//third_party/py/tensorflow_privacy/privacy/analysis:rdp_accountant",
13 | "//third_party/py/tensorflow_privacy/privacy/dp_query",
14 | "//third_party/py/tensorflow_privacy/privacy/dp_query:gaussian_query",
15 | "//third_party/py/tensorflow_privacy/privacy/dp_query:nested_query",
16 | "//third_party/py/tensorflow_privacy/privacy/dp_query:no_privacy_query",
17 | "//third_party/py/tensorflow_privacy/privacy/dp_query:normalized_query",
18 | "//third_party/py/tensorflow_privacy/privacy/dp_query:quantile_adaptive_clip_sum_query",
19 | "//third_party/py/tensorflow_privacy/privacy/optimizers:dp_optimizer",
20 | ],
21 | )
22 |
--------------------------------------------------------------------------------
/tensorflow_privacy/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to to see
12 | your current agreements on file or to sign a new one.
13 |
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 |
18 | ## Code reviews
19 |
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 |
25 | ## Community Guidelines
26 |
27 | This project follows Google's
28 | [Open Source Community Guidelines](https://opensource.google.com/conduct/).
29 |
--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2017/train_student_mnist_250_lap_20_count_50_epochs_600.sh:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 |
17 | # Be sure to clone https://github.com/openai/improved-gan
18 | # and add improved-gan/mnist_svhn_cifar10 to your PATH variable
19 |
20 | # Download labels used to train the student
21 | wget https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_student_labels_lap_20.npy
22 |
23 | # Train the student using improved-gan
24 | THEANO_FLAGS='floatX=float32,device=gpu,lib.cnmem=1' train_mnist_fm_custom_labels.py --labels mnist_250_student_labels_lap_20.npy --count 50 --epochs 600
25 |
26 |
--------------------------------------------------------------------------------
/tensorflow_privacy/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """TensorFlow Privacy library setup file for pip."""
15 | from setuptools import find_packages
16 | from setuptools import setup
17 |
18 | setup(name='tensorflow_privacy',
19 | version='0.1.0',
20 | url='https://github.com/tensorflow/privacy',
21 | license='Apache-2.0',
22 | install_requires=[
23 | 'scipy>=0.17',
24 | 'mpmath', # used in tests only
25 | ],
26 | # Explicit dependence on TensorFlow is not supported.
27 | # See https://github.com/tensorflow/tensorflow/issues/7166
28 | extras_require={
29 | 'tf': ['tensorflow>=1.0.0'],
30 | 'tf_gpu': ['tensorflow-gpu>=1.0.0'],
31 | },
32 | packages=find_packages())
33 |
--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2017/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 |
17 | def batch_indices(batch_nb, data_length, batch_size):
18 | """
19 | This helper function computes a batch start and end index
20 | :param batch_nb: the batch number
21 | :param data_length: the total length of the data being parsed by batches
22 | :param batch_size: the number of inputs in each batch
23 | :return: pair of (start, end) indices
24 | """
25 | # Batch start and end index
26 | start = int(batch_nb * batch_size)
27 | end = int((batch_nb + 1) * batch_size)
28 |
29 | # When there are not enough inputs left, we reuse some to complete the batch
30 | if end > data_length:
31 | shift = end - data_length
32 | start -= shift
33 | end -= shift
34 |
35 | return start, end
36 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/bolt_on/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019, The TensorFlow Privacy Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """BoltOn Method for privacy."""
15 | import sys
16 | from distutils.version import LooseVersion
17 | import tensorflow as tf
18 |
19 | if LooseVersion(tf.__version__) < LooseVersion("2.0.0"):
20 | raise ImportError("Please upgrade your version "
21 | "of tensorflow from: {0} to at least 2.0.0 to "
22 | "use privacy/bolt_on".format(LooseVersion(tf.__version__)))
23 | if hasattr(sys, "skip_tf_privacy_import"): # Useful for standalone scripts.
24 | pass
25 | else:
26 | from privacy.bolt_on.models import BoltOnModel # pylint: disable=g-import-not-at-top
27 | from privacy.bolt_on.optimizers import BoltOn # pylint: disable=g-import-not-at-top
28 | from privacy.bolt_on.losses import StrongConvexHuber # pylint: disable=g-import-not-at-top
29 | from privacy.bolt_on.losses import StrongConvexBinaryCrossentropy # pylint: disable=g-import-not-at-top
30 |
--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/ICLR2018/generate_figures.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ==============================================================================
16 |
17 |
18 | counts_file="data/glyph_5000_teachers.npy"
19 | output_dir="figures/"
20 |
21 | mkdir -p $output_dir
22 |
23 | if [ ! -d "$output_dir" ]; then
24 | echo "Directory $output_dir does not exist."
25 | exit 1
26 | fi
27 |
28 | python rdp_bucketized.py \
29 | --plot=small \
30 | --counts_file=$counts_file \
31 | --plot_file=$output_dir"noisy_thresholding_check_perf.pdf"
32 |
33 | python rdp_bucketized.py \
34 | --plot=large \
35 | --counts_file=$counts_file \
36 | --plot_file=$output_dir"noisy_thresholding_check_perf_details.pdf"
37 |
38 | python rdp_cumulative.py \
39 | --cache=False \
40 | --counts_file=$counts_file \
41 | --figures_dir=$output_dir
42 |
43 | python utility_queries_answered.py --plot_file=$output_dir"utility_queries_answered.pdf"
--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/ICLR2018/download.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Script to download votes files to the data/ directory.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | from six.moves import urllib
23 | import os
24 | import tarfile
25 |
26 | FILE_URI = 'https://storage.googleapis.com/pate-votes/votes.gz'
27 | DATA_DIR = 'data/'
28 |
29 |
30 | def download():
31 | print('Downloading ' + FILE_URI)
32 | tar_filename, _ = urllib.request.urlretrieve(FILE_URI)
33 | print('Unpacking ' + tar_filename)
34 | with tarfile.open(tar_filename, "r:gz") as tar:
35 | tar.extractall(DATA_DIR)
36 | print('Done!')
37 |
38 |
39 | if __name__ == '__main__':
40 | if not os.path.exists(DATA_DIR):
41 | print('Data directory does not exist. Creating ' + DATA_DIR)
42 | os.makedirs(DATA_DIR)
43 | download()
44 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/normalized_query_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Tests for GaussianAverageQuery."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import tensorflow as tf
22 |
23 | from privacy.dp_query import gaussian_query
24 | from privacy.dp_query import normalized_query
25 | from privacy.dp_query import test_utils
26 |
27 |
28 | class NormalizedQueryTest(tf.test.TestCase):
29 |
30 | def test_normalization(self):
31 | with self.cached_session() as sess:
32 | record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0].
33 | record2 = tf.constant([4.0, -3.0]) # Not clipped.
34 |
35 | sum_query = gaussian_query.GaussianSumQuery(
36 | l2_norm_clip=5.0, stddev=0.0)
37 | query = normalized_query.NormalizedQuery(
38 | numerator_query=sum_query, denominator=2.0)
39 |
40 | query_result, _ = test_utils.run_query(query, [record1, record2])
41 | result = sess.run(query_result)
42 | expected = [0.5, 0.5]
43 | self.assertAllClose(result, expected)
44 |
45 |
46 | if __name__ == '__main__':
47 | tf.test.main()
48 |
--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2017/metrics.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | import numpy as np
21 |
22 |
23 | def accuracy(logits, labels):
24 | """
25 | Return accuracy of the array of logits (or label predictions) wrt the labels
26 | :param logits: this can either be logits, probabilities, or a single label
27 | :param labels: the correct labels to match against
28 | :return: the accuracy as a float
29 | """
30 | assert len(logits) == len(labels)
31 |
32 | if len(np.shape(logits)) > 1:
33 | # Predicted labels are the argmax over axis 1
34 | predicted_labels = np.argmax(logits, axis=1)
35 | else:
36 | # Input was already labels
37 | assert len(np.shape(logits)) == 1
38 | predicted_labels = logits
39 |
40 | # Check against correct labels to compute correct guesses
41 | correct = np.sum(predicted_labels == labels.reshape(len(labels)))
42 |
43 | # Divide by number of labels to obtain accuracy
44 | accuracy = float(correct) / len(labels)
45 |
46 | # Return float value
47 | return accuracy
48 |
49 |
50 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/test_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utility methods for testing private queries.
15 |
16 | Utility methods for testing private queries.
17 | """
18 |
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 |
23 |
24 | def run_query(query, records, global_state=None, weights=None):
25 | """Executes query on the given set of records as a single sample.
26 |
27 | Args:
28 | query: A PrivateQuery to run.
29 | records: An iterable containing records to pass to the query.
30 | global_state: The current global state. If None, an initial global state is
31 | generated.
32 | weights: An optional iterable containing the weights of the records.
33 |
34 | Returns:
35 | A tuple (result, new_global_state) where "result" is the result of the
36 | query and "new_global_state" is the updated global state.
37 | """
38 | if not global_state:
39 | global_state = query.initial_global_state()
40 | params = query.derive_sample_params(global_state)
41 | sample_state = query.initial_sample_state(next(iter(records)))
42 | if weights is None:
43 | for record in records:
44 | sample_state = query.accumulate_record(params, sample_state, record)
45 | else:
46 | for weight, record in zip(weights, records):
47 | sample_state = query.accumulate_record(
48 | params, sample_state, record, weight)
49 | return query.get_noised_result(sample_state, global_state)
50 |
--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/ICLR2018/README.md:
--------------------------------------------------------------------------------
1 | Scripts in support of the paper "Scalable Private Learning with PATE" by Nicolas
2 | Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar, Ulfar
3 | Erlingsson (ICLR 2018, https://arxiv.org/abs/1802.08908).
4 |
5 |
6 | ### Requirements
7 |
8 | * Python, version ≥ 2.7
9 | * absl (see [here](https://github.com/abseil/abseil-py), or just type `pip install absl-py`)
10 | * matplotlib
11 | * numpy
12 | * scipy
13 | * sympy (for smooth sensitivity analysis)
14 | * write access to the current directory (otherwise, output directories in download.py and *.sh
15 | scripts must be changed)
16 |
17 | ## Reproducing Figures 1 and 5, and Table 2
18 |
19 | Before running any of the analysis scripts, create the data/ directory and download votes files by running\
20 | `$ python download.py`
21 |
22 | To generate Figures 1 and 5 run\
23 | `$ sh generate_figures.sh`\
24 | The output is written to the figures/ directory.
25 |
26 | For Table 2 run (may take several hours)\
27 | `$ sh generate_table.sh`\
28 | The output is written to the console.
29 |
30 | For data-independent bounds (for comparison with Table 2), run\
31 | `$ sh generate_table_data_independent.sh`\
32 | The output is written to the console.
33 |
34 | ## Files in this directory
35 |
36 | * generate_figures.sh — Master script for generating Figures 1 and 5.
37 |
38 | * generate_table.sh — Master script for generating Table 2.
39 |
40 | * generate_table_data_independent.sh — Master script for computing data-independent
41 | bounds.
42 |
43 | * rdp_bucketized.py — Script for producing Figure 1 (right) and Figure 5 (right).
44 |
45 | * rdp_cumulative.py — Script for producing Figure 1 (middle) and Figure 5 (left).
46 |
47 | * smooth_sensitivity_table.py — Script for generating Table 2.
48 |
49 | * utility_queries_answered — Script for producing Figure 1 (left).
50 |
51 | * plot_partition.py — Script for producing partition.pdf, a detailed breakdown of privacy
52 | costs for Confident-GNMax with smooth sensitivity analysis (takes ~50 hours).
53 |
54 | * plots_for_slides.py — Script for producing several plots for the slide deck.
55 |
56 | * download.py — Utility script for populating the data/ directory.
57 |
58 | * plot_ls_q.py is not used.
59 |
60 |
61 | All Python files take flags. Run script_name.py --help for help on flags.
62 |
--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/ICLR2018/generate_table.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ==============================================================================
16 |
17 |
18 | echo "Reproducing Table 2. Takes a couple of hours."
19 |
20 | executable="python smooth_sensitivity_table.py"
21 | data_dir="data"
22 |
23 | echo
24 | echo "######## MNIST ########"
25 | echo
26 |
27 | $executable \
28 | --counts_file=$data_dir"/mnist_250_teachers.npy" \
29 | --threshold=200 \
30 | --sigma1=150 \
31 | --sigma2=40 \
32 | --queries=640 \
33 | --delta=1e-5
34 |
35 | echo
36 | echo "######## SVHN ########"
37 | echo
38 |
39 | $executable \
40 | --counts_file=$data_dir"/svhn_250_teachers.npy" \
41 | --threshold=300 \
42 | --sigma1=200 \
43 | --sigma2=40 \
44 | --queries=8500 \
45 | --delta=1e-6
46 |
47 | echo
48 | echo "######## Adult ########"
49 | echo
50 |
51 | $executable \
52 | --counts_file=$data_dir"/adult_250_teachers.npy" \
53 | --threshold=300 \
54 | --sigma1=200 \
55 | --sigma2=40 \
56 | --queries=1500 \
57 | --delta=1e-5
58 |
59 | echo
60 | echo "######## Glyph (Confident) ########"
61 | echo
62 |
63 | $executable \
64 | --counts_file=$data_dir"/glyph_5000_teachers.npy" \
65 | --threshold=1000 \
66 | --sigma1=500 \
67 | --sigma2=100 \
68 | --queries=12000 \
69 | --delta=1e-8
70 |
71 | echo
72 | echo "######## Glyph (Interactive, Round 1) ########"
73 | echo
74 |
75 | $executable \
76 | --counts_file=$data_dir"/glyph_round1.npy" \
77 | --threshold=3500 \
78 | --sigma1=1500 \
79 | --sigma2=100 \
80 | --delta=1e-8
81 |
82 | echo
83 | echo "######## Glyph (Interactive, Round 2) ########"
84 | echo
85 |
86 | $executable \
87 | --counts_file=$data_dir"/glyph_round2.npy" \
88 | --baseline_file=$data_dir"/glyph_round2_student.npy" \
89 | --threshold=3500 \
90 | --sigma1=2000 \
91 | --sigma2=200 \
92 | --teachers=5000 \
93 | --delta=1e-8
94 |
--------------------------------------------------------------------------------
/results/mnist_dpsgd_delta_1e-05_lr_3e-06.txt:
--------------------------------------------------------------------------------
1 | eps: [1.0244832274550977, 1.0895559994960096, 1.152984324480169, 1.1884359033987033, 1.2238874823172377, 1.259339061235772, 1.2947906401543063, 1.3302422190728407, 1.365693797991375, 1.4011453769099091, 1.4365969558284435, 1.4720485347469778, 1.5075001136655122, 1.5429516925840465, 1.5784032715025806, 1.6138548504211152, 1.6493064293396493, 1.6847580082581837, 1.720209587176718, 1.7556611660952521, 1.7911127450137867, 1.8265643239323208, 1.8620159028508552, 1.8974674817693895, 1.9329190606879239, 1.9683706396064582, 2.0038222185249923, 2.039273797443527, 2.074725376362061, 2.1070971504466094, 2.1385696633235494, 2.1700421762004893, 2.2015146890774293, 2.2329872019543693, 2.2644597148313097, 2.2959322277082497, 2.3273444725641, 2.3584159072093906, 2.389150196384082, 2.4195528187374924, 2.4496306673437216, 2.47939177188419, 2.5088451064044532, 2.5380004577251647, 2.566868337378801, 2.5954599248338712, 2.6237615073107916, 2.651776396750234, 2.6795399578410217, 2.7070652189159037, 2.734365832716886, 2.76142277415431, 2.788203585584366, 2.814789652616146, 2.84119660459472, 2.8673271737581487, 2.8932638632808336, 2.9190555302993633, 2.9445717345610545, 2.969930040191774, 2.9951400019060035, 3.0200849330064745, 3.0449423727552256, 3.0695416941430738, 3.0940278310878373, 3.1183227421228525, 3.1424603930913553, 3.16645149256044, 3.190264545989246, 3.213952427257068, 3.2374659175249123, 3.2608511687943733, 3.284091357435858, 3.3071745619803172, 3.330169025294796, 3.3529507616602854, 3.375728486088109, 3.3982093282729817, 3.4206901704578545, 3.442981331768199, 3.4651618493845584, 3.4872994645999036, 3.509180223018679, 3.531060981437454, 3.5527797253598825, 3.5743612858426586, 3.5959428463254346, 3.617279590270551, 3.6385625100894483, 3.659845429908345, 3.6808390641874653, 3.701823896735645, 3.7228087292838246, 3.743500323855752, 3.764187618748881, 3.784874913642011, 3.80530789401406, 3.825698197184275, 3.84608850035449, 3.866308840554522]
2 | validation acc: [0.0594, 0.1068, 0.1737, 0.2333, 0.2853, 0.337, 0.3641, 0.3896, 0.4202, 0.4531, 0.5045, 0.5675, 0.6272, 0.6809, 0.7356, 0.7729, 0.7981, 0.8153, 0.8298, 0.8387, 0.849, 0.8564, 0.8637, 0.8688, 0.8737, 0.8804, 0.8856, 0.8913, 0.8965, 0.8998, 0.9034, 0.9064, 0.9102, 0.9129, 0.9151, 0.9179, 0.92, 0.9222, 0.9237, 0.9245, 0.927, 0.9291, 0.9301, 0.9323, 0.933, 0.9337, 0.9363, 0.9348, 0.9372, 0.939, 0.9399, 0.9399, 0.9411, 0.9413, 0.9434, 0.9435, 0.944, 0.9435, 0.9454, 0.9458, 0.9469, 0.9472, 0.9476, 0.9484, 0.9482, 0.949, 0.9497, 0.9502, 0.9516, 0.9515, 0.952, 0.9516, 0.9526, 0.9526, 0.9522, 0.9533, 0.9523, 0.9533, 0.9551, 0.9536, 0.9547, 0.9554, 0.9554, 0.955, 0.957, 0.9563, 0.9566, 0.9572, 0.9567, 0.9581, 0.9574, 0.9583, 0.9587, 0.9579, 0.9582, 0.9606, 0.9602, 0.9609, 0.9602, 0.9611]
3 |
--------------------------------------------------------------------------------
/results/mnist_dpsgd_delta_0.0001_lr_3e-06.txt:
--------------------------------------------------------------------------------
1 | eps: [0.8326011363722605, 0.8976739084131724, 0.9436584069352558, 0.9791099858537902, 1.0145615647723245, 1.0500131436908586, 1.085464722609393, 1.1209163015279273, 1.1563678804464617, 1.191819459364996, 1.2272710382835303, 1.2627226172020647, 1.2981741961205988, 1.3336257750391332, 1.3690773539576675, 1.4045289328762018, 1.4399805117947362, 1.4754320907132703, 1.5108836696318049, 1.546335248550339, 1.5817868274688733, 1.6172384063874077, 1.6526899853059418, 1.6856777203572872, 1.7171502332342274, 1.7486227461111676, 1.7800952589881076, 1.8115677718650476, 1.8430402847419878, 1.8743356446438426, 1.9052181718400023, 1.9356422549899939, 1.96567941857039, 1.9953366822421725, 2.0246147124836638, 2.0534946699030403, 2.082028256362481, 2.110225372299399, 2.1380963940615825, 2.165652175132723, 2.1929040544609935, 2.219863870575365, 2.2465439806151197, 2.272957283675776, 2.2990878528392047, 2.3249403189786673, 2.350554146796505, 2.375944032153419, 2.4011253446527228, 2.4260296809068174, 2.4507134910368893, 2.47522226487027, 2.499517175905285, 2.5235647542607182, 2.547474060537832, 2.5711619418056544, 2.594649784273822, 2.618035035543283, 2.6411230086753616, 2.664135220235837, 2.6869169566013262, 2.709575125744436, 2.732055967929309, 2.754385065414559, 2.7765655830309184, 2.798591875047433, 2.820472633466208, 2.842223780287906, 2.863805340770682, 2.8853105042888822, 2.906593424107779, 2.927876343926676, 2.9488682178596077, 2.969853050407788, 2.9906628001059756, 3.0113500949991048, 3.032012133309119, 3.0524024364793334, 3.0727927396495485, 3.093047072443617, 3.113140926228243, 3.1332347800128684, 3.1531211515642337, 3.172919094784786, 3.1927170380053385, 3.212277580271345, 3.231780148313306, 3.2512827163552673, 3.270562153959215, 3.2897698788428245, 3.308977603726434, 3.3280234805794278, 3.3469368910280495, 3.365850301476671, 3.3847132309541426, 3.4033328524582283, 3.421952473962314, 3.4405720954663996, 3.459012770868646, 3.477339125747593]
2 | validation acc: [0.0623, 0.1518, 0.2339, 0.2726, 0.2902, 0.3211, 0.3532, 0.4021, 0.4795, 0.5877, 0.6763, 0.7647, 0.8091, 0.8336, 0.8494, 0.8648, 0.8742, 0.8808, 0.8885, 0.893, 0.8974, 0.9032, 0.9063, 0.91, 0.9137, 0.9162, 0.9186, 0.921, 0.9246, 0.9264, 0.9289, 0.9309, 0.9311, 0.9326, 0.9339, 0.9365, 0.9359, 0.9389, 0.94, 0.9398, 0.9403, 0.9413, 0.9423, 0.9437, 0.9441, 0.9456, 0.9463, 0.9469, 0.9472, 0.9482, 0.9484, 0.9501, 0.9503, 0.9514, 0.9519, 0.9517, 0.9526, 0.9531, 0.9533, 0.9537, 0.9544, 0.9547, 0.9554, 0.956, 0.9564, 0.9565, 0.9567, 0.9576, 0.958, 0.9577, 0.9581, 0.9591, 0.9585, 0.9595, 0.9604, 0.9601, 0.9608, 0.9608, 0.9618, 0.9608, 0.9612, 0.9622, 0.9628, 0.9624, 0.9629, 0.9633, 0.9632, 0.9635, 0.9635, 0.9641, 0.9644, 0.9647, 0.9652, 0.9645, 0.9654, 0.9654, 0.9658, 0.9661, 0.9659, 0.9658]
3 |
--------------------------------------------------------------------------------
/results/mnist_dpsgd_delta_0.001_lr_3e-06.txt:
--------------------------------------------------------------------------------
1 | eps: [0.6407190452894234, 0.6988809104718083, 0.7343324893903427, 0.769784068308877, 0.8052356472274113, 0.8406872261459456, 0.8761388050644799, 0.9115903839830142, 0.9470419629015485, 0.9824935418200829, 1.0179451207386172, 1.0533966996571515, 1.0888482785756857, 1.12429985749422, 1.1597514364127544, 1.1952030153312887, 1.230654594249823, 1.2642582902679655, 1.2957308031449055, 1.3272033160218457, 1.358675828898786, 1.3901483417757259, 1.4212154199821514, 1.4517316912424953, 1.4817234307631033, 1.5111556227476015, 1.5401210024272802, 1.5685965878910908, 1.5966528302821439, 1.6242391313495421, 1.651442276205101, 1.6782716502966837, 1.704717962756832, 1.7307937037915002, 1.756536559031956, 1.7819580241150645, 1.8070700925530656, 1.8318852879307048, 1.8564166986527024, 1.8806780159609964, 1.9046691154300808, 1.928371456354241, 1.9518336510227319, 1.9750714553704065, 1.998101415176878, 2.0208831515423666, 2.0434217654007636, 2.065788799060919, 2.0879693166772784, 2.109884285494962, 2.1316678352159295, 2.1532493956987055, 2.17462433812611, 2.1959072579450067, 2.2168973715317506, 2.2378252763561997, 2.258512571249329, 2.2791066757743925, 2.2994969789446076, 2.3197853043327132, 2.3398791581173386, 2.3598913778680375, 2.3796893210885894, 2.3994569011825284, 2.4189594692244896, 2.4384620372664503, 2.457723546690314, 2.476931271573923, 2.4960176104345706, 2.5149310208831928, 2.533844431331814, 2.552499639343671, 2.5711192608477567, 2.589677989431748, 2.608004344310695, 2.6263306991896416, 2.644543279614852, 2.6625768870738824, 2.6806104945329134, 2.6985240355045024, 2.7162654116914053, 2.7340067878783083, 2.751671671352242, 2.769121329409039, 2.786570987465835, 2.8040206455226313, 2.821199636692215, 2.8383580868067537, 2.855516536921293, 2.872559270264915, 2.8894270197188785, 2.906294769172841, 2.9231625186268038, 2.9398410578754635, 2.9564186110890702, 2.972996164302677, 2.9895737175162838, 3.0059561161040556, 3.0222439746836636, 3.0385318332632716]
2 | validation acc: [0.1552, 0.201, 0.269, 0.3583, 0.4291, 0.4754, 0.5168, 0.5515, 0.6042, 0.6536, 0.6972, 0.7508, 0.7861, 0.8116, 0.8305, 0.8419, 0.8523, 0.8625, 0.8687, 0.8793, 0.8832, 0.8886, 0.8923, 0.8974, 0.8993, 0.9025, 0.9062, 0.9072, 0.9088, 0.9121, 0.9126, 0.9139, 0.9173, 0.9184, 0.9179, 0.921, 0.9215, 0.9238, 0.924, 0.9251, 0.9272, 0.9278, 0.9285, 0.93, 0.9303, 0.9322, 0.932, 0.9352, 0.9359, 0.9358, 0.9377, 0.9383, 0.9389, 0.9398, 0.9402, 0.9414, 0.9414, 0.9424, 0.9434, 0.9454, 0.944, 0.9457, 0.9471, 0.947, 0.9472, 0.9484, 0.9489, 0.9493, 0.9502, 0.9503, 0.951, 0.9518, 0.9522, 0.953, 0.9526, 0.9535, 0.9542, 0.9536, 0.9537, 0.9546, 0.955, 0.9558, 0.9562, 0.9546, 0.9559, 0.9565, 0.9561, 0.9571, 0.957, 0.9575, 0.9576, 0.958, 0.9576, 0.9582, 0.9584, 0.9588, 0.9588, 0.959, 0.9593, 0.9592]
3 |
--------------------------------------------------------------------------------
/results/mnist_dpsgd_delta_0.01_lr_3e-06.txt:
--------------------------------------------------------------------------------
1 | eps: [0.4488369542065862, 0.4895549929268951, 0.5250065718454293, 0.5604581507639637, 0.595909729682498, 0.6313613086010323, 0.6668128875195666, 0.7022644664381009, 0.7377160453566352, 0.7731676242751695, 0.8086192031937038, 0.8428388601786436, 0.8743113730555838, 0.9057838859325238, 0.9371678223219213, 0.9678211274949969, 0.9976683411210863, 1.0267473349515202, 1.0551126861496996, 1.0828260875663616, 1.1099319352876824, 1.136478641837888, 1.1624701594893336, 1.1879720160767095, 1.2130148404534087, 1.237611132435135, 1.2617823771303591, 1.2855809709028272, 1.3090175177716414, 1.3320676101179185, 1.354787562872218, 1.3771925327072796, 1.3992959375237164, 1.421111890143953, 1.4426552521444411, 1.463938171963338, 1.484926525203894, 1.5056750474995524, 1.5262012182396667, 1.5465235362218086, 1.5666173900064342, 1.586459547392393, 1.6061387901356725, 1.6256413581776337, 1.6448849394214122, 1.6640117402897139, 1.6829251507383356, 1.7016664262291141, 1.7202860477331998, 1.7386695628737965, 1.7569959177527434, 1.775051258049255, 1.7930848655082858, 1.8108436077942702, 1.8285849839811732, 1.8460808862726925, 1.863530544329489, 1.8807997577948097, 1.8979582079093487, 1.9150395135099436, 1.9319072629639062, 1.948775012417869, 1.9654198896548445, 1.9819974428684513, 1.9985414578761678, 2.0148293164557756, 2.0311171750353836, 2.047319524470186, 2.063318187250421, 2.079316850030656, 2.095230822970841, 2.110940786057371, 2.1266507491439004, 2.142333957071471, 2.1577557138768193, 2.173177470682168, 2.188599227487517, 2.203826372709379, 2.218960413994861, 2.234094455280343, 2.2492215146115266, 2.2640683285237113, 2.2789151424358964, 2.2937619563480807, 2.308575939755106, 2.3231360118616964, 2.3376960839682868, 2.3522561560748776, 2.3668162281814684, 2.381112291486219, 2.395386104814934, 2.4096599181436478, 2.4239337314723626, 2.438087536786136, 2.4520755718531704, 2.466063606920205, 2.480051641987239, 2.4940396770542734, 2.5078632962244485, 2.52156603107018]
2 | validation acc: [0.1643, 0.1843, 0.2231, 0.2901, 0.3378, 0.3651, 0.3892, 0.4121, 0.4509, 0.4894, 0.55, 0.6317, 0.7042, 0.7651, 0.7938, 0.816, 0.8333, 0.8453, 0.8603, 0.8713, 0.8776, 0.882, 0.887, 0.8912, 0.8959, 0.9004, 0.9038, 0.907, 0.9092, 0.9123, 0.9154, 0.9176, 0.9196, 0.9214, 0.9244, 0.9251, 0.9279, 0.9287, 0.9308, 0.9316, 0.9326, 0.9333, 0.9352, 0.936, 0.9369, 0.938, 0.9388, 0.9404, 0.9409, 0.9412, 0.942, 0.943, 0.9447, 0.9447, 0.9461, 0.9462, 0.9467, 0.9481, 0.9481, 0.9478, 0.948, 0.95, 0.9501, 0.9507, 0.9507, 0.9513, 0.9516, 0.9524, 0.9532, 0.9542, 0.9541, 0.9543, 0.9549, 0.9557, 0.9561, 0.9562, 0.9577, 0.9565, 0.9576, 0.9584, 0.9585, 0.9592, 0.9593, 0.9598, 0.9594, 0.9603, 0.9609, 0.9611, 0.9612, 0.9619, 0.9623, 0.9625, 0.963, 0.9628, 0.9634, 0.9632, 0.9635, 0.9637, 0.9635, 0.9634]
3 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/no_privacy_query.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Implements DPQuery interface for no privacy average queries."""
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | from distutils.version import LooseVersion
21 | import tensorflow as tf
22 |
23 | from privacy.dp_query import dp_query
24 |
25 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
26 | nest = tf.contrib.framework.nest
27 | else:
28 | nest = tf.nest
29 |
30 |
31 | class NoPrivacySumQuery(dp_query.SumAggregationDPQuery):
32 | """Implements DPQuery interface for a sum query with no privacy.
33 |
34 | Accumulates vectors without clipping or adding noise.
35 | """
36 |
37 | def get_noised_result(self, sample_state, global_state):
38 | """See base class."""
39 | return sample_state, global_state
40 |
41 |
42 | class NoPrivacyAverageQuery(dp_query.SumAggregationDPQuery):
43 | """Implements DPQuery interface for an average query with no privacy.
44 |
45 | Accumulates vectors and normalizes by the total number of accumulated vectors.
46 | """
47 |
48 | def initial_sample_state(self, template):
49 | """See base class."""
50 | return (super(NoPrivacyAverageQuery, self).initial_sample_state(template),
51 | tf.constant(0.0))
52 |
53 | def preprocess_record(self, params, record, weight=1):
54 | """Multiplies record by weight."""
55 | weighted_record = nest.map_structure(lambda t: weight * t, record)
56 | return (weighted_record, tf.cast(weight, tf.float32))
57 |
58 | def accumulate_record(self, params, sample_state, record, weight=1):
59 | """Accumulates record, multiplying by weight."""
60 | weighted_record = nest.map_structure(lambda t: weight * t, record)
61 | return self.accumulate_preprocessed_record(
62 | sample_state, (weighted_record, tf.cast(weight, tf.float32)))
63 |
64 | def get_noised_result(self, sample_state, global_state):
65 | """See base class."""
66 | sum_state, denominator = sample_state
67 |
68 | return (
69 | nest.map_structure(lambda t: t / denominator, sum_state),
70 | global_state)
71 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019, The TensorFlow Privacy Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """TensorFlow Privacy library."""
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | import sys
21 |
22 | # pylint: disable=g-import-not-at-top
23 |
24 | if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts.
25 | pass
26 | else:
27 | from privacy.analysis.privacy_ledger import GaussianSumQueryEntry
28 | from privacy.analysis.privacy_ledger import PrivacyLedger
29 | from privacy.analysis.privacy_ledger import QueryWithLedger
30 | from privacy.analysis.privacy_ledger import SampleEntry
31 |
32 | from privacy.dp_query.dp_query import DPQuery
33 | from privacy.dp_query.gaussian_query import GaussianAverageQuery
34 | from privacy.dp_query.gaussian_query import GaussianSumQuery
35 | from privacy.dp_query.nested_query import NestedQuery
36 | from privacy.dp_query.no_privacy_query import NoPrivacyAverageQuery
37 | from privacy.dp_query.no_privacy_query import NoPrivacySumQuery
38 | from privacy.dp_query.normalized_query import NormalizedQuery
39 | from privacy.dp_query.quantile_adaptive_clip_sum_query import QuantileAdaptiveClipSumQuery
40 | from privacy.dp_query.quantile_adaptive_clip_sum_query import QuantileAdaptiveClipAverageQuery
41 |
42 | from privacy.optimizers.dp_optimizer import DPAdagradGaussianOptimizer
43 | from privacy.optimizers.dp_optimizer import DPAdagradOptimizer
44 | from privacy.optimizers.dp_optimizer import DPAdamGaussianOptimizer
45 | from privacy.optimizers.dp_optimizer import DPAdamOptimizer
46 | from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
47 | from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
48 |
49 | try:
50 | from privacy.bolt_on.models import BoltOnModel
51 | from privacy.bolt_on.optimizers import BoltOn
52 | from privacy.bolt_on.losses import StrongConvexMixin
53 | from privacy.bolt_on.losses import StrongConvexBinaryCrossentropy
54 | from privacy.bolt_on.losses import StrongConvexHuber
55 | except ImportError:
56 | # module `bolt_on` not yet available in this version of TF Privacy
57 | pass
58 |
--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/ICLR2018/generate_table_data_independent.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ==============================================================================
16 |
17 |
18 | echo "Table 2 with data-independent analysis."
19 |
20 | executable="python smooth_sensitivity_table.py"
21 | data_dir="data"
22 |
23 | echo
24 | echo "######## MNIST ########"
25 | echo
26 |
27 | $executable \
28 | --counts_file=$data_dir"/mnist_250_teachers.npy" \
29 | --threshold=200 \
30 | --sigma1=150 \
31 | --sigma2=40 \
32 | --queries=640 \
33 | --delta=1e-5 \
34 | --data_independent
35 | echo
36 | echo "######## SVHN ########"
37 | echo
38 |
39 | $executable \
40 | --counts_file=$data_dir"/svhn_250_teachers.npy" \
41 | --threshold=300 \
42 | --sigma1=200 \
43 | --sigma2=40 \
44 | --queries=8500 \
45 | --delta=1e-6 \
46 | --data_independent
47 |
48 | echo
49 | echo "######## Adult ########"
50 | echo
51 |
52 | $executable \
53 | --counts_file=$data_dir"/adult_250_teachers.npy" \
54 | --threshold=300 \
55 | --sigma1=200 \
56 | --sigma2=40 \
57 | --queries=1500 \
58 | --delta=1e-5 \
59 | --data_independent
60 |
61 | echo
62 | echo "######## Glyph (Confident) ########"
63 | echo
64 |
65 | $executable \
66 | --counts_file=$data_dir"/glyph_5000_teachers.npy" \
67 | --threshold=1000 \
68 | --sigma1=500 \
69 | --sigma2=100 \
70 | --queries=12000 \
71 | --delta=1e-8 \
72 | --data_independent
73 |
74 | echo
75 | echo "######## Glyph (Interactive, Round 1) ########"
76 | echo
77 |
78 | $executable \
79 | --counts_file=$data_dir"/glyph_round1.npy" \
80 | --threshold=3500 \
81 | --sigma1=1500 \
82 | --sigma2=100 \
83 | --delta=1e-8 \
84 | --data_independent
85 |
86 | echo
87 | echo "######## Glyph (Interactive, Round 2) ########"
88 | echo
89 |
90 | $executable \
91 | --counts_file=$data_dir"/glyph_round2.npy" \
92 | --baseline_file=$data_dir"/glyph_round2_student.npy" \
93 | --threshold=3500 \
94 | --sigma1=2000 \
95 | --sigma2=200 \
96 | --teachers=5000 \
97 | --delta=1e-8 \
98 | --order=8.5 \
99 | --data_independent
100 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/bolt_on/README.md:
--------------------------------------------------------------------------------
1 | # BoltOn Subpackage
2 |
3 | This package contains source code for the BoltOn method, a particular
4 | differential-privacy (DP) technique that uses output perturbations and
5 | leverages additional assumptions to provide a new way of approaching the
6 | privacy guarantees.
7 |
8 | ## BoltOn Description
9 |
10 | This method uses 4 key steps to achieve privacy guarantees:
11 | 1. Adds noise to weights after training (output perturbation).
12 | 2. Projects weights to R, the radius of the hypothesis space,
13 | after each batch. This value is configurable by the user.
14 | 3. Limits learning rate
15 | 4. Uses a strongly convex loss function (see compile)
16 |
17 | For more details on the strong convexity requirements, see:
18 | Bolt-on Differential Privacy for Scalable Stochastic Gradient
19 | Descent-based Analytics by Xi Wu et al. at https://arxiv.org/pdf/1606.04722.pdf
20 |
21 | ## Why BoltOn?
22 |
23 | The major difference for the BoltOn method is that it injects noise post model
24 | convergence, rather than noising gradients or weights during training. This
25 | approach requires some additional constraints listed in the Description.
26 | Should the use-case and model satisfy these constraints, this is another
27 | approach that can be trained to maximize utility while maintaining the privacy.
28 | The paper describes in detail the advantages and disadvantages of this approach
29 | and its results compared to some other methods, namely noising at each iteration
30 | and no noising.
31 |
32 | ## Tutorials
33 |
34 | This package has a tutorial that can be found in the root tutorials directory,
35 | under `bolton_tutorial.py`.
36 |
37 | ## Contribution
38 |
39 | This package was initially contributed by Georgian Partners with the hope of
40 | growing the tensorflow/privacy library. There are several rich use cases for
41 | delta-epsilon privacy in machine learning, some of which can be explored here:
42 | https://medium.com/apache-mxnet/epsilon-differential-privacy-for-machine-learning-using-mxnet-a4270fe3865e
43 | https://arxiv.org/pdf/1811.04911.pdf
44 |
45 | ## Stability
46 |
47 | As we are pegged on tensorflow2.0, this package may encounter stability
48 | issues in the ongoing development of tensorflow2.0.
49 |
50 | This sub-package is currently stable for 2.0.0a0, 2.0.0b0, and 2.0.0.b1 If you
51 | would like to use this subpackage, please do use one of these versions as we
52 | cannot guarantee it will work for all latest releases. If you do find issues,
53 | feel free to raise an issue to the contributors listed below.
54 |
55 | ## Contacts
56 |
57 | In addition to the maintainers of tensorflow/privacy listed in the root
58 | README.md, please feel free to contact members of Georgian Partners. In
59 | particular,
60 |
61 | * Georgian Partners(@georgianpartners)
62 | * Ji Chao Zhang(@Jichaogp)
63 | * Christopher Choquette(@cchoquette)
64 |
65 | ## Copyright
66 |
67 | Copyright 2019 - Google LLC
68 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/no_privacy_query_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Tests for NoPrivacyAverageQuery."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | from absl.testing import parameterized
22 | import tensorflow as tf
23 |
24 | from privacy.dp_query import no_privacy_query
25 | from privacy.dp_query import test_utils
26 |
27 |
28 | class NoPrivacyQueryTest(tf.test.TestCase, parameterized.TestCase):
29 |
30 | def test_sum(self):
31 | with self.cached_session() as sess:
32 | record1 = tf.constant([2.0, 0.0])
33 | record2 = tf.constant([-1.0, 1.0])
34 |
35 | query = no_privacy_query.NoPrivacySumQuery()
36 | query_result, _ = test_utils.run_query(query, [record1, record2])
37 | result = sess.run(query_result)
38 | expected = [1.0, 1.0]
39 | self.assertAllClose(result, expected)
40 |
41 | def test_no_privacy_average(self):
42 | with self.cached_session() as sess:
43 | record1 = tf.constant([5.0, 0.0])
44 | record2 = tf.constant([-1.0, 2.0])
45 |
46 | query = no_privacy_query.NoPrivacyAverageQuery()
47 | query_result, _ = test_utils.run_query(query, [record1, record2])
48 | result = sess.run(query_result)
49 | expected = [2.0, 1.0]
50 | self.assertAllClose(result, expected)
51 |
52 | def test_no_privacy_weighted_average(self):
53 | with self.cached_session() as sess:
54 | record1 = tf.constant([4.0, 0.0])
55 | record2 = tf.constant([-1.0, 1.0])
56 |
57 | weights = [1, 3]
58 |
59 | query = no_privacy_query.NoPrivacyAverageQuery()
60 | query_result, _ = test_utils.run_query(
61 | query, [record1, record2], weights=weights)
62 | result = sess.run(query_result)
63 | expected = [0.25, 0.75]
64 | self.assertAllClose(result, expected)
65 |
66 | @parameterized.named_parameters(
67 | ('type_mismatch', [1.0], (1.0,), TypeError),
68 | ('too_few_on_left', [1.0], [1.0, 1.0], ValueError),
69 | ('too_few_on_right', [1.0, 1.0], [1.0], ValueError))
70 | def test_incompatible_records(self, record1, record2, error_type):
71 | query = no_privacy_query.NoPrivacySumQuery()
72 | with self.assertRaises(error_type):
73 | test_utils.run_query(query, [record1, record2])
74 |
75 |
76 | if __name__ == '__main__':
77 | tf.test.main()
78 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/tensor_buffer_test_graph.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Tests for tensor_buffer in graph mode."""
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | import tensorflow as tf
21 |
22 | from privacy.analysis import tensor_buffer
23 |
24 |
25 | class TensorBufferTest(tf.test.TestCase):
26 | """Tests for TensorBuffer in graph mode."""
27 |
28 | def test_noresize(self):
29 | """Test buffer does not resize if capacity is not exceeded."""
30 | with self.cached_session() as sess:
31 | size, shape = 2, [2, 3]
32 |
33 | my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
34 | value1 = [[1, 2, 3], [4, 5, 6]]
35 | with tf.control_dependencies([my_buffer.append(value1)]):
36 | value2 = [[7, 8, 9], [10, 11, 12]]
37 | with tf.control_dependencies([my_buffer.append(value2)]):
38 | values = my_buffer.values
39 | current_size = my_buffer.current_size
40 | capacity = my_buffer.capacity
41 | self.evaluate(tf.global_variables_initializer())
42 |
43 | v, cs, cap = sess.run([values, current_size, capacity])
44 | self.assertAllEqual(v, [value1, value2])
45 | self.assertEqual(cs, 2)
46 | self.assertEqual(cap, 2)
47 |
48 | def test_resize(self):
49 | """Test buffer resizes if capacity is exceeded."""
50 | with self.cached_session() as sess:
51 | size, shape = 2, [2, 3]
52 |
53 | my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
54 | value1 = [[1, 2, 3], [4, 5, 6]]
55 | with tf.control_dependencies([my_buffer.append(value1)]):
56 | value2 = [[7, 8, 9], [10, 11, 12]]
57 | with tf.control_dependencies([my_buffer.append(value2)]):
58 | value3 = [[13, 14, 15], [16, 17, 18]]
59 | with tf.control_dependencies([my_buffer.append(value3)]):
60 | values = my_buffer.values
61 | current_size = my_buffer.current_size
62 | capacity = my_buffer.capacity
63 | self.evaluate(tf.global_variables_initializer())
64 |
65 | v, cs, cap = sess.run([values, current_size, capacity])
66 | self.assertAllEqual(v, [value1, value2, value3])
67 | self.assertEqual(cs, 3)
68 | self.assertEqual(cap, 4)
69 |
70 |
71 | if __name__ == '__main__':
72 | tf.test.main()
73 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/tensor_buffer_test_eager.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Tests for tensor_buffer in eager mode."""
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | import tensorflow as tf
21 |
22 | from privacy.analysis import tensor_buffer
23 |
24 | tf.enable_eager_execution()
25 |
26 |
27 | class TensorBufferTest(tf.test.TestCase):
28 | """Tests for TensorBuffer in eager mode."""
29 |
30 | def test_basic(self):
31 | size, shape = 2, [2, 3]
32 |
33 | my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
34 |
35 | value1 = [[1, 2, 3], [4, 5, 6]]
36 | my_buffer.append(value1)
37 | self.assertAllEqual(my_buffer.values.numpy(), [value1])
38 |
39 | value2 = [[4, 5, 6], [7, 8, 9]]
40 | my_buffer.append(value2)
41 | self.assertAllEqual(my_buffer.values.numpy(), [value1, value2])
42 |
43 | def test_fail_on_scalar(self):
44 | with self.assertRaisesRegexp(ValueError, 'Shape cannot be scalar.'):
45 | tensor_buffer.TensorBuffer(1, ())
46 |
47 | def test_fail_on_inconsistent_shape(self):
48 | size, shape = 1, [2, 3]
49 |
50 | my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
51 |
52 | with self.assertRaisesRegexp(
53 | tf.errors.InvalidArgumentError,
54 | 'Appending value of inconsistent shape.'):
55 | my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32))
56 |
57 | def test_resize(self):
58 | size, shape = 2, [2, 3]
59 |
60 | my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
61 |
62 | # Append three buffers. Third one should succeed after resizing.
63 | value1 = [[1, 2, 3], [4, 5, 6]]
64 | my_buffer.append(value1)
65 | self.assertAllEqual(my_buffer.values.numpy(), [value1])
66 | self.assertAllEqual(my_buffer.current_size.numpy(), 1)
67 | self.assertAllEqual(my_buffer.capacity.numpy(), 2)
68 |
69 | value2 = [[4, 5, 6], [7, 8, 9]]
70 | my_buffer.append(value2)
71 | self.assertAllEqual(my_buffer.values.numpy(), [value1, value2])
72 | self.assertAllEqual(my_buffer.current_size.numpy(), 2)
73 | self.assertAllEqual(my_buffer.capacity.numpy(), 2)
74 |
75 | value3 = [[7, 8, 9], [10, 11, 12]]
76 | my_buffer.append(value3)
77 | self.assertAllEqual(my_buffer.values.numpy(), [value1, value2, value3])
78 | self.assertAllEqual(my_buffer.current_size.numpy(), 3)
79 | # Capacity should have doubled.
80 | self.assertAllEqual(my_buffer.capacity.numpy(), 4)
81 |
82 |
83 | if __name__ == '__main__':
84 | tf.test.main()
85 |
--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/README.md:
--------------------------------------------------------------------------------
1 | Implementation of an RDP privacy accountant and smooth sensitivity analysis for
2 | the PATE framework. The underlying theory and supporting experiments appear in
3 | "Scalable Private Learning with PATE" by Nicolas Papernot, Shuang Song, Ilya
4 | Mironov, Ananth Raghunathan, Kunal Talwar, Ulfar Erlingsson (ICLR 2018,
5 | https://arxiv.org/abs/1802.08908).
6 |
7 | ## Overview
8 |
9 | The PATE ('Private Aggregation of Teacher Ensembles') framework was introduced
10 | by Papernot et al. in "Semi-supervised Knowledge Transfer for Deep Learning from
11 | Private Training Data" (ICLR 2017, https://arxiv.org/abs/1610.05755). The
12 | framework enables model-agnostic training that provably provides [differential
13 | privacy](https://en.wikipedia.org/wiki/Differential_privacy) of the training
14 | dataset.
15 |
16 | The framework consists of _teachers_, the _student_ model, and the _aggregator_. The
17 | teachers are models trained on disjoint subsets of the training datasets. The student
18 | model has access to an insensitive (e.g., public) unlabelled dataset, which is labelled by
19 | interacting with the ensemble of teachers via the _aggregator_. The aggregator tallies
20 | outputs of the teacher models, and either forwards a (noisy) aggregate to the student, or
21 | refuses to answer.
22 |
23 | Differential privacy is enforced by the aggregator. The privacy guarantees can be _data-independent_,
24 | which means that they are solely the function of the aggregator's parameters. Alternatively, privacy
25 | analysis can be _data-dependent_, which allows for finer reasoning where, under certain conditions on
26 | the input distribution, the final privacy guarantees can be improved relative to the data-independent
27 | analysis. Data-dependent privacy guarantees may, by themselves, be a function of sensitive data and
28 | therefore publishing these guarantees requires its own sanitization procedure. In our case
29 | sanitization of data-dependent privacy guarantees proceeds via _smooth sensitivity_ analysis.
30 |
31 | The common machinery used for all privacy analyses in this repository is the
32 | Rényi differential privacy, or RDP (see https://arxiv.org/abs/1702.07476).
33 |
34 | This repository contains implementations of privacy accountants and smooth
35 | sensitivity analysis for several data-independent and data-dependent mechanism that together
36 | comprise the PATE framework.
37 |
38 |
39 | ### Requirements
40 |
41 | * Python, version ≥ 2.7
42 | * absl (see [here](https://github.com/abseil/abseil-py), or just type `pip install absl-py`)
43 | * numpy
44 | * scipy
45 | * sympy (for smooth sensitivity analysis)
46 | * unittest (for testing)
47 |
48 |
49 | ### Self-testing
50 |
51 | To verify the installation run
52 | ```bash
53 | $ python core_test.py
54 | $ python smooth_sensitivity_test.py
55 | ```
56 |
57 |
58 | ## Files in this directory
59 |
60 | * core.py — RDP privacy accountant for several vote aggregators (GNMax,
61 | Threshold, Laplace).
62 |
63 | * smooth_sensitivity.py — Smooth sensitivity analysis for GNMax and
64 | Threshold mechanisms.
65 |
66 | * core_test.py and smooth_sensitivity_test.py — Unit tests for the
67 | files above.
68 |
69 | ## Contact information
70 |
71 | You may direct your comments to mironov@google.com and PR to @ilyamironov.
72 |
--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/ICLR2018/plot_ls_q.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Plots LS(q).
17 |
18 | A script in support of the PATE2 paper. NOT PRESENTLY USED.
19 |
20 | The output is written to a specified directory as a pdf file.
21 | """
22 | from __future__ import absolute_import
23 | from __future__ import division
24 | from __future__ import print_function
25 |
26 | import math
27 | import os
28 | import sys
29 |
30 | sys.path.append('..') # Main modules reside in the parent directory.
31 |
32 |
33 | from absl import app
34 | from absl import flags
35 | import matplotlib
36 | matplotlib.use('TkAgg')
37 | import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top
38 | import numpy as np
39 | import smooth_sensitivity as pate_ss
40 |
41 | plt.style.use('ggplot')
42 |
43 | FLAGS = flags.FLAGS
44 |
45 | flags.DEFINE_string('figures_dir', '', 'Path where the output is written to.')
46 |
47 |
48 | def compute_ls_q(sigma, order, num_classes):
49 |
50 | def beta(q):
51 | return pate_ss._compute_rdp_gnmax(sigma, math.log(q), order)
52 |
53 | def bu(q):
54 | return pate_ss._compute_bu_gnmax(q, sigma, order)
55 |
56 | def bl(q):
57 | return pate_ss._compute_bl_gnmax(q, sigma, order)
58 |
59 | def delta_beta(q):
60 | if q == 0 or q > .8:
61 | return 0
62 | beta_q = beta(q)
63 | beta_bu_q = beta(bu(q))
64 | beta_bl_q = beta(bl(q))
65 | assert beta_bl_q <= beta_q <= beta_bu_q
66 | return beta_bu_q - beta_q # max(beta_bu_q - beta_q, beta_q - beta_bl_q)
67 |
68 | logq0 = pate_ss.compute_logq0_gnmax(sigma, order)
69 | logq1 = pate_ss._compute_logq1(sigma, order, num_classes)
70 | print(math.exp(logq1), math.exp(logq0))
71 | xs = np.linspace(0, .1, num=1000, endpoint=True)
72 | ys = [delta_beta(x) for x in xs]
73 | return xs, ys
74 |
75 |
76 | def main(argv):
77 | del argv # Unused.
78 |
79 | sigma = 20
80 | order = 20.
81 | num_classes = 10
82 |
83 | # sigma = 20
84 | # order = 25.
85 | # num_classes = 10
86 |
87 | x_axis, ys = compute_ls_q(sigma, order, num_classes)
88 |
89 | fig, ax = plt.subplots()
90 | fig.set_figheight(4.5)
91 | fig.set_figwidth(4.7)
92 |
93 | ax.plot(x_axis, ys, alpha=.8, linewidth=5)
94 | plt.xlabel('Number of queries answered', fontsize=16)
95 | plt.ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16)
96 | ax.tick_params(labelsize=14)
97 | fout_name = os.path.join(FLAGS.figures_dir, 'ls_of_q.pdf')
98 | print('Saving the graph to ' + fout_name)
99 | plt.show()
100 |
101 | plt.close('all')
102 |
103 |
104 | if __name__ == '__main__':
105 | app.run(main)
106 |
--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/ICLR2018/utility_queries_answered.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | from absl import app
21 | from absl import flags
22 | import matplotlib
23 | import os
24 |
25 | matplotlib.use('TkAgg')
26 | import matplotlib.pyplot as plt
27 |
28 | plt.style.use('ggplot')
29 |
30 | FLAGS = flags.FLAGS
31 | flags.DEFINE_string('plot_file', '', 'Output file name.')
32 |
33 | qa_lnmax = [500, 750] + range(1000, 12500, 500)
34 |
35 | acc_lnmax = [43.3, 52.3, 59.8, 66.7, 68.8, 70.5, 71.6, 72.3, 72.6, 72.9, 73.4,
36 | 73.4, 73.7, 73.9, 74.2, 74.4, 74.5, 74.7, 74.8, 75, 75.1, 75.1,
37 | 75.4, 75.4, 75.4]
38 |
39 | qa_gnmax = [456, 683, 908, 1353, 1818, 2260, 2702, 3153, 3602, 4055, 4511, 4964,
40 | 5422, 5875, 6332, 6792, 7244, 7696, 8146, 8599, 9041, 9496, 9945,
41 | 10390, 10842]
42 |
43 | acc_gnmax = [39.6, 52.2, 59.6, 66.6, 69.6, 70.5, 71.8, 72, 72.7, 72.9, 73.3,
44 | 73.4, 73.4, 73.8, 74, 74.2, 74.4, 74.5, 74.5, 74.7, 74.8, 75, 75.1,
45 | 75.1, 75.4]
46 |
47 | qa_gnmax_aggressive = [167, 258, 322, 485, 647, 800, 967, 1133, 1282, 1430,
48 | 1573, 1728, 1889, 2028, 2190, 2348, 2510, 2668, 2950,
49 | 3098, 3265, 3413, 3581, 3730]
50 |
51 | acc_gnmax_aggressive = [17.8, 26.8, 39.3, 48, 55.7, 61, 62.8, 64.8, 65.4, 66.7,
52 | 66.2, 68.3, 68.3, 68.7, 69.1, 70, 70.2, 70.5, 70.9,
53 | 70.7, 71.3, 71.3, 71.3, 71.8]
54 |
55 |
56 | def main(argv):
57 | del argv # Unused.
58 |
59 | plt.close('all')
60 | fig, ax = plt.subplots()
61 | fig.set_figheight(4.7)
62 | fig.set_figwidth(5)
63 | ax.plot(qa_lnmax, acc_lnmax, color='r', ls='--', linewidth=5., marker='o',
64 | alpha=.5, label='LNMax')
65 | ax.plot(qa_gnmax, acc_gnmax, color='g', ls='-', linewidth=5., marker='o',
66 | alpha=.5, label='Confident-GNMax')
67 | # ax.plot(qa_gnmax_aggressive, acc_gnmax_aggressive, color='b', ls='-', marker='o', alpha=.5, label='Confident-GNMax (aggressive)')
68 | plt.xticks([0, 2000, 4000, 6000])
69 | plt.xlim([0, 6000])
70 | # ax.set_yscale('log')
71 | plt.ylim([65, 76])
72 | ax.tick_params(labelsize=14)
73 | plt.xlabel('Number of queries answered', fontsize=16)
74 | plt.ylabel('Student test accuracy (%)', fontsize=16)
75 | plt.legend(loc=2, prop={'size': 16})
76 |
77 | x = [400, 2116, 4600, 4680]
78 | y = [69.5, 68.5, 74, 72.5]
79 | annotations = [0.76, 2.89, 1.42, 5.76]
80 | color_annotations = ['g', 'r', 'g', 'r']
81 | for i, txt in enumerate(annotations):
82 | ax.annotate(r'${\varepsilon=}$' + str(txt), (x[i], y[i]), fontsize=16,
83 | color=color_annotations[i])
84 |
85 | plot_filename = os.path.expanduser(FLAGS.plot_file)
86 | plt.savefig(plot_filename, bbox_inches='tight')
87 | plt.show()
88 |
89 | if __name__ == '__main__':
90 | app.run(main)
91 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/normalized_query.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Implements DPQuery interface for normalized queries.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import collections
23 |
24 | from distutils.version import LooseVersion
25 | import tensorflow as tf
26 |
27 | from privacy.dp_query import dp_query
28 |
29 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
30 | nest = tf.contrib.framework.nest
31 | else:
32 | nest = tf.nest
33 |
34 |
35 | class NormalizedQuery(dp_query.DPQuery):
36 | """DPQuery for queries with a DPQuery numerator and fixed denominator."""
37 |
38 | # pylint: disable=invalid-name
39 | _GlobalState = collections.namedtuple(
40 | '_GlobalState', ['numerator_state', 'denominator'])
41 |
42 | def __init__(self, numerator_query, denominator):
43 | """Initializer for NormalizedQuery.
44 |
45 | Args:
46 | numerator_query: A DPQuery for the numerator.
47 | denominator: A value for the denominator. May be None if it will be
48 | supplied via the set_denominator function before get_noised_result is
49 | called.
50 | """
51 | self._numerator = numerator_query
52 | self._denominator = denominator
53 |
54 | def set_ledger(self, ledger):
55 | """See base class."""
56 | self._numerator.set_ledger(ledger)
57 |
58 | def initial_global_state(self):
59 | """See base class."""
60 | if self._denominator is not None:
61 | denominator = tf.cast(self._denominator, tf.float32)
62 | else:
63 | denominator = None
64 | return self._GlobalState(
65 | self._numerator.initial_global_state(), denominator)
66 |
67 | def derive_sample_params(self, global_state):
68 | """See base class."""
69 | return self._numerator.derive_sample_params(global_state.numerator_state)
70 |
71 | def initial_sample_state(self, template):
72 | """See base class."""
73 | # NormalizedQuery has no sample state beyond the numerator state.
74 | return self._numerator.initial_sample_state(template)
75 |
76 | def preprocess_record(self, params, record):
77 | return self._numerator.preprocess_record(params, record)
78 |
79 | def accumulate_preprocessed_record(
80 | self, sample_state, preprocessed_record):
81 | """See base class."""
82 | return self._numerator.accumulate_preprocessed_record(
83 | sample_state, preprocessed_record)
84 |
85 | def get_noised_result(self, sample_state, global_state):
86 | """See base class."""
87 | noised_sum, new_sum_global_state = self._numerator.get_noised_result(
88 | sample_state, global_state.numerator_state)
89 | def normalize(v):
90 | return tf.truediv(v, global_state.denominator)
91 |
92 | return (nest.map_structure(normalize, noised_sum),
93 | self._GlobalState(new_sum_global_state, global_state.denominator))
94 |
95 | def merge_sample_states(self, sample_state_1, sample_state_2):
96 | """See base class."""
97 | return self._numerator.merge_sample_states(sample_state_1, sample_state_2)
98 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 |
3 | licenses(["notice"]) # Apache 2.0
4 |
5 | py_library(
6 | name = "dp_query",
7 | srcs = ["dp_query.py"],
8 | deps = [
9 | "//third_party/py/distutils",
10 | "//third_party/py/tensorflow",
11 | ],
12 | )
13 |
14 | py_library(
15 | name = "gaussian_query",
16 | srcs = ["gaussian_query.py"],
17 | deps = [
18 | ":dp_query",
19 | ":normalized_query",
20 | "//third_party/py/distutils",
21 | "//third_party/py/tensorflow",
22 | ],
23 | )
24 |
25 | py_test(
26 | name = "gaussian_query_test",
27 | size = "small",
28 | srcs = ["gaussian_query_test.py"],
29 | python_version = "PY2",
30 | deps = [
31 | ":gaussian_query",
32 | ":test_utils",
33 | "//third_party/py/absl/testing:parameterized",
34 | "//third_party/py/numpy",
35 | "//third_party/py/six",
36 | "//third_party/py/tensorflow",
37 | ],
38 | )
39 |
40 | py_library(
41 | name = "no_privacy_query",
42 | srcs = ["no_privacy_query.py"],
43 | deps = [
44 | ":dp_query",
45 | "//third_party/py/distutils",
46 | "//third_party/py/tensorflow",
47 | ],
48 | )
49 |
50 | py_test(
51 | name = "no_privacy_query_test",
52 | size = "small",
53 | srcs = ["no_privacy_query_test.py"],
54 | python_version = "PY2",
55 | deps = [
56 | ":no_privacy_query",
57 | ":test_utils",
58 | "//third_party/py/absl/testing:parameterized",
59 | "//third_party/py/tensorflow",
60 | ],
61 | )
62 |
63 | py_library(
64 | name = "normalized_query",
65 | srcs = ["normalized_query.py"],
66 | deps = [
67 | ":dp_query",
68 | "//third_party/py/distutils",
69 | "//third_party/py/tensorflow",
70 | ],
71 | )
72 |
73 | py_test(
74 | name = "normalized_query_test",
75 | size = "small",
76 | srcs = ["normalized_query_test.py"],
77 | python_version = "PY2",
78 | deps = [
79 | ":gaussian_query",
80 | ":normalized_query",
81 | ":test_utils",
82 | "//third_party/py/tensorflow",
83 | ],
84 | )
85 |
86 | py_library(
87 | name = "nested_query",
88 | srcs = ["nested_query.py"],
89 | deps = [
90 | ":dp_query",
91 | "//third_party/py/distutils",
92 | "//third_party/py/tensorflow",
93 | ],
94 | )
95 |
96 | py_test(
97 | name = "nested_query_test",
98 | size = "small",
99 | srcs = ["nested_query_test.py"],
100 | python_version = "PY2",
101 | deps = [
102 | ":gaussian_query",
103 | ":nested_query",
104 | ":test_utils",
105 | "//third_party/py/absl/testing:parameterized",
106 | "//third_party/py/distutils",
107 | "//third_party/py/numpy",
108 | "//third_party/py/tensorflow",
109 | ],
110 | )
111 |
112 | py_library(
113 | name = "quantile_adaptive_clip_sum_query",
114 | srcs = ["quantile_adaptive_clip_sum_query.py"],
115 | deps = [
116 | ":dp_query",
117 | ":gaussian_query",
118 | ":normalized_query",
119 | "//third_party/py/tensorflow",
120 | ],
121 | )
122 |
123 | py_test(
124 | name = "quantile_adaptive_clip_sum_query_test",
125 | srcs = ["quantile_adaptive_clip_sum_query_test.py"],
126 | python_version = "PY2",
127 | deps = [
128 | ":quantile_adaptive_clip_sum_query",
129 | ":test_utils",
130 | "//third_party/py/numpy",
131 | "//third_party/py/tensorflow",
132 | "//third_party/py/tensorflow_privacy/privacy/analysis:privacy_ledger",
133 | ],
134 | )
135 |
136 | py_library(
137 | name = "test_utils",
138 | srcs = ["test_utils.py"],
139 | deps = [],
140 | )
141 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | r"""Command-line script for computing privacy of a model trained with DP-SGD.
16 |
17 | The script applies the RDP accountant to estimate privacy budget of an iterated
18 | Sampled Gaussian Mechanism. The mechanism's parameters are controlled by flags.
19 |
20 | Example:
21 | compute_dp_sgd_privacy
22 | --N=60000 \
23 | --batch_size=256 \
24 | --noise_multiplier=1.12 \
25 | --epochs=60 \
26 | --delta=1e-5
27 |
28 | The output states that DP-SGD with these parameters satisfies (2.92, 1e-5)-DP.
29 | """
30 |
31 | from __future__ import absolute_import
32 | from __future__ import division
33 | from __future__ import print_function
34 |
35 | import math
36 | import sys
37 |
38 | from absl import app
39 | from absl import flags
40 |
41 | # Opting out of loading all sibling packages and their dependencies.
42 | sys.skip_tf_privacy_import = True
43 |
44 | from privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top
45 | from privacy.analysis.rdp_accountant import get_privacy_spent
46 |
47 | FLAGS = flags.FLAGS
48 |
49 | flags.DEFINE_integer('N', None, 'Total number of examples')
50 | flags.DEFINE_integer('batch_size', None, 'Batch size')
51 | flags.DEFINE_float('noise_multiplier', None, 'Noise multiplier for DP-SGD')
52 | flags.DEFINE_float('epochs', None, 'Number of epochs (may be fractional)')
53 | flags.DEFINE_float('delta', 1e-6, 'Target delta')
54 |
55 | flags.mark_flag_as_required('N')
56 | flags.mark_flag_as_required('batch_size')
57 | flags.mark_flag_as_required('noise_multiplier')
58 | flags.mark_flag_as_required('epochs')
59 |
60 |
61 | def apply_dp_sgd_analysis(q, sigma, steps, orders, delta):
62 | """Compute and print results of DP-SGD analysis."""
63 |
64 | # compute_rdp requires that sigma be the ratio of the standard deviation of
65 | # the Gaussian noise to the l2-sensitivity of the function to which it is
66 | # added. Hence, sigma here corresponds to the `noise_multiplier` parameter
67 | # in the DP-SGD implementation found in privacy.optimizers.dp_optimizer
68 | rdp = compute_rdp(q, sigma, steps, orders)
69 |
70 | eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta)
71 |
72 | print('DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated'
73 | ' over {} steps satisfies'.format(100 * q, sigma, steps), end=' ')
74 | print('differential privacy with eps = {:.3g} and delta = {}.'.format(
75 | eps, delta))
76 | print('The optimal RDP order is {}.'.format(opt_order))
77 |
78 | if opt_order == max(orders) or opt_order == min(orders):
79 | print('The privacy estimate is likely to be improved by expanding '
80 | 'the set of orders.')
81 |
82 |
83 | def main(argv):
84 | del argv # argv is not used.
85 |
86 | q = FLAGS.batch_size / FLAGS.N # q - the sampling ratio.
87 | if q > 1:
88 | raise app.UsageError('N must be larger than the batch size.')
89 | orders = ([1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] +
90 | list(range(5, 64)) + [128, 256, 512])
91 | steps = int(math.ceil(FLAGS.epochs * FLAGS.N / FLAGS.batch_size))
92 |
93 | apply_dp_sgd_analysis(q, FLAGS.noise_multiplier, steps, orders, FLAGS.delta)
94 |
95 |
96 | if __name__ == '__main__':
97 | app.run(main)
98 |
--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2017/train_teachers.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | import deep_cnn
20 | import input # pylint: disable=redefined-builtin
21 | import metrics
22 | import tensorflow as tf
23 |
24 |
25 | tf.flags.DEFINE_string('dataset', 'svhn', 'The name of the dataset to use')
26 | tf.flags.DEFINE_integer('nb_labels', 10, 'Number of output classes')
27 |
28 | tf.flags.DEFINE_string('data_dir','/tmp','Temporary storage')
29 | tf.flags.DEFINE_string('train_dir','/tmp/train_dir',
30 | 'Where model ckpt are saved')
31 |
32 | tf.flags.DEFINE_integer('max_steps', 3000, 'Number of training steps to run.')
33 | tf.flags.DEFINE_integer('nb_teachers', 50, 'Teachers in the ensemble.')
34 | tf.flags.DEFINE_integer('teacher_id', 0, 'ID of teacher being trained.')
35 |
36 | tf.flags.DEFINE_boolean('deeper', False, 'Activate deeper CNN model')
37 |
38 | FLAGS = tf.flags.FLAGS
39 |
40 |
41 | def train_teacher(dataset, nb_teachers, teacher_id):
42 | """
43 | This function trains a teacher (teacher id) among an ensemble of nb_teachers
44 | models for the dataset specified.
45 | :param dataset: string corresponding to dataset (svhn, cifar10)
46 | :param nb_teachers: total number of teachers in the ensemble
47 | :param teacher_id: id of the teacher being trained
48 | :return: True if everything went well
49 | """
50 | # If working directories do not exist, create them
51 | assert input.create_dir_if_needed(FLAGS.data_dir)
52 | assert input.create_dir_if_needed(FLAGS.train_dir)
53 |
54 | # Load the dataset
55 | if dataset == 'svhn':
56 | train_data,train_labels,test_data,test_labels = input.ld_svhn(extended=True)
57 | elif dataset == 'cifar10':
58 | train_data, train_labels, test_data, test_labels = input.ld_cifar10()
59 | elif dataset == 'mnist':
60 | train_data, train_labels, test_data, test_labels = input.ld_mnist()
61 | else:
62 | print("Check value of dataset flag")
63 | return False
64 |
65 | # Retrieve subset of data for this teacher
66 | data, labels = input.partition_dataset(train_data,
67 | train_labels,
68 | nb_teachers,
69 | teacher_id)
70 |
71 | print("Length of training data: " + str(len(labels)))
72 |
73 | # Define teacher checkpoint filename and full path
74 | if FLAGS.deeper:
75 | filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt'
76 | else:
77 | filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt'
78 | ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + filename
79 |
80 | # Perform teacher training
81 | assert deep_cnn.train(data, labels, ckpt_path)
82 |
83 | # Append final step value to checkpoint for evaluation
84 | ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1)
85 |
86 | # Retrieve teacher probability estimates on the test data
87 | teacher_preds = deep_cnn.softmax_preds(test_data, ckpt_path_final)
88 |
89 | # Compute teacher accuracy
90 | precision = metrics.accuracy(teacher_preds, test_labels)
91 | print('Precision of teacher after training: ' + str(precision))
92 |
93 | return True
94 |
95 |
96 | def main(argv=None): # pylint: disable=unused-argument
97 | # Make a call to train_teachers with values specified in flags
98 | assert train_teacher(FLAGS.dataset, FLAGS.nb_teachers, FLAGS.teacher_id)
99 |
100 | if __name__ == '__main__':
101 | tf.app.run()
102 |
--------------------------------------------------------------------------------
/tensorflow_privacy/README.md:
--------------------------------------------------------------------------------
1 | # TensorFlow Privacy
2 |
3 | This repository contains the source code for TensorFlow Privacy, a Python
4 | library that includes implementations of TensorFlow optimizers for training
5 | machine learning models with differential privacy. The library comes with
6 | tutorials and analysis tools for computing the privacy guarantees provided.
7 |
8 | The TensorFlow Privacy library is under continual development, always welcoming
9 | contributions. In particular, we always welcome help towards resolving the
10 | issues currently open.
11 |
12 | ## Setting up TensorFlow Privacy
13 |
14 | ### Dependencies
15 |
16 | This library uses [TensorFlow](https://www.tensorflow.org/) to define machine
17 | learning models. Therefore, installing TensorFlow (>= 1.14) is a pre-requisite.
18 | You can find instructions [here](https://www.tensorflow.org/install/). For
19 | better performance, it is also recommended to install TensorFlow with GPU
20 | support (detailed instructions on how to do this are available in the TensorFlow
21 | installation documentation).
22 |
23 | In addition to TensorFlow and its dependencies, other prerequisites are:
24 |
25 | * `scipy` >= 0.17
26 |
27 | * `mpmath` (for testing)
28 |
29 | * `tensorflow_datasets` (for the RNN tutorial `lm_dpsgd_tutorial.py` only)
30 |
31 | ### Installing TensorFlow Privacy
32 |
33 | First, clone this GitHub repository into a directory of your choice:
34 |
35 | ```
36 | git clone https://github.com/tensorflow/privacy
37 | ```
38 |
39 | You can then install the local package in "editable" mode in order to add it to
40 | your `PYTHONPATH`:
41 |
42 | ```
43 | cd privacy
44 | pip install -e .
45 | ```
46 |
47 | If you'd like to make contributions, we recommend first forking the repository
48 | and then cloning your fork rather than cloning this repository directly.
49 |
50 | ## Contributing
51 |
52 | Contributions are welcomed! Bug fixes and new features can be initiated through
53 | GitHub pull requests. To speed the code review process, we ask that:
54 |
55 | * When making code contributions to TensorFlow Privacy, you follow the `PEP8
56 | with two spaces` coding style (the same as the one used by TensorFlow) in
57 | your pull requests. In most cases this can be done by running `autopep8 -i
58 | --indent-size 2 ` on the files you have edited.
59 |
60 | * You should also check your code with pylint and TensorFlow's pylint
61 | [configuration file](https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/tools/ci_build/pylintrc)
62 | by running `pylint --rcfile=/path/to/the/tf/rcfile `.
63 |
64 | * When making your first pull request, you
65 | [sign the Google CLA](https://cla.developers.google.com/clas)
66 |
67 | * We do not accept pull requests that add git submodules because of
68 | [the problems that arise when maintaining git submodules](https://medium.com/@porteneuve/mastering-git-submodules-34c65e940407)
69 |
70 | ## Tutorials directory
71 |
72 | To help you get started with the functionalities provided by this library, we
73 | provide a detailed walkthrough [here](tutorials/walkthrough/walkthrough.md) that
74 | will teach you how to wrap existing optimizers
75 | (e.g., SGD, Adam, ...) into their differentially private counterparts using
76 | TensorFlow (TF) Privacy. You will also learn how to tune the parameters
77 | introduced by differentially private optimization and how to
78 | measure the privacy guarantees provided using analysis tools included in TF
79 | Privacy.
80 |
81 | In addition, the
82 | `tutorials/` folder comes with scripts demonstrating how to use the library
83 | features. The list of tutorials is described in the README included in the
84 | tutorials directory.
85 |
86 | NOTE: the tutorials are maintained carefully. However, they are not considered
87 | part of the API and they can change at any time without warning. You should not
88 | write 3rd party code that imports the tutorials and expect that the interface
89 | will not break.
90 |
91 | ## Research directory
92 |
93 | This folder contains code to reproduce results from research papers related to
94 | privacy in machine learning. It is not maintained as carefully as the tutorials
95 | directory, but rather intended as a convenient archive.
96 |
97 | ## Remarks
98 |
99 | The content of this repository supersedes the following existing folder in the
100 | tensorflow/models [repository](https://github.com/tensorflow/models/tree/master/research/differential_privacy)
101 |
102 | ## Contacts
103 |
104 | If you have any questions that cannot be addressed by raising an issue, feel
105 | free to contact:
106 |
107 | * Galen Andrew (@galenmandrew)
108 | * Steve Chien (@schien1729)
109 | * Nicolas Papernot (@npapernot)
110 |
111 | ## Copyright
112 |
113 | Copyright 2019 - Google LLC
114 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/nested_query.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Implements DPQuery interface for queries over nested structures.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | from distutils.version import LooseVersion
23 | import tensorflow as tf
24 |
25 | from privacy.dp_query import dp_query
26 |
27 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
28 | nest = tf.contrib.framework.nest
29 | else:
30 | nest = tf.nest
31 |
32 |
33 | class NestedQuery(dp_query.DPQuery):
34 | """Implements DPQuery interface for structured queries.
35 |
36 | NestedQuery evaluates arbitrary nested structures of queries. Records must be
37 | nested structures of tensors that are compatible (in type and arity) with the
38 | query structure, but are allowed to have deeper structure within each leaf of
39 | the query structure. For example, the nested query [q1, q2] is compatible with
40 | the record [t1, t2] or [t1, (t2, t3)], but not with (t1, t2), [t1] or
41 | [t1, t2, t3]. The entire substructure of each record corresponding to a leaf
42 | node of the query structure is routed to the corresponding query. If the same
43 | tensor should be consumed by multiple sub-queries, it can be replicated in the
44 | record, for example [t1, t1].
45 |
46 | NestedQuery is intended to allow privacy mechanisms for groups as described in
47 | [McMahan & Andrew, 2018: "A General Approach to Adding Differential Privacy to
48 | Iterative Training Procedures" (https://arxiv.org/abs/1812.06210)].
49 | """
50 |
51 | def __init__(self, queries):
52 | """Initializes the NestedQuery.
53 |
54 | Args:
55 | queries: A nested structure of queries.
56 | """
57 | self._queries = queries
58 |
59 | def _map_to_queries(self, fn, *inputs, **kwargs):
60 | def caller(query, *args):
61 | return getattr(query, fn)(*args, **kwargs)
62 | return nest.map_structure_up_to(
63 | self._queries, caller, self._queries, *inputs)
64 |
65 | def set_ledger(self, ledger):
66 | self._map_to_queries('set_ledger', ledger=ledger)
67 |
68 | def initial_global_state(self):
69 | """See base class."""
70 | return self._map_to_queries('initial_global_state')
71 |
72 | def derive_sample_params(self, global_state):
73 | """See base class."""
74 | return self._map_to_queries('derive_sample_params', global_state)
75 |
76 | def initial_sample_state(self, template):
77 | """See base class."""
78 | return self._map_to_queries('initial_sample_state', template)
79 |
80 | def preprocess_record(self, params, record):
81 | """See base class."""
82 | return self._map_to_queries('preprocess_record', params, record)
83 |
84 | def accumulate_preprocessed_record(
85 | self, sample_state, preprocessed_record):
86 | """See base class."""
87 | return self._map_to_queries(
88 | 'accumulate_preprocessed_record',
89 | sample_state,
90 | preprocessed_record)
91 |
92 | def merge_sample_states(self, sample_state_1, sample_state_2):
93 | return self._map_to_queries(
94 | 'merge_sample_states', sample_state_1, sample_state_2)
95 |
96 | def get_noised_result(self, sample_state, global_state):
97 | """Gets query result after all records of sample have been accumulated.
98 |
99 | Args:
100 | sample_state: The sample state after all records have been accumulated.
101 | global_state: The global state.
102 |
103 | Returns:
104 | A tuple (result, new_global_state) where "result" is a structure matching
105 | the query structure containing the results of the subqueries and
106 | "new_global_state" is a structure containing the updated global states
107 | for the subqueries.
108 | """
109 | estimates_and_new_global_states = self._map_to_queries(
110 | 'get_noised_result', sample_state, global_state)
111 |
112 | flat_estimates, flat_new_global_states = zip(
113 | *nest.flatten_up_to(self._queries, estimates_and_new_global_states))
114 | return (
115 | nest.pack_sequence_as(self._queries, flat_estimates),
116 | nest.pack_sequence_as(self._queries, flat_new_global_states))
117 |
--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/core_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Tests for pate.core."""
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import sys
23 | import unittest
24 | import numpy as np
25 |
26 | import core as pate
27 |
28 |
29 | class PateTest(unittest.TestCase):
30 |
31 | def _test_rdp_gaussian_value_errors(self):
32 | # Test for ValueErrors.
33 | with self.assertRaises(ValueError):
34 | pate.rdp_gaussian(1.0, 1.0, np.array([2, 3, 4]))
35 | with self.assertRaises(ValueError):
36 | pate.rdp_gaussian(np.log(0.5), -1.0, np.array([2, 3, 4]))
37 | with self.assertRaises(ValueError):
38 | pate.rdp_gaussian(np.log(0.5), 1.0, np.array([1, 3, 4]))
39 |
40 | def _test_rdp_gaussian_as_function_of_q(self):
41 | # Test for data-independent and data-dependent ranges over q.
42 | # The following corresponds to orders 1.1, 2.5, 32, 250
43 | # sigmas 1.5, 15, 1500, 15000.
44 | # Hand calculated -log(q0)s arranged in a 'sigma major' ordering.
45 | neglogq0s = [
46 | 2.8, 2.6, 427, None, 4.8, 4.0, 4.7, 275, 9.6, 8.8, 6.0, 4, 12, 11.2,
47 | 8.6, 6.4
48 | ]
49 | idx_neglogq0s = 0 # To iterate through neglogq0s.
50 | orders = [1.1, 2.5, 32, 250]
51 | sigmas = [1.5, 15, 1500, 15000]
52 | for sigma in sigmas:
53 | for order in orders:
54 | curr_neglogq0 = neglogq0s[idx_neglogq0s]
55 | idx_neglogq0s += 1
56 | if curr_neglogq0 is None: # sigma == 1.5 and order == 250:
57 | continue
58 |
59 | rdp_at_q0 = pate.rdp_gaussian(-curr_neglogq0, sigma, order)
60 |
61 | # Data-dependent range. (Successively halve the value of q.)
62 | logq_dds = (-curr_neglogq0 - np.array(
63 | [0, np.log(2), np.log(4), np.log(8)]))
64 | # Check that in q_dds, rdp is decreasing.
65 | for idx in range(len(logq_dds) - 1):
66 | self.assertGreater(
67 | pate.rdp_gaussian(logq_dds[idx], sigma, order),
68 | pate.rdp_gaussian(logq_dds[idx + 1], sigma, order))
69 |
70 | # Data-independent range.
71 | q_dids = np.exp(-curr_neglogq0) + np.array([0.1, 0.2, 0.3, 0.4])
72 | # Check that in q_dids, rdp is constant.
73 | for q in q_dids:
74 | self.assertEqual(rdp_at_q0, pate.rdp_gaussian(
75 | np.log(q), sigma, order))
76 |
77 | def _test_compute_eps_from_delta_value_error(self):
78 | # Test for ValueError.
79 | with self.assertRaises(ValueError):
80 | pate.compute_eps_from_delta([1.1, 2, 3, 4], [1, 2, 3], 0.001)
81 |
82 | def _test_compute_eps_from_delta_monotonicity(self):
83 | # Test for monotonicity with respect to delta.
84 | orders = [1.1, 2.5, 250.0]
85 | sigmas = [1e-3, 1.0, 1e5]
86 | deltas = [1e-60, 1e-6, 0.1, 0.999]
87 | for sigma in sigmas:
88 | list_of_eps = []
89 | rdps_for_gaussian = np.array(orders) / (2 * sigma**2)
90 | for delta in deltas:
91 | list_of_eps.append(
92 | pate.compute_eps_from_delta(orders, rdps_for_gaussian, delta)[0])
93 |
94 | # Check that in list_of_eps, epsilons are decreasing (as delta increases).
95 | sorted_list_of_eps = list(list_of_eps)
96 | sorted_list_of_eps.sort(reverse=True)
97 | self.assertEqual(list_of_eps, sorted_list_of_eps)
98 |
99 | def _test_compute_q0(self):
100 | # Stub code to search a logq space and figure out logq0 by eyeballing
101 | # results. This code does not run with the tests. Remove underscore to run.
102 | sigma = 15
103 | order = 250
104 | logqs = np.arange(-290, -270, 1)
105 | count = 0
106 | for logq in logqs:
107 | count += 1
108 | sys.stdout.write("\t%0.5g: %0.10g" %
109 | (logq, pate.rdp_gaussian(logq, sigma, order)))
110 | sys.stdout.flush()
111 | if count % 5 == 0:
112 | print("")
113 |
114 | def test_rdp_gaussian(self):
115 | self._test_rdp_gaussian_value_errors()
116 | self._test_rdp_gaussian_as_function_of_q()
117 |
118 | def test_compute_eps_from_delta(self):
119 | self._test_compute_eps_from_delta_value_error()
120 | self._test_compute_eps_from_delta_monotonicity()
121 |
122 |
123 | if __name__ == "__main__":
124 | unittest.main()
125 |
--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/smooth_sensitivity_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Tests for pate.smooth_sensitivity."""
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import unittest
23 | import numpy as np
24 |
25 | import smooth_sensitivity as pate_ss
26 |
27 |
28 | class PateSmoothSensitivityTest(unittest.TestCase):
29 |
30 | def test_check_conditions(self):
31 | self.assertEqual(pate_ss.check_conditions(20, 10, 25.), (True, False))
32 | self.assertEqual(pate_ss.check_conditions(30, 10, 25.), (True, True))
33 |
34 | def _assert_all_close(self, x, y):
35 | """Asserts that two numpy arrays are close."""
36 | self.assertEqual(len(x), len(y))
37 | self.assertTrue(np.allclose(x, y, rtol=1e-8, atol=0))
38 |
39 | def test_compute_local_sensitivity_bounds_gnmax(self):
40 | counts1 = np.array([10, 0, 0])
41 | sigma1 = .5
42 | order1 = 1.5
43 |
44 | answer1 = np.array(
45 | [3.13503646e-17, 1.60178280e-08, 5.90681786e-03] + [5.99981308e+00] * 7)
46 |
47 | # Test for "going right" in the smooth sensitivity computation.
48 | out1 = pate_ss.compute_local_sensitivity_bounds_gnmax(
49 | counts1, 10, sigma1, order1)
50 |
51 | self._assert_all_close(out1, answer1)
52 |
53 | counts2 = np.array([1000, 500, 300, 200, 0])
54 | sigma2 = 250.
55 | order2 = 10.
56 |
57 | # Test for "going left" in the smooth sensitivity computation.
58 | out2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
59 | counts2, 2000, sigma2, order2)
60 |
61 | answer2 = np.array([0.] * 298 + [2.77693450548e-7, 2.10853979548e-6] +
62 | [2.73113623988e-6] * 1700)
63 | self._assert_all_close(out2, answer2)
64 |
65 | def test_compute_local_sensitivity_bounds_threshold(self):
66 | counts1_3 = np.array([20, 10, 0])
67 | num_teachers = sum(counts1_3)
68 | t1 = 16 # high threshold
69 | sigma = 2
70 | order = 10
71 |
72 | out1 = pate_ss.compute_local_sensitivity_bounds_threshold(
73 | counts1_3, num_teachers, t1, sigma, order)
74 | answer1 = np.array([0] * 3 + [
75 | 1.48454129e-04, 1.47826870e-02, 3.94153241e-02, 6.45775697e-02,
76 | 9.01543247e-02, 1.16054002e-01, 1.42180452e-01, 1.42180452e-01,
77 | 1.48454129e-04, 1.47826870e-02, 3.94153241e-02, 6.45775697e-02,
78 | 9.01543266e-02, 1.16054000e-01, 1.42180452e-01, 1.68302106e-01,
79 | 1.93127860e-01
80 | ] + [0] * 10)
81 | self._assert_all_close(out1, answer1)
82 |
83 | t2 = 2 # low threshold
84 |
85 | out2 = pate_ss.compute_local_sensitivity_bounds_threshold(
86 | counts1_3, num_teachers, t2, sigma, order)
87 | answer2 = np.array([
88 | 1.60212079e-01, 2.07021132e-01, 2.07021132e-01, 1.93127860e-01,
89 | 1.68302106e-01, 1.42180452e-01, 1.16054002e-01, 9.01543247e-02,
90 | 6.45775697e-02, 3.94153241e-02, 1.47826870e-02, 1.48454129e-04
91 | ] + [0] * 18)
92 | self._assert_all_close(out2, answer2)
93 |
94 | t3 = 50 # very high threshold (larger than the number of teachers).
95 |
96 | out3 = pate_ss.compute_local_sensitivity_bounds_threshold(
97 | counts1_3, num_teachers, t3, sigma, order)
98 |
99 | answer3 = np.array([
100 | 1.35750725752e-19, 1.88990500499e-17, 2.05403154065e-15,
101 | 1.74298153642e-13, 1.15489723995e-11, 5.97584949325e-10,
102 | 2.41486826748e-08, 7.62150641922e-07, 1.87846248741e-05,
103 | 0.000360973025976, 0.000360973025976, 2.76377015215e-50,
104 | 1.00904975276e-53, 2.87254164748e-57, 6.37583360761e-61,
105 | 1.10331620211e-64, 1.48844393335e-68, 1.56535552444e-72,
106 | 1.28328011060e-76, 8.20047697109e-81
107 | ] + [0] * 10)
108 |
109 | self._assert_all_close(out3, answer3)
110 |
111 | # Fractional values.
112 | counts4 = np.array([19.5, -5.1, 0])
113 | t4 = 10.1
114 | out4 = pate_ss.compute_local_sensitivity_bounds_threshold(
115 | counts4, num_teachers, t4, sigma, order)
116 |
117 | answer4 = np.array([
118 | 0.0620410301, 0.0875807131, 0.113451958, 0.139561671, 0.1657074530,
119 | 0.1908244840, 0.2070270720, 0.207027072, 0.169718100, 0.0575152142,
120 | 0.00678695871
121 | ] + [0] * 6 + [0.000536304908, 0.0172181073, 0.041909870] + [0] * 10)
122 | self._assert_all_close(out4, answer4)
123 |
124 |
125 | if __name__ == "__main__":
126 | unittest.main()
127 |
--------------------------------------------------------------------------------
/results/cifar_dpsgd_delta_0.0001_lr_0.001.txt:
--------------------------------------------------------------------------------
1 | eps: [0.5150317674746007, 0.5183779587283024, 0.521724149982004, 0.5250703412357057, 0.5284165324894075, 0.5317627237431092, 0.5351089149968108, 0.5384551062505125, 0.5418012975042142, 0.545147488757916, 0.5484936800116176, 0.5518398712653193, 0.555186062519021, 0.5585322537727228, 0.5618784450264244, 0.5652246362801261, 0.5685708275338278, 0.5719170187875295, 0.5752632100412312, 0.5786094012949329, 0.5819555925486346, 0.5853017838023362, 0.5886479750560379, 0.5919941663097397, 0.5953403575634414, 0.5986865488171431, 0.6020327400708447, 0.6053789313245465, 0.6087251225782482, 0.6120713138319499, 0.6154175050856515, 0.6187636963393532, 0.622109887593055, 0.6254560788467567, 0.6288022701004583, 0.63214846135416, 0.6354946526078618, 0.6388408438615634, 0.6421870351152651, 0.6455332263689668, 0.6488794176226685, 0.6522256088763703, 0.6555718001300719, 0.6589179913837736, 0.6622641826374753, 0.665610373891177, 0.6689565651448788, 0.6723027563985804, 0.6756489476522821, 0.6789951389059838, 0.6823413301596855, 0.6856875214133872, 0.6890337126670889, 0.6923799039207906, 0.6957260951744924, 0.699072286428194, 0.7024184776818957, 0.7057646689355974, 0.709110860189299, 0.7124570514430008, 0.7158032426967025, 0.7191494339504042, 0.7224956252041059, 0.7258418164578075, 0.7291880077115093, 0.732534198965211, 0.7358803902189126, 0.7392265814726143, 0.742572772726316, 0.7459189639800178, 0.7492651552337195, 0.7526113464874211, 0.7559575377411228, 0.7593037289948246, 0.7626499202485262, 0.7659961115022279, 0.7693423027559296, 0.7726884940096312, 0.7760346852633331, 0.7793808765170347, 0.7827270677707364, 0.7860732590244381, 0.7894194502781398, 0.7927656415318415, 0.7961118327855432, 0.7994580240392448, 0.8028042152929467, 0.8061504065466483, 0.80949659780035, 0.8128427890540517, 0.8161889803077533, 0.8195351715614552, 0.8228813628151568, 0.8262275540688585, 0.8295737453225602, 0.8329199365762618, 0.8362661278299636, 0.8396123190836653, 0.8429585103373669, 0.8463047015910687, 0.8496508928447704, 0.8529970840984721, 0.8563432753521738, 0.8596894666058754, 0.8630356578595771, 0.8663818491132789, 0.8697280403669805, 0.8730742316206823, 0.8764204228743839, 0.8797666141280857, 0.8831128053817874, 0.886458996635489, 0.8898051878891907, 0.8931513791428924, 0.8964975703965941, 0.8998437616502959, 0.9031899529039975, 0.9065361441576992, 0.909882335411401, 0.9132285266651026, 0.9165747179188043, 0.919920909172506, 0.9232671004262076, 0.9266132916799095, 0.9299594829336111, 0.9333056741873128, 0.9366321354866116, 0.9397411701930944, 0.942850204899577, 0.9459592396060599, 0.9490682743125425, 0.9521773090190253, 0.955286343725508, 0.9583953784319907, 0.9615044131384735, 0.9646134478449562, 0.967722482551439, 0.9708315172579216, 0.9739405519644044, 0.9770495866708871, 0.9801586213773699, 0.9832676560838526, 0.9863766907903353, 0.9894857254968181, 0.9925947602033007, 0.9957037949097836, 0.9988128296162663, 1.001921864322749, 1.0050308990292316, 1.0081399337357144, 1.0112489684421972, 1.0143580031486799, 1.0174670378551627, 1.0205760725616453, 1.0236851072681281, 1.026794141974611, 1.0299031766810935, 1.0330122113875764, 1.036121246094059, 1.0392302808005418, 1.0423393155070246, 1.0454483502135072, 1.0485573849199898, 1.0516664196264727, 1.0547754543329555, 1.057884489039438, 1.060993523745921, 1.0641025584524035, 1.0672115931588864, 1.0703206278653692, 1.0734296625718518, 1.0765386972783344, 1.0796477319848172, 1.0827567666913, 1.0858658013977829, 1.0889748361042655, 1.092083870810748, 1.095192905517231, 1.0983019402237137, 1.1014109749301964, 1.104520009636679, 1.1076290443431618, 1.1107380790496446, 1.1138471137561274, 1.11695614846261, 1.1200651831690926, 1.1231742178755755, 1.1262832525820583, 1.1293922872885411, 1.1325013219950237, 1.1356103567015063, 1.1387193914079892, 1.141828426114472, 1.1449374608209546, 1.1479242666081464, 1.1508590255484523, 1.1537937844887582, 1.156728543429064, 1.1596633023693697, 1.1625980613096756]
2 | validation acc: [0.3689, 0.4393, 0.4015, 0.4714, 0.5035, 0.5211, 0.5439, 0.5236, 0.5269, 0.5212, 0.5728, 0.5894, 0.6, 0.5682, 0.595, 0.6018, 0.5995, 0.5808, 0.6293, 0.5536, 0.6295, 0.6174, 0.6154, 0.6357, 0.6283, 0.6392, 0.6269, 0.6295, 0.6244, 0.6356, 0.6583, 0.6559, 0.6231, 0.6565, 0.6261, 0.648, 0.6631, 0.6505, 0.6463, 0.6132, 0.6627, 0.67, 0.6711, 0.6797, 0.6806, 0.6739, 0.666, 0.6837, 0.6636, 0.6594, 0.6692, 0.6836, 0.6531, 0.6807, 0.6928, 0.6751, 0.6717, 0.6842, 0.6873, 0.6875, 0.6964, 0.6807, 0.6873, 0.6865, 0.6725, 0.6951, 0.7011, 0.7006, 0.6831, 0.6934, 0.6845, 0.695, 0.681, 0.6752, 0.7018, 0.689, 0.6966, 0.6856, 0.7055, 0.6915, 0.6999, 0.7041, 0.7046, 0.7065, 0.6895, 0.7009, 0.6926, 0.7011, 0.6886, 0.687, 0.6958, 0.6659, 0.6842, 0.7042, 0.6952, 0.6946, 0.7093, 0.6639, 0.7009, 0.696, 0.6996, 0.6833, 0.6999, 0.6965, 0.6988, 0.6989, 0.7109, 0.708, 0.7098, 0.6979, 0.7056, 0.7033, 0.7145, 0.7079, 0.7074, 0.6999, 0.7051, 0.7149, 0.708, 0.6967, 0.708, 0.6877, 0.7099, 0.7002, 0.6944, 0.7035, 0.7081, 0.7052, 0.7149, 0.7072, 0.7155, 0.7131, 0.7038, 0.7051, 0.7151, 0.7064, 0.7047, 0.7083, 0.7178, 0.6983, 0.7147, 0.7072, 0.6974, 0.7154, 0.7125, 0.7122, 0.684, 0.7159, 0.7176, 0.7013, 0.7085, 0.6975, 0.7118, 0.717, 0.7073, 0.712, 0.7118, 0.7154, 0.6985, 0.7013, 0.7042, 0.7186, 0.7075, 0.7138, 0.7114, 0.7143, 0.7176, 0.7153, 0.7182, 0.7053, 0.7027, 0.6978, 0.7036, 0.7201, 0.7076, 0.7116, 0.7174, 0.7194, 0.7077, 0.7169, 0.7126, 0.7196, 0.709, 0.7195, 0.7159, 0.7108, 0.7153, 0.7122, 0.7089, 0.7093, 0.7101, 0.7123, 0.7181, 0.7225, 0.7054, 0.7108, 0.7106, 0.7147, 0.717, 0.7107]
3 |
--------------------------------------------------------------------------------
/results/cifar_dpsgd_delta_1e-06_lr_0.001.txt:
--------------------------------------------------------------------------------
1 | eps: [0.7708745555850502, 0.774220746838752, 0.7775669380924536, 0.7809131293461553, 0.784259320599857, 0.7876055118535588, 0.7909517031072604, 0.7942978943609621, 0.7976440856146638, 0.8009902768683654, 0.8043364681220672, 0.8076826593757689, 0.8110288506294706, 0.8143750418831723, 0.817721233136874, 0.8210674243905757, 0.8244136156442774, 0.8277598068979791, 0.8311059981516807, 0.8344521894053825, 0.8377983806590842, 0.8411445719127859, 0.8444907631664875, 0.8478369544201892, 0.851183145673891, 0.8545293369275927, 0.8578755281812943, 0.861221719434996, 0.8645679106886978, 0.8679141019423995, 0.8712602931961011, 0.8746064844498028, 0.8779526757035045, 0.8812988669572063, 0.8846450582109079, 0.8879912494646096, 0.8913374407183113, 0.8946836319720131, 0.8980298232257147, 0.9013760144794164, 0.9047222057331181, 0.9080683969868197, 0.9114145882405215, 0.9147607794942232, 0.9181069707479249, 0.9214531620016266, 0.9247993532553282, 0.92814554450903, 0.9314917357627317, 0.9348379270164334, 0.9381841182701351, 0.9415303095238368, 0.9448765007775385, 0.9482226920312402, 0.9515688832849418, 0.9549150745386435, 0.9582612657923453, 0.961607457046047, 0.9649536482997487, 0.9682998395534503, 0.9716460308071521, 0.9749922220608538, 0.9783384133145554, 0.9816846045682571, 0.9850307958219588, 0.9883769870756606, 0.9917231783293623, 0.9950693695830639, 0.9984155608367656, 1.0017617520904674, 1.005107943344169, 1.0084541345978708, 1.0118003258515724, 1.015146517105274, 1.0184927083589759, 1.0218388996126775, 1.0251850908663793, 1.028531282120081, 1.0318774733737825, 1.0352236646274844, 1.038569855881186, 1.0419160471348876, 1.0452622383885894, 1.048608429642291, 1.0519546208959927, 1.0553008121496945, 1.0586470034033961, 1.0619931946570977, 1.0653393859107996, 1.0686855771645012, 1.072031768418203, 1.0753779596719046, 1.0787241509256065, 1.082070342179308, 1.0854165334330097, 1.0887627246867115, 1.0921089159404131, 1.0954551071941148, 1.0988012984478166, 1.1021474897015182, 1.1054936809552198, 1.1088398722089217, 1.1121860634626233, 1.1155322547163251, 1.1188784459700267, 1.1222246372237286, 1.1255708284774302, 1.1289170197311318, 1.1322632109848336, 1.1356094022385352, 1.1389555934922369, 1.1423017847459387, 1.1456479759996403, 1.148994167253342, 1.1523403585070437, 1.1556865497607454, 1.159032741014447, 1.1623789322681488, 1.1657251235218504, 1.1690713147755523, 1.1724175060292539, 1.1757636972829557, 1.1791098885366573, 1.182456079790359, 1.1858022710440608, 1.1891484622977624, 1.192494653551464, 1.1958408448051658, 1.1991870360588675, 1.202533227312569, 1.205879418566271, 1.2092256098199725, 1.2125718010736741, 1.215917992327376, 1.2192641835810776, 1.2226103748347794, 1.225956566088481, 1.2293027573421829, 1.2326489485958845, 1.235995139849586, 1.239341331103288, 1.2426875223569895, 1.2460337136106912, 1.249379904864393, 1.2527260961180946, 1.2560722873717962, 1.259418478625498, 1.2627646698791997, 1.2661108611329013, 1.2694570523866031, 1.272803243640305, 1.2761494348940063, 1.2794956261477082, 1.28284181740141, 1.2861880086551116, 1.2895341999088132, 1.292880391162515, 1.2962265824162167, 1.2995727736699183, 1.3029189649236201, 1.3062651561773218, 1.3096113474310234, 1.3129575386847252, 1.3163037299384268, 1.3196499211921284, 1.3229961124458303, 1.326342303699532, 1.3296884949532335, 1.3330346862069353, 1.3363808774606372, 1.3397270687143388, 1.3430732599680404, 1.3464194512217422, 1.3497656424754438, 1.3531118337291455, 1.3564580249828473, 1.359804216236549, 1.3631504074902505, 1.3664965987439524, 1.369842789997654, 1.3731889812513556, 1.3765351725050574, 1.3798813637587592, 1.3832275550124609, 1.3865737462661625, 1.3899199375198643, 1.393266128773566, 1.3966123200272675, 1.3999585112809694, 1.403304702534671, 1.406502720583159, 1.4096117552896417, 1.4127207899961243, 1.415829824702607, 1.41893885940909, 1.4220478941155725, 1.4251569288220554, 1.428265963528538, 1.4313749982350208, 1.4344840329415036]
2 | validation acc: [0.3756, 0.4218, 0.4576, 0.4572, 0.4651, 0.4857, 0.5136, 0.5337, 0.5265, 0.5388, 0.5286, 0.5851, 0.5796, 0.5462, 0.5908, 0.5742, 0.6136, 0.6284, 0.62, 0.6343, 0.6097, 0.5827, 0.6341, 0.6228, 0.6097, 0.6276, 0.6345, 0.6337, 0.6002, 0.6019, 0.6545, 0.6631, 0.657, 0.6518, 0.5948, 0.6656, 0.6811, 0.6565, 0.656, 0.6623, 0.6691, 0.6784, 0.6245, 0.6595, 0.6673, 0.6807, 0.6832, 0.6674, 0.6831, 0.6682, 0.6918, 0.6821, 0.6889, 0.6752, 0.6908, 0.6817, 0.6664, 0.6706, 0.6626, 0.6724, 0.6938, 0.6982, 0.6643, 0.6889, 0.6762, 0.685, 0.6905, 0.6932, 0.6746, 0.6929, 0.6961, 0.6895, 0.7006, 0.6895, 0.6689, 0.6777, 0.6898, 0.691, 0.6958, 0.6895, 0.6984, 0.6855, 0.7083, 0.6827, 0.6995, 0.6969, 0.6915, 0.6919, 0.7012, 0.6939, 0.6988, 0.7037, 0.6882, 0.7033, 0.7001, 0.6896, 0.7084, 0.6972, 0.6925, 0.6818, 0.7096, 0.6979, 0.6871, 0.7024, 0.6981, 0.705, 0.7077, 0.6933, 0.6958, 0.7041, 0.7097, 0.6911, 0.711, 0.6803, 0.7049, 0.696, 0.6952, 0.7037, 0.6988, 0.7038, 0.7027, 0.709, 0.7009, 0.7011, 0.6897, 0.6994, 0.7066, 0.7003, 0.7055, 0.7056, 0.7117, 0.6883, 0.6999, 0.6967, 0.6828, 0.6997, 0.6998, 0.7012, 0.7046, 0.7142, 0.7072, 0.7101, 0.7134, 0.7073, 0.7083, 0.6998, 0.7108, 0.7114, 0.7121, 0.6971, 0.6996, 0.708, 0.7059, 0.7061, 0.7115, 0.7101, 0.7076, 0.713, 0.7002, 0.706, 0.7077, 0.7046, 0.7088, 0.7055, 0.7149, 0.6985, 0.7109, 0.7013, 0.6881, 0.7105, 0.7124, 0.6967, 0.7129, 0.7077, 0.7158, 0.7088, 0.7125, 0.7061, 0.7141, 0.7113, 0.7113, 0.701, 0.7078, 0.7119, 0.7149, 0.7165, 0.7112, 0.7059, 0.7105, 0.711, 0.7144, 0.7158, 0.6987, 0.7047, 0.7062, 0.7107, 0.7102, 0.713, 0.7108, 0.7112]
3 |
--------------------------------------------------------------------------------
/results/cifar_dpsgd_delta_1e-05_lr_0.001.txt:
--------------------------------------------------------------------------------
1 | eps: [0.6429531615298255, 0.6462993527835272, 0.6496455440372289, 0.6529917352909306, 0.6563379265446323, 0.659684117798334, 0.6630303090520356, 0.6663765003057374, 0.6697226915594391, 0.6730688828131408, 0.6764150740668424, 0.6797612653205442, 0.6831074565742459, 0.6864536478279476, 0.6897998390816492, 0.6931460303353509, 0.6964922215890527, 0.6998384128427544, 0.703184604096456, 0.7065307953501577, 0.7098769866038595, 0.7132231778575611, 0.7165693691112628, 0.7199155603649645, 0.7232617516186662, 0.726607942872368, 0.7299541341260696, 0.7333003253797713, 0.736646516633473, 0.7399927078871747, 0.7433388991408764, 0.7466850903945781, 0.7500312816482798, 0.7533774729019815, 0.7567236641556832, 0.7600698554093849, 0.7634160466630866, 0.7667622379167882, 0.7701084291704899, 0.7734546204241917, 0.7768008116778934, 0.7801470029315951, 0.7834931941852967, 0.7868393854389985, 0.7901855766927002, 0.7935317679464019, 0.7968779592001036, 0.8002241504538052, 0.803570341707507, 0.8069165329612087, 0.8102627242149103, 0.813608915468612, 0.8169551067223138, 0.8203012979760155, 0.8236474892297172, 0.8269936804834188, 0.8303398717371205, 0.8336860629908223, 0.8370322542445239, 0.8403784454982256, 0.8437246367519273, 0.847070828005629, 0.8504170192593308, 0.8537632105130324, 0.8571094017667341, 0.8604555930204358, 0.8638017842741375, 0.8671479755278392, 0.8704941667815409, 0.8738403580352426, 0.8771865492889444, 0.880532740542646, 0.8838789317963477, 0.8872251230500494, 0.890571314303751, 0.8939175055574528, 0.8972636968111545, 0.9006098880648561, 0.9039560793185579, 0.9073022705722595, 0.9106484618259613, 0.913994653079663, 0.9173408443333646, 0.9206870355870663, 0.924033226840768, 0.9273794180944697, 0.9307256093481715, 0.9340718006018731, 0.9374179918555748, 0.9407641831092766, 0.9441103743629782, 0.94745656561668, 0.9508027568703816, 0.9541489481240834, 0.9574951393777851, 0.9608413306314867, 0.9641875218851884, 0.9675337131388901, 0.9708799043925918, 0.9742260956462936, 0.9775722868999952, 0.9809184781536969, 0.9842646694073987, 0.9876108606611003, 0.990957051914802, 0.9943032431685037, 0.9976494344222053, 1.0009956256759072, 1.0043418169296088, 1.0076880081833104, 1.0110341994370122, 1.0143803906907138, 1.0177265819444155, 1.0210727731981173, 1.024418964451819, 1.0277651557055207, 1.0311113469592224, 1.0344575382129242, 1.0378037294666258, 1.0411499207203274, 1.0444961119740293, 1.0478423032277309, 1.0511884944814325, 1.0545346857351343, 1.057880876988836, 1.0612270682425375, 1.0645732594962394, 1.067919450749941, 1.0712656420036426, 1.0746118332573444, 1.077958024511046, 1.081304215764748, 1.0846504070184495, 1.0879965982721513, 1.091342789525853, 1.0946889807795546, 1.0980351720332564, 1.101381363286958, 1.1047275545406596, 1.1080737457943615, 1.111419937048063, 1.1147661283017647, 1.1181123195554665, 1.1214585108091681, 1.1248047020628698, 1.1281508933165716, 1.1314970845702732, 1.134843275823975, 1.1381894670776767, 1.1415356583313785, 1.14488184958508, 1.1482280408387817, 1.1515742320924836, 1.1549204233461852, 1.1582666145998868, 1.1616128058535886, 1.1649589971072902, 1.1683051883609918, 1.1715674280348853, 1.1746764627413682, 1.177785497447851, 1.1808945321543336, 1.1840035668608162, 1.187112601567299, 1.1902216362737819, 1.1933306709802645, 1.1964397056867473, 1.19954874039323, 1.2026577750997127, 1.2057668098061955, 1.2088758445126782, 1.2119848792191608, 1.2150939139256436, 1.2182029486321264, 1.2213119833386092, 1.2244210180450918, 1.2275300527515745, 1.2306390874580573, 1.23374812216454, 1.2368571568710227, 1.2399661915775053, 1.2430752262839881, 1.246184260990471, 1.2492932956969538, 1.2524023304034364, 1.255511365109919, 1.2586203998164018, 1.2617294345228847, 1.2648384692293675, 1.26794750393585, 1.2710565386423327, 1.2741655733488155, 1.2772746080552984, 1.280383642761781, 1.2834926774682636, 1.2866017121747464, 1.2897107468812292, 1.292819781587712, 1.2959288162941947, 1.2990378510006773]
2 | validation acc: [0.3862, 0.4004, 0.472, 0.4846, 0.4885, 0.5227, 0.5376, 0.5356, 0.5393, 0.5635, 0.5725, 0.5661, 0.5843, 0.5438, 0.5578, 0.5831, 0.5552, 0.6103, 0.6269, 0.6374, 0.6368, 0.6487, 0.6304, 0.6503, 0.6575, 0.6605, 0.6447, 0.6517, 0.6564, 0.6473, 0.6612, 0.6177, 0.6586, 0.6693, 0.6787, 0.6607, 0.6713, 0.6521, 0.6856, 0.6873, 0.6751, 0.6718, 0.6855, 0.6855, 0.6903, 0.6754, 0.6879, 0.6705, 0.6817, 0.6988, 0.6854, 0.6942, 0.6741, 0.6766, 0.6513, 0.6887, 0.6859, 0.6742, 0.6927, 0.6952, 0.6957, 0.6914, 0.6613, 0.6897, 0.6996, 0.6947, 0.6949, 0.7076, 0.7055, 0.6975, 0.6965, 0.6952, 0.7058, 0.6886, 0.7069, 0.6995, 0.7004, 0.6913, 0.6775, 0.6852, 0.7103, 0.6981, 0.7069, 0.6831, 0.686, 0.6965, 0.7016, 0.7051, 0.7053, 0.71, 0.6983, 0.7085, 0.6941, 0.7142, 0.7117, 0.7053, 0.703, 0.6986, 0.7028, 0.687, 0.7078, 0.7027, 0.7033, 0.7014, 0.7109, 0.7074, 0.6972, 0.7062, 0.7017, 0.6891, 0.7045, 0.7116, 0.6902, 0.7089, 0.7077, 0.7067, 0.6935, 0.7092, 0.7053, 0.7038, 0.7144, 0.7054, 0.6911, 0.7094, 0.7109, 0.7051, 0.7122, 0.7066, 0.7152, 0.6971, 0.7127, 0.7078, 0.6832, 0.7162, 0.7108, 0.7104, 0.7095, 0.6962, 0.709, 0.7153, 0.7138, 0.7133, 0.7069, 0.7129, 0.718, 0.6988, 0.7103, 0.7074, 0.7074, 0.7025, 0.7119, 0.7105, 0.7086, 0.7141, 0.7092, 0.7044, 0.7129, 0.7082, 0.7058, 0.7043, 0.7112, 0.712, 0.7115, 0.7156, 0.7187, 0.7084, 0.7186, 0.7017, 0.7182, 0.7187, 0.709, 0.7035, 0.7123, 0.7159, 0.7169, 0.7134, 0.7068, 0.7085, 0.7115, 0.7112, 0.7163, 0.7135, 0.7191, 0.721, 0.7158, 0.7156, 0.7113, 0.7226, 0.7208, 0.7148, 0.7143, 0.7124, 0.7221, 0.7156, 0.7205, 0.7194, 0.7078, 0.7183, 0.7229, 0.7208]
3 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/tensor_buffer.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """A lightweight buffer for maintaining tensors."""
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | import tensorflow as tf
21 |
22 |
23 | class TensorBuffer(object):
24 | """A lightweight buffer for maintaining lists.
25 |
26 | The TensorBuffer accumulates tensors of the given shape into a tensor (whose
27 | rank is one more than that of the given shape) via calls to `append`. The
28 | current value of the accumulated tensor can be extracted via the property
29 | `values`.
30 | """
31 |
32 | def __init__(self, capacity, shape, dtype=tf.int32, name=None):
33 | """Initializes the TensorBuffer.
34 |
35 | Args:
36 | capacity: Initial capacity. Buffer will double in capacity each time it is
37 | filled to capacity.
38 | shape: The shape (as tuple or list) of the tensors to accumulate.
39 | dtype: The type of the tensors.
40 | name: A string name for the variable_scope used.
41 |
42 | Raises:
43 | ValueError: If the shape is empty (specifies scalar shape).
44 | """
45 | shape = list(shape)
46 | self._rank = len(shape)
47 | self._name = name
48 | self._dtype = dtype
49 | if not self._rank:
50 | raise ValueError('Shape cannot be scalar.')
51 | shape = [capacity] + shape
52 |
53 | with tf.variable_scope(self._name):
54 | # We need to use a placeholder as the initial value to allow resizing.
55 | self._buffer = tf.Variable(
56 | initial_value=tf.placeholder_with_default(
57 | tf.zeros(shape, dtype), shape=None),
58 | trainable=False,
59 | name='buffer',
60 | use_resource=True)
61 | self._current_size = tf.Variable(
62 | initial_value=0, dtype=tf.int32, trainable=False, name='current_size')
63 | self._capacity = tf.Variable(
64 | initial_value=capacity,
65 | dtype=tf.int32,
66 | trainable=False,
67 | name='capacity')
68 |
69 | def append(self, value):
70 | """Appends a new tensor to the end of the buffer.
71 |
72 | Args:
73 | value: The tensor to append. Must match the shape specified in the
74 | initializer.
75 |
76 | Returns:
77 | An op appending the new tensor to the end of the buffer.
78 | """
79 |
80 | def _double_capacity():
81 | """Doubles the capacity of the current tensor buffer."""
82 | padding = tf.zeros_like(self._buffer, self._buffer.dtype)
83 | new_buffer = tf.concat([self._buffer, padding], axis=0)
84 | if tf.executing_eagerly():
85 | with tf.variable_scope(self._name, reuse=True):
86 | self._buffer = tf.get_variable(
87 | name='buffer',
88 | dtype=self._dtype,
89 | initializer=new_buffer,
90 | trainable=False)
91 | return self._buffer, tf.assign(self._capacity,
92 | tf.multiply(self._capacity, 2))
93 | else:
94 | return tf.assign(
95 | self._buffer, new_buffer,
96 | validate_shape=False), tf.assign(self._capacity,
97 | tf.multiply(self._capacity, 2))
98 |
99 | update_buffer, update_capacity = tf.cond(
100 | tf.equal(self._current_size, self._capacity),
101 | _double_capacity, lambda: (self._buffer, self._capacity))
102 |
103 | with tf.control_dependencies([update_buffer, update_capacity]):
104 | with tf.control_dependencies([
105 | tf.assert_less(
106 | self._current_size,
107 | self._capacity,
108 | message='Appending past end of TensorBuffer.'),
109 | tf.assert_equal(
110 | tf.shape(value),
111 | tf.shape(self._buffer)[1:],
112 | message='Appending value of inconsistent shape.')
113 | ]):
114 | with tf.control_dependencies(
115 | [tf.assign(self._buffer[self._current_size, :], value)]):
116 | return tf.assign_add(self._current_size, 1)
117 |
118 | @property
119 | def values(self):
120 | """Returns the accumulated tensor."""
121 | begin_value = tf.zeros([self._rank + 1], dtype=tf.int32)
122 | value_size = tf.concat([[self._current_size],
123 | tf.constant(-1, tf.int32, [self._rank])], 0)
124 | return tf.slice(self._buffer, begin_value, value_size)
125 |
126 | @property
127 | def current_size(self):
128 | """Returns the current number of tensors in the buffer."""
129 | return self._current_size
130 |
131 | @property
132 | def capacity(self):
133 | """Returns the current capacity of the buffer."""
134 | return self._capacity
135 |
--------------------------------------------------------------------------------
/tensorflow_privacy/tutorials/walkthrough/mnist_scratch.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | """Scratchpad for training a CNN on MNIST with DPSGD."""
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import numpy as np
22 | import tensorflow as tf
23 |
24 | tf.flags.DEFINE_float('learning_rate', .15, 'Learning rate for training')
25 | tf.flags.DEFINE_integer('batch_size', 256, 'Batch size')
26 | tf.flags.DEFINE_integer('epochs', 15, 'Number of epochs')
27 |
28 | FLAGS = tf.flags.FLAGS
29 |
30 |
31 | def cnn_model_fn(features, labels, mode):
32 | """Model function for a CNN."""
33 |
34 | # Define CNN architecture using tf.keras.layers.
35 | input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])
36 | y = tf.keras.layers.Conv2D(16, 8,
37 | strides=2,
38 | padding='same',
39 | activation='relu').apply(input_layer)
40 | y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
41 | y = tf.keras.layers.Conv2D(32, 4,
42 | strides=2,
43 | padding='valid',
44 | activation='relu').apply(y)
45 | y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
46 | y = tf.keras.layers.Flatten().apply(y)
47 | y = tf.keras.layers.Dense(32, activation='relu').apply(y)
48 | logits = tf.keras.layers.Dense(10).apply(y)
49 |
50 | # Calculate loss as a vector and as its average across minibatch.
51 | vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
52 | logits=logits)
53 | scalar_loss = tf.reduce_mean(vector_loss)
54 |
55 | # Configure the training op (for TRAIN mode).
56 | if mode == tf.estimator.ModeKeys.TRAIN:
57 | optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
58 | opt_loss = scalar_loss
59 | global_step = tf.train.get_global_step()
60 | train_op = optimizer.minimize(loss=opt_loss, global_step=global_step)
61 | return tf.estimator.EstimatorSpec(mode=mode,
62 | loss=scalar_loss,
63 | train_op=train_op)
64 |
65 | # Add evaluation metrics (for EVAL mode).
66 | elif mode == tf.estimator.ModeKeys.EVAL:
67 | eval_metric_ops = {
68 | 'accuracy':
69 | tf.metrics.accuracy(
70 | labels=labels,
71 | predictions=tf.argmax(input=logits, axis=1))
72 | }
73 | return tf.estimator.EstimatorSpec(mode=mode,
74 | loss=scalar_loss,
75 | eval_metric_ops=eval_metric_ops)
76 |
77 |
78 | def load_mnist():
79 | """Loads MNIST and preprocesses to combine training and validation data."""
80 | train, test = tf.keras.datasets.mnist.load_data()
81 | train_data, train_labels = train
82 | test_data, test_labels = test
83 |
84 | train_data = np.array(train_data, dtype=np.float32) / 255
85 | test_data = np.array(test_data, dtype=np.float32) / 255
86 |
87 | train_labels = np.array(train_labels, dtype=np.int32)
88 | test_labels = np.array(test_labels, dtype=np.int32)
89 |
90 | assert train_data.min() == 0.
91 | assert train_data.max() == 1.
92 | assert test_data.min() == 0.
93 | assert test_data.max() == 1.
94 | assert train_labels.ndim == 1
95 | assert test_labels.ndim == 1
96 |
97 | return train_data, train_labels, test_data, test_labels
98 |
99 |
100 | def main(unused_argv):
101 | tf.logging.set_verbosity(tf.logging.INFO)
102 |
103 | # Load training and test data.
104 | train_data, train_labels, test_data, test_labels = load_mnist()
105 |
106 | # Instantiate the tf.Estimator.
107 | mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn)
108 |
109 | # Create tf.Estimator input functions for the training and test data.
110 | train_input_fn = tf.estimator.inputs.numpy_input_fn(
111 | x={'x': train_data},
112 | y=train_labels,
113 | batch_size=FLAGS.batch_size,
114 | num_epochs=FLAGS.epochs,
115 | shuffle=True)
116 | eval_input_fn = tf.estimator.inputs.numpy_input_fn(
117 | x={'x': test_data},
118 | y=test_labels,
119 | num_epochs=1,
120 | shuffle=False)
121 |
122 | # Training loop.
123 | steps_per_epoch = 60000 // FLAGS.batch_size
124 | for epoch in range(1, FLAGS.epochs + 1):
125 | # Train the model for one epoch.
126 | mnist_classifier.train(input_fn=train_input_fn, steps=steps_per_epoch)
127 |
128 | # Evaluate the model and print results
129 | eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
130 | test_accuracy = eval_results['accuracy']
131 | print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy))
132 |
133 | if __name__ == '__main__':
134 | tf.app.run()
135 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/privacy_ledger_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Tests for PrivacyLedger."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import tensorflow as tf
22 |
23 | from privacy.analysis import privacy_ledger
24 | from privacy.dp_query import gaussian_query
25 | from privacy.dp_query import nested_query
26 | from privacy.dp_query import test_utils
27 |
28 | tf.enable_eager_execution()
29 |
30 |
31 | class PrivacyLedgerTest(tf.test.TestCase):
32 |
33 | def test_fail_on_probability_zero(self):
34 | with self.assertRaisesRegexp(ValueError,
35 | 'Selection probability cannot be 0.'):
36 | privacy_ledger.PrivacyLedger(10, 0)
37 |
38 | def test_basic(self):
39 | ledger = privacy_ledger.PrivacyLedger(10, 0.1)
40 | ledger.record_sum_query(5.0, 1.0)
41 | ledger.record_sum_query(2.0, 0.5)
42 |
43 | ledger.finalize_sample()
44 |
45 | expected_queries = [[5.0, 1.0], [2.0, 0.5]]
46 | formatted = ledger.get_formatted_ledger_eager()
47 |
48 | sample = formatted[0]
49 | self.assertAllClose(sample.population_size, 10.0)
50 | self.assertAllClose(sample.selection_probability, 0.1)
51 | self.assertAllClose(sorted(sample.queries), sorted(expected_queries))
52 |
53 | def test_sum_query(self):
54 | record1 = tf.constant([2.0, 0.0])
55 | record2 = tf.constant([-1.0, 1.0])
56 |
57 | population_size = tf.Variable(0)
58 | selection_probability = tf.Variable(1.0)
59 |
60 | query = gaussian_query.GaussianSumQuery(
61 | l2_norm_clip=10.0, stddev=0.0)
62 | query = privacy_ledger.QueryWithLedger(
63 | query, population_size, selection_probability)
64 |
65 | # First sample.
66 | tf.assign(population_size, 10)
67 | tf.assign(selection_probability, 0.1)
68 | test_utils.run_query(query, [record1, record2])
69 |
70 | expected_queries = [[10.0, 0.0]]
71 | formatted = query.ledger.get_formatted_ledger_eager()
72 | sample_1 = formatted[0]
73 | self.assertAllClose(sample_1.population_size, 10.0)
74 | self.assertAllClose(sample_1.selection_probability, 0.1)
75 | self.assertAllClose(sample_1.queries, expected_queries)
76 |
77 | # Second sample.
78 | tf.assign(population_size, 20)
79 | tf.assign(selection_probability, 0.2)
80 | test_utils.run_query(query, [record1, record2])
81 |
82 | formatted = query.ledger.get_formatted_ledger_eager()
83 | sample_1, sample_2 = formatted
84 | self.assertAllClose(sample_1.population_size, 10.0)
85 | self.assertAllClose(sample_1.selection_probability, 0.1)
86 | self.assertAllClose(sample_1.queries, expected_queries)
87 |
88 | self.assertAllClose(sample_2.population_size, 20.0)
89 | self.assertAllClose(sample_2.selection_probability, 0.2)
90 | self.assertAllClose(sample_2.queries, expected_queries)
91 |
92 | def test_nested_query(self):
93 | population_size = tf.Variable(0)
94 | selection_probability = tf.Variable(1.0)
95 |
96 | query1 = gaussian_query.GaussianAverageQuery(
97 | l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0)
98 | query2 = gaussian_query.GaussianAverageQuery(
99 | l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0)
100 |
101 | query = nested_query.NestedQuery([query1, query2])
102 | query = privacy_ledger.QueryWithLedger(
103 | query, population_size, selection_probability)
104 |
105 | record1 = [1.0, [12.0, 9.0]]
106 | record2 = [5.0, [1.0, 2.0]]
107 |
108 | # First sample.
109 | tf.assign(population_size, 10)
110 | tf.assign(selection_probability, 0.1)
111 | test_utils.run_query(query, [record1, record2])
112 |
113 | expected_queries = [[4.0, 2.0], [5.0, 1.0]]
114 | formatted = query.ledger.get_formatted_ledger_eager()
115 | sample_1 = formatted[0]
116 | self.assertAllClose(sample_1.population_size, 10.0)
117 | self.assertAllClose(sample_1.selection_probability, 0.1)
118 | self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries))
119 |
120 | # Second sample.
121 | tf.assign(population_size, 20)
122 | tf.assign(selection_probability, 0.2)
123 | test_utils.run_query(query, [record1, record2])
124 |
125 | formatted = query.ledger.get_formatted_ledger_eager()
126 | sample_1, sample_2 = formatted
127 | self.assertAllClose(sample_1.population_size, 10.0)
128 | self.assertAllClose(sample_1.selection_probability, 0.1)
129 | self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries))
130 |
131 | self.assertAllClose(sample_2.population_size, 20.0)
132 | self.assertAllClose(sample_2.selection_probability, 0.2)
133 | self.assertAllClose(sorted(sample_2.queries), sorted(expected_queries))
134 |
135 |
136 | if __name__ == '__main__':
137 | tf.test.main()
138 |
--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2017/aggregation.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import numpy as np
22 | from six.moves import xrange
23 |
24 |
25 | def labels_from_probs(probs):
26 | """
27 | Helper function: computes argmax along last dimension of array to obtain
28 | labels (max prob or max logit value)
29 | :param probs: numpy array where probabilities or logits are on last dimension
30 | :return: array with same shape as input besides last dimension with shape 1
31 | now containing the labels
32 | """
33 | # Compute last axis index
34 | last_axis = len(np.shape(probs)) - 1
35 |
36 | # Label is argmax over last dimension
37 | labels = np.argmax(probs, axis=last_axis)
38 |
39 | # Return as np.int32
40 | return np.asarray(labels, dtype=np.int32)
41 |
42 |
43 | def noisy_max(logits, lap_scale, return_clean_votes=False):
44 | """
45 | This aggregation mechanism takes the softmax/logit output of several models
46 | resulting from inference on identical inputs and computes the noisy-max of
47 | the votes for candidate classes to select a label for each sample: it
48 | adds Laplacian noise to label counts and returns the most frequent label.
49 | :param logits: logits or probabilities for each sample
50 | :param lap_scale: scale of the Laplacian noise to be added to counts
51 | :param return_clean_votes: if set to True, also returns clean votes (without
52 | Laplacian noise). This can be used to perform the
53 | privacy analysis of this aggregation mechanism.
54 | :return: pair of result and (if clean_votes is set to True) the clean counts
55 | for each class per sample and the original labels produced by
56 | the teachers.
57 | """
58 |
59 | # Compute labels from logits/probs and reshape array properly
60 | labels = labels_from_probs(logits)
61 | labels_shape = np.shape(labels)
62 | labels = labels.reshape((labels_shape[0], labels_shape[1]))
63 |
64 | # Initialize array to hold final labels
65 | result = np.zeros(int(labels_shape[1]))
66 |
67 | if return_clean_votes:
68 | # Initialize array to hold clean votes for each sample
69 | clean_votes = np.zeros((int(labels_shape[1]), 10))
70 |
71 | # Parse each sample
72 | for i in xrange(int(labels_shape[1])):
73 | # Count number of votes assigned to each class
74 | label_counts = np.bincount(labels[:, i], minlength=10)
75 |
76 | if return_clean_votes:
77 | # Store vote counts for export
78 | clean_votes[i] = label_counts
79 |
80 | # Cast in float32 to prepare before addition of Laplacian noise
81 | label_counts = np.asarray(label_counts, dtype=np.float32)
82 |
83 | # Sample independent Laplacian noise for each class
84 | for item in xrange(10):
85 | label_counts[item] += np.random.laplace(loc=0.0, scale=float(lap_scale))
86 |
87 | # Result is the most frequent label
88 | result[i] = np.argmax(label_counts)
89 |
90 | # Cast labels to np.int32 for compatibility with deep_cnn.py feed dictionaries
91 | result = np.asarray(result, dtype=np.int32)
92 |
93 | if return_clean_votes:
94 | # Returns several array, which are later saved:
95 | # result: labels obtained from the noisy aggregation
96 | # clean_votes: the number of teacher votes assigned to each sample and class
97 | # labels: the labels assigned by teachers (before the noisy aggregation)
98 | return result, clean_votes, labels
99 | else:
100 | # Only return labels resulting from noisy aggregation
101 | return result
102 |
103 |
104 | def aggregation_most_frequent(logits):
105 | """
106 | This aggregation mechanism takes the softmax/logit output of several models
107 | resulting from inference on identical inputs and computes the most frequent
108 | label. It is deterministic (no noise injection like noisy_max() above.
109 | :param logits: logits or probabilities for each sample
110 | :return:
111 | """
112 | # Compute labels from logits/probs and reshape array properly
113 | labels = labels_from_probs(logits)
114 | labels_shape = np.shape(labels)
115 | labels = labels.reshape((labels_shape[0], labels_shape[1]))
116 |
117 | # Initialize array to hold final labels
118 | result = np.zeros(int(labels_shape[1]))
119 |
120 | # Parse each sample
121 | for i in xrange(int(labels_shape[1])):
122 | # Count number of votes assigned to each class
123 | label_counts = np.bincount(labels[:, i], minlength=10)
124 |
125 | label_counts = np.asarray(label_counts, dtype=np.int32)
126 |
127 | # Result is the most frequent label
128 | result[i] = np.argmax(label_counts)
129 |
130 | return np.asarray(result, dtype=np.int32)
131 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/gaussian_query.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Implements DPQuery interface for Gaussian average queries.
16 | """
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import collections
23 |
24 | from distutils.version import LooseVersion
25 | import tensorflow as tf
26 |
27 | from privacy.dp_query import dp_query
28 | from privacy.dp_query import normalized_query
29 |
30 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
31 | nest = tf.contrib.framework.nest
32 | else:
33 | nest = tf.nest
34 |
35 |
36 | class GaussianSumQuery(dp_query.SumAggregationDPQuery):
37 | """Implements DPQuery interface for Gaussian sum queries.
38 |
39 | Accumulates clipped vectors, then adds Gaussian noise to the sum.
40 | """
41 |
42 | # pylint: disable=invalid-name
43 | _GlobalState = collections.namedtuple(
44 | '_GlobalState', ['l2_norm_clip', 'stddev'])
45 |
46 | def __init__(self, l2_norm_clip, stddev):
47 | """Initializes the GaussianSumQuery.
48 |
49 | Args:
50 | l2_norm_clip: The clipping norm to apply to the global norm of each
51 | record.
52 | stddev: The stddev of the noise added to the sum.
53 | """
54 | self._l2_norm_clip = l2_norm_clip
55 | self._stddev = stddev
56 | self._ledger = None
57 |
58 | def set_ledger(self, ledger):
59 | self._ledger = ledger
60 |
61 | def make_global_state(self, l2_norm_clip, stddev):
62 | """Creates a global state from the given parameters."""
63 | return self._GlobalState(tf.cast(l2_norm_clip, tf.float32),
64 | tf.cast(stddev, tf.float32))
65 |
66 | def initial_global_state(self):
67 | return self.make_global_state(self._l2_norm_clip, self._stddev)
68 |
69 | def derive_sample_params(self, global_state):
70 | return global_state.l2_norm_clip
71 |
72 | def initial_sample_state(self, template):
73 | return nest.map_structure(
74 | dp_query.zeros_like, template)
75 |
76 | def preprocess_record_impl(self, params, record):
77 | """Clips the l2 norm, returning the clipped record and the l2 norm.
78 |
79 | Args:
80 | params: The parameters for the sample.
81 | record: The record to be processed.
82 |
83 | Returns:
84 | A tuple (preprocessed_records, l2_norm) where `preprocessed_records` is
85 | the structure of preprocessed tensors, and l2_norm is the total l2 norm
86 | before clipping.
87 | """
88 | l2_norm_clip = params
89 | record_as_list = nest.flatten(record)
90 | clipped_as_list, norm = tf.clip_by_global_norm(record_as_list, l2_norm_clip)
91 | return nest.pack_sequence_as(record, clipped_as_list), norm
92 |
93 | def preprocess_record(self, params, record):
94 | preprocessed_record, _ = self.preprocess_record_impl(params, record)
95 | return preprocessed_record
96 |
97 | def get_noised_result(self, sample_state, global_state):
98 | """See base class."""
99 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
100 | def add_noise(v):
101 | return v + tf.random_normal(tf.shape(v), stddev=global_state.stddev)
102 | else:
103 | random_normal = tf.random_normal_initializer(stddev=global_state.stddev)
104 | def add_noise(v):
105 | return v + random_normal(tf.shape(v))
106 |
107 | if self._ledger:
108 | dependencies = [
109 | self._ledger.record_sum_query(
110 | global_state.l2_norm_clip, global_state.stddev)
111 | ]
112 | else:
113 | dependencies = []
114 | with tf.control_dependencies(dependencies):
115 | return nest.map_structure(add_noise, sample_state), global_state
116 |
117 |
118 | class GaussianAverageQuery(normalized_query.NormalizedQuery):
119 | """Implements DPQuery interface for Gaussian average queries.
120 |
121 | Accumulates clipped vectors, adds Gaussian noise, and normalizes.
122 |
123 | Note that we use "fixed-denominator" estimation: the denominator should be
124 | specified as the expected number of records per sample. Accumulating the
125 | denominator separately would also be possible but would be produce a higher
126 | variance estimator.
127 | """
128 |
129 | def __init__(self,
130 | l2_norm_clip,
131 | sum_stddev,
132 | denominator):
133 | """Initializes the GaussianAverageQuery.
134 |
135 | Args:
136 | l2_norm_clip: The clipping norm to apply to the global norm of each
137 | record.
138 | sum_stddev: The stddev of the noise added to the sum (before
139 | normalization).
140 | denominator: The normalization constant (applied after noise is added to
141 | the sum).
142 | """
143 | super(GaussianAverageQuery, self).__init__(
144 | numerator_query=GaussianSumQuery(l2_norm_clip, sum_stddev),
145 | denominator=denominator)
146 |
--------------------------------------------------------------------------------
/dp_optimizer.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Differentially private optimizers for TensorFlow."""
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | import tensorflow as tf
21 |
22 | from privacy.analysis import privacy_ledger
23 | from privacy.dp_query import gaussian_query
24 |
25 | def make_optimizer_class(cls):
26 | """Constructs a DP optimizer class from an existing one."""
27 | parent_code = tf.optimizers.Optimizer._compute_gradients.__code__
28 | child_code = cls._compute_gradients.__code__
29 | if child_code is not parent_code:
30 | tf.logging.warning(
31 | 'WARNING: Calling make_optimizer_class() on class %s that overrides '
32 | 'method compute_gradients(). Check to ensure that '
33 | 'make_optimizer_class() does not interfere with overridden version.',
34 | cls.__name__)
35 |
36 | class DPOptimizerClass(cls):
37 | """Differentially private subclass of given class cls."""
38 |
39 | def __init__(
40 | self,
41 | dp_sum_query,
42 | num_microbatches=None,
43 | unroll_microbatches=False,
44 | *args,
45 | **kwargs):
46 | """Initialize the DPOptimizerClass.
47 |
48 | Args:
49 | dp_sum_query: DPQuery object, specifying differential privacy
50 | mechanism to use.
51 | num_microbatches: How many microbatches into which the minibatch is
52 | split. If None, will default to the size of the minibatch, and
53 | per-example gradients will be computed.
54 | unroll_microbatches: If true, processes microbatches within a Python
55 | loop instead of a tf.while_loop. Can be used if using a tf.while_loop
56 | raises an exception.
57 | """
58 | super(DPOptimizerClass, self).__init__(*args, **kwargs)
59 | ###### accountant + sanitizer ######
60 | self._dp_sum_query = dp_sum_query
61 | ######
62 | self._num_microbatches = num_microbatches
63 | self._global_state = self._dp_sum_query.initial_global_state()
64 | self._unroll_microbatches = unroll_microbatches
65 |
66 | def compute_gradients(self, loss, var_list, gate_gradients=None, aggregation_method=None, colocate_gradients_with_ops=False, grad_loss=None, gradient_tape=None):
67 | if not gradient_tape:
68 | raise ValueError('A tape needs to be passed.')
69 |
70 | vector_loss = loss()
71 | if self._num_microbatches is None:
72 | self._num_microbatches = tf.shape(vector_loss)[0]
73 | sample_state = self._dp_sum_query.initial_sample_state(var_list)
74 | microbatches_losses = tf.reshape(vector_loss, [self._num_microbatches, -1])
75 | sample_params = (self._dp_sum_query.derive_sample_params(self._global_state))
76 |
77 | for idx in range(self._num_microbatches):
78 | ###### compute gradient ######
79 | microbatch_loss = tf.reduce_mean(tf.gather(microbatches_losses, [idx]))
80 | grads = gradient_tape.gradient(microbatch_loss, var_list)
81 | ######
82 |
83 | ###### accountant ######
84 | sample_state = self._dp_sum_query.accumulate_record(sample_params, sample_state, grads)
85 | ######
86 |
87 | ###### sanitizer ######
88 | grad_sums, self._global_state = (self._dp_sum_query.get_noised_result(sample_state, self._global_state))
89 | ######
90 |
91 | def normalize(v):
92 | return v / tf.cast(self._num_microbatches, tf.float32)
93 |
94 | final_grads = tf.nest.map_structure(normalize, grad_sums)
95 |
96 | grads_and_vars = list(zip(final_grads, var_list))
97 | return grads_and_vars
98 |
99 | return DPOptimizerClass
100 |
101 |
102 | def make_gaussian_optimizer_class(cls):
103 | """Constructs a DP optimizer with Gaussian averaging of updates."""
104 |
105 | class DPGaussianOptimizerClass(make_optimizer_class(cls)):
106 | """DP subclass of given class cls using Gaussian averaging."""
107 |
108 | def __init__(self, l2_norm_clip, noise_multiplier, num_microbatches=None, ledger=None, unroll_microbatches=False, *args, **kwargs):
109 | dp_sum_query = gaussian_query.GaussianSumQuery(l2_norm_clip, l2_norm_clip * noise_multiplier)
110 |
111 | if ledger:
112 | dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, ledger=ledger)
113 |
114 | super(DPGaussianOptimizerClass, self).__init__(dp_sum_query, num_microbatches, unroll_microbatches, *args, **kwargs)
115 |
116 | @property
117 | def ledger(self):
118 | return self._dp_sum_query.ledger
119 |
120 | return DPGaussianOptimizerClass
121 |
122 | DPAdagradOptimizer = make_optimizer_class(tf.optimizers.Adagrad)
123 | DPAdamOptimizer = make_optimizer_class(tf.optimizers.Adam)
124 | DPGradientDescentOptimizer = make_optimizer_class(tf.optimizers.SGD)
125 |
126 | DPAdagradGaussianOptimizer = make_gaussian_optimizer_class(tf.optimizers.Adagrad)
127 | DPAdamGaussianOptimizer = make_gaussian_optimizer_class(tf.optimizers.Adam)
128 | DPGradientDescentGaussianOptimizer = make_gaussian_optimizer_class(tf.optimizers.SGD)
129 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/optimizers/dp_optimizer_eager_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Tests for differentially private optimizers."""
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | from absl.testing import parameterized
21 | import numpy as np
22 | import tensorflow as tf
23 |
24 | from privacy.analysis import privacy_ledger
25 | from privacy.dp_query import gaussian_query
26 | from privacy.optimizers import dp_optimizer
27 |
28 |
29 | class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
30 |
31 | def setUp(self):
32 | tf.enable_eager_execution()
33 | super(DPOptimizerEagerTest, self).setUp()
34 |
35 | def _loss_fn(self, val0, val1):
36 | return 0.5 * tf.reduce_sum(tf.squared_difference(val0, val1), axis=1)
37 |
38 | @parameterized.named_parameters(
39 | ('DPGradientDescent 1', dp_optimizer.DPGradientDescentOptimizer, 1,
40 | [-2.5, -2.5]),
41 | ('DPGradientDescent 2', dp_optimizer.DPGradientDescentOptimizer, 2,
42 | [-2.5, -2.5]),
43 | ('DPGradientDescent 4', dp_optimizer.DPGradientDescentOptimizer, 4,
44 | [-2.5, -2.5]),
45 | ('DPAdagrad 1', dp_optimizer.DPAdagradOptimizer, 1, [-2.5, -2.5]),
46 | ('DPAdagrad 2', dp_optimizer.DPAdagradOptimizer, 2, [-2.5, -2.5]),
47 | ('DPAdagrad 4', dp_optimizer.DPAdagradOptimizer, 4, [-2.5, -2.5]),
48 | ('DPAdam 1', dp_optimizer.DPAdamOptimizer, 1, [-2.5, -2.5]),
49 | ('DPAdam 2', dp_optimizer.DPAdamOptimizer, 2, [-2.5, -2.5]),
50 | ('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5]))
51 | def testBaseline(self, cls, num_microbatches, expected_answer):
52 | with tf.GradientTape(persistent=True) as gradient_tape:
53 | var0 = tf.Variable([1.0, 2.0])
54 | data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
55 |
56 | dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
57 | dp_sum_query = privacy_ledger.QueryWithLedger(
58 | dp_sum_query, 1e6, num_microbatches / 1e6)
59 |
60 | opt = cls(
61 | dp_sum_query,
62 | num_microbatches=num_microbatches,
63 | learning_rate=2.0)
64 |
65 | self.evaluate(tf.global_variables_initializer())
66 | # Fetch params to validate initial values
67 | self.assertAllClose([1.0, 2.0], self.evaluate(var0))
68 |
69 | # Expected gradient is sum of differences divided by number of
70 | # microbatches.
71 | grads_and_vars = opt.compute_gradients(
72 | lambda: self._loss_fn(var0, data0), [var0],
73 | gradient_tape=gradient_tape)
74 | self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0])
75 |
76 | @parameterized.named_parameters(
77 | ('DPGradientDescent', dp_optimizer.DPGradientDescentOptimizer),
78 | ('DPAdagrad', dp_optimizer.DPAdagradOptimizer),
79 | ('DPAdam', dp_optimizer.DPAdamOptimizer))
80 | def testClippingNorm(self, cls):
81 | with tf.GradientTape(persistent=True) as gradient_tape:
82 | var0 = tf.Variable([0.0, 0.0])
83 | data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
84 |
85 | dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
86 | dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
87 |
88 | opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
89 |
90 | self.evaluate(tf.global_variables_initializer())
91 | # Fetch params to validate initial values
92 | self.assertAllClose([0.0, 0.0], self.evaluate(var0))
93 |
94 | # Expected gradient is sum of differences.
95 | grads_and_vars = opt.compute_gradients(
96 | lambda: self._loss_fn(var0, data0), [var0],
97 | gradient_tape=gradient_tape)
98 | self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0])
99 |
100 | @parameterized.named_parameters(
101 | ('DPGradientDescent', dp_optimizer.DPGradientDescentOptimizer),
102 | ('DPAdagrad', dp_optimizer.DPAdagradOptimizer),
103 | ('DPAdam', dp_optimizer.DPAdamOptimizer))
104 | def testNoiseMultiplier(self, cls):
105 | with tf.GradientTape(persistent=True) as gradient_tape:
106 | var0 = tf.Variable([0.0])
107 | data0 = tf.Variable([[0.0]])
108 |
109 | dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0)
110 | dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
111 |
112 | opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
113 |
114 | self.evaluate(tf.global_variables_initializer())
115 | # Fetch params to validate initial values
116 | self.assertAllClose([0.0], self.evaluate(var0))
117 |
118 | grads = []
119 | for _ in range(1000):
120 | grads_and_vars = opt.compute_gradients(
121 | lambda: self._loss_fn(var0, data0), [var0],
122 | gradient_tape=gradient_tape)
123 | grads.append(grads_and_vars[0][0])
124 |
125 | # Test standard deviation is close to l2_norm_clip * noise_multiplier.
126 | self.assertNear(np.std(grads), 2.0 * 4.0, 0.5)
127 |
128 |
129 | if __name__ == '__main__':
130 | tf.test.main()
131 |
--------------------------------------------------------------------------------
/mnist.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Training a CNN on MNIST with Keras and the DP SGD optimizer."""
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | from absl import app
21 | from absl import flags
22 | from absl import logging
23 |
24 | import numpy as np
25 | import tensorflow as tf
26 |
27 | from privacy.analysis.rdp_accountant import compute_rdp
28 | from privacy.analysis.rdp_accountant import get_privacy_spent
29 | from dp_optimizer import DPGradientDescentGaussianOptimizer
30 |
31 | GradientDescentOptimizer = tf.compat.v1.train.GradientDescentOptimizer
32 |
33 | flags.DEFINE_boolean('dpsgd', True, 'If True, train with DP-SGD. If False, train with vanilla SGD.')
34 | flags.DEFINE_float('learning_rate', 1e-3, 'Learning rate for training')
35 | flags.DEFINE_float('noise_multiplier', 1.1, 'Ratio of the standard deviation to the clipping norm')
36 | flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
37 | flags.DEFINE_integer('batch_size', 250, 'Batch size')
38 | flags.DEFINE_integer('epochs', 400, 'Number of epochs')
39 | flags.DEFINE_integer('microbatches', 250, 'Number of microbatches (must evenly divide batch_size)')
40 | flags.DEFINE_string('model_dir', None, 'Model directory')
41 |
42 | FLAGS = flags.FLAGS
43 | delta = 1e-2 # Delta is set to 1e-5 because MNIST has 60000 training points.
44 |
45 |
46 | def compute_epsilon(steps):
47 | """Computes epsilon value for given hyperparameters."""
48 | if FLAGS.noise_multiplier == 0.0:
49 | return float('inf')
50 | orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
51 | sampling_probability = FLAGS.batch_size / 60000
52 | rdp = compute_rdp(q=sampling_probability, noise_multiplier=FLAGS.noise_multiplier, steps=steps, orders=orders)
53 | return get_privacy_spent(orders, rdp, target_delta=delta)[0]
54 |
55 |
56 | class EpsilonPrintingCallback(tf.keras.callbacks.Callback):
57 | """Callback for Keras model to evaluate epsilon after every epoch."""
58 | def __init__(self):
59 | self.eps_history = []
60 |
61 | def on_epoch_end(self, epoch, logs=None):
62 | if FLAGS.dpsgd:
63 | eps = compute_epsilon((epoch + 1) * (60000 // FLAGS.batch_size))
64 | self.eps_history.append(eps)
65 | print(', eps = {}'.format(eps))
66 |
67 |
68 | def load_mnist():
69 | """Loads MNIST and preprocesses to combine training and validation data."""
70 | train, test = tf.keras.datasets.mnist.load_data()
71 | train_data, train_labels = train
72 | test_data, test_labels = test
73 |
74 | train_data = np.array(train_data, dtype=np.float32) / 255
75 | test_data = np.array(test_data, dtype=np.float32) / 255
76 |
77 | train_data = train_data.reshape(train_data.shape[0], 28, 28, 1)
78 | test_data = test_data.reshape(test_data.shape[0], 28, 28, 1)
79 |
80 | train_labels = np.array(train_labels, dtype=np.int32)
81 | test_labels = np.array(test_labels, dtype=np.int32)
82 |
83 | train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10)
84 | test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10)
85 |
86 | return train_data, train_labels, test_data, test_labels
87 |
88 |
89 | def main(unused_argv):
90 | logging.set_verbosity(logging.INFO)
91 | if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0:
92 | raise ValueError('Number of microbatches should divide evenly batch_size')
93 |
94 | # Load training and test data.
95 | train_data, train_labels, test_data, test_labels = load_mnist()
96 |
97 | # Define a sequential Keras model
98 | model = tf.keras.Sequential([
99 | tf.keras.layers.Conv2D(16, 8, strides=2, padding='same', activation='relu', input_shape=(28, 28, 1)),
100 | tf.keras.layers.MaxPool2D(2, 1),
101 | tf.keras.layers.Conv2D(32, 4, strides=2, padding='valid', activation='relu'),
102 | tf.keras.layers.MaxPool2D(2, 1),
103 | tf.keras.layers.Flatten(),
104 | tf.keras.layers.Dense(32, activation='relu'),
105 | tf.keras.layers.Dense(10)
106 | ])
107 |
108 | if FLAGS.dpsgd:
109 | optimizer = DPGradientDescentGaussianOptimizer(
110 | l2_norm_clip=FLAGS.l2_norm_clip,
111 | noise_multiplier=FLAGS.noise_multiplier,
112 | num_microbatches=FLAGS.microbatches,
113 | learning_rate=FLAGS.learning_rate)
114 | # Compute vector of per-example loss rather than its mean over a minibatch.
115 | loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True, reduction=tf.compat.v1.losses.Reduction.NONE)
116 | else:
117 | optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
118 | loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
119 |
120 | # Compile model with Keras
121 | model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
122 |
123 | # Train model with Keras
124 | eps_callback = EpsilonPrintingCallback()
125 | fit_history = model.fit(train_data, train_labels, epochs=FLAGS.epochs, validation_data=(test_data, test_labels), batch_size=FLAGS.batch_size, callbacks=[eps_callback])
126 | eps_history = eps_callback.eps_history
127 | val_acc_history = fit_history.history['val_accuracy']
128 | with open('delta_{}_lr_{}.txt'.format(delta, FLAGS.learning_rate), 'w') as f:
129 | f.write('eps: {}\n'.format(eps_history))
130 | f.write('validation acc: {}\n'.format(val_acc_history))
131 |
132 | if __name__ == '__main__':
133 | app.run(main)
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/nested_query_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Tests for NestedQuery."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 |
22 | from absl.testing import parameterized
23 | from distutils.version import LooseVersion
24 | import numpy as np
25 | import tensorflow as tf
26 |
27 | from privacy.dp_query import gaussian_query
28 | from privacy.dp_query import nested_query
29 | from privacy.dp_query import test_utils
30 |
31 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
32 | nest = tf.contrib.framework.nest
33 | else:
34 | nest = tf.nest
35 |
36 | _basic_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
37 |
38 |
39 | class NestedQueryTest(tf.test.TestCase, parameterized.TestCase):
40 |
41 | def test_nested_gaussian_sum_no_clip_no_noise(self):
42 | with self.cached_session() as sess:
43 | query1 = gaussian_query.GaussianSumQuery(
44 | l2_norm_clip=10.0, stddev=0.0)
45 | query2 = gaussian_query.GaussianSumQuery(
46 | l2_norm_clip=10.0, stddev=0.0)
47 |
48 | query = nested_query.NestedQuery([query1, query2])
49 |
50 | record1 = [1.0, [2.0, 3.0]]
51 | record2 = [4.0, [3.0, 2.0]]
52 |
53 | query_result, _ = test_utils.run_query(query, [record1, record2])
54 | result = sess.run(query_result)
55 | expected = [5.0, [5.0, 5.0]]
56 | self.assertAllClose(result, expected)
57 |
58 | def test_nested_gaussian_average_no_clip_no_noise(self):
59 | with self.cached_session() as sess:
60 | query1 = gaussian_query.GaussianAverageQuery(
61 | l2_norm_clip=10.0, sum_stddev=0.0, denominator=5.0)
62 | query2 = gaussian_query.GaussianAverageQuery(
63 | l2_norm_clip=10.0, sum_stddev=0.0, denominator=5.0)
64 |
65 | query = nested_query.NestedQuery([query1, query2])
66 |
67 | record1 = [1.0, [2.0, 3.0]]
68 | record2 = [4.0, [3.0, 2.0]]
69 |
70 | query_result, _ = test_utils.run_query(query, [record1, record2])
71 | result = sess.run(query_result)
72 | expected = [1.0, [1.0, 1.0]]
73 | self.assertAllClose(result, expected)
74 |
75 | def test_nested_gaussian_average_with_clip_no_noise(self):
76 | with self.cached_session() as sess:
77 | query1 = gaussian_query.GaussianAverageQuery(
78 | l2_norm_clip=4.0, sum_stddev=0.0, denominator=5.0)
79 | query2 = gaussian_query.GaussianAverageQuery(
80 | l2_norm_clip=5.0, sum_stddev=0.0, denominator=5.0)
81 |
82 | query = nested_query.NestedQuery([query1, query2])
83 |
84 | record1 = [1.0, [12.0, 9.0]] # Clipped to [1.0, [4.0, 3.0]]
85 | record2 = [5.0, [1.0, 2.0]] # Clipped to [4.0, [1.0, 2.0]]
86 |
87 | query_result, _ = test_utils.run_query(query, [record1, record2])
88 | result = sess.run(query_result)
89 | expected = [1.0, [1.0, 1.0]]
90 | self.assertAllClose(result, expected)
91 |
92 | def test_complex_nested_query(self):
93 | with self.cached_session() as sess:
94 | query_ab = gaussian_query.GaussianSumQuery(
95 | l2_norm_clip=1.0, stddev=0.0)
96 | query_c = gaussian_query.GaussianAverageQuery(
97 | l2_norm_clip=10.0, sum_stddev=0.0, denominator=2.0)
98 | query_d = gaussian_query.GaussianSumQuery(
99 | l2_norm_clip=10.0, stddev=0.0)
100 |
101 | query = nested_query.NestedQuery(
102 | [query_ab, {'c': query_c, 'd': [query_d]}])
103 |
104 | record1 = [{'a': 0.0, 'b': 2.71828}, {'c': (-4.0, 6.0), 'd': [-4.0]}]
105 | record2 = [{'a': 3.14159, 'b': 0.0}, {'c': (6.0, -4.0), 'd': [5.0]}]
106 |
107 | query_result, _ = test_utils.run_query(query, [record1, record2])
108 | result = sess.run(query_result)
109 | expected = [{'a': 1.0, 'b': 1.0}, {'c': (1.0, 1.0), 'd': [1.0]}]
110 | self.assertAllClose(result, expected)
111 |
112 | def test_nested_query_with_noise(self):
113 | with self.cached_session() as sess:
114 | sum_stddev = 2.71828
115 | denominator = 3.14159
116 |
117 | query1 = gaussian_query.GaussianSumQuery(
118 | l2_norm_clip=1.5, stddev=sum_stddev)
119 | query2 = gaussian_query.GaussianAverageQuery(
120 | l2_norm_clip=0.5, sum_stddev=sum_stddev, denominator=denominator)
121 | query = nested_query.NestedQuery((query1, query2))
122 |
123 | record1 = (3.0, [2.0, 1.5])
124 | record2 = (0.0, [-1.0, -3.5])
125 |
126 | query_result, _ = test_utils.run_query(query, [record1, record2])
127 |
128 | noised_averages = []
129 | for _ in range(1000):
130 | noised_averages.append(nest.flatten(sess.run(query_result)))
131 |
132 | result_stddev = np.std(noised_averages, 0)
133 | avg_stddev = sum_stddev / denominator
134 | expected_stddev = [sum_stddev, avg_stddev, avg_stddev]
135 | self.assertArrayNear(result_stddev, expected_stddev, 0.1)
136 |
137 | @parameterized.named_parameters(
138 | ('type_mismatch', [_basic_query], (1.0,), TypeError),
139 | ('too_many_queries', [_basic_query, _basic_query], [1.0], ValueError),
140 | ('query_too_deep', [_basic_query, [_basic_query]], [1.0, 1.0], TypeError))
141 | def test_record_incompatible_with_query(
142 | self, queries, record, error_type):
143 | with self.assertRaises(error_type):
144 | test_utils.run_query(nested_query.NestedQuery(queries), [record])
145 |
146 |
147 | if __name__ == '__main__':
148 | tf.test.main()
149 |
--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2017/README.md:
--------------------------------------------------------------------------------
1 | # Learning private models with multiple teachers
2 |
3 | This repository contains code to create a setup for learning privacy-preserving
4 | student models by transferring knowledge from an ensemble of teachers trained
5 | on disjoint subsets of the data for which privacy guarantees are to be provided.
6 |
7 | Knowledge acquired by teachers is transferred to the student in a differentially
8 | private manner by noisily aggregating the teacher decisions before feeding them
9 | to the student during training.
10 |
11 | The paper describing the approach is [arXiv:1610.05755](https://arxiv.org/abs/1610.05755)
12 |
13 | ## Dependencies
14 |
15 | This model uses `TensorFlow` to perform numerical computations associated with
16 | machine learning models, as well as common Python libraries like: `numpy`,
17 | `scipy`, and `six`. Instructions to install these can be found in their
18 | respective documentations.
19 |
20 | ## How to run
21 |
22 | This repository supports the MNIST and SVHN datasets. The following
23 | instructions are given for MNIST but can easily be adapted by replacing the
24 | flag `--dataset=mnist` by `--dataset=svhn`.
25 | There are 2 steps: teacher training and student training. Data will be
26 | automatically downloaded when you start the teacher training.
27 |
28 | The following is a two-step process: first we train an ensemble of teacher
29 | models and second we train a student using predictions made by this ensemble.
30 |
31 | **Training the teachers:** first run the `train_teachers.py` file with at least
32 | three flags specifying (1) the number of teachers, (2) the ID of the teacher
33 | you are training among these teachers, and (3) the dataset on which to train.
34 | For instance, to train teacher number 10 among an ensemble of 100 teachers for
35 | MNIST, you use the following command:
36 |
37 | ```
38 | python train_teachers.py --nb_teachers=100 --teacher_id=10 --dataset=mnist
39 | ```
40 |
41 | Other flags like `train_dir` and `data_dir` should optionally be set to
42 | respectively point to the directory where model checkpoints and temporary data
43 | (like the dataset) should be saved. The flag `max_steps` (default at 3000)
44 | controls the length of training. See `train_teachers.py` and `deep_cnn.py`
45 | to find available flags and their descriptions.
46 |
47 | **Training the student:** once the teachers are all trained, e.g., teachers
48 | with IDs `0` to `99` are trained for `nb_teachers=100`, we are ready to train
49 | the student. The student is trained by labeling some of the test data with
50 | predictions from the teachers. The predictions are aggregated by counting the
51 | votes assigned to each class among the ensemble of teachers, adding Laplacian
52 | noise to these votes, and assigning the label with the maximum noisy vote count
53 | to the sample. This is detailed in function `noisy_max` in the file
54 | `aggregation.py`. To learn the student, use the following command:
55 |
56 | ```
57 | python train_student.py --nb_teachers=100 --dataset=mnist --stdnt_share=5000
58 | ```
59 |
60 | The flag `--stdnt_share=5000` indicates that the student should be able to
61 | use the first `5000` samples of the dataset's test subset as unlabeled
62 | training points (they will be labeled using the teacher predictions). The
63 | remaining samples are used for evaluation of the student's accuracy, which
64 | is displayed upon completion of training.
65 |
66 | ## Using semi-supervised GANs to train the student
67 |
68 | In the paper, we describe how to train the student in a semi-supervised
69 | fashion using Generative Adversarial Networks. This can be reproduced for MNIST
70 | by cloning the [improved-gan](https://github.com/openai/improved-gan)
71 | repository and adding to your `PATH` variable before running the shell
72 | script `train_student_mnist_250_lap_20_count_50_epochs_600.sh`.
73 |
74 | ```
75 | export PATH="/path/to/improved-gan/mnist_svhn_cifar10":$PATH
76 | sh train_student_mnist_250_lap_20_count_50_epochs_600.sh
77 | ```
78 |
79 |
80 | ## Alternative deeper convolutional architecture
81 |
82 | Note that a deeper convolutional model is available. Both the default and
83 | deeper models graphs are defined in `deep_cnn.py`, respectively by
84 | functions `inference` and `inference_deeper`. Use the flag `--deeper=true`
85 | to switch to that model when launching `train_teachers.py` and
86 | `train_student.py`.
87 |
88 | ## Privacy analysis
89 |
90 | In the paper, we detail how data-dependent differential privacy bounds can be
91 | computed to estimate the cost of training the student. In order to reproduce
92 | the bounds given in the paper, we include the label predicted by our two
93 | teacher ensembles: MNIST and SVHN. You can run the privacy analysis for each
94 | dataset with the following commands:
95 |
96 | ```
97 | python analysis.py --counts_file=mnist_250_teachers_labels.npy --indices_file=mnist_250_teachers_100_indices_used_by_student.npy
98 |
99 | python analysis.py --counts_file=svhn_250_teachers_labels.npy --max_examples=1000 --delta=1e-6
100 | ```
101 |
102 | To expedite experimentation with the privacy analysis of student training,
103 | the `analysis.py` file is configured to download the labels produced by 250
104 | teacher models, for MNIST and SVHN when running the two commands included
105 | above. These 250 teacher models were trained using the following command lines,
106 | where `XXX` takes values between `0` and `249`:
107 |
108 | ```
109 | python train_teachers.py --nb_teachers=250 --teacher_id=XXX --dataset=mnist
110 | python train_teachers.py --nb_teachers=250 --teacher_id=XXX --dataset=svhn
111 | ```
112 |
113 | Note that these labels may also be used in lieu of function `ensemble_preds`
114 | in `train_student.py`, to compare the performance of alternative student model
115 | architectures and learning techniques. This facilitates future work, by
116 | removing the need for training the MNIST and SVHN teacher ensembles when
117 | proposing new student training approaches.
118 |
119 | ## Contact
120 |
121 | To ask questions, please email `nicolas@papernot.fr` or open an issue on
122 | the `tensorflow/models` issues tracker. Please assign issues to
123 | [@npapernot](https://github.com/npapernot).
124 |
--------------------------------------------------------------------------------
/tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_keras.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Training a CNN on MNIST with Keras and the DP SGD optimizer."""
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | from absl import app
21 | from absl import flags
22 |
23 | from distutils.version import LooseVersion
24 |
25 | import numpy as np
26 | import tensorflow as tf
27 |
28 | from privacy.analysis.rdp_accountant import compute_rdp
29 | from privacy.analysis.rdp_accountant import get_privacy_spent
30 | from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
31 |
32 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
33 | GradientDescentOptimizer = tf.train.GradientDescentOptimizer
34 | else:
35 | GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name
36 |
37 | flags.DEFINE_boolean(
38 | 'dpsgd', True, 'If True, train with DP-SGD. If False, '
39 | 'train with vanilla SGD.')
40 | flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training')
41 | flags.DEFINE_float('noise_multiplier', 1.1,
42 | 'Ratio of the standard deviation to the clipping norm')
43 | flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
44 | flags.DEFINE_integer('batch_size', 250, 'Batch size')
45 | flags.DEFINE_integer('epochs', 60, 'Number of epochs')
46 | flags.DEFINE_integer(
47 | 'microbatches', 250, 'Number of microbatches '
48 | '(must evenly divide batch_size)')
49 | flags.DEFINE_string('model_dir', None, 'Model directory')
50 |
51 | FLAGS = flags.FLAGS
52 |
53 |
54 | def compute_epsilon(steps):
55 | """Computes epsilon value for given hyperparameters."""
56 | if FLAGS.noise_multiplier == 0.0:
57 | return float('inf')
58 | orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
59 | sampling_probability = FLAGS.batch_size / 60000
60 | rdp = compute_rdp(q=sampling_probability,
61 | noise_multiplier=FLAGS.noise_multiplier,
62 | steps=steps,
63 | orders=orders)
64 | # Delta is set to 1e-5 because MNIST has 60000 training points.
65 | return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
66 |
67 |
68 | def load_mnist():
69 | """Loads MNIST and preprocesses to combine training and validation data."""
70 | train, test = tf.keras.datasets.mnist.load_data()
71 | train_data, train_labels = train
72 | test_data, test_labels = test
73 |
74 | train_data = np.array(train_data, dtype=np.float32) / 255
75 | test_data = np.array(test_data, dtype=np.float32) / 255
76 |
77 | train_data = train_data.reshape(train_data.shape[0], 28, 28, 1)
78 | test_data = test_data.reshape(test_data.shape[0], 28, 28, 1)
79 |
80 | train_labels = np.array(train_labels, dtype=np.int32)
81 | test_labels = np.array(test_labels, dtype=np.int32)
82 |
83 | train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10)
84 | test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10)
85 |
86 | assert train_data.min() == 0.
87 | assert train_data.max() == 1.
88 | assert test_data.min() == 0.
89 | assert test_data.max() == 1.
90 |
91 | return train_data, train_labels, test_data, test_labels
92 |
93 |
94 | def main(unused_argv):
95 | tf.logging.set_verbosity(tf.logging.INFO)
96 | if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0:
97 | raise ValueError('Number of microbatches should divide evenly batch_size')
98 |
99 | # Load training and test data.
100 | train_data, train_labels, test_data, test_labels = load_mnist()
101 |
102 | # Define a sequential Keras model
103 | model = tf.keras.Sequential([
104 | tf.keras.layers.Conv2D(16, 8,
105 | strides=2,
106 | padding='same',
107 | activation='relu',
108 | input_shape=(28, 28, 1)),
109 | tf.keras.layers.MaxPool2D(2, 1),
110 | tf.keras.layers.Conv2D(32, 4,
111 | strides=2,
112 | padding='valid',
113 | activation='relu'),
114 | tf.keras.layers.MaxPool2D(2, 1),
115 | tf.keras.layers.Flatten(),
116 | tf.keras.layers.Dense(32, activation='relu'),
117 | tf.keras.layers.Dense(10)
118 | ])
119 |
120 | if FLAGS.dpsgd:
121 | optimizer = DPGradientDescentGaussianOptimizer(
122 | l2_norm_clip=FLAGS.l2_norm_clip,
123 | noise_multiplier=FLAGS.noise_multiplier,
124 | num_microbatches=FLAGS.microbatches,
125 | learning_rate=FLAGS.learning_rate)
126 | # Compute vector of per-example loss rather than its mean over a minibatch.
127 | loss = tf.keras.losses.CategoricalCrossentropy(
128 | from_logits=True, reduction=tf.losses.Reduction.NONE)
129 | else:
130 | optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
131 | loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
132 |
133 | # Compile model with Keras
134 | model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
135 |
136 | # Train model with Keras
137 | model.fit(train_data, train_labels,
138 | epochs=FLAGS.epochs,
139 | validation_data=(test_data, test_labels),
140 | batch_size=FLAGS.batch_size)
141 |
142 | # Compute the privacy budget expended.
143 | if FLAGS.dpsgd:
144 | eps = compute_epsilon(FLAGS.epochs * 60000 // FLAGS.batch_size)
145 | print('For delta=1e-5, the current epsilon is: %.2f' % eps)
146 | else:
147 | print('Trained with vanilla non-private SGD optimizer')
148 |
149 | if __name__ == '__main__':
150 | app.run(main)
151 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/gaussian_query_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Tests for GaussianAverageQuery."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | from absl.testing import parameterized
22 | import numpy as np
23 | from six.moves import xrange
24 | import tensorflow as tf
25 |
26 | from privacy.dp_query import gaussian_query
27 | from privacy.dp_query import test_utils
28 |
29 |
30 | class GaussianQueryTest(tf.test.TestCase, parameterized.TestCase):
31 |
32 | def test_gaussian_sum_no_clip_no_noise(self):
33 | with self.cached_session() as sess:
34 | record1 = tf.constant([2.0, 0.0])
35 | record2 = tf.constant([-1.0, 1.0])
36 |
37 | query = gaussian_query.GaussianSumQuery(
38 | l2_norm_clip=10.0, stddev=0.0)
39 | query_result, _ = test_utils.run_query(query, [record1, record2])
40 | result = sess.run(query_result)
41 | expected = [1.0, 1.0]
42 | self.assertAllClose(result, expected)
43 |
44 | def test_gaussian_sum_with_clip_no_noise(self):
45 | with self.cached_session() as sess:
46 | record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0].
47 | record2 = tf.constant([4.0, -3.0]) # Not clipped.
48 |
49 | query = gaussian_query.GaussianSumQuery(
50 | l2_norm_clip=5.0, stddev=0.0)
51 | query_result, _ = test_utils.run_query(query, [record1, record2])
52 | result = sess.run(query_result)
53 | expected = [1.0, 1.0]
54 | self.assertAllClose(result, expected)
55 |
56 | def test_gaussian_sum_with_changing_clip_no_noise(self):
57 | with self.cached_session() as sess:
58 | record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0].
59 | record2 = tf.constant([4.0, -3.0]) # Not clipped.
60 |
61 | l2_norm_clip = tf.Variable(5.0)
62 | l2_norm_clip_placeholder = tf.placeholder(tf.float32)
63 | assign_l2_norm_clip = tf.assign(l2_norm_clip, l2_norm_clip_placeholder)
64 | query = gaussian_query.GaussianSumQuery(
65 | l2_norm_clip=l2_norm_clip, stddev=0.0)
66 | query_result, _ = test_utils.run_query(query, [record1, record2])
67 |
68 | self.evaluate(tf.global_variables_initializer())
69 | result = sess.run(query_result)
70 | expected = [1.0, 1.0]
71 | self.assertAllClose(result, expected)
72 |
73 | sess.run(assign_l2_norm_clip, {l2_norm_clip_placeholder: 0.0})
74 | result = sess.run(query_result)
75 | expected = [0.0, 0.0]
76 | self.assertAllClose(result, expected)
77 |
78 | def test_gaussian_sum_with_noise(self):
79 | with self.cached_session() as sess:
80 | record1, record2 = 2.71828, 3.14159
81 | stddev = 1.0
82 |
83 | query = gaussian_query.GaussianSumQuery(
84 | l2_norm_clip=5.0, stddev=stddev)
85 | query_result, _ = test_utils.run_query(query, [record1, record2])
86 |
87 | noised_sums = []
88 | for _ in xrange(1000):
89 | noised_sums.append(sess.run(query_result))
90 |
91 | result_stddev = np.std(noised_sums)
92 | self.assertNear(result_stddev, stddev, 0.1)
93 |
94 | def test_gaussian_sum_merge(self):
95 | records1 = [tf.constant([2.0, 0.0]), tf.constant([-1.0, 1.0])]
96 | records2 = [tf.constant([3.0, 5.0]), tf.constant([-1.0, 4.0])]
97 |
98 | def get_sample_state(records):
99 | query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=1.0)
100 | global_state = query.initial_global_state()
101 | params = query.derive_sample_params(global_state)
102 | sample_state = query.initial_sample_state(records[0])
103 | for record in records:
104 | sample_state = query.accumulate_record(params, sample_state, record)
105 | return sample_state
106 |
107 | sample_state_1 = get_sample_state(records1)
108 | sample_state_2 = get_sample_state(records2)
109 |
110 | merged = gaussian_query.GaussianSumQuery(10.0, 1.0).merge_sample_states(
111 | sample_state_1,
112 | sample_state_2)
113 |
114 | with self.cached_session() as sess:
115 | result = sess.run(merged)
116 |
117 | expected = [3.0, 10.0]
118 | self.assertAllClose(result, expected)
119 |
120 | def test_gaussian_average_no_noise(self):
121 | with self.cached_session() as sess:
122 | record1 = tf.constant([5.0, 0.0]) # Clipped to [3.0, 0.0].
123 | record2 = tf.constant([-1.0, 2.0]) # Not clipped.
124 |
125 | query = gaussian_query.GaussianAverageQuery(
126 | l2_norm_clip=3.0, sum_stddev=0.0, denominator=2.0)
127 | query_result, _ = test_utils.run_query(query, [record1, record2])
128 | result = sess.run(query_result)
129 | expected_average = [1.0, 1.0]
130 | self.assertAllClose(result, expected_average)
131 |
132 | def test_gaussian_average_with_noise(self):
133 | with self.cached_session() as sess:
134 | record1, record2 = 2.71828, 3.14159
135 | sum_stddev = 1.0
136 | denominator = 2.0
137 |
138 | query = gaussian_query.GaussianAverageQuery(
139 | l2_norm_clip=5.0, sum_stddev=sum_stddev, denominator=denominator)
140 | query_result, _ = test_utils.run_query(query, [record1, record2])
141 |
142 | noised_averages = []
143 | for _ in range(1000):
144 | noised_averages.append(sess.run(query_result))
145 |
146 | result_stddev = np.std(noised_averages)
147 | avg_stddev = sum_stddev / denominator
148 | self.assertNear(result_stddev, avg_stddev, 0.1)
149 |
150 | @parameterized.named_parameters(
151 | ('type_mismatch', [1.0], (1.0,), TypeError),
152 | ('too_few_on_left', [1.0], [1.0, 1.0], ValueError),
153 | ('too_few_on_right', [1.0, 1.0], [1.0], ValueError))
154 | def test_incompatible_records(self, record1, record2, error_type):
155 | query = gaussian_query.GaussianSumQuery(1.0, 0.0)
156 | with self.assertRaises(error_type):
157 | test_utils.run_query(query, [record1, record2])
158 |
159 |
160 | if __name__ == '__main__':
161 | tf.test.main()
162 |
--------------------------------------------------------------------------------
/tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_eager.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Training a CNN on MNIST in TF Eager mode with DP-SGD optimizer."""
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | from absl import app
20 | from absl import flags
21 |
22 | from distutils.version import LooseVersion
23 |
24 | import numpy as np
25 | import tensorflow as tf
26 |
27 | from privacy.analysis.rdp_accountant import compute_rdp
28 | from privacy.analysis.rdp_accountant import get_privacy_spent
29 | from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
30 |
31 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
32 | GradientDescentOptimizer = tf.train.GradientDescentOptimizer
33 | tf.enable_eager_execution()
34 | else:
35 | GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name
36 |
37 | flags.DEFINE_boolean('dpsgd', True, 'If True, train with DP-SGD. If False, '
38 | 'train with vanilla SGD.')
39 | flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training')
40 | flags.DEFINE_float('noise_multiplier', 1.1,
41 | 'Ratio of the standard deviation to the clipping norm')
42 | flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
43 | flags.DEFINE_integer('batch_size', 250, 'Batch size')
44 | flags.DEFINE_integer('epochs', 60, 'Number of epochs')
45 | flags.DEFINE_integer('microbatches', 250, 'Number of microbatches '
46 | '(must evenly divide batch_size)')
47 |
48 | FLAGS = flags.FLAGS
49 |
50 |
51 | def compute_epsilon(steps):
52 | """Computes epsilon value for given hyperparameters."""
53 | if FLAGS.noise_multiplier == 0.0:
54 | return float('inf')
55 | orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
56 | sampling_probability = FLAGS.batch_size / 60000
57 | rdp = compute_rdp(q=sampling_probability,
58 | noise_multiplier=FLAGS.noise_multiplier,
59 | steps=steps,
60 | orders=orders)
61 | # Delta is set to 1e-5 because MNIST has 60000 training points.
62 | return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
63 |
64 |
65 | def main(_):
66 | if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0:
67 | raise ValueError('Number of microbatches should divide evenly batch_size')
68 |
69 | # Fetch the mnist data
70 | train, test = tf.keras.datasets.mnist.load_data()
71 | train_images, train_labels = train
72 | test_images, test_labels = test
73 |
74 | # Create a dataset object and batch for the training data
75 | dataset = tf.data.Dataset.from_tensor_slices(
76 | (tf.cast(train_images[..., tf.newaxis]/255, tf.float32),
77 | tf.cast(train_labels, tf.int64)))
78 | dataset = dataset.shuffle(1000).batch(FLAGS.batch_size)
79 |
80 | # Create a dataset object and batch for the test data
81 | eval_dataset = tf.data.Dataset.from_tensor_slices(
82 | (tf.cast(test_images[..., tf.newaxis]/255, tf.float32),
83 | tf.cast(test_labels, tf.int64)))
84 | eval_dataset = eval_dataset.batch(10000)
85 |
86 | # Define the model using tf.keras.layers
87 | mnist_model = tf.keras.Sequential([
88 | tf.keras.layers.Conv2D(16, 8,
89 | strides=2,
90 | padding='same',
91 | activation='relu'),
92 | tf.keras.layers.MaxPool2D(2, 1),
93 | tf.keras.layers.Conv2D(32, 4, strides=2, activation='relu'),
94 | tf.keras.layers.MaxPool2D(2, 1),
95 | tf.keras.layers.Flatten(),
96 | tf.keras.layers.Dense(32, activation='relu'),
97 | tf.keras.layers.Dense(10)
98 | ])
99 |
100 | # Instantiate the optimizer
101 | if FLAGS.dpsgd:
102 | opt = DPGradientDescentGaussianOptimizer(
103 | l2_norm_clip=FLAGS.l2_norm_clip,
104 | noise_multiplier=FLAGS.noise_multiplier,
105 | num_microbatches=FLAGS.microbatches,
106 | learning_rate=FLAGS.learning_rate)
107 | else:
108 | opt = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
109 |
110 | # Training loop.
111 | steps_per_epoch = 60000 // FLAGS.batch_size
112 | for epoch in range(FLAGS.epochs):
113 | # Train the model for one epoch.
114 | for (_, (images, labels)) in enumerate(dataset.take(-1)):
115 | with tf.GradientTape(persistent=True) as gradient_tape:
116 | # This dummy call is needed to obtain the var list.
117 | logits = mnist_model(images, training=True)
118 | var_list = mnist_model.trainable_variables
119 |
120 | # In Eager mode, the optimizer takes a function that returns the loss.
121 | def loss_fn():
122 | logits = mnist_model(images, training=True) # pylint: disable=undefined-loop-variable,cell-var-from-loop
123 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
124 | labels=labels, logits=logits) # pylint: disable=undefined-loop-variable,cell-var-from-loop
125 | # If training without privacy, the loss is a scalar not a vector.
126 | if not FLAGS.dpsgd:
127 | loss = tf.reduce_mean(loss)
128 | return loss
129 |
130 | if FLAGS.dpsgd:
131 | grads_and_vars = opt.compute_gradients(loss_fn, var_list,
132 | gradient_tape=gradient_tape)
133 | else:
134 | grads_and_vars = opt.compute_gradients(loss_fn, var_list)
135 |
136 | opt.apply_gradients(grads_and_vars)
137 |
138 | # Evaluate the model and print results
139 | for (_, (images, labels)) in enumerate(eval_dataset.take(-1)):
140 | logits = mnist_model(images, training=False)
141 | correct_preds = tf.equal(tf.argmax(logits, axis=1), labels)
142 | test_accuracy = np.mean(correct_preds.numpy())
143 | print('Test accuracy after epoch %d is: %.3f' % (epoch, test_accuracy))
144 |
145 | # Compute the privacy budget expended so far.
146 | if FLAGS.dpsgd:
147 | eps = compute_epsilon((epoch + 1) * steps_per_epoch)
148 | print('For delta=1e-5, the current epsilon is: %.2f' % eps)
149 | else:
150 | print('Trained with vanilla non-private SGD optimizer')
151 |
152 | if __name__ == '__main__':
153 | app.run(main)
154 |
--------------------------------------------------------------------------------
/tensorflow_privacy/tutorials/README.md:
--------------------------------------------------------------------------------
1 | # Tutorials
2 |
3 | This folder contains a set of tutorials that demonstrate the features of this
4 | library.
5 | As demonstrated on MNIST in `mnist_dpsgd_tutorial.py`, the easiest way to use
6 | a differentially private optimizer is to modify an existing TF training loop
7 | to replace an existing vanilla optimizer with its differentially private
8 | counterpart implemented in the library.
9 |
10 | Here is a list of all the tutorials included:
11 |
12 | * `lm_dpsgd_tutorial.py`: learn a language model with differential privacy.
13 |
14 | * `mnist_dpsgd_tutorial.py`: learn a convolutional neural network on MNIST with
15 | differential privacy.
16 |
17 | * `mnist_dpsgd_tutorial_eager.py`: learn a convolutional neural network on MNIST
18 | with differential privacy using Eager mode.
19 |
20 | * `mnist_dpsgd_tutorial_keras.py`: learn a convolutional neural network on MNIST
21 | with differential privacy using tf.Keras.
22 |
23 | * `mnist_lr_tutorial.py`: learn a differentially private logistic regression
24 | model on MNIST. The model illustrates application of the
25 | "amplification-by-iteration" analysis (https://arxiv.org/abs/1808.06651).
26 |
27 | The rest of this README describes the different parameters used to configure
28 | DP-SGD as well as expected outputs for the `mnist_dpsgd_tutorial.py` tutorial.
29 |
30 | ## Parameters
31 |
32 | All of the optimizers share some privacy-specific parameters that need to
33 | be tuned in addition to any existing hyperparameter. There are currently four:
34 |
35 | * `learning_rate` (float): The learning rate of the SGD training algorithm. The
36 | higher the learning rate, the more each update matters. If the updates are noisy
37 | (such as when the additive noise is large compared to the clipping
38 | threshold), the learning rate must be kept low for the training procedure to converge.
39 | * `num_microbatches` (int): The input data for each step (i.e., batch) of your
40 | original training algorithm is split into this many microbatches. Generally,
41 | increasing this will improve your utility but slow down your training in terms
42 | of wall-clock time. The total number of examples consumed in one global step
43 | remains the same. This number should evenly divide your input batch size.
44 | * `l2_norm_clip` (float): The cumulative gradient across all network parameters
45 | from each microbatch will be clipped so that its L2 norm is at most this
46 | value. You should set this to something close to some percentile of what
47 | you expect the gradient from each microbatch to be. In previous experiments,
48 | we've found numbers from 0.5 to 1.0 to work reasonably well.
49 | * `noise_multiplier` (float): This governs the amount of noise added during
50 | training. Generally, more noise results in better privacy and lower utility.
51 | This generally has to be at least 0.3 to obtain rigorous privacy guarantees,
52 | but smaller values may still be acceptable for practical purposes.
53 |
54 | ## Measuring Privacy
55 |
56 | Differential privacy can be expressed using two values, epsilon and delta.
57 | Roughly speaking, they mean the following:
58 |
59 | * epsilon gives a ceiling on how much the probability of a particular output
60 | can increase by including (or removing) a single training example. We usually
61 | want it to be a small constant (less than 10, or, for more stringent privacy
62 | guarantees, less than 1). However, this is only an upper bound, and a large
63 | value of epsilon may still mean good practical privacy.
64 | * delta bounds the probability of an arbitrary change in model behavior.
65 | We can usually set this to a very small number (1e-7 or so) without
66 | compromising utility. A rule of thumb is to set it to be less than the inverse
67 | of the training data size.
68 |
69 | To find out the epsilon given a fixed delta value for your model, follow the
70 | approach demonstrated in the `compute_epsilon` of the `mnist_dpsgd_tutorial.py`
71 | where the arguments used to call the RDP accountant (i.e., the tool used to
72 | compute the privacy guarantee) are:
73 |
74 | * `q` : The sampling ratio, defined as (number of examples consumed in one
75 | step) / (total training examples).
76 | * `noise_multiplier` : The noise_multiplier from your parameters above.
77 | * `steps` : The number of global steps taken.
78 |
79 | A detailed writeup of the theory behind the computation of epsilon and delta
80 | is available at https://arxiv.org/abs/1908.10530.
81 |
82 | ## Expected Output
83 |
84 | When the `mnist_dpsgd_tutorial.py` script is run with the default parameters,
85 | the output will contain the following lines (leaving out a lot of diagnostic
86 | info):
87 | ```
88 | ...
89 | Test accuracy after 1 epochs is: 0.774
90 | For delta=1e-5, the current epsilon is: 1.03
91 | ...
92 | Test accuracy after 2 epochs is: 0.877
93 | For delta=1e-5, the current epsilon is: 1.11
94 | ...
95 | Test accuracy after 60 epochs is: 0.966
96 | For delta=1e-5, the current epsilon is: 3.01
97 | ```
98 |
99 | ## Using Command-Line Interface for Privacy Budgeting
100 |
101 | Before launching a (possibly quite lengthy) training procedure, it is possible
102 | to compute, quickly and accurately, privacy loss at any point of the training.
103 | To do so, run the script `privacy/analysis/compute_dp_sgd_privacy.py`, which
104 | does not have any TensorFlow dependencies. For example, executing
105 | ```
106 | compute_dp_sgd_privacy.py --N=60000 --batch_size=256 --noise_multiplier=1.1 --epochs=60 --delta=1e-5
107 | ```
108 | allows us to conclude, in a matter of seconds, that DP-SGD run with default
109 | parameters satisfies differential privacy with eps = 3.01 and delta = 1e-05.
110 | Note that the flags provided in the command above correspond to the tutorial in
111 | `mnist_dpsgd_tutorial.py`. The command is applicable to other datasets but the
112 | values passed must be adapted (e.g., N the number of training points).
113 |
114 |
115 | ## Select Parameters
116 |
117 | The table below has a few sample parameters illustrating various
118 | accuracy/privacy tradeoffs achieved by the MNIST tutorial in
119 | `mnist_dpsgd_tutorial.py` (default parameters are in __bold__; privacy epsilon
120 | is reported at delta=1e-5; accuracy is averaged over 10 runs, its standard
121 | deviation is less than .3% in all cases).
122 |
123 | | Learning rate | Noise multiplier | Clipping threshold | Number of microbatches | Number of epochs | Privacy eps | Accuracy |
124 | | ------------- | ---------------- | ----------------- | ---------------------- | ---------------- | ----------- | -------- |
125 | | 0.1 | | | __256__ | 20 | no privacy | 99.0% |
126 | | 0.25 | 1.3 | 1.5 | __256__ | 15 | 1.19 | 95.0% |
127 | | __0.15__ | __1.1__ | __1.0__ | __256__ |__60__ | 3.01 | 96.6% |
128 | | 0.25 | 0.7 | 1.5 | __256__ | 45 | 7.10 | 97.0% |
129 |
130 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for rdp_accountant.py."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import sys
22 |
23 | from absl.testing import absltest
24 | from absl.testing import parameterized
25 | from mpmath import exp
26 | from mpmath import inf
27 | from mpmath import log
28 | from mpmath import npdf
29 | from mpmath import quad
30 | import numpy as np
31 |
32 | from privacy.analysis import privacy_ledger
33 | from privacy.analysis import rdp_accountant
34 |
35 |
36 | class TestGaussianMoments(parameterized.TestCase):
37 | #################################
38 | # HELPER FUNCTIONS: #
39 | # Exact computations using #
40 | # multi-precision arithmetic. #
41 | #################################
42 |
43 | def _log_float_mp(self, x):
44 | # Convert multi-precision input to float log space.
45 | if x >= sys.float_info.min:
46 | return float(log(x))
47 | else:
48 | return -np.inf
49 |
50 | def _integral_mp(self, fn, bounds=(-inf, inf)):
51 | integral, _ = quad(fn, bounds, error=True, maxdegree=8)
52 | return integral
53 |
54 | def _distributions_mp(self, sigma, q):
55 |
56 | def _mu0(x):
57 | return npdf(x, mu=0, sigma=sigma)
58 |
59 | def _mu1(x):
60 | return npdf(x, mu=1, sigma=sigma)
61 |
62 | def _mu(x):
63 | return (1 - q) * _mu0(x) + q * _mu1(x)
64 |
65 | return _mu0, _mu # Closure!
66 |
67 | def _mu1_over_mu0(self, x, sigma):
68 | # Closed-form expression for N(1, sigma^2) / N(0, sigma^2) at x.
69 | return exp((2 * x - 1) / (2 * sigma**2))
70 |
71 | def _mu_over_mu0(self, x, q, sigma):
72 | return (1 - q) + q * self._mu1_over_mu0(x, sigma)
73 |
74 | def _compute_a_mp(self, sigma, q, alpha):
75 | """Compute A_alpha for arbitrary alpha by numerical integration."""
76 | mu0, _ = self._distributions_mp(sigma, q)
77 | a_alpha_fn = lambda z: mu0(z) * self._mu_over_mu0(z, q, sigma)**alpha
78 | a_alpha = self._integral_mp(a_alpha_fn)
79 | return a_alpha
80 |
81 | # TEST ROUTINES
82 | def test_compute_rdp_no_data(self):
83 | # q = 0
84 | self.assertEqual(rdp_accountant.compute_rdp(0, 10, 1, 20), 0)
85 |
86 | def test_compute_rdp_no_sampling(self):
87 | # q = 1, RDP = alpha/2 * sigma^2
88 | self.assertEqual(rdp_accountant.compute_rdp(1, 10, 1, 20), 0.1)
89 |
90 | def test_compute_rdp_scalar(self):
91 | rdp_scalar = rdp_accountant.compute_rdp(0.1, 2, 10, 5)
92 | self.assertAlmostEqual(rdp_scalar, 0.07737, places=5)
93 |
94 | def test_compute_rdp_sequence(self):
95 | rdp_vec = rdp_accountant.compute_rdp(0.01, 2.5, 50,
96 | [1.5, 2.5, 5, 50, 100, np.inf])
97 | self.assertSequenceAlmostEqual(
98 | rdp_vec, [0.00065, 0.001085, 0.00218075, 0.023846, 167.416307, np.inf],
99 | delta=1e-5)
100 |
101 | params = ({'q': 1e-7, 'sigma': .1, 'order': 1.01},
102 | {'q': 1e-6, 'sigma': .1, 'order': 256},
103 | {'q': 1e-5, 'sigma': .1, 'order': 256.1},
104 | {'q': 1e-6, 'sigma': 1, 'order': 27},
105 | {'q': 1e-4, 'sigma': 1., 'order': 1.5},
106 | {'q': 1e-3, 'sigma': 1., 'order': 2},
107 | {'q': .01, 'sigma': 10, 'order': 20},
108 | {'q': .1, 'sigma': 100, 'order': 20.5},
109 | {'q': .99, 'sigma': .1, 'order': 256},
110 | {'q': .999, 'sigma': 100, 'order': 256.1})
111 |
112 | # pylint:disable=undefined-variable
113 | @parameterized.parameters(p for p in params)
114 | def test_compute_log_a_equals_mp(self, q, sigma, order):
115 | # Compare the cheap computation of log(A) with an expensive, multi-precision
116 | # computation.
117 | log_a = rdp_accountant._compute_log_a(q, sigma, order)
118 | log_a_mp = self._log_float_mp(self._compute_a_mp(sigma, q, order))
119 | np.testing.assert_allclose(log_a, log_a_mp, rtol=1e-4)
120 |
121 | def test_get_privacy_spent_check_target_delta(self):
122 | orders = range(2, 33)
123 | rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders)
124 | eps, _, opt_order = rdp_accountant.get_privacy_spent(
125 | orders, rdp, target_delta=1e-5)
126 | self.assertAlmostEqual(eps, 1.258575, places=5)
127 | self.assertEqual(opt_order, 20)
128 |
129 | def test_get_privacy_spent_check_target_eps(self):
130 | orders = range(2, 33)
131 | rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders)
132 | _, delta, opt_order = rdp_accountant.get_privacy_spent(
133 | orders, rdp, target_eps=1.258575)
134 | self.assertAlmostEqual(delta, 1e-5)
135 | self.assertEqual(opt_order, 20)
136 |
137 | def test_check_composition(self):
138 | orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12., 14.,
139 | 16., 20., 24., 28., 32., 64., 256.)
140 |
141 | rdp = rdp_accountant.compute_rdp(q=1e-4,
142 | noise_multiplier=.4,
143 | steps=40000,
144 | orders=orders)
145 |
146 | eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp,
147 | target_delta=1e-6)
148 |
149 | rdp += rdp_accountant.compute_rdp(q=0.1,
150 | noise_multiplier=2,
151 | steps=100,
152 | orders=orders)
153 | eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp,
154 | target_delta=1e-5)
155 | self.assertAlmostEqual(eps, 8.509656, places=5)
156 | self.assertEqual(opt_order, 2.5)
157 |
158 | def test_compute_rdp_from_ledger(self):
159 | orders = range(2, 33)
160 | q = 0.1
161 | n = 1000
162 | l2_norm_clip = 3.14159
163 | noise_stddev = 2.71828
164 | steps = 3
165 |
166 | query_entry = privacy_ledger.GaussianSumQueryEntry(
167 | l2_norm_clip, noise_stddev)
168 | ledger = [privacy_ledger.SampleEntry(n, q, [query_entry])] * steps
169 |
170 | z = noise_stddev / l2_norm_clip
171 | rdp = rdp_accountant.compute_rdp(q, z, steps, orders)
172 | rdp_from_ledger = rdp_accountant.compute_rdp_from_ledger(ledger, orders)
173 | self.assertSequenceAlmostEqual(rdp, rdp_from_ledger)
174 |
175 |
176 | if __name__ == '__main__':
177 | absltest.main()
178 |
--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Vectorized differentially private optimizers for TensorFlow."""
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | from distutils.version import LooseVersion
21 | import tensorflow as tf
22 |
23 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
24 | nest = tf.contrib.framework.nest
25 | AdagradOptimizer = tf.train.AdagradOptimizer
26 | AdamOptimizer = tf.train.AdamOptimizer
27 | GradientDescentOptimizer = tf.train.GradientDescentOptimizer
28 | parent_code = tf.train.Optimizer.compute_gradients.__code__
29 | GATE_OP = tf.train.Optimizer.GATE_OP # pylint: disable=invalid-name
30 | else:
31 | nest = tf.nest
32 | AdagradOptimizer = tf.optimizers.Adagrad
33 | AdamOptimizer = tf.optimizers.Adam
34 | GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name
35 | parent_code = tf.optimizers.Optimizer._compute_gradients.__code__ # pylint: disable=protected-access
36 | GATE_OP = None # pylint: disable=invalid-name
37 |
38 |
39 | def make_vectorized_optimizer_class(cls):
40 | """Constructs a vectorized DP optimizer class from an existing one."""
41 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
42 | child_code = cls.compute_gradients.__code__
43 | else:
44 | child_code = cls._compute_gradients.__code__ # pylint: disable=protected-access
45 | if child_code is not parent_code:
46 | tf.logging.warning(
47 | 'WARNING: Calling make_optimizer_class() on class %s that overrides '
48 | 'method compute_gradients(). Check to ensure that '
49 | 'make_optimizer_class() does not interfere with overridden version.',
50 | cls.__name__)
51 |
52 | class DPOptimizerClass(cls):
53 | """Differentially private subclass of given class cls."""
54 |
55 | def __init__(
56 | self,
57 | l2_norm_clip,
58 | noise_multiplier,
59 | num_microbatches=None,
60 | *args, # pylint: disable=keyword-arg-before-vararg, g-doc-args
61 | **kwargs):
62 | """Initialize the DPOptimizerClass.
63 |
64 | Args:
65 | l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients)
66 | noise_multiplier: Ratio of the standard deviation to the clipping norm
67 | num_microbatches: How many microbatches into which the minibatch is
68 | split. If None, will default to the size of the minibatch, and
69 | per-example gradients will be computed.
70 | """
71 | super(DPOptimizerClass, self).__init__(*args, **kwargs)
72 | self._l2_norm_clip = l2_norm_clip
73 | self._noise_multiplier = noise_multiplier
74 | self._num_microbatches = num_microbatches
75 |
76 | def compute_gradients(self,
77 | loss,
78 | var_list,
79 | gate_gradients=GATE_OP,
80 | aggregation_method=None,
81 | colocate_gradients_with_ops=False,
82 | grad_loss=None,
83 | gradient_tape=None):
84 | if callable(loss):
85 | # TF is running in Eager mode
86 | raise NotImplementedError('Vectorized optimizer unavailable for TF2.')
87 | else:
88 | # TF is running in graph mode, check we did not receive a gradient tape.
89 | if gradient_tape:
90 | raise ValueError('When in graph mode, a tape should not be passed.')
91 |
92 | batch_size = tf.shape(loss)[0]
93 | if self._num_microbatches is None:
94 | self._num_microbatches = batch_size
95 |
96 | # Note: it would be closer to the correct i.i.d. sampling of records if
97 | # we sampled each microbatch from the appropriate binomial distribution,
98 | # although that still wouldn't be quite correct because it would be
99 | # sampling from the dataset without replacement.
100 | microbatch_losses = tf.reshape(loss, [self._num_microbatches, -1])
101 |
102 | if var_list is None:
103 | var_list = (
104 | tf.trainable_variables() + tf.get_collection(
105 | tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
106 |
107 | def process_microbatch(microbatch_loss):
108 | """Compute clipped grads for one microbatch."""
109 | microbatch_loss = tf.reduce_mean(microbatch_loss)
110 | grads, _ = zip(*super(DPOptimizerClass, self).compute_gradients(
111 | microbatch_loss,
112 | var_list,
113 | gate_gradients,
114 | aggregation_method,
115 | colocate_gradients_with_ops,
116 | grad_loss))
117 | grads_list = [
118 | g if g is not None else tf.zeros_like(v)
119 | for (g, v) in zip(list(grads), var_list)
120 | ]
121 | # Clip gradients to have L2 norm of l2_norm_clip.
122 | # Here, we use TF primitives rather than the built-in
123 | # tf.clip_by_global_norm() so that operations can be vectorized
124 | # across microbatches.
125 | grads_flat = nest.flatten(grads_list)
126 | squared_l2_norms = [tf.reduce_sum(tf.square(g)) for g in grads_flat]
127 | global_norm = tf.sqrt(tf.add_n(squared_l2_norms))
128 | div = tf.maximum(global_norm / self._l2_norm_clip, 1.)
129 | clipped_flat = [g / div for g in grads_flat]
130 | clipped_grads = nest.pack_sequence_as(grads_list, clipped_flat)
131 | return clipped_grads
132 |
133 | clipped_grads = tf.vectorized_map(process_microbatch, microbatch_losses)
134 |
135 | def reduce_noise_normalize_batch(stacked_grads):
136 | summed_grads = tf.reduce_sum(stacked_grads, axis=0)
137 | noise_stddev = self._l2_norm_clip * self._noise_multiplier
138 | noise = tf.random.normal(tf.shape(summed_grads),
139 | stddev=noise_stddev)
140 | noised_grads = summed_grads + noise
141 | return noised_grads / tf.cast(self._num_microbatches, tf.float32)
142 |
143 | final_grads = nest.map_structure(reduce_noise_normalize_batch,
144 | clipped_grads)
145 |
146 | return list(zip(final_grads, var_list))
147 |
148 | return DPOptimizerClass
149 |
150 |
151 | VectorizedDPAdagrad = make_vectorized_optimizer_class(AdagradOptimizer)
152 | VectorizedDPAdam = make_vectorized_optimizer_class(AdamOptimizer)
153 | VectorizedDPSGD = make_vectorized_optimizer_class(GradientDescentOptimizer)
154 |
--------------------------------------------------------------------------------
/tensorflow_privacy/tutorials/bolton_tutorial.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019, The TensorFlow Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Tutorial for bolt_on module, the model and the optimizer."""
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | import tensorflow as tf # pylint: disable=wrong-import-position
19 | from privacy.bolt_on import losses # pylint: disable=wrong-import-position
20 | from privacy.bolt_on import models # pylint: disable=wrong-import-position
21 | from privacy.bolt_on.optimizers import BoltOn # pylint: disable=wrong-import-position
22 | # -------
23 | # First, we will create a binary classification dataset with a single output
24 | # dimension. The samples for each label are repeated data points at different
25 | # points in space.
26 | # -------
27 | # Parameters for dataset
28 | n_samples = 10
29 | input_dim = 2
30 | n_outputs = 1
31 | # Create binary classification dataset:
32 | x_stack = [tf.constant(-1, tf.float32, (n_samples, input_dim)),
33 | tf.constant(1, tf.float32, (n_samples, input_dim))]
34 | y_stack = [tf.constant(0, tf.float32, (n_samples, 1)),
35 | tf.constant(1, tf.float32, (n_samples, 1))]
36 | x, y = tf.concat(x_stack, 0), tf.concat(y_stack, 0)
37 | print(x.shape, y.shape)
38 | generator = tf.data.Dataset.from_tensor_slices((x, y))
39 | generator = generator.batch(10)
40 | generator = generator.shuffle(10)
41 | # -------
42 | # First, we will explore using the pre - built BoltOnModel, which is a thin
43 | # wrapper around a Keras Model using a single - layer neural network.
44 | # It automatically uses the BoltOn Optimizer which encompasses all the logic
45 | # required for the BoltOn Differential Privacy method.
46 | # -------
47 | bolt = models.BoltOnModel(n_outputs) # tell the model how many outputs we have.
48 | # -------
49 | # Now, we will pick our optimizer and Strongly Convex Loss function. The loss
50 | # must extend from StrongConvexMixin and implement the associated methods.Some
51 | # existing loss functions are pre - implemented in bolt_on.loss
52 | # -------
53 | optimizer = tf.optimizers.SGD()
54 | reg_lambda = 1
55 | C = 1
56 | radius_constant = 1
57 | loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
58 | # -------
59 | # For simplicity, we pick all parameters of the StrongConvexBinaryCrossentropy
60 | # to be 1; these are all tunable and their impact can be read in losses.
61 | # StrongConvexBinaryCrossentropy.We then compile the model with the chosen
62 | # optimizer and loss, which will automatically wrap the chosen optimizer with
63 | # the BoltOn Optimizer, ensuring the required components function as required
64 | # for privacy guarantees.
65 | # -------
66 | bolt.compile(optimizer, loss)
67 | # -------
68 | # To fit the model, the optimizer will require additional information about
69 | # the dataset and model.These parameters are:
70 | # 1. the class_weights used
71 | # 2. the number of samples in the dataset
72 | # 3. the batch size which the model will try to infer, if possible. If not,
73 | # you will be required to pass these explicitly to the fit method.
74 | #
75 | # As well, there are two privacy parameters than can be altered:
76 | # 1. epsilon, a float
77 | # 2. noise_distribution, a valid string indicating the distriution to use (must
78 | # be implemented)
79 | #
80 | # The BoltOnModel offers a helper method,.calculate_class_weight to aid in
81 | # class_weight calculation.
82 | # required parameters
83 | # -------
84 | class_weight = None # default, use .calculate_class_weight for other values
85 | batch_size = None # default, if it cannot be inferred, specify this
86 | n_samples = None # default, if it cannot be iferred, specify this
87 | # privacy parameters
88 | epsilon = 2
89 | noise_distribution = 'laplace'
90 |
91 | bolt.fit(x,
92 | y,
93 | epsilon=epsilon,
94 | class_weight=class_weight,
95 | batch_size=batch_size,
96 | n_samples=n_samples,
97 | noise_distribution=noise_distribution,
98 | epochs=2)
99 | # -------
100 | # We may also train a generator object, or try different optimizers and loss
101 | # functions. Below, we will see that we must pass the number of samples as the
102 | # fit method is unable to infer it for a generator.
103 | # -------
104 | optimizer2 = tf.optimizers.Adam()
105 | bolt.compile(optimizer2, loss)
106 | # required parameters
107 | class_weight = None # default, use .calculate_class_weight for other values
108 | batch_size = None # default, if it cannot be inferred, specify this
109 | n_samples = None # default, if it cannot be iferred, specify this
110 | # privacy parameters
111 | epsilon = 2
112 | noise_distribution = 'laplace'
113 | try:
114 | bolt.fit(generator,
115 | epsilon=epsilon,
116 | class_weight=class_weight,
117 | batch_size=batch_size,
118 | n_samples=n_samples,
119 | noise_distribution=noise_distribution,
120 | verbose=0)
121 | except ValueError as e:
122 | print(e)
123 | # -------
124 | # And now, re running with the parameter set.
125 | # -------
126 | n_samples = 20
127 | bolt.fit_generator(generator,
128 | epsilon=epsilon,
129 | class_weight=class_weight,
130 | n_samples=n_samples,
131 | noise_distribution=noise_distribution,
132 | verbose=0)
133 | # -------
134 | # You don't have to use the BoltOn model to use the BoltOn method.
135 | # There are only a few requirements:
136 | # 1. make sure any requirements from the loss are implemented in the model.
137 | # 2. instantiate the optimizer and use it as a context around the fit operation.
138 | # -------
139 | # -------------------- Part 2, using the Optimizer
140 |
141 | # -------
142 | # Here, we create our own model and setup the BoltOn optimizer.
143 | # -------
144 |
145 |
146 | class TestModel(tf.keras.Model): # pylint: disable=abstract-method
147 |
148 | def __init__(self, reg_layer, number_of_outputs=1):
149 | super(TestModel, self).__init__(name='test')
150 | self.output_layer = tf.keras.layers.Dense(number_of_outputs,
151 | kernel_regularizer=reg_layer)
152 |
153 | def call(self, inputs): # pylint: disable=arguments-differ
154 | return self.output_layer(inputs)
155 |
156 |
157 | optimizer = tf.optimizers.SGD()
158 | loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
159 | optimizer = BoltOn(optimizer, loss)
160 | # -------
161 | # Now, we instantiate our model and check for 1. Since our loss requires L2
162 | # regularization over the kernel, we will pass it to the model.
163 | # -------
164 | n_outputs = 1 # parameter for model and optimizer context.
165 | test_model = TestModel(loss.kernel_regularizer(), n_outputs)
166 | test_model.compile(optimizer, loss)
167 | # -------
168 | # We comply with 2., and use the BoltOn Optimizer as a context around the fit
169 | # method.
170 | # -------
171 | # parameters for context
172 | noise_distribution = 'laplace'
173 | epsilon = 2
174 | class_weights = 1 # Previously, the fit method auto-detected the class_weights.
175 | # Here, we need to pass the class_weights explicitly. 1 is the same as None.
176 | n_samples = 20
177 | batch_size = 5
178 |
179 | with optimizer(
180 | noise_distribution=noise_distribution,
181 | epsilon=epsilon,
182 | layers=test_model.layers,
183 | class_weights=class_weights,
184 | n_samples=n_samples,
185 | batch_size=batch_size
186 | ) as _:
187 | test_model.fit(x, y, batch_size=batch_size, epochs=2)
188 |
--------------------------------------------------------------------------------