├── tensorflow_privacy
    ├── research
    │   ├── pate_2017
    │   │   ├── __init__.py
    │   │   ├── train_student_mnist_250_lap_20_count_50_epochs_600.sh
    │   │   ├── utils.py
    │   │   ├── metrics.py
    │   │   ├── train_teachers.py
    │   │   ├── aggregation.py
    │   │   └── README.md
    │   ├── README.md
    │   └── pate_2018
    │   │   ├── ICLR2018
    │   │       ├── generate_figures.sh
    │   │       ├── download.py
    │   │       ├── README.md
    │   │       ├── generate_table.sh
    │   │       ├── generate_table_data_independent.sh
    │   │       ├── plot_ls_q.py
    │   │       └── utility_queries_answered.py
    │   │   ├── README.md
    │   │   ├── core_test.py
    │   │   └── smooth_sensitivity_test.py
    ├── requirements.txt
    ├── privacy
    │   ├── BUILD
    │   ├── bolt_on
    │   │   ├── __init__.py
    │   │   └── README.md
    │   ├── dp_query
    │   │   ├── normalized_query_test.py
    │   │   ├── test_utils.py
    │   │   ├── no_privacy_query.py
    │   │   ├── no_privacy_query_test.py
    │   │   ├── normalized_query.py
    │   │   ├── BUILD
    │   │   ├── nested_query.py
    │   │   ├── gaussian_query.py
    │   │   ├── nested_query_test.py
    │   │   └── gaussian_query_test.py
    │   ├── __init__.py
    │   ├── analysis
    │   │   ├── tensor_buffer_test_graph.py
    │   │   ├── tensor_buffer_test_eager.py
    │   │   ├── compute_dp_sgd_privacy.py
    │   │   ├── tensor_buffer.py
    │   │   ├── privacy_ledger_test.py
    │   │   └── rdp_accountant_test.py
    │   └── optimizers
    │   │   ├── dp_optimizer_eager_test.py
    │   │   └── dp_optimizer_vectorized.py
    ├── CONTRIBUTING.md
    ├── setup.py
    ├── README.md
    └── tutorials
    │   ├── walkthrough
    │       └── mnist_scratch.py
    │   ├── mnist_dpsgd_tutorial_keras.py
    │   ├── mnist_dpsgd_tutorial_eager.py
    │   ├── README.md
    │   └── bolton_tutorial.py
├── requirements.txt
├── figures
    ├── Mnist_epoch-ε_lr_3e-06.png
    ├── Mnist_accuracy-epoch_lr_3e-06.png
    ├── Mnist_accuracy-epoch_lr_e-03.png
    ├── Mnist_accuracy-epsilon_lr_3e-06.png
    ├── cifar10_accuracy-epoch_lr_e-03.png
    └── cifar10_accuracy-epsilon_lr_e-03.png
├── README.md
├── results
    ├── mnist_dpsgd_delta_1e-05_lr_3e-06.txt
    ├── mnist_dpsgd_delta_0.0001_lr_3e-06.txt
    ├── mnist_dpsgd_delta_0.001_lr_3e-06.txt
    ├── mnist_dpsgd_delta_0.01_lr_3e-06.txt
    ├── cifar_dpsgd_delta_0.0001_lr_0.001.txt
    ├── cifar_dpsgd_delta_1e-06_lr_0.001.txt
    └── cifar_dpsgd_delta_1e-05_lr_0.001.txt
├── dp_optimizer.py
└── mnist.py


/tensorflow_privacy/research/pate_2017/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow==2.0.0
2 | tensorflow-privacy==0.1.0
3 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow>=1.13
2 | mpmath
3 | scipy>=0.17
4 | 


--------------------------------------------------------------------------------
/figures/Mnist_epoch-ε_lr_3e-06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/Mnist_epoch-ε_lr_3e-06.png


--------------------------------------------------------------------------------
/figures/Mnist_accuracy-epoch_lr_3e-06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/Mnist_accuracy-epoch_lr_3e-06.png


--------------------------------------------------------------------------------
/figures/Mnist_accuracy-epoch_lr_e-03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/Mnist_accuracy-epoch_lr_e-03.png


--------------------------------------------------------------------------------
/figures/Mnist_accuracy-epsilon_lr_3e-06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/Mnist_accuracy-epsilon_lr_3e-06.png


--------------------------------------------------------------------------------
/figures/cifar10_accuracy-epoch_lr_e-03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/cifar10_accuracy-epoch_lr_e-03.png


--------------------------------------------------------------------------------
/figures/cifar10_accuracy-epsilon_lr_e-03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/cifar10_accuracy-epsilon_lr_e-03.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Learning with Differential Privacy
 2 | 
 3 | ## Prerequisites
 4 | Windows 10 + CUDA 10 + CUDNN 7 + TensorFlow 2.0 with Anaconda 3
 5 | ```
 6 | conda create -n tf2 python=3.6
 7 | activate tf2
 8 | conda install tensorflow-gpu==2.0.0
 9 | pip install tensorflow-privacy==0.1.0
10 | ```


--------------------------------------------------------------------------------
/tensorflow_privacy/research/README.md:
--------------------------------------------------------------------------------
 1 | # Research
 2 | 
 3 | This folder contains code to reproduce results from research papers. Currently,
 4 | the following papers are included: 
 5 | 
 6 | * Semi-supervised Knowledge Transfer for Deep Learning from Private Training
 7 |   Data (ICLR 2017): `pate_2017`
 8 | 
 9 | * Scalable Private Learning with PATE (ICLR 2018): `pate_2018`
10 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//visibility:public"])
 2 | 
 3 | licenses(["notice"])  # Apache 2.0
 4 | 
 5 | exports_files(["LICENSE"])
 6 | 
 7 | py_library(
 8 |     name = "privacy",
 9 |     srcs = ["__init__.py"],
10 |     deps = [
11 |         "//third_party/py/tensorflow_privacy/privacy/analysis:privacy_ledger",
12 |         "//third_party/py/tensorflow_privacy/privacy/analysis:rdp_accountant",
13 |         "//third_party/py/tensorflow_privacy/privacy/dp_query",
14 |         "//third_party/py/tensorflow_privacy/privacy/dp_query:gaussian_query",
15 |         "//third_party/py/tensorflow_privacy/privacy/dp_query:nested_query",
16 |         "//third_party/py/tensorflow_privacy/privacy/dp_query:no_privacy_query",
17 |         "//third_party/py/tensorflow_privacy/privacy/dp_query:normalized_query",
18 |         "//third_party/py/tensorflow_privacy/privacy/dp_query:quantile_adaptive_clip_sum_query",
19 |         "//third_party/py/tensorflow_privacy/privacy/optimizers:dp_optimizer",
20 |     ],
21 | )
22 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to <https://cla.developers.google.com/> to see
12 | your current agreements on file or to sign a new one.
13 | 
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 | 
18 | ## Code reviews
19 | 
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 | 
25 | ## Community Guidelines
26 | 
27 | This project follows Google's 
28 | [Open Source Community Guidelines](https://opensource.google.com/conduct/).
29 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2017/train_student_mnist_250_lap_20_count_50_epochs_600.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | 
17 | # Be sure to clone https://github.com/openai/improved-gan
18 | # and add improved-gan/mnist_svhn_cifar10 to your PATH variable
19 | 
20 | # Download labels used to train the student
21 | wget https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_student_labels_lap_20.npy
22 | 
23 | # Train the student using improved-gan 
24 | THEANO_FLAGS='floatX=float32,device=gpu,lib.cnmem=1' train_mnist_fm_custom_labels.py --labels mnist_250_student_labels_lap_20.npy --count 50 --epochs 600
25 | 
26 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """TensorFlow Privacy library setup file for pip."""
15 | from setuptools import find_packages
16 | from setuptools import setup
17 | 
18 | setup(name='tensorflow_privacy',
19 |       version='0.1.0',
20 |       url='https://github.com/tensorflow/privacy',
21 |       license='Apache-2.0',
22 |       install_requires=[
23 |           'scipy>=0.17',
24 |           'mpmath',  # used in tests only
25 |       ],
26 |       # Explicit dependence on TensorFlow is not supported.
27 |       # See https://github.com/tensorflow/tensorflow/issues/7166
28 |       extras_require={
29 |           'tf': ['tensorflow>=1.0.0'],
30 |           'tf_gpu': ['tensorflow-gpu>=1.0.0'],
31 |       },
32 |       packages=find_packages())
33 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2017/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | 
17 | def batch_indices(batch_nb, data_length, batch_size):
18 |   """
19 |   This helper function computes a batch start and end index
20 |   :param batch_nb: the batch number
21 |   :param data_length: the total length of the data being parsed by batches
22 |   :param batch_size: the number of inputs in each batch
23 |   :return: pair of (start, end) indices
24 |   """
25 |   # Batch start and end index
26 |   start = int(batch_nb * batch_size)
27 |   end = int((batch_nb + 1) * batch_size)
28 | 
29 |   # When there are not enough inputs left, we reuse some to complete the batch
30 |   if end > data_length:
31 |     shift = end - data_length
32 |     start -= shift
33 |     end -= shift
34 | 
35 |   return start, end
36 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/bolt_on/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """BoltOn Method for privacy."""
15 | import sys
16 | from distutils.version import LooseVersion
17 | import tensorflow as tf
18 | 
19 | if LooseVersion(tf.__version__) < LooseVersion("2.0.0"):
20 |   raise ImportError("Please upgrade your version "
21 |                     "of tensorflow from: {0} to at least 2.0.0 to "
22 |                     "use privacy/bolt_on".format(LooseVersion(tf.__version__)))
23 | if hasattr(sys, "skip_tf_privacy_import"):  # Useful for standalone scripts.
24 |   pass
25 | else:
26 |   from privacy.bolt_on.models import BoltOnModel  # pylint: disable=g-import-not-at-top
27 |   from privacy.bolt_on.optimizers import BoltOn  # pylint: disable=g-import-not-at-top
28 |   from privacy.bolt_on.losses import StrongConvexHuber  # pylint: disable=g-import-not-at-top
29 |   from privacy.bolt_on.losses import StrongConvexBinaryCrossentropy  # pylint: disable=g-import-not-at-top
30 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/ICLR2018/generate_figures.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ==============================================================================
16 | 
17 | 
18 | counts_file="data/glyph_5000_teachers.npy"
19 | output_dir="figures/"
20 | 
21 | mkdir -p $output_dir
22 | 
23 | if [ ! -d "$output_dir" ]; then
24 |   echo "Directory $output_dir does not exist."
25 |   exit 1
26 | fi
27 | 
28 | python rdp_bucketized.py \
29 |   --plot=small \
30 |   --counts_file=$counts_file \
31 |   --plot_file=$output_dir"noisy_thresholding_check_perf.pdf"
32 | 
33 | python rdp_bucketized.py \
34 |   --plot=large \
35 |   --counts_file=$counts_file \
36 |   --plot_file=$output_dir"noisy_thresholding_check_perf_details.pdf"
37 | 
38 | python rdp_cumulative.py \
39 |   --cache=False \
40 |   --counts_file=$counts_file \
41 |   --figures_dir=$output_dir
42 | 
43 | python utility_queries_answered.py --plot_file=$output_dir"utility_queries_answered.pdf"


--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/ICLR2018/download.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Script to download votes files to the data/ directory.
16 | """
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | 
22 | from six.moves import urllib
23 | import os
24 | import tarfile
25 | 
26 | FILE_URI = 'https://storage.googleapis.com/pate-votes/votes.gz'
27 | DATA_DIR = 'data/'
28 | 
29 | 
30 | def download():
31 |   print('Downloading ' + FILE_URI)
32 |   tar_filename, _ = urllib.request.urlretrieve(FILE_URI)
33 |   print('Unpacking ' + tar_filename)
34 |   with tarfile.open(tar_filename, "r:gz") as tar:
35 |     tar.extractall(DATA_DIR)
36 |   print('Done!')
37 | 
38 | 
39 | if __name__ == '__main__':
40 |   if not os.path.exists(DATA_DIR):
41 |     print('Data directory does not exist. Creating ' + DATA_DIR)
42 |     os.makedirs(DATA_DIR)
43 |   download()
44 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/normalized_query_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Tests for GaussianAverageQuery."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import tensorflow as tf
22 | 
23 | from privacy.dp_query import gaussian_query
24 | from privacy.dp_query import normalized_query
25 | from privacy.dp_query import test_utils
26 | 
27 | 
28 | class NormalizedQueryTest(tf.test.TestCase):
29 | 
30 |   def test_normalization(self):
31 |     with self.cached_session() as sess:
32 |       record1 = tf.constant([-6.0, 8.0])  # Clipped to [-3.0, 4.0].
33 |       record2 = tf.constant([4.0, -3.0])  # Not clipped.
34 | 
35 |       sum_query = gaussian_query.GaussianSumQuery(
36 |           l2_norm_clip=5.0, stddev=0.0)
37 |       query = normalized_query.NormalizedQuery(
38 |           numerator_query=sum_query, denominator=2.0)
39 | 
40 |       query_result, _ = test_utils.run_query(query, [record1, record2])
41 |       result = sess.run(query_result)
42 |       expected = [0.5, 0.5]
43 |       self.assertAllClose(result, expected)
44 | 
45 | 
46 | if __name__ == '__main__':
47 |   tf.test.main()
48 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2017/metrics.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | import numpy as np
21 | 
22 | 
23 | def accuracy(logits, labels):
24 |   """
25 |   Return accuracy of the array of logits (or label predictions) wrt the labels
26 |   :param logits: this can either be logits, probabilities, or a single label
27 |   :param labels: the correct labels to match against
28 |   :return: the accuracy as a float
29 |   """
30 |   assert len(logits) == len(labels)
31 | 
32 |   if len(np.shape(logits)) > 1:
33 |     # Predicted labels are the argmax over axis 1
34 |     predicted_labels = np.argmax(logits, axis=1)
35 |   else:
36 |     # Input was already labels
37 |     assert len(np.shape(logits)) == 1
38 |     predicted_labels = logits
39 | 
40 |   # Check against correct labels to compute correct guesses
41 |   correct = np.sum(predicted_labels == labels.reshape(len(labels)))
42 | 
43 |   # Divide by number of labels to obtain accuracy
44 |   accuracy = float(correct) / len(labels)
45 | 
46 |   # Return float value
47 |   return accuracy
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/test_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utility methods for testing private queries.
15 | 
16 | Utility methods for testing private queries.
17 | """
18 | 
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 | 
23 | 
24 | def run_query(query, records, global_state=None, weights=None):
25 |   """Executes query on the given set of records as a single sample.
26 | 
27 |   Args:
28 |     query: A PrivateQuery to run.
29 |     records: An iterable containing records to pass to the query.
30 |     global_state: The current global state. If None, an initial global state is
31 |       generated.
32 |     weights: An optional iterable containing the weights of the records.
33 | 
34 |   Returns:
35 |     A tuple (result, new_global_state) where "result" is the result of the
36 |       query and "new_global_state" is the updated global state.
37 |   """
38 |   if not global_state:
39 |     global_state = query.initial_global_state()
40 |   params = query.derive_sample_params(global_state)
41 |   sample_state = query.initial_sample_state(next(iter(records)))
42 |   if weights is None:
43 |     for record in records:
44 |       sample_state = query.accumulate_record(params, sample_state, record)
45 |   else:
46 |     for weight, record in zip(weights, records):
47 |       sample_state = query.accumulate_record(
48 |           params, sample_state, record, weight)
49 |   return query.get_noised_result(sample_state, global_state)
50 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/ICLR2018/README.md:
--------------------------------------------------------------------------------
 1 | Scripts in support of the paper "Scalable Private Learning with PATE" by Nicolas
 2 | Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar, Ulfar
 3 | Erlingsson (ICLR 2018, https://arxiv.org/abs/1802.08908).
 4 | 
 5 | 
 6 | ### Requirements
 7 | 
 8 | * Python, version &ge; 2.7
 9 | * absl (see [here](https://github.com/abseil/abseil-py), or just type `pip install absl-py`)
10 | * matplotlib
11 | * numpy
12 | * scipy
13 | * sympy (for smooth sensitivity analysis)  
14 | * write access to the current directory (otherwise, output directories in download.py and *.sh
15 | scripts must be changed)
16 | 
17 | ## Reproducing Figures 1 and 5, and Table 2
18 | 
19 | Before running any of the analysis scripts, create the data/ directory and download votes files by running\
20 | `$ python download.py`
21 | 
22 | To generate Figures 1 and 5 run\
23 | `$ sh generate_figures.sh`\
24 | The output is written to the figures/ directory.
25 | 
26 | For Table 2 run (may take several hours)\
27 | `$ sh generate_table.sh`\
28 | The output is written to the console.
29 | 
30 | For data-independent bounds (for comparison with Table 2), run\
31 | `$ sh generate_table_data_independent.sh`\
32 | The output is written to the console.
33 | 
34 | ## Files in this directory
35 | 
36 | *   generate_figures.sh &mdash; Master script for generating Figures 1 and 5.
37 | 
38 | *   generate_table.sh &mdash; Master script for generating Table 2.
39 | 
40 | *   generate_table_data_independent.sh &mdash; Master script for computing data-independent
41 |     bounds.
42 | 
43 | *   rdp_bucketized.py &mdash; Script for producing Figure 1 (right) and Figure 5 (right).
44 | 
45 | *   rdp_cumulative.py &mdash; Script for producing Figure 1 (middle) and Figure 5 (left).
46 |    
47 | *   smooth_sensitivity_table.py &mdash; Script for generating Table 2.
48 | 
49 | *   utility_queries_answered &mdash; Script for producing Figure 1 (left).
50 | 
51 | *   plot_partition.py &mdash; Script for producing partition.pdf, a detailed breakdown of privacy
52 | costs for Confident-GNMax with smooth sensitivity analysis (takes ~50 hours).
53 | 
54 | *   plots_for_slides.py &mdash; Script for producing several plots for the slide deck. 
55 | 
56 | *   download.py &mdash; Utility script for populating the data/ directory.
57 | 
58 | *   plot_ls_q.py is not used.
59 | 
60 | 
61 | All Python files take flags. Run script_name.py --help for help on flags.
62 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/ICLR2018/generate_table.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ==============================================================================
16 | 
17 | 
18 | echo "Reproducing Table 2. Takes a couple of hours."
19 | 
20 | executable="python smooth_sensitivity_table.py"
21 | data_dir="data"
22 | 
23 | echo
24 | echo "######## MNIST ########"
25 | echo
26 | 
27 | $executable \
28 |   --counts_file=$data_dir"/mnist_250_teachers.npy" \
29 |   --threshold=200 \
30 |   --sigma1=150 \
31 |   --sigma2=40 \
32 |   --queries=640 \
33 |   --delta=1e-5
34 | 
35 | echo
36 | echo "######## SVHN ########"
37 | echo
38 | 
39 | $executable \
40 |   --counts_file=$data_dir"/svhn_250_teachers.npy" \
41 |   --threshold=300 \
42 |   --sigma1=200 \
43 |   --sigma2=40 \
44 |   --queries=8500 \
45 |   --delta=1e-6
46 | 
47 | echo
48 | echo "######## Adult ########"
49 | echo
50 | 
51 | $executable \
52 |   --counts_file=$data_dir"/adult_250_teachers.npy" \
53 |   --threshold=300 \
54 |   --sigma1=200 \
55 |   --sigma2=40 \
56 |   --queries=1500 \
57 |   --delta=1e-5
58 | 
59 | echo
60 | echo "######## Glyph (Confident) ########"
61 | echo
62 | 
63 | $executable \
64 |   --counts_file=$data_dir"/glyph_5000_teachers.npy" \
65 |   --threshold=1000 \
66 |   --sigma1=500 \
67 |   --sigma2=100 \
68 |   --queries=12000 \
69 |   --delta=1e-8
70 | 
71 | echo
72 | echo "######## Glyph (Interactive, Round 1) ########"
73 | echo
74 | 
75 | $executable \
76 |   --counts_file=$data_dir"/glyph_round1.npy" \
77 |   --threshold=3500 \
78 |   --sigma1=1500 \
79 |   --sigma2=100 \
80 |   --delta=1e-8
81 | 
82 | echo
83 | echo "######## Glyph (Interactive, Round 2) ########"
84 | echo
85 | 
86 | $executable \
87 |   --counts_file=$data_dir"/glyph_round2.npy" \
88 |   --baseline_file=$data_dir"/glyph_round2_student.npy" \
89 |   --threshold=3500 \
90 |   --sigma1=2000 \
91 |   --sigma2=200 \
92 |   --teachers=5000 \
93 |   --delta=1e-8
94 | 


--------------------------------------------------------------------------------
/results/mnist_dpsgd_delta_1e-05_lr_3e-06.txt:
--------------------------------------------------------------------------------
1 | eps: [1.0244832274550977, 1.0895559994960096, 1.152984324480169, 1.1884359033987033, 1.2238874823172377, 1.259339061235772, 1.2947906401543063, 1.3302422190728407, 1.365693797991375, 1.4011453769099091, 1.4365969558284435, 1.4720485347469778, 1.5075001136655122, 1.5429516925840465, 1.5784032715025806, 1.6138548504211152, 1.6493064293396493, 1.6847580082581837, 1.720209587176718, 1.7556611660952521, 1.7911127450137867, 1.8265643239323208, 1.8620159028508552, 1.8974674817693895, 1.9329190606879239, 1.9683706396064582, 2.0038222185249923, 2.039273797443527, 2.074725376362061, 2.1070971504466094, 2.1385696633235494, 2.1700421762004893, 2.2015146890774293, 2.2329872019543693, 2.2644597148313097, 2.2959322277082497, 2.3273444725641, 2.3584159072093906, 2.389150196384082, 2.4195528187374924, 2.4496306673437216, 2.47939177188419, 2.5088451064044532, 2.5380004577251647, 2.566868337378801, 2.5954599248338712, 2.6237615073107916, 2.651776396750234, 2.6795399578410217, 2.7070652189159037, 2.734365832716886, 2.76142277415431, 2.788203585584366, 2.814789652616146, 2.84119660459472, 2.8673271737581487, 2.8932638632808336, 2.9190555302993633, 2.9445717345610545, 2.969930040191774, 2.9951400019060035, 3.0200849330064745, 3.0449423727552256, 3.0695416941430738, 3.0940278310878373, 3.1183227421228525, 3.1424603930913553, 3.16645149256044, 3.190264545989246, 3.213952427257068, 3.2374659175249123, 3.2608511687943733, 3.284091357435858, 3.3071745619803172, 3.330169025294796, 3.3529507616602854, 3.375728486088109, 3.3982093282729817, 3.4206901704578545, 3.442981331768199, 3.4651618493845584, 3.4872994645999036, 3.509180223018679, 3.531060981437454, 3.5527797253598825, 3.5743612858426586, 3.5959428463254346, 3.617279590270551, 3.6385625100894483, 3.659845429908345, 3.6808390641874653, 3.701823896735645, 3.7228087292838246, 3.743500323855752, 3.764187618748881, 3.784874913642011, 3.80530789401406, 3.825698197184275, 3.84608850035449, 3.866308840554522]
2 | validation acc: [0.0594, 0.1068, 0.1737, 0.2333, 0.2853, 0.337, 0.3641, 0.3896, 0.4202, 0.4531, 0.5045, 0.5675, 0.6272, 0.6809, 0.7356, 0.7729, 0.7981, 0.8153, 0.8298, 0.8387, 0.849, 0.8564, 0.8637, 0.8688, 0.8737, 0.8804, 0.8856, 0.8913, 0.8965, 0.8998, 0.9034, 0.9064, 0.9102, 0.9129, 0.9151, 0.9179, 0.92, 0.9222, 0.9237, 0.9245, 0.927, 0.9291, 0.9301, 0.9323, 0.933, 0.9337, 0.9363, 0.9348, 0.9372, 0.939, 0.9399, 0.9399, 0.9411, 0.9413, 0.9434, 0.9435, 0.944, 0.9435, 0.9454, 0.9458, 0.9469, 0.9472, 0.9476, 0.9484, 0.9482, 0.949, 0.9497, 0.9502, 0.9516, 0.9515, 0.952, 0.9516, 0.9526, 0.9526, 0.9522, 0.9533, 0.9523, 0.9533, 0.9551, 0.9536, 0.9547, 0.9554, 0.9554, 0.955, 0.957, 0.9563, 0.9566, 0.9572, 0.9567, 0.9581, 0.9574, 0.9583, 0.9587, 0.9579, 0.9582, 0.9606, 0.9602, 0.9609, 0.9602, 0.9611]
3 | 


--------------------------------------------------------------------------------
/results/mnist_dpsgd_delta_0.0001_lr_3e-06.txt:
--------------------------------------------------------------------------------
1 | eps: [0.8326011363722605, 0.8976739084131724, 0.9436584069352558, 0.9791099858537902, 1.0145615647723245, 1.0500131436908586, 1.085464722609393, 1.1209163015279273, 1.1563678804464617, 1.191819459364996, 1.2272710382835303, 1.2627226172020647, 1.2981741961205988, 1.3336257750391332, 1.3690773539576675, 1.4045289328762018, 1.4399805117947362, 1.4754320907132703, 1.5108836696318049, 1.546335248550339, 1.5817868274688733, 1.6172384063874077, 1.6526899853059418, 1.6856777203572872, 1.7171502332342274, 1.7486227461111676, 1.7800952589881076, 1.8115677718650476, 1.8430402847419878, 1.8743356446438426, 1.9052181718400023, 1.9356422549899939, 1.96567941857039, 1.9953366822421725, 2.0246147124836638, 2.0534946699030403, 2.082028256362481, 2.110225372299399, 2.1380963940615825, 2.165652175132723, 2.1929040544609935, 2.219863870575365, 2.2465439806151197, 2.272957283675776, 2.2990878528392047, 2.3249403189786673, 2.350554146796505, 2.375944032153419, 2.4011253446527228, 2.4260296809068174, 2.4507134910368893, 2.47522226487027, 2.499517175905285, 2.5235647542607182, 2.547474060537832, 2.5711619418056544, 2.594649784273822, 2.618035035543283, 2.6411230086753616, 2.664135220235837, 2.6869169566013262, 2.709575125744436, 2.732055967929309, 2.754385065414559, 2.7765655830309184, 2.798591875047433, 2.820472633466208, 2.842223780287906, 2.863805340770682, 2.8853105042888822, 2.906593424107779, 2.927876343926676, 2.9488682178596077, 2.969853050407788, 2.9906628001059756, 3.0113500949991048, 3.032012133309119, 3.0524024364793334, 3.0727927396495485, 3.093047072443617, 3.113140926228243, 3.1332347800128684, 3.1531211515642337, 3.172919094784786, 3.1927170380053385, 3.212277580271345, 3.231780148313306, 3.2512827163552673, 3.270562153959215, 3.2897698788428245, 3.308977603726434, 3.3280234805794278, 3.3469368910280495, 3.365850301476671, 3.3847132309541426, 3.4033328524582283, 3.421952473962314, 3.4405720954663996, 3.459012770868646, 3.477339125747593]
2 | validation acc: [0.0623, 0.1518, 0.2339, 0.2726, 0.2902, 0.3211, 0.3532, 0.4021, 0.4795, 0.5877, 0.6763, 0.7647, 0.8091, 0.8336, 0.8494, 0.8648, 0.8742, 0.8808, 0.8885, 0.893, 0.8974, 0.9032, 0.9063, 0.91, 0.9137, 0.9162, 0.9186, 0.921, 0.9246, 0.9264, 0.9289, 0.9309, 0.9311, 0.9326, 0.9339, 0.9365, 0.9359, 0.9389, 0.94, 0.9398, 0.9403, 0.9413, 0.9423, 0.9437, 0.9441, 0.9456, 0.9463, 0.9469, 0.9472, 0.9482, 0.9484, 0.9501, 0.9503, 0.9514, 0.9519, 0.9517, 0.9526, 0.9531, 0.9533, 0.9537, 0.9544, 0.9547, 0.9554, 0.956, 0.9564, 0.9565, 0.9567, 0.9576, 0.958, 0.9577, 0.9581, 0.9591, 0.9585, 0.9595, 0.9604, 0.9601, 0.9608, 0.9608, 0.9618, 0.9608, 0.9612, 0.9622, 0.9628, 0.9624, 0.9629, 0.9633, 0.9632, 0.9635, 0.9635, 0.9641, 0.9644, 0.9647, 0.9652, 0.9645, 0.9654, 0.9654, 0.9658, 0.9661, 0.9659, 0.9658]
3 | 


--------------------------------------------------------------------------------
/results/mnist_dpsgd_delta_0.001_lr_3e-06.txt:
--------------------------------------------------------------------------------
1 | eps: [0.6407190452894234, 0.6988809104718083, 0.7343324893903427, 0.769784068308877, 0.8052356472274113, 0.8406872261459456, 0.8761388050644799, 0.9115903839830142, 0.9470419629015485, 0.9824935418200829, 1.0179451207386172, 1.0533966996571515, 1.0888482785756857, 1.12429985749422, 1.1597514364127544, 1.1952030153312887, 1.230654594249823, 1.2642582902679655, 1.2957308031449055, 1.3272033160218457, 1.358675828898786, 1.3901483417757259, 1.4212154199821514, 1.4517316912424953, 1.4817234307631033, 1.5111556227476015, 1.5401210024272802, 1.5685965878910908, 1.5966528302821439, 1.6242391313495421, 1.651442276205101, 1.6782716502966837, 1.704717962756832, 1.7307937037915002, 1.756536559031956, 1.7819580241150645, 1.8070700925530656, 1.8318852879307048, 1.8564166986527024, 1.8806780159609964, 1.9046691154300808, 1.928371456354241, 1.9518336510227319, 1.9750714553704065, 1.998101415176878, 2.0208831515423666, 2.0434217654007636, 2.065788799060919, 2.0879693166772784, 2.109884285494962, 2.1316678352159295, 2.1532493956987055, 2.17462433812611, 2.1959072579450067, 2.2168973715317506, 2.2378252763561997, 2.258512571249329, 2.2791066757743925, 2.2994969789446076, 2.3197853043327132, 2.3398791581173386, 2.3598913778680375, 2.3796893210885894, 2.3994569011825284, 2.4189594692244896, 2.4384620372664503, 2.457723546690314, 2.476931271573923, 2.4960176104345706, 2.5149310208831928, 2.533844431331814, 2.552499639343671, 2.5711192608477567, 2.589677989431748, 2.608004344310695, 2.6263306991896416, 2.644543279614852, 2.6625768870738824, 2.6806104945329134, 2.6985240355045024, 2.7162654116914053, 2.7340067878783083, 2.751671671352242, 2.769121329409039, 2.786570987465835, 2.8040206455226313, 2.821199636692215, 2.8383580868067537, 2.855516536921293, 2.872559270264915, 2.8894270197188785, 2.906294769172841, 2.9231625186268038, 2.9398410578754635, 2.9564186110890702, 2.972996164302677, 2.9895737175162838, 3.0059561161040556, 3.0222439746836636, 3.0385318332632716]
2 | validation acc: [0.1552, 0.201, 0.269, 0.3583, 0.4291, 0.4754, 0.5168, 0.5515, 0.6042, 0.6536, 0.6972, 0.7508, 0.7861, 0.8116, 0.8305, 0.8419, 0.8523, 0.8625, 0.8687, 0.8793, 0.8832, 0.8886, 0.8923, 0.8974, 0.8993, 0.9025, 0.9062, 0.9072, 0.9088, 0.9121, 0.9126, 0.9139, 0.9173, 0.9184, 0.9179, 0.921, 0.9215, 0.9238, 0.924, 0.9251, 0.9272, 0.9278, 0.9285, 0.93, 0.9303, 0.9322, 0.932, 0.9352, 0.9359, 0.9358, 0.9377, 0.9383, 0.9389, 0.9398, 0.9402, 0.9414, 0.9414, 0.9424, 0.9434, 0.9454, 0.944, 0.9457, 0.9471, 0.947, 0.9472, 0.9484, 0.9489, 0.9493, 0.9502, 0.9503, 0.951, 0.9518, 0.9522, 0.953, 0.9526, 0.9535, 0.9542, 0.9536, 0.9537, 0.9546, 0.955, 0.9558, 0.9562, 0.9546, 0.9559, 0.9565, 0.9561, 0.9571, 0.957, 0.9575, 0.9576, 0.958, 0.9576, 0.9582, 0.9584, 0.9588, 0.9588, 0.959, 0.9593, 0.9592]
3 | 


--------------------------------------------------------------------------------
/results/mnist_dpsgd_delta_0.01_lr_3e-06.txt:
--------------------------------------------------------------------------------
1 | eps: [0.4488369542065862, 0.4895549929268951, 0.5250065718454293, 0.5604581507639637, 0.595909729682498, 0.6313613086010323, 0.6668128875195666, 0.7022644664381009, 0.7377160453566352, 0.7731676242751695, 0.8086192031937038, 0.8428388601786436, 0.8743113730555838, 0.9057838859325238, 0.9371678223219213, 0.9678211274949969, 0.9976683411210863, 1.0267473349515202, 1.0551126861496996, 1.0828260875663616, 1.1099319352876824, 1.136478641837888, 1.1624701594893336, 1.1879720160767095, 1.2130148404534087, 1.237611132435135, 1.2617823771303591, 1.2855809709028272, 1.3090175177716414, 1.3320676101179185, 1.354787562872218, 1.3771925327072796, 1.3992959375237164, 1.421111890143953, 1.4426552521444411, 1.463938171963338, 1.484926525203894, 1.5056750474995524, 1.5262012182396667, 1.5465235362218086, 1.5666173900064342, 1.586459547392393, 1.6061387901356725, 1.6256413581776337, 1.6448849394214122, 1.6640117402897139, 1.6829251507383356, 1.7016664262291141, 1.7202860477331998, 1.7386695628737965, 1.7569959177527434, 1.775051258049255, 1.7930848655082858, 1.8108436077942702, 1.8285849839811732, 1.8460808862726925, 1.863530544329489, 1.8807997577948097, 1.8979582079093487, 1.9150395135099436, 1.9319072629639062, 1.948775012417869, 1.9654198896548445, 1.9819974428684513, 1.9985414578761678, 2.0148293164557756, 2.0311171750353836, 2.047319524470186, 2.063318187250421, 2.079316850030656, 2.095230822970841, 2.110940786057371, 2.1266507491439004, 2.142333957071471, 2.1577557138768193, 2.173177470682168, 2.188599227487517, 2.203826372709379, 2.218960413994861, 2.234094455280343, 2.2492215146115266, 2.2640683285237113, 2.2789151424358964, 2.2937619563480807, 2.308575939755106, 2.3231360118616964, 2.3376960839682868, 2.3522561560748776, 2.3668162281814684, 2.381112291486219, 2.395386104814934, 2.4096599181436478, 2.4239337314723626, 2.438087536786136, 2.4520755718531704, 2.466063606920205, 2.480051641987239, 2.4940396770542734, 2.5078632962244485, 2.52156603107018]
2 | validation acc: [0.1643, 0.1843, 0.2231, 0.2901, 0.3378, 0.3651, 0.3892, 0.4121, 0.4509, 0.4894, 0.55, 0.6317, 0.7042, 0.7651, 0.7938, 0.816, 0.8333, 0.8453, 0.8603, 0.8713, 0.8776, 0.882, 0.887, 0.8912, 0.8959, 0.9004, 0.9038, 0.907, 0.9092, 0.9123, 0.9154, 0.9176, 0.9196, 0.9214, 0.9244, 0.9251, 0.9279, 0.9287, 0.9308, 0.9316, 0.9326, 0.9333, 0.9352, 0.936, 0.9369, 0.938, 0.9388, 0.9404, 0.9409, 0.9412, 0.942, 0.943, 0.9447, 0.9447, 0.9461, 0.9462, 0.9467, 0.9481, 0.9481, 0.9478, 0.948, 0.95, 0.9501, 0.9507, 0.9507, 0.9513, 0.9516, 0.9524, 0.9532, 0.9542, 0.9541, 0.9543, 0.9549, 0.9557, 0.9561, 0.9562, 0.9577, 0.9565, 0.9576, 0.9584, 0.9585, 0.9592, 0.9593, 0.9598, 0.9594, 0.9603, 0.9609, 0.9611, 0.9612, 0.9619, 0.9623, 0.9625, 0.963, 0.9628, 0.9634, 0.9632, 0.9635, 0.9637, 0.9635, 0.9634]
3 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/no_privacy_query.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Implements DPQuery interface for no privacy average queries."""
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | from distutils.version import LooseVersion
21 | import tensorflow as tf
22 | 
23 | from privacy.dp_query import dp_query
24 | 
25 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
26 |   nest = tf.contrib.framework.nest
27 | else:
28 |   nest = tf.nest
29 | 
30 | 
31 | class NoPrivacySumQuery(dp_query.SumAggregationDPQuery):
32 |   """Implements DPQuery interface for a sum query with no privacy.
33 | 
34 |   Accumulates vectors without clipping or adding noise.
35 |   """
36 | 
37 |   def get_noised_result(self, sample_state, global_state):
38 |     """See base class."""
39 |     return sample_state, global_state
40 | 
41 | 
42 | class NoPrivacyAverageQuery(dp_query.SumAggregationDPQuery):
43 |   """Implements DPQuery interface for an average query with no privacy.
44 | 
45 |   Accumulates vectors and normalizes by the total number of accumulated vectors.
46 |   """
47 | 
48 |   def initial_sample_state(self, template):
49 |     """See base class."""
50 |     return (super(NoPrivacyAverageQuery, self).initial_sample_state(template),
51 |             tf.constant(0.0))
52 | 
53 |   def preprocess_record(self, params, record, weight=1):
54 |     """Multiplies record by weight."""
55 |     weighted_record = nest.map_structure(lambda t: weight * t, record)
56 |     return (weighted_record, tf.cast(weight, tf.float32))
57 | 
58 |   def accumulate_record(self, params, sample_state, record, weight=1):
59 |     """Accumulates record, multiplying by weight."""
60 |     weighted_record = nest.map_structure(lambda t: weight * t, record)
61 |     return self.accumulate_preprocessed_record(
62 |         sample_state, (weighted_record, tf.cast(weight, tf.float32)))
63 | 
64 |   def get_noised_result(self, sample_state, global_state):
65 |     """See base class."""
66 |     sum_state, denominator = sample_state
67 | 
68 |     return (
69 |         nest.map_structure(lambda t: t / denominator, sum_state),
70 |         global_state)
71 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """TensorFlow Privacy library."""
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | import sys
21 | 
22 | # pylint: disable=g-import-not-at-top
23 | 
24 | if hasattr(sys, 'skip_tf_privacy_import'):  # Useful for standalone scripts.
25 |   pass
26 | else:
27 |   from privacy.analysis.privacy_ledger import GaussianSumQueryEntry
28 |   from privacy.analysis.privacy_ledger import PrivacyLedger
29 |   from privacy.analysis.privacy_ledger import QueryWithLedger
30 |   from privacy.analysis.privacy_ledger import SampleEntry
31 | 
32 |   from privacy.dp_query.dp_query import DPQuery
33 |   from privacy.dp_query.gaussian_query import GaussianAverageQuery
34 |   from privacy.dp_query.gaussian_query import GaussianSumQuery
35 |   from privacy.dp_query.nested_query import NestedQuery
36 |   from privacy.dp_query.no_privacy_query import NoPrivacyAverageQuery
37 |   from privacy.dp_query.no_privacy_query import NoPrivacySumQuery
38 |   from privacy.dp_query.normalized_query import NormalizedQuery
39 |   from privacy.dp_query.quantile_adaptive_clip_sum_query import QuantileAdaptiveClipSumQuery
40 |   from privacy.dp_query.quantile_adaptive_clip_sum_query import QuantileAdaptiveClipAverageQuery
41 | 
42 |   from privacy.optimizers.dp_optimizer import DPAdagradGaussianOptimizer
43 |   from privacy.optimizers.dp_optimizer import DPAdagradOptimizer
44 |   from privacy.optimizers.dp_optimizer import DPAdamGaussianOptimizer
45 |   from privacy.optimizers.dp_optimizer import DPAdamOptimizer
46 |   from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
47 |   from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
48 | 
49 |   try:
50 |     from privacy.bolt_on.models import BoltOnModel
51 |     from privacy.bolt_on.optimizers import BoltOn
52 |     from privacy.bolt_on.losses import StrongConvexMixin
53 |     from privacy.bolt_on.losses import StrongConvexBinaryCrossentropy
54 |     from privacy.bolt_on.losses import StrongConvexHuber
55 |   except ImportError:
56 |     # module `bolt_on` not yet available in this version of TF Privacy
57 |     pass
58 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/ICLR2018/generate_table_data_independent.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ==============================================================================
 16 | 
 17 | 
 18 | echo "Table 2 with data-independent analysis."
 19 | 
 20 | executable="python smooth_sensitivity_table.py"
 21 | data_dir="data"
 22 | 
 23 | echo
 24 | echo "######## MNIST ########"
 25 | echo
 26 | 
 27 | $executable \
 28 |   --counts_file=$data_dir"/mnist_250_teachers.npy" \
 29 |   --threshold=200 \
 30 |   --sigma1=150 \
 31 |   --sigma2=40 \
 32 |   --queries=640 \
 33 |   --delta=1e-5 \
 34 |   --data_independent
 35 | echo
 36 | echo "######## SVHN ########"
 37 | echo
 38 | 
 39 | $executable \
 40 |   --counts_file=$data_dir"/svhn_250_teachers.npy" \
 41 |   --threshold=300 \
 42 |   --sigma1=200 \
 43 |   --sigma2=40 \
 44 |   --queries=8500 \
 45 |   --delta=1e-6 \
 46 |   --data_independent
 47 | 
 48 | echo
 49 | echo "######## Adult ########"
 50 | echo
 51 | 
 52 | $executable \
 53 |   --counts_file=$data_dir"/adult_250_teachers.npy" \
 54 |   --threshold=300 \
 55 |   --sigma1=200 \
 56 |   --sigma2=40 \
 57 |   --queries=1500 \
 58 |   --delta=1e-5 \
 59 |   --data_independent
 60 | 
 61 | echo
 62 | echo "######## Glyph (Confident) ########"
 63 | echo
 64 | 
 65 | $executable \
 66 |   --counts_file=$data_dir"/glyph_5000_teachers.npy" \
 67 |   --threshold=1000 \
 68 |   --sigma1=500 \
 69 |   --sigma2=100 \
 70 |   --queries=12000 \
 71 |   --delta=1e-8 \
 72 |   --data_independent
 73 | 
 74 | echo
 75 | echo "######## Glyph (Interactive, Round 1) ########"
 76 | echo
 77 | 
 78 | $executable \
 79 |   --counts_file=$data_dir"/glyph_round1.npy" \
 80 |   --threshold=3500 \
 81 |   --sigma1=1500 \
 82 |   --sigma2=100 \
 83 |   --delta=1e-8 \
 84 |   --data_independent
 85 | 
 86 | echo
 87 | echo "######## Glyph (Interactive, Round 2) ########"
 88 | echo
 89 | 
 90 | $executable \
 91 |   --counts_file=$data_dir"/glyph_round2.npy" \
 92 |   --baseline_file=$data_dir"/glyph_round2_student.npy" \
 93 |   --threshold=3500 \
 94 |   --sigma1=2000 \
 95 |   --sigma2=200 \
 96 |   --teachers=5000 \
 97 |   --delta=1e-8 \
 98 |   --order=8.5 \
 99 |   --data_independent
100 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/bolt_on/README.md:
--------------------------------------------------------------------------------
 1 | # BoltOn Subpackage
 2 | 
 3 | This package contains source code for the BoltOn method, a particular
 4 | differential-privacy (DP) technique that uses output perturbations and
 5 | leverages additional assumptions to provide a new way of approaching the
 6 | privacy guarantees.
 7 | 
 8 | ## BoltOn Description
 9 | 
10 | This method uses 4 key steps to achieve privacy guarantees:
11 |   1. Adds noise to weights after training (output perturbation).
12 |   2. Projects weights to R, the radius of the hypothesis space,
13 |       after each batch. This value is configurable by the user.
14 |   3. Limits learning rate
15 |   4. Uses a strongly convex loss function (see compile)
16 | 
17 | For more details on the strong convexity requirements, see:
18 | Bolt-on Differential Privacy for Scalable Stochastic Gradient
19 | Descent-based Analytics by Xi Wu et al. at https://arxiv.org/pdf/1606.04722.pdf
20 | 
21 | ## Why BoltOn?
22 | 
23 | The major difference for the BoltOn method is that it injects noise post model
24 | convergence, rather than noising gradients or weights during training. This
25 | approach requires some additional constraints listed in the Description.
26 | Should the use-case and model satisfy these constraints, this is another
27 | approach that can be trained to maximize utility while maintaining the privacy.
28 | The paper describes in detail the advantages and disadvantages of this approach
29 | and its results compared to some other methods, namely noising at each iteration
30 | and no noising.
31 | 
32 | ## Tutorials
33 | 
34 | This package has a tutorial that can be found in the root tutorials directory,
35 | under `bolton_tutorial.py`.
36 | 
37 | ## Contribution
38 | 
39 | This package was initially contributed by Georgian Partners with the hope of
40 | growing the tensorflow/privacy library. There are several rich use cases for
41 | delta-epsilon privacy in machine learning, some of which can be explored here:
42 | https://medium.com/apache-mxnet/epsilon-differential-privacy-for-machine-learning-using-mxnet-a4270fe3865e
43 | https://arxiv.org/pdf/1811.04911.pdf
44 | 
45 | ## Stability
46 | 
47 | As we are pegged on tensorflow2.0, this package may encounter stability
48 | issues in the ongoing development of tensorflow2.0.
49 | 
50 | This sub-package is currently stable for 2.0.0a0, 2.0.0b0, and 2.0.0.b1 If you
51 | would like to use this subpackage, please do use one of these versions as we
52 | cannot guarantee it will work for all latest releases. If you do find issues,
53 | feel free to raise an issue to the contributors listed below.
54 | 
55 | ## Contacts
56 | 
57 | In addition to the maintainers of tensorflow/privacy listed in the root
58 | README.md, please feel free to contact members of Georgian Partners. In
59 | particular,
60 | 
61 | * Georgian Partners(@georgianpartners)
62 | * Ji Chao Zhang(@Jichaogp)
63 | * Christopher Choquette(@cchoquette)
64 | 
65 | ## Copyright
66 | 
67 | Copyright 2019 - Google LLC
68 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/no_privacy_query_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Tests for NoPrivacyAverageQuery."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | from absl.testing import parameterized
22 | import tensorflow as tf
23 | 
24 | from privacy.dp_query import no_privacy_query
25 | from privacy.dp_query import test_utils
26 | 
27 | 
28 | class NoPrivacyQueryTest(tf.test.TestCase, parameterized.TestCase):
29 | 
30 |   def test_sum(self):
31 |     with self.cached_session() as sess:
32 |       record1 = tf.constant([2.0, 0.0])
33 |       record2 = tf.constant([-1.0, 1.0])
34 | 
35 |       query = no_privacy_query.NoPrivacySumQuery()
36 |       query_result, _ = test_utils.run_query(query, [record1, record2])
37 |       result = sess.run(query_result)
38 |       expected = [1.0, 1.0]
39 |       self.assertAllClose(result, expected)
40 | 
41 |   def test_no_privacy_average(self):
42 |     with self.cached_session() as sess:
43 |       record1 = tf.constant([5.0, 0.0])
44 |       record2 = tf.constant([-1.0, 2.0])
45 | 
46 |       query = no_privacy_query.NoPrivacyAverageQuery()
47 |       query_result, _ = test_utils.run_query(query, [record1, record2])
48 |       result = sess.run(query_result)
49 |       expected = [2.0, 1.0]
50 |       self.assertAllClose(result, expected)
51 | 
52 |   def test_no_privacy_weighted_average(self):
53 |     with self.cached_session() as sess:
54 |       record1 = tf.constant([4.0, 0.0])
55 |       record2 = tf.constant([-1.0, 1.0])
56 | 
57 |       weights = [1, 3]
58 | 
59 |       query = no_privacy_query.NoPrivacyAverageQuery()
60 |       query_result, _ = test_utils.run_query(
61 |           query, [record1, record2], weights=weights)
62 |       result = sess.run(query_result)
63 |       expected = [0.25, 0.75]
64 |       self.assertAllClose(result, expected)
65 | 
66 |   @parameterized.named_parameters(
67 |       ('type_mismatch', [1.0], (1.0,), TypeError),
68 |       ('too_few_on_left', [1.0], [1.0, 1.0], ValueError),
69 |       ('too_few_on_right', [1.0, 1.0], [1.0], ValueError))
70 |   def test_incompatible_records(self, record1, record2, error_type):
71 |     query = no_privacy_query.NoPrivacySumQuery()
72 |     with self.assertRaises(error_type):
73 |       test_utils.run_query(query, [record1, record2])
74 | 
75 | 
76 | if __name__ == '__main__':
77 |   tf.test.main()
78 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/tensor_buffer_test_graph.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Tests for tensor_buffer in graph mode."""
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | import tensorflow as tf
21 | 
22 | from privacy.analysis import tensor_buffer
23 | 
24 | 
25 | class TensorBufferTest(tf.test.TestCase):
26 |   """Tests for TensorBuffer in graph mode."""
27 | 
28 |   def test_noresize(self):
29 |     """Test buffer does not resize if capacity is not exceeded."""
30 |     with self.cached_session() as sess:
31 |       size, shape = 2, [2, 3]
32 | 
33 |       my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
34 |       value1 = [[1, 2, 3], [4, 5, 6]]
35 |       with tf.control_dependencies([my_buffer.append(value1)]):
36 |         value2 = [[7, 8, 9], [10, 11, 12]]
37 |         with tf.control_dependencies([my_buffer.append(value2)]):
38 |           values = my_buffer.values
39 |           current_size = my_buffer.current_size
40 |           capacity = my_buffer.capacity
41 |       self.evaluate(tf.global_variables_initializer())
42 | 
43 |       v, cs, cap = sess.run([values, current_size, capacity])
44 |       self.assertAllEqual(v, [value1, value2])
45 |       self.assertEqual(cs, 2)
46 |       self.assertEqual(cap, 2)
47 | 
48 |   def test_resize(self):
49 |     """Test buffer resizes if capacity is exceeded."""
50 |     with self.cached_session() as sess:
51 |       size, shape = 2, [2, 3]
52 | 
53 |       my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
54 |       value1 = [[1, 2, 3], [4, 5, 6]]
55 |       with tf.control_dependencies([my_buffer.append(value1)]):
56 |         value2 = [[7, 8, 9], [10, 11, 12]]
57 |         with tf.control_dependencies([my_buffer.append(value2)]):
58 |           value3 = [[13, 14, 15], [16, 17, 18]]
59 |           with tf.control_dependencies([my_buffer.append(value3)]):
60 |             values = my_buffer.values
61 |             current_size = my_buffer.current_size
62 |             capacity = my_buffer.capacity
63 |       self.evaluate(tf.global_variables_initializer())
64 | 
65 |       v, cs, cap = sess.run([values, current_size, capacity])
66 |       self.assertAllEqual(v, [value1, value2, value3])
67 |       self.assertEqual(cs, 3)
68 |       self.assertEqual(cap, 4)
69 | 
70 | 
71 | if __name__ == '__main__':
72 |   tf.test.main()
73 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/tensor_buffer_test_eager.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Tests for tensor_buffer in eager mode."""
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | import tensorflow as tf
21 | 
22 | from privacy.analysis import tensor_buffer
23 | 
24 | tf.enable_eager_execution()
25 | 
26 | 
27 | class TensorBufferTest(tf.test.TestCase):
28 |   """Tests for TensorBuffer in eager mode."""
29 | 
30 |   def test_basic(self):
31 |     size, shape = 2, [2, 3]
32 | 
33 |     my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
34 | 
35 |     value1 = [[1, 2, 3], [4, 5, 6]]
36 |     my_buffer.append(value1)
37 |     self.assertAllEqual(my_buffer.values.numpy(), [value1])
38 | 
39 |     value2 = [[4, 5, 6], [7, 8, 9]]
40 |     my_buffer.append(value2)
41 |     self.assertAllEqual(my_buffer.values.numpy(), [value1, value2])
42 | 
43 |   def test_fail_on_scalar(self):
44 |     with self.assertRaisesRegexp(ValueError, 'Shape cannot be scalar.'):
45 |       tensor_buffer.TensorBuffer(1, ())
46 | 
47 |   def test_fail_on_inconsistent_shape(self):
48 |     size, shape = 1, [2, 3]
49 | 
50 |     my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
51 | 
52 |     with self.assertRaisesRegexp(
53 |         tf.errors.InvalidArgumentError,
54 |         'Appending value of inconsistent shape.'):
55 |       my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32))
56 | 
57 |   def test_resize(self):
58 |     size, shape = 2, [2, 3]
59 | 
60 |     my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
61 | 
62 |     # Append three buffers. Third one should succeed after resizing.
63 |     value1 = [[1, 2, 3], [4, 5, 6]]
64 |     my_buffer.append(value1)
65 |     self.assertAllEqual(my_buffer.values.numpy(), [value1])
66 |     self.assertAllEqual(my_buffer.current_size.numpy(), 1)
67 |     self.assertAllEqual(my_buffer.capacity.numpy(), 2)
68 | 
69 |     value2 = [[4, 5, 6], [7, 8, 9]]
70 |     my_buffer.append(value2)
71 |     self.assertAllEqual(my_buffer.values.numpy(), [value1, value2])
72 |     self.assertAllEqual(my_buffer.current_size.numpy(), 2)
73 |     self.assertAllEqual(my_buffer.capacity.numpy(), 2)
74 | 
75 |     value3 = [[7, 8, 9], [10, 11, 12]]
76 |     my_buffer.append(value3)
77 |     self.assertAllEqual(my_buffer.values.numpy(), [value1, value2, value3])
78 |     self.assertAllEqual(my_buffer.current_size.numpy(), 3)
79 |     # Capacity should have doubled.
80 |     self.assertAllEqual(my_buffer.capacity.numpy(), 4)
81 | 
82 | 
83 | if __name__ == '__main__':
84 |   tf.test.main()
85 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/README.md:
--------------------------------------------------------------------------------
 1 | Implementation of an RDP privacy accountant and smooth sensitivity analysis for
 2 | the PATE framework. The underlying theory and supporting experiments appear in
 3 | "Scalable Private Learning with PATE" by Nicolas Papernot, Shuang Song, Ilya
 4 | Mironov, Ananth Raghunathan, Kunal Talwar, Ulfar Erlingsson (ICLR 2018,
 5 | https://arxiv.org/abs/1802.08908).
 6 | 
 7 | ## Overview
 8 | 
 9 | The PATE ('Private Aggregation of Teacher Ensembles') framework was introduced 
10 | by Papernot et al. in "Semi-supervised Knowledge Transfer for Deep Learning from
11 | Private Training Data" (ICLR 2017, https://arxiv.org/abs/1610.05755). The 
12 | framework enables model-agnostic training that provably provides [differential
13 | privacy](https://en.wikipedia.org/wiki/Differential_privacy) of the training 
14 | dataset. 
15 | 
16 | The framework consists of _teachers_, the _student_ model, and the _aggregator_. The 
17 | teachers are models trained on disjoint subsets of the training datasets. The student
18 | model has access to an insensitive (e.g., public) unlabelled dataset, which is labelled by 
19 | interacting with the ensemble of teachers via the _aggregator_. The aggregator tallies 
20 | outputs of the teacher models, and either forwards a (noisy) aggregate to the student, or
21 | refuses to answer.
22 | 
23 | Differential privacy is enforced by the aggregator. The privacy guarantees can be _data-independent_,
24 | which means that they are solely the function of the aggregator's parameters. Alternatively, privacy 
25 | analysis can be _data-dependent_, which allows for finer reasoning where, under certain conditions on
26 | the input distribution, the final privacy guarantees can be improved relative to the data-independent
27 | analysis. Data-dependent privacy guarantees may, by themselves, be a function of sensitive data and 
28 | therefore publishing these guarantees requires its own sanitization procedure. In our case 
29 | sanitization of data-dependent privacy guarantees proceeds via _smooth sensitivity_ analysis.
30 | 
31 | The common machinery used for all privacy analyses in this repository is the 
32 | R&eacute;nyi differential privacy, or RDP (see https://arxiv.org/abs/1702.07476). 
33 | 
34 | This repository contains implementations of privacy accountants and smooth 
35 | sensitivity analysis for several data-independent and data-dependent mechanism that together
36 | comprise the PATE framework.
37 | 
38 | 
39 | ### Requirements
40 | 
41 | * Python, version &ge; 2.7
42 | * absl (see [here](https://github.com/abseil/abseil-py), or just type `pip install absl-py`)
43 | * numpy
44 | * scipy
45 | * sympy (for smooth sensitivity analysis)
46 | * unittest (for testing)
47 | 
48 | 
49 | ### Self-testing
50 | 
51 | To verify the installation run
52 | ```bash
53 | $ python core_test.py
54 | $ python smooth_sensitivity_test.py
55 | ```
56 | 
57 | 
58 | ## Files in this directory
59 | 
60 | *   core.py &mdash; RDP privacy accountant for several vote aggregators (GNMax,
61 |     Threshold, Laplace).
62 | 
63 | *   smooth_sensitivity.py &mdash; Smooth sensitivity analysis for GNMax and
64 |     Threshold mechanisms.
65 | 
66 | *   core_test.py and smooth_sensitivity_test.py &mdash; Unit tests for the
67 |     files above.
68 | 
69 | ## Contact information
70 | 
71 | You may direct your comments to mironov@google.com and PR to @ilyamironov.
72 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/ICLR2018/plot_ls_q.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Plots LS(q).
 17 | 
 18 | A script in support of the PATE2 paper. NOT PRESENTLY USED.
 19 | 
 20 | The output is written to a specified directory as a pdf file.
 21 | """
 22 | from __future__ import absolute_import
 23 | from __future__ import division
 24 | from __future__ import print_function
 25 | 
 26 | import math
 27 | import os
 28 | import sys
 29 | 
 30 | sys.path.append('..')  # Main modules reside in the parent directory.
 31 | 
 32 | 
 33 | from absl import app
 34 | from absl import flags
 35 | import matplotlib
 36 | matplotlib.use('TkAgg')
 37 | import matplotlib.pyplot as plt  # pylint: disable=g-import-not-at-top
 38 | import numpy as np
 39 | import smooth_sensitivity as pate_ss
 40 | 
 41 | plt.style.use('ggplot')
 42 | 
 43 | FLAGS = flags.FLAGS
 44 | 
 45 | flags.DEFINE_string('figures_dir', '', 'Path where the output is written to.')
 46 | 
 47 | 
 48 | def compute_ls_q(sigma, order, num_classes):
 49 | 
 50 |   def beta(q):
 51 |     return pate_ss._compute_rdp_gnmax(sigma, math.log(q), order)
 52 | 
 53 |   def bu(q):
 54 |     return pate_ss._compute_bu_gnmax(q, sigma, order)
 55 | 
 56 |   def bl(q):
 57 |     return pate_ss._compute_bl_gnmax(q, sigma, order)
 58 | 
 59 |   def delta_beta(q):
 60 |     if q == 0 or q > .8:
 61 |       return 0
 62 |     beta_q = beta(q)
 63 |     beta_bu_q = beta(bu(q))
 64 |     beta_bl_q = beta(bl(q))
 65 |     assert beta_bl_q <= beta_q <= beta_bu_q
 66 |     return beta_bu_q - beta_q  # max(beta_bu_q - beta_q, beta_q - beta_bl_q)
 67 | 
 68 |   logq0 = pate_ss.compute_logq0_gnmax(sigma, order)
 69 |   logq1 = pate_ss._compute_logq1(sigma, order, num_classes)
 70 |   print(math.exp(logq1), math.exp(logq0))
 71 |   xs = np.linspace(0, .1, num=1000, endpoint=True)
 72 |   ys = [delta_beta(x) for x in xs]
 73 |   return xs, ys
 74 | 
 75 | 
 76 | def main(argv):
 77 |   del argv  # Unused.
 78 | 
 79 |   sigma = 20
 80 |   order = 20.
 81 |   num_classes = 10
 82 | 
 83 |   # sigma = 20
 84 |   # order = 25.
 85 |   # num_classes = 10
 86 | 
 87 |   x_axis, ys = compute_ls_q(sigma, order, num_classes)
 88 | 
 89 |   fig, ax = plt.subplots()
 90 |   fig.set_figheight(4.5)
 91 |   fig.set_figwidth(4.7)
 92 | 
 93 |   ax.plot(x_axis, ys, alpha=.8, linewidth=5)
 94 |   plt.xlabel('Number of queries answered', fontsize=16)
 95 |   plt.ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16)
 96 |   ax.tick_params(labelsize=14)
 97 |   fout_name = os.path.join(FLAGS.figures_dir, 'ls_of_q.pdf')
 98 |   print('Saving the graph to ' + fout_name)
 99 |   plt.show()
100 | 
101 |   plt.close('all')
102 | 
103 | 
104 | if __name__ == '__main__':
105 |   app.run(main)
106 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/ICLR2018/utility_queries_answered.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | from absl import app
21 | from absl import flags
22 | import matplotlib
23 | import os
24 | 
25 | matplotlib.use('TkAgg')
26 | import matplotlib.pyplot as plt
27 | 
28 | plt.style.use('ggplot')
29 | 
30 | FLAGS = flags.FLAGS
31 | flags.DEFINE_string('plot_file', '', 'Output file name.')
32 | 
33 | qa_lnmax = [500, 750] + range(1000, 12500, 500)
34 | 
35 | acc_lnmax = [43.3, 52.3, 59.8, 66.7, 68.8, 70.5, 71.6, 72.3, 72.6, 72.9, 73.4,
36 |              73.4, 73.7, 73.9, 74.2, 74.4, 74.5, 74.7, 74.8, 75, 75.1, 75.1,
37 |              75.4, 75.4, 75.4]
38 | 
39 | qa_gnmax = [456, 683, 908, 1353, 1818, 2260, 2702, 3153, 3602, 4055, 4511, 4964,
40 |             5422, 5875, 6332, 6792, 7244, 7696, 8146, 8599, 9041, 9496, 9945,
41 |             10390, 10842]
42 | 
43 | acc_gnmax = [39.6, 52.2, 59.6, 66.6, 69.6, 70.5, 71.8, 72, 72.7, 72.9, 73.3,
44 |              73.4, 73.4, 73.8, 74, 74.2, 74.4, 74.5, 74.5, 74.7, 74.8, 75, 75.1,
45 |              75.1, 75.4]
46 | 
47 | qa_gnmax_aggressive = [167, 258, 322, 485, 647, 800, 967, 1133, 1282, 1430,
48 |                        1573, 1728, 1889, 2028, 2190, 2348, 2510, 2668, 2950,
49 |                        3098, 3265, 3413, 3581, 3730]
50 | 
51 | acc_gnmax_aggressive = [17.8, 26.8, 39.3, 48, 55.7, 61, 62.8, 64.8, 65.4, 66.7,
52 |                         66.2, 68.3, 68.3, 68.7, 69.1, 70, 70.2, 70.5, 70.9,
53 |                         70.7, 71.3, 71.3, 71.3, 71.8]
54 | 
55 | 
56 | def main(argv):
57 |   del argv  # Unused.
58 | 
59 |   plt.close('all')
60 |   fig, ax = plt.subplots()
61 |   fig.set_figheight(4.7)
62 |   fig.set_figwidth(5)
63 |   ax.plot(qa_lnmax, acc_lnmax, color='r', ls='--', linewidth=5., marker='o',
64 |           alpha=.5, label='LNMax')
65 |   ax.plot(qa_gnmax, acc_gnmax, color='g', ls='-', linewidth=5., marker='o',
66 |           alpha=.5, label='Confident-GNMax')
67 |   # ax.plot(qa_gnmax_aggressive, acc_gnmax_aggressive, color='b', ls='-', marker='o', alpha=.5, label='Confident-GNMax (aggressive)')
68 |   plt.xticks([0, 2000, 4000, 6000])
69 |   plt.xlim([0, 6000])
70 |   # ax.set_yscale('log')
71 |   plt.ylim([65, 76])
72 |   ax.tick_params(labelsize=14)
73 |   plt.xlabel('Number of queries answered', fontsize=16)
74 |   plt.ylabel('Student test accuracy (%)', fontsize=16)
75 |   plt.legend(loc=2, prop={'size': 16})
76 | 
77 |   x = [400, 2116, 4600, 4680]
78 |   y = [69.5, 68.5, 74, 72.5]
79 |   annotations = [0.76, 2.89, 1.42, 5.76]
80 |   color_annotations = ['g', 'r', 'g', 'r']
81 |   for i, txt in enumerate(annotations):
82 |     ax.annotate(r'${\varepsilon=}$' + str(txt), (x[i], y[i]), fontsize=16,
83 |                 color=color_annotations[i])
84 | 
85 |   plot_filename = os.path.expanduser(FLAGS.plot_file)
86 |   plt.savefig(plot_filename, bbox_inches='tight')
87 |   plt.show()
88 | 
89 | if __name__ == '__main__':
90 |   app.run(main)
91 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/normalized_query.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Implements DPQuery interface for normalized queries.
16 | """
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | 
22 | import collections
23 | 
24 | from distutils.version import LooseVersion
25 | import tensorflow as tf
26 | 
27 | from privacy.dp_query import dp_query
28 | 
29 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
30 |   nest = tf.contrib.framework.nest
31 | else:
32 |   nest = tf.nest
33 | 
34 | 
35 | class NormalizedQuery(dp_query.DPQuery):
36 |   """DPQuery for queries with a DPQuery numerator and fixed denominator."""
37 | 
38 |   # pylint: disable=invalid-name
39 |   _GlobalState = collections.namedtuple(
40 |       '_GlobalState', ['numerator_state', 'denominator'])
41 | 
42 |   def __init__(self, numerator_query, denominator):
43 |     """Initializer for NormalizedQuery.
44 | 
45 |     Args:
46 |       numerator_query: A DPQuery for the numerator.
47 |       denominator: A value for the denominator. May be None if it will be
48 |         supplied via the set_denominator function before get_noised_result is
49 |         called.
50 |     """
51 |     self._numerator = numerator_query
52 |     self._denominator = denominator
53 | 
54 |   def set_ledger(self, ledger):
55 |     """See base class."""
56 |     self._numerator.set_ledger(ledger)
57 | 
58 |   def initial_global_state(self):
59 |     """See base class."""
60 |     if self._denominator is not None:
61 |       denominator = tf.cast(self._denominator, tf.float32)
62 |     else:
63 |       denominator = None
64 |     return self._GlobalState(
65 |         self._numerator.initial_global_state(), denominator)
66 | 
67 |   def derive_sample_params(self, global_state):
68 |     """See base class."""
69 |     return self._numerator.derive_sample_params(global_state.numerator_state)
70 | 
71 |   def initial_sample_state(self, template):
72 |     """See base class."""
73 |     # NormalizedQuery has no sample state beyond the numerator state.
74 |     return self._numerator.initial_sample_state(template)
75 | 
76 |   def preprocess_record(self, params, record):
77 |     return self._numerator.preprocess_record(params, record)
78 | 
79 |   def accumulate_preprocessed_record(
80 |       self, sample_state, preprocessed_record):
81 |     """See base class."""
82 |     return self._numerator.accumulate_preprocessed_record(
83 |         sample_state, preprocessed_record)
84 | 
85 |   def get_noised_result(self, sample_state, global_state):
86 |     """See base class."""
87 |     noised_sum, new_sum_global_state = self._numerator.get_noised_result(
88 |         sample_state, global_state.numerator_state)
89 |     def normalize(v):
90 |       return tf.truediv(v, global_state.denominator)
91 | 
92 |     return (nest.map_structure(normalize, noised_sum),
93 |             self._GlobalState(new_sum_global_state, global_state.denominator))
94 | 
95 |   def merge_sample_states(self, sample_state_1, sample_state_2):
96 |     """See base class."""
97 |     return self._numerator.merge_sample_states(sample_state_1, sample_state_2)
98 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/BUILD:
--------------------------------------------------------------------------------
  1 | package(default_visibility = ["//visibility:public"])
  2 | 
  3 | licenses(["notice"])  # Apache 2.0
  4 | 
  5 | py_library(
  6 |     name = "dp_query",
  7 |     srcs = ["dp_query.py"],
  8 |     deps = [
  9 |         "//third_party/py/distutils",
 10 |         "//third_party/py/tensorflow",
 11 |     ],
 12 | )
 13 | 
 14 | py_library(
 15 |     name = "gaussian_query",
 16 |     srcs = ["gaussian_query.py"],
 17 |     deps = [
 18 |         ":dp_query",
 19 |         ":normalized_query",
 20 |         "//third_party/py/distutils",
 21 |         "//third_party/py/tensorflow",
 22 |     ],
 23 | )
 24 | 
 25 | py_test(
 26 |     name = "gaussian_query_test",
 27 |     size = "small",
 28 |     srcs = ["gaussian_query_test.py"],
 29 |     python_version = "PY2",
 30 |     deps = [
 31 |         ":gaussian_query",
 32 |         ":test_utils",
 33 |         "//third_party/py/absl/testing:parameterized",
 34 |         "//third_party/py/numpy",
 35 |         "//third_party/py/six",
 36 |         "//third_party/py/tensorflow",
 37 |     ],
 38 | )
 39 | 
 40 | py_library(
 41 |     name = "no_privacy_query",
 42 |     srcs = ["no_privacy_query.py"],
 43 |     deps = [
 44 |         ":dp_query",
 45 |         "//third_party/py/distutils",
 46 |         "//third_party/py/tensorflow",
 47 |     ],
 48 | )
 49 | 
 50 | py_test(
 51 |     name = "no_privacy_query_test",
 52 |     size = "small",
 53 |     srcs = ["no_privacy_query_test.py"],
 54 |     python_version = "PY2",
 55 |     deps = [
 56 |         ":no_privacy_query",
 57 |         ":test_utils",
 58 |         "//third_party/py/absl/testing:parameterized",
 59 |         "//third_party/py/tensorflow",
 60 |     ],
 61 | )
 62 | 
 63 | py_library(
 64 |     name = "normalized_query",
 65 |     srcs = ["normalized_query.py"],
 66 |     deps = [
 67 |         ":dp_query",
 68 |         "//third_party/py/distutils",
 69 |         "//third_party/py/tensorflow",
 70 |     ],
 71 | )
 72 | 
 73 | py_test(
 74 |     name = "normalized_query_test",
 75 |     size = "small",
 76 |     srcs = ["normalized_query_test.py"],
 77 |     python_version = "PY2",
 78 |     deps = [
 79 |         ":gaussian_query",
 80 |         ":normalized_query",
 81 |         ":test_utils",
 82 |         "//third_party/py/tensorflow",
 83 |     ],
 84 | )
 85 | 
 86 | py_library(
 87 |     name = "nested_query",
 88 |     srcs = ["nested_query.py"],
 89 |     deps = [
 90 |         ":dp_query",
 91 |         "//third_party/py/distutils",
 92 |         "//third_party/py/tensorflow",
 93 |     ],
 94 | )
 95 | 
 96 | py_test(
 97 |     name = "nested_query_test",
 98 |     size = "small",
 99 |     srcs = ["nested_query_test.py"],
100 |     python_version = "PY2",
101 |     deps = [
102 |         ":gaussian_query",
103 |         ":nested_query",
104 |         ":test_utils",
105 |         "//third_party/py/absl/testing:parameterized",
106 |         "//third_party/py/distutils",
107 |         "//third_party/py/numpy",
108 |         "//third_party/py/tensorflow",
109 |     ],
110 | )
111 | 
112 | py_library(
113 |     name = "quantile_adaptive_clip_sum_query",
114 |     srcs = ["quantile_adaptive_clip_sum_query.py"],
115 |     deps = [
116 |         ":dp_query",
117 |         ":gaussian_query",
118 |         ":normalized_query",
119 |         "//third_party/py/tensorflow",
120 |     ],
121 | )
122 | 
123 | py_test(
124 |     name = "quantile_adaptive_clip_sum_query_test",
125 |     srcs = ["quantile_adaptive_clip_sum_query_test.py"],
126 |     python_version = "PY2",
127 |     deps = [
128 |         ":quantile_adaptive_clip_sum_query",
129 |         ":test_utils",
130 |         "//third_party/py/numpy",
131 |         "//third_party/py/tensorflow",
132 |         "//third_party/py/tensorflow_privacy/privacy/analysis:privacy_ledger",
133 |     ],
134 | )
135 | 
136 | py_library(
137 |     name = "test_utils",
138 |     srcs = ["test_utils.py"],
139 |     deps = [],
140 | )
141 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | r"""Command-line script for computing privacy of a model trained with DP-SGD.
16 | 
17 | The script applies the RDP accountant to estimate privacy budget of an iterated
18 | Sampled Gaussian Mechanism. The mechanism's parameters are controlled by flags.
19 | 
20 | Example:
21 |   compute_dp_sgd_privacy
22 |     --N=60000 \
23 |     --batch_size=256 \
24 |     --noise_multiplier=1.12 \
25 |     --epochs=60 \
26 |     --delta=1e-5
27 | 
28 | The output states that DP-SGD with these parameters satisfies (2.92, 1e-5)-DP.
29 | """
30 | 
31 | from __future__ import absolute_import
32 | from __future__ import division
33 | from __future__ import print_function
34 | 
35 | import math
36 | import sys
37 | 
38 | from absl import app
39 | from absl import flags
40 | 
41 | # Opting out of loading all sibling packages and their dependencies.
42 | sys.skip_tf_privacy_import = True
43 | 
44 | from privacy.analysis.rdp_accountant import compute_rdp  # pylint: disable=g-import-not-at-top
45 | from privacy.analysis.rdp_accountant import get_privacy_spent
46 | 
47 | FLAGS = flags.FLAGS
48 | 
49 | flags.DEFINE_integer('N', None, 'Total number of examples')
50 | flags.DEFINE_integer('batch_size', None, 'Batch size')
51 | flags.DEFINE_float('noise_multiplier', None, 'Noise multiplier for DP-SGD')
52 | flags.DEFINE_float('epochs', None, 'Number of epochs (may be fractional)')
53 | flags.DEFINE_float('delta', 1e-6, 'Target delta')
54 | 
55 | flags.mark_flag_as_required('N')
56 | flags.mark_flag_as_required('batch_size')
57 | flags.mark_flag_as_required('noise_multiplier')
58 | flags.mark_flag_as_required('epochs')
59 | 
60 | 
61 | def apply_dp_sgd_analysis(q, sigma, steps, orders, delta):
62 |   """Compute and print results of DP-SGD analysis."""
63 | 
64 |   # compute_rdp requires that sigma be the ratio of the standard deviation of
65 |   # the Gaussian noise to the l2-sensitivity of the function to which it is
66 |   # added. Hence, sigma here corresponds to the `noise_multiplier` parameter
67 |   # in the DP-SGD implementation found in privacy.optimizers.dp_optimizer
68 |   rdp = compute_rdp(q, sigma, steps, orders)
69 | 
70 |   eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta)
71 | 
72 |   print('DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated'
73 |         ' over {} steps satisfies'.format(100 * q, sigma, steps), end=' ')
74 |   print('differential privacy with eps = {:.3g} and delta = {}.'.format(
75 |       eps, delta))
76 |   print('The optimal RDP order is {}.'.format(opt_order))
77 | 
78 |   if opt_order == max(orders) or opt_order == min(orders):
79 |     print('The privacy estimate is likely to be improved by expanding '
80 |           'the set of orders.')
81 | 
82 | 
83 | def main(argv):
84 |   del argv  # argv is not used.
85 | 
86 |   q = FLAGS.batch_size / FLAGS.N  # q - the sampling ratio.
87 |   if q > 1:
88 |     raise app.UsageError('N must be larger than the batch size.')
89 |   orders = ([1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] +
90 |             list(range(5, 64)) + [128, 256, 512])
91 |   steps = int(math.ceil(FLAGS.epochs * FLAGS.N / FLAGS.batch_size))
92 | 
93 |   apply_dp_sgd_analysis(q, FLAGS.noise_multiplier, steps, orders, FLAGS.delta)
94 | 
95 | 
96 | if __name__ == '__main__':
97 |   app.run(main)
98 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2017/train_teachers.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | import deep_cnn
 20 | import input  # pylint: disable=redefined-builtin
 21 | import metrics
 22 | import tensorflow as tf
 23 | 
 24 | 
 25 | tf.flags.DEFINE_string('dataset', 'svhn', 'The name of the dataset to use')
 26 | tf.flags.DEFINE_integer('nb_labels', 10, 'Number of output classes')
 27 | 
 28 | tf.flags.DEFINE_string('data_dir','/tmp','Temporary storage')
 29 | tf.flags.DEFINE_string('train_dir','/tmp/train_dir',
 30 |                        'Where model ckpt are saved')
 31 | 
 32 | tf.flags.DEFINE_integer('max_steps', 3000, 'Number of training steps to run.')
 33 | tf.flags.DEFINE_integer('nb_teachers', 50, 'Teachers in the ensemble.')
 34 | tf.flags.DEFINE_integer('teacher_id', 0, 'ID of teacher being trained.')
 35 | 
 36 | tf.flags.DEFINE_boolean('deeper', False, 'Activate deeper CNN model')
 37 | 
 38 | FLAGS = tf.flags.FLAGS
 39 | 
 40 | 
 41 | def train_teacher(dataset, nb_teachers, teacher_id):
 42 |   """
 43 |   This function trains a teacher (teacher id) among an ensemble of nb_teachers
 44 |   models for the dataset specified.
 45 |   :param dataset: string corresponding to dataset (svhn, cifar10)
 46 |   :param nb_teachers: total number of teachers in the ensemble
 47 |   :param teacher_id: id of the teacher being trained
 48 |   :return: True if everything went well
 49 |   """
 50 |   # If working directories do not exist, create them
 51 |   assert input.create_dir_if_needed(FLAGS.data_dir)
 52 |   assert input.create_dir_if_needed(FLAGS.train_dir)
 53 | 
 54 |   # Load the dataset
 55 |   if dataset == 'svhn':
 56 |     train_data,train_labels,test_data,test_labels = input.ld_svhn(extended=True)
 57 |   elif dataset == 'cifar10':
 58 |     train_data, train_labels, test_data, test_labels = input.ld_cifar10()
 59 |   elif dataset == 'mnist':
 60 |     train_data, train_labels, test_data, test_labels = input.ld_mnist()
 61 |   else:
 62 |     print("Check value of dataset flag")
 63 |     return False
 64 | 
 65 |   # Retrieve subset of data for this teacher
 66 |   data, labels = input.partition_dataset(train_data,
 67 |                                          train_labels,
 68 |                                          nb_teachers,
 69 |                                          teacher_id)
 70 | 
 71 |   print("Length of training data: " + str(len(labels)))
 72 | 
 73 |   # Define teacher checkpoint filename and full path
 74 |   if FLAGS.deeper:
 75 |     filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt'
 76 |   else:
 77 |     filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt'
 78 |   ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + filename
 79 | 
 80 |   # Perform teacher training
 81 |   assert deep_cnn.train(data, labels, ckpt_path)
 82 | 
 83 |   # Append final step value to checkpoint for evaluation
 84 |   ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1)
 85 | 
 86 |   # Retrieve teacher probability estimates on the test data
 87 |   teacher_preds = deep_cnn.softmax_preds(test_data, ckpt_path_final)
 88 | 
 89 |   # Compute teacher accuracy
 90 |   precision = metrics.accuracy(teacher_preds, test_labels)
 91 |   print('Precision of teacher after training: ' + str(precision))
 92 | 
 93 |   return True
 94 | 
 95 | 
 96 | def main(argv=None):  # pylint: disable=unused-argument
 97 |   # Make a call to train_teachers with values specified in flags
 98 |   assert train_teacher(FLAGS.dataset, FLAGS.nb_teachers, FLAGS.teacher_id)
 99 | 
100 | if __name__ == '__main__':
101 |   tf.app.run()
102 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/README.md:
--------------------------------------------------------------------------------
  1 | # TensorFlow Privacy
  2 | 
  3 | This repository contains the source code for TensorFlow Privacy, a Python
  4 | library that includes implementations of TensorFlow optimizers for training
  5 | machine learning models with differential privacy. The library comes with
  6 | tutorials and analysis tools for computing the privacy guarantees provided.
  7 | 
  8 | The TensorFlow Privacy library is under continual development, always welcoming
  9 | contributions. In particular, we always welcome help towards resolving the
 10 | issues currently open.
 11 | 
 12 | ## Setting up TensorFlow Privacy
 13 | 
 14 | ### Dependencies
 15 | 
 16 | This library uses [TensorFlow](https://www.tensorflow.org/) to define machine
 17 | learning models. Therefore, installing TensorFlow (>= 1.14) is a pre-requisite.
 18 | You can find instructions [here](https://www.tensorflow.org/install/). For
 19 | better performance, it is also recommended to install TensorFlow with GPU
 20 | support (detailed instructions on how to do this are available in the TensorFlow
 21 | installation documentation).
 22 | 
 23 | In addition to TensorFlow and its dependencies, other prerequisites are:
 24 | 
 25 |   * `scipy` >= 0.17
 26 | 
 27 |   * `mpmath` (for testing)
 28 | 
 29 |   * `tensorflow_datasets` (for the RNN tutorial `lm_dpsgd_tutorial.py` only)
 30 | 
 31 | ### Installing TensorFlow Privacy
 32 | 
 33 | First, clone this GitHub repository into a directory of your choice:
 34 | 
 35 | ```
 36 | git clone https://github.com/tensorflow/privacy
 37 | ```
 38 | 
 39 | You can then install the local package in "editable" mode in order to add it to
 40 | your `PYTHONPATH`:
 41 | 
 42 | ```
 43 | cd privacy
 44 | pip install -e .
 45 | ```
 46 | 
 47 | If you'd like to make contributions, we recommend first forking the repository
 48 | and then cloning your fork rather than cloning this repository directly.
 49 | 
 50 | ## Contributing
 51 | 
 52 | Contributions are welcomed! Bug fixes and new features can be initiated through
 53 | GitHub pull requests. To speed the code review process, we ask that:
 54 | 
 55 | *   When making code contributions to TensorFlow Privacy, you follow the `PEP8
 56 |     with two spaces` coding style (the same as the one used by TensorFlow) in
 57 |     your pull requests. In most cases this can be done by running `autopep8 -i
 58 |     --indent-size 2 <file>` on the files you have edited.
 59 | 
 60 | *   You should also check your code with pylint and TensorFlow's pylint
 61 |     [configuration file](https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/tools/ci_build/pylintrc)
 62 |     by running `pylint --rcfile=/path/to/the/tf/rcfile <edited file.py>`.
 63 | 
 64 | *   When making your first pull request, you
 65 |     [sign the Google CLA](https://cla.developers.google.com/clas)
 66 | 
 67 | *   We do not accept pull requests that add git submodules because of
 68 |     [the problems that arise when maintaining git submodules](https://medium.com/@porteneuve/mastering-git-submodules-34c65e940407)
 69 | 
 70 | ## Tutorials directory
 71 | 
 72 | To help you get started with the functionalities provided by this library, we
 73 | provide a detailed walkthrough [here](tutorials/walkthrough/walkthrough.md) that
 74 | will teach you how to wrap existing optimizers
 75 | (e.g., SGD, Adam, ...) into their differentially private counterparts using
 76 | TensorFlow (TF) Privacy. You will also learn how to tune the parameters
 77 | introduced by differentially private optimization and how to
 78 | measure the privacy guarantees provided using analysis tools included in TF
 79 | Privacy.
 80 | 
 81 | In addition, the
 82 | `tutorials/` folder comes with scripts demonstrating how to use the library
 83 | features. The list of tutorials is described in the README included in the
 84 | tutorials directory.
 85 | 
 86 | NOTE: the tutorials are maintained carefully. However, they are not considered
 87 | part of the API and they can change at any time without warning. You should not
 88 | write 3rd party code that imports the tutorials and expect that the interface
 89 | will not break.
 90 | 
 91 | ## Research directory
 92 | 
 93 | This folder contains code to reproduce results from research papers related to
 94 | privacy in machine learning. It is not maintained as carefully as the tutorials
 95 | directory, but rather intended as a convenient archive. 
 96 | 
 97 | ## Remarks
 98 | 
 99 | The content of this repository supersedes the following existing folder in the
100 | tensorflow/models [repository](https://github.com/tensorflow/models/tree/master/research/differential_privacy)
101 | 
102 | ## Contacts
103 | 
104 | If you have any questions that cannot be addressed by raising an issue, feel
105 | free to contact:
106 | 
107 | * Galen Andrew (@galenmandrew)
108 | * Steve Chien (@schien1729)
109 | * Nicolas Papernot (@npapernot)
110 | 
111 | ## Copyright
112 | 
113 | Copyright 2019 - Google LLC
114 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/nested_query.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018, The TensorFlow Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Implements DPQuery interface for queries over nested structures.
 16 | """
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | from distutils.version import LooseVersion
 23 | import tensorflow as tf
 24 | 
 25 | from privacy.dp_query import dp_query
 26 | 
 27 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
 28 |   nest = tf.contrib.framework.nest
 29 | else:
 30 |   nest = tf.nest
 31 | 
 32 | 
 33 | class NestedQuery(dp_query.DPQuery):
 34 |   """Implements DPQuery interface for structured queries.
 35 | 
 36 |   NestedQuery evaluates arbitrary nested structures of queries. Records must be
 37 |   nested structures of tensors that are compatible (in type and arity) with the
 38 |   query structure, but are allowed to have deeper structure within each leaf of
 39 |   the query structure. For example, the nested query [q1, q2] is compatible with
 40 |   the record [t1, t2] or [t1, (t2, t3)], but not with (t1, t2), [t1] or
 41 |   [t1, t2, t3]. The entire substructure of each record corresponding to a leaf
 42 |   node of the query structure is routed to the corresponding query. If the same
 43 |   tensor should be consumed by multiple sub-queries, it can be replicated in the
 44 |   record, for example [t1, t1].
 45 | 
 46 |   NestedQuery is intended to allow privacy mechanisms for groups as described in
 47 |   [McMahan & Andrew, 2018: "A General Approach to Adding Differential Privacy to
 48 |   Iterative Training Procedures" (https://arxiv.org/abs/1812.06210)].
 49 |   """
 50 | 
 51 |   def __init__(self, queries):
 52 |     """Initializes the NestedQuery.
 53 | 
 54 |     Args:
 55 |       queries: A nested structure of queries.
 56 |     """
 57 |     self._queries = queries
 58 | 
 59 |   def _map_to_queries(self, fn, *inputs, **kwargs):
 60 |     def caller(query, *args):
 61 |       return getattr(query, fn)(*args, **kwargs)
 62 |     return nest.map_structure_up_to(
 63 |         self._queries, caller, self._queries, *inputs)
 64 | 
 65 |   def set_ledger(self, ledger):
 66 |     self._map_to_queries('set_ledger', ledger=ledger)
 67 | 
 68 |   def initial_global_state(self):
 69 |     """See base class."""
 70 |     return self._map_to_queries('initial_global_state')
 71 | 
 72 |   def derive_sample_params(self, global_state):
 73 |     """See base class."""
 74 |     return self._map_to_queries('derive_sample_params', global_state)
 75 | 
 76 |   def initial_sample_state(self, template):
 77 |     """See base class."""
 78 |     return self._map_to_queries('initial_sample_state', template)
 79 | 
 80 |   def preprocess_record(self, params, record):
 81 |     """See base class."""
 82 |     return self._map_to_queries('preprocess_record', params, record)
 83 | 
 84 |   def accumulate_preprocessed_record(
 85 |       self, sample_state, preprocessed_record):
 86 |     """See base class."""
 87 |     return self._map_to_queries(
 88 |         'accumulate_preprocessed_record',
 89 |         sample_state,
 90 |         preprocessed_record)
 91 | 
 92 |   def merge_sample_states(self, sample_state_1, sample_state_2):
 93 |     return self._map_to_queries(
 94 |         'merge_sample_states', sample_state_1, sample_state_2)
 95 | 
 96 |   def get_noised_result(self, sample_state, global_state):
 97 |     """Gets query result after all records of sample have been accumulated.
 98 | 
 99 |     Args:
100 |       sample_state: The sample state after all records have been accumulated.
101 |       global_state: The global state.
102 | 
103 |     Returns:
104 |       A tuple (result, new_global_state) where "result" is a structure matching
105 |       the query structure containing the results of the subqueries and
106 |       "new_global_state" is a structure containing the updated global states
107 |       for the subqueries.
108 |     """
109 |     estimates_and_new_global_states = self._map_to_queries(
110 |         'get_noised_result', sample_state, global_state)
111 | 
112 |     flat_estimates, flat_new_global_states = zip(
113 |         *nest.flatten_up_to(self._queries, estimates_and_new_global_states))
114 |     return (
115 |         nest.pack_sequence_as(self._queries, flat_estimates),
116 |         nest.pack_sequence_as(self._queries, flat_new_global_states))
117 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/core_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Tests for pate.core."""
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | import sys
 23 | import unittest
 24 | import numpy as np
 25 | 
 26 | import core as pate
 27 | 
 28 | 
 29 | class PateTest(unittest.TestCase):
 30 | 
 31 |   def _test_rdp_gaussian_value_errors(self):
 32 |     # Test for ValueErrors.
 33 |     with self.assertRaises(ValueError):
 34 |       pate.rdp_gaussian(1.0, 1.0, np.array([2, 3, 4]))
 35 |     with self.assertRaises(ValueError):
 36 |       pate.rdp_gaussian(np.log(0.5), -1.0, np.array([2, 3, 4]))
 37 |     with self.assertRaises(ValueError):
 38 |       pate.rdp_gaussian(np.log(0.5), 1.0, np.array([1, 3, 4]))
 39 | 
 40 |   def _test_rdp_gaussian_as_function_of_q(self):
 41 |     # Test for data-independent and data-dependent ranges over q.
 42 |     # The following corresponds to orders 1.1, 2.5, 32, 250
 43 |     # sigmas 1.5, 15, 1500, 15000.
 44 |     # Hand calculated -log(q0)s arranged in a 'sigma major' ordering.
 45 |     neglogq0s = [
 46 |         2.8, 2.6, 427, None, 4.8, 4.0, 4.7, 275, 9.6, 8.8, 6.0, 4, 12, 11.2,
 47 |         8.6, 6.4
 48 |     ]
 49 |     idx_neglogq0s = 0  # To iterate through neglogq0s.
 50 |     orders = [1.1, 2.5, 32, 250]
 51 |     sigmas = [1.5, 15, 1500, 15000]
 52 |     for sigma in sigmas:
 53 |       for order in orders:
 54 |         curr_neglogq0 = neglogq0s[idx_neglogq0s]
 55 |         idx_neglogq0s += 1
 56 |         if curr_neglogq0 is None:  # sigma == 1.5 and order == 250:
 57 |           continue
 58 | 
 59 |         rdp_at_q0 = pate.rdp_gaussian(-curr_neglogq0, sigma, order)
 60 | 
 61 |         # Data-dependent range. (Successively halve the value of q.)
 62 |         logq_dds = (-curr_neglogq0 - np.array(
 63 |             [0, np.log(2), np.log(4), np.log(8)]))
 64 |         # Check that in q_dds, rdp is decreasing.
 65 |         for idx in range(len(logq_dds) - 1):
 66 |           self.assertGreater(
 67 |               pate.rdp_gaussian(logq_dds[idx], sigma, order),
 68 |               pate.rdp_gaussian(logq_dds[idx + 1], sigma, order))
 69 | 
 70 |         # Data-independent range.
 71 |         q_dids = np.exp(-curr_neglogq0) + np.array([0.1, 0.2, 0.3, 0.4])
 72 |         # Check that in q_dids, rdp is constant.
 73 |         for q in q_dids:
 74 |           self.assertEqual(rdp_at_q0, pate.rdp_gaussian(
 75 |               np.log(q), sigma, order))
 76 | 
 77 |   def _test_compute_eps_from_delta_value_error(self):
 78 |     # Test for ValueError.
 79 |     with self.assertRaises(ValueError):
 80 |       pate.compute_eps_from_delta([1.1, 2, 3, 4], [1, 2, 3], 0.001)
 81 | 
 82 |   def _test_compute_eps_from_delta_monotonicity(self):
 83 |     # Test for monotonicity with respect to delta.
 84 |     orders = [1.1, 2.5, 250.0]
 85 |     sigmas = [1e-3, 1.0, 1e5]
 86 |     deltas = [1e-60, 1e-6, 0.1, 0.999]
 87 |     for sigma in sigmas:
 88 |       list_of_eps = []
 89 |       rdps_for_gaussian = np.array(orders) / (2 * sigma**2)
 90 |       for delta in deltas:
 91 |         list_of_eps.append(
 92 |             pate.compute_eps_from_delta(orders, rdps_for_gaussian, delta)[0])
 93 | 
 94 |       # Check that in list_of_eps, epsilons are decreasing (as delta increases).
 95 |       sorted_list_of_eps = list(list_of_eps)
 96 |       sorted_list_of_eps.sort(reverse=True)
 97 |       self.assertEqual(list_of_eps, sorted_list_of_eps)
 98 | 
 99 |   def _test_compute_q0(self):
100 |     # Stub code to search a logq space and figure out logq0 by eyeballing
101 |     # results. This code does not run with the tests. Remove underscore to run.
102 |     sigma = 15
103 |     order = 250
104 |     logqs = np.arange(-290, -270, 1)
105 |     count = 0
106 |     for logq in logqs:
107 |       count += 1
108 |       sys.stdout.write("\t%0.5g: %0.10g" %
109 |                        (logq, pate.rdp_gaussian(logq, sigma, order)))
110 |       sys.stdout.flush()
111 |       if count % 5 == 0:
112 |         print("")
113 | 
114 |   def test_rdp_gaussian(self):
115 |     self._test_rdp_gaussian_value_errors()
116 |     self._test_rdp_gaussian_as_function_of_q()
117 | 
118 |   def test_compute_eps_from_delta(self):
119 |     self._test_compute_eps_from_delta_value_error()
120 |     self._test_compute_eps_from_delta_monotonicity()
121 | 
122 | 
123 | if __name__ == "__main__":
124 |   unittest.main()
125 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2018/smooth_sensitivity_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Tests for pate.smooth_sensitivity."""
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | import unittest
 23 | import numpy as np
 24 | 
 25 | import smooth_sensitivity as pate_ss
 26 | 
 27 | 
 28 | class PateSmoothSensitivityTest(unittest.TestCase):
 29 | 
 30 |   def test_check_conditions(self):
 31 |     self.assertEqual(pate_ss.check_conditions(20, 10, 25.), (True, False))
 32 |     self.assertEqual(pate_ss.check_conditions(30, 10, 25.), (True, True))
 33 | 
 34 |   def _assert_all_close(self, x, y):
 35 |     """Asserts that two numpy arrays are close."""
 36 |     self.assertEqual(len(x), len(y))
 37 |     self.assertTrue(np.allclose(x, y, rtol=1e-8, atol=0))
 38 | 
 39 |   def test_compute_local_sensitivity_bounds_gnmax(self):
 40 |     counts1 = np.array([10, 0, 0])
 41 |     sigma1 = .5
 42 |     order1 = 1.5
 43 | 
 44 |     answer1 = np.array(
 45 |         [3.13503646e-17, 1.60178280e-08, 5.90681786e-03] + [5.99981308e+00] * 7)
 46 | 
 47 |     # Test for "going right" in the smooth sensitivity computation.
 48 |     out1 = pate_ss.compute_local_sensitivity_bounds_gnmax(
 49 |         counts1, 10, sigma1, order1)
 50 | 
 51 |     self._assert_all_close(out1, answer1)
 52 | 
 53 |     counts2 = np.array([1000, 500, 300, 200, 0])
 54 |     sigma2 = 250.
 55 |     order2 = 10.
 56 | 
 57 |     # Test for "going left" in the smooth sensitivity computation.
 58 |     out2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
 59 |         counts2, 2000, sigma2, order2)
 60 | 
 61 |     answer2 = np.array([0.] * 298 + [2.77693450548e-7, 2.10853979548e-6] +
 62 |                        [2.73113623988e-6] * 1700)
 63 |     self._assert_all_close(out2, answer2)
 64 | 
 65 |   def test_compute_local_sensitivity_bounds_threshold(self):
 66 |     counts1_3 = np.array([20, 10, 0])
 67 |     num_teachers = sum(counts1_3)
 68 |     t1 = 16  # high threshold
 69 |     sigma = 2
 70 |     order = 10
 71 | 
 72 |     out1 = pate_ss.compute_local_sensitivity_bounds_threshold(
 73 |         counts1_3, num_teachers, t1, sigma, order)
 74 |     answer1 = np.array([0] * 3 + [
 75 |         1.48454129e-04, 1.47826870e-02, 3.94153241e-02, 6.45775697e-02,
 76 |         9.01543247e-02, 1.16054002e-01, 1.42180452e-01, 1.42180452e-01,
 77 |         1.48454129e-04, 1.47826870e-02, 3.94153241e-02, 6.45775697e-02,
 78 |         9.01543266e-02, 1.16054000e-01, 1.42180452e-01, 1.68302106e-01,
 79 |         1.93127860e-01
 80 |     ] + [0] * 10)
 81 |     self._assert_all_close(out1, answer1)
 82 | 
 83 |     t2 = 2  # low threshold
 84 | 
 85 |     out2 = pate_ss.compute_local_sensitivity_bounds_threshold(
 86 |         counts1_3, num_teachers, t2, sigma, order)
 87 |     answer2 = np.array([
 88 |         1.60212079e-01, 2.07021132e-01, 2.07021132e-01, 1.93127860e-01,
 89 |         1.68302106e-01, 1.42180452e-01, 1.16054002e-01, 9.01543247e-02,
 90 |         6.45775697e-02, 3.94153241e-02, 1.47826870e-02, 1.48454129e-04
 91 |     ] + [0] * 18)
 92 |     self._assert_all_close(out2, answer2)
 93 | 
 94 |     t3 = 50  # very high threshold (larger than the number of teachers).
 95 | 
 96 |     out3 = pate_ss.compute_local_sensitivity_bounds_threshold(
 97 |         counts1_3, num_teachers, t3, sigma, order)
 98 | 
 99 |     answer3 = np.array([
100 |         1.35750725752e-19, 1.88990500499e-17, 2.05403154065e-15,
101 |         1.74298153642e-13, 1.15489723995e-11, 5.97584949325e-10,
102 |         2.41486826748e-08, 7.62150641922e-07, 1.87846248741e-05,
103 |         0.000360973025976, 0.000360973025976, 2.76377015215e-50,
104 |         1.00904975276e-53, 2.87254164748e-57, 6.37583360761e-61,
105 |         1.10331620211e-64, 1.48844393335e-68, 1.56535552444e-72,
106 |         1.28328011060e-76, 8.20047697109e-81
107 |     ] + [0] * 10)
108 | 
109 |     self._assert_all_close(out3, answer3)
110 | 
111 |     # Fractional values.
112 |     counts4 = np.array([19.5, -5.1, 0])
113 |     t4 = 10.1
114 |     out4 = pate_ss.compute_local_sensitivity_bounds_threshold(
115 |         counts4, num_teachers, t4, sigma, order)
116 | 
117 |     answer4 = np.array([
118 |         0.0620410301, 0.0875807131, 0.113451958, 0.139561671, 0.1657074530,
119 |         0.1908244840, 0.2070270720, 0.207027072, 0.169718100, 0.0575152142,
120 |         0.00678695871
121 |     ] + [0] * 6 + [0.000536304908, 0.0172181073, 0.041909870] + [0] * 10)
122 |     self._assert_all_close(out4, answer4)
123 | 
124 | 
125 | if __name__ == "__main__":
126 |   unittest.main()
127 | 


--------------------------------------------------------------------------------
/results/cifar_dpsgd_delta_0.0001_lr_0.001.txt:
--------------------------------------------------------------------------------
1 | eps: [0.5150317674746007, 0.5183779587283024, 0.521724149982004, 0.5250703412357057, 0.5284165324894075, 0.5317627237431092, 0.5351089149968108, 0.5384551062505125, 0.5418012975042142, 0.545147488757916, 0.5484936800116176, 0.5518398712653193, 0.555186062519021, 0.5585322537727228, 0.5618784450264244, 0.5652246362801261, 0.5685708275338278, 0.5719170187875295, 0.5752632100412312, 0.5786094012949329, 0.5819555925486346, 0.5853017838023362, 0.5886479750560379, 0.5919941663097397, 0.5953403575634414, 0.5986865488171431, 0.6020327400708447, 0.6053789313245465, 0.6087251225782482, 0.6120713138319499, 0.6154175050856515, 0.6187636963393532, 0.622109887593055, 0.6254560788467567, 0.6288022701004583, 0.63214846135416, 0.6354946526078618, 0.6388408438615634, 0.6421870351152651, 0.6455332263689668, 0.6488794176226685, 0.6522256088763703, 0.6555718001300719, 0.6589179913837736, 0.6622641826374753, 0.665610373891177, 0.6689565651448788, 0.6723027563985804, 0.6756489476522821, 0.6789951389059838, 0.6823413301596855, 0.6856875214133872, 0.6890337126670889, 0.6923799039207906, 0.6957260951744924, 0.699072286428194, 0.7024184776818957, 0.7057646689355974, 0.709110860189299, 0.7124570514430008, 0.7158032426967025, 0.7191494339504042, 0.7224956252041059, 0.7258418164578075, 0.7291880077115093, 0.732534198965211, 0.7358803902189126, 0.7392265814726143, 0.742572772726316, 0.7459189639800178, 0.7492651552337195, 0.7526113464874211, 0.7559575377411228, 0.7593037289948246, 0.7626499202485262, 0.7659961115022279, 0.7693423027559296, 0.7726884940096312, 0.7760346852633331, 0.7793808765170347, 0.7827270677707364, 0.7860732590244381, 0.7894194502781398, 0.7927656415318415, 0.7961118327855432, 0.7994580240392448, 0.8028042152929467, 0.8061504065466483, 0.80949659780035, 0.8128427890540517, 0.8161889803077533, 0.8195351715614552, 0.8228813628151568, 0.8262275540688585, 0.8295737453225602, 0.8329199365762618, 0.8362661278299636, 0.8396123190836653, 0.8429585103373669, 0.8463047015910687, 0.8496508928447704, 0.8529970840984721, 0.8563432753521738, 0.8596894666058754, 0.8630356578595771, 0.8663818491132789, 0.8697280403669805, 0.8730742316206823, 0.8764204228743839, 0.8797666141280857, 0.8831128053817874, 0.886458996635489, 0.8898051878891907, 0.8931513791428924, 0.8964975703965941, 0.8998437616502959, 0.9031899529039975, 0.9065361441576992, 0.909882335411401, 0.9132285266651026, 0.9165747179188043, 0.919920909172506, 0.9232671004262076, 0.9266132916799095, 0.9299594829336111, 0.9333056741873128, 0.9366321354866116, 0.9397411701930944, 0.942850204899577, 0.9459592396060599, 0.9490682743125425, 0.9521773090190253, 0.955286343725508, 0.9583953784319907, 0.9615044131384735, 0.9646134478449562, 0.967722482551439, 0.9708315172579216, 0.9739405519644044, 0.9770495866708871, 0.9801586213773699, 0.9832676560838526, 0.9863766907903353, 0.9894857254968181, 0.9925947602033007, 0.9957037949097836, 0.9988128296162663, 1.001921864322749, 1.0050308990292316, 1.0081399337357144, 1.0112489684421972, 1.0143580031486799, 1.0174670378551627, 1.0205760725616453, 1.0236851072681281, 1.026794141974611, 1.0299031766810935, 1.0330122113875764, 1.036121246094059, 1.0392302808005418, 1.0423393155070246, 1.0454483502135072, 1.0485573849199898, 1.0516664196264727, 1.0547754543329555, 1.057884489039438, 1.060993523745921, 1.0641025584524035, 1.0672115931588864, 1.0703206278653692, 1.0734296625718518, 1.0765386972783344, 1.0796477319848172, 1.0827567666913, 1.0858658013977829, 1.0889748361042655, 1.092083870810748, 1.095192905517231, 1.0983019402237137, 1.1014109749301964, 1.104520009636679, 1.1076290443431618, 1.1107380790496446, 1.1138471137561274, 1.11695614846261, 1.1200651831690926, 1.1231742178755755, 1.1262832525820583, 1.1293922872885411, 1.1325013219950237, 1.1356103567015063, 1.1387193914079892, 1.141828426114472, 1.1449374608209546, 1.1479242666081464, 1.1508590255484523, 1.1537937844887582, 1.156728543429064, 1.1596633023693697, 1.1625980613096756]
2 | validation acc: [0.3689, 0.4393, 0.4015, 0.4714, 0.5035, 0.5211, 0.5439, 0.5236, 0.5269, 0.5212, 0.5728, 0.5894, 0.6, 0.5682, 0.595, 0.6018, 0.5995, 0.5808, 0.6293, 0.5536, 0.6295, 0.6174, 0.6154, 0.6357, 0.6283, 0.6392, 0.6269, 0.6295, 0.6244, 0.6356, 0.6583, 0.6559, 0.6231, 0.6565, 0.6261, 0.648, 0.6631, 0.6505, 0.6463, 0.6132, 0.6627, 0.67, 0.6711, 0.6797, 0.6806, 0.6739, 0.666, 0.6837, 0.6636, 0.6594, 0.6692, 0.6836, 0.6531, 0.6807, 0.6928, 0.6751, 0.6717, 0.6842, 0.6873, 0.6875, 0.6964, 0.6807, 0.6873, 0.6865, 0.6725, 0.6951, 0.7011, 0.7006, 0.6831, 0.6934, 0.6845, 0.695, 0.681, 0.6752, 0.7018, 0.689, 0.6966, 0.6856, 0.7055, 0.6915, 0.6999, 0.7041, 0.7046, 0.7065, 0.6895, 0.7009, 0.6926, 0.7011, 0.6886, 0.687, 0.6958, 0.6659, 0.6842, 0.7042, 0.6952, 0.6946, 0.7093, 0.6639, 0.7009, 0.696, 0.6996, 0.6833, 0.6999, 0.6965, 0.6988, 0.6989, 0.7109, 0.708, 0.7098, 0.6979, 0.7056, 0.7033, 0.7145, 0.7079, 0.7074, 0.6999, 0.7051, 0.7149, 0.708, 0.6967, 0.708, 0.6877, 0.7099, 0.7002, 0.6944, 0.7035, 0.7081, 0.7052, 0.7149, 0.7072, 0.7155, 0.7131, 0.7038, 0.7051, 0.7151, 0.7064, 0.7047, 0.7083, 0.7178, 0.6983, 0.7147, 0.7072, 0.6974, 0.7154, 0.7125, 0.7122, 0.684, 0.7159, 0.7176, 0.7013, 0.7085, 0.6975, 0.7118, 0.717, 0.7073, 0.712, 0.7118, 0.7154, 0.6985, 0.7013, 0.7042, 0.7186, 0.7075, 0.7138, 0.7114, 0.7143, 0.7176, 0.7153, 0.7182, 0.7053, 0.7027, 0.6978, 0.7036, 0.7201, 0.7076, 0.7116, 0.7174, 0.7194, 0.7077, 0.7169, 0.7126, 0.7196, 0.709, 0.7195, 0.7159, 0.7108, 0.7153, 0.7122, 0.7089, 0.7093, 0.7101, 0.7123, 0.7181, 0.7225, 0.7054, 0.7108, 0.7106, 0.7147, 0.717, 0.7107]
3 | 


--------------------------------------------------------------------------------
/results/cifar_dpsgd_delta_1e-06_lr_0.001.txt:
--------------------------------------------------------------------------------
1 | eps: [0.7708745555850502, 0.774220746838752, 0.7775669380924536, 0.7809131293461553, 0.784259320599857, 0.7876055118535588, 0.7909517031072604, 0.7942978943609621, 0.7976440856146638, 0.8009902768683654, 0.8043364681220672, 0.8076826593757689, 0.8110288506294706, 0.8143750418831723, 0.817721233136874, 0.8210674243905757, 0.8244136156442774, 0.8277598068979791, 0.8311059981516807, 0.8344521894053825, 0.8377983806590842, 0.8411445719127859, 0.8444907631664875, 0.8478369544201892, 0.851183145673891, 0.8545293369275927, 0.8578755281812943, 0.861221719434996, 0.8645679106886978, 0.8679141019423995, 0.8712602931961011, 0.8746064844498028, 0.8779526757035045, 0.8812988669572063, 0.8846450582109079, 0.8879912494646096, 0.8913374407183113, 0.8946836319720131, 0.8980298232257147, 0.9013760144794164, 0.9047222057331181, 0.9080683969868197, 0.9114145882405215, 0.9147607794942232, 0.9181069707479249, 0.9214531620016266, 0.9247993532553282, 0.92814554450903, 0.9314917357627317, 0.9348379270164334, 0.9381841182701351, 0.9415303095238368, 0.9448765007775385, 0.9482226920312402, 0.9515688832849418, 0.9549150745386435, 0.9582612657923453, 0.961607457046047, 0.9649536482997487, 0.9682998395534503, 0.9716460308071521, 0.9749922220608538, 0.9783384133145554, 0.9816846045682571, 0.9850307958219588, 0.9883769870756606, 0.9917231783293623, 0.9950693695830639, 0.9984155608367656, 1.0017617520904674, 1.005107943344169, 1.0084541345978708, 1.0118003258515724, 1.015146517105274, 1.0184927083589759, 1.0218388996126775, 1.0251850908663793, 1.028531282120081, 1.0318774733737825, 1.0352236646274844, 1.038569855881186, 1.0419160471348876, 1.0452622383885894, 1.048608429642291, 1.0519546208959927, 1.0553008121496945, 1.0586470034033961, 1.0619931946570977, 1.0653393859107996, 1.0686855771645012, 1.072031768418203, 1.0753779596719046, 1.0787241509256065, 1.082070342179308, 1.0854165334330097, 1.0887627246867115, 1.0921089159404131, 1.0954551071941148, 1.0988012984478166, 1.1021474897015182, 1.1054936809552198, 1.1088398722089217, 1.1121860634626233, 1.1155322547163251, 1.1188784459700267, 1.1222246372237286, 1.1255708284774302, 1.1289170197311318, 1.1322632109848336, 1.1356094022385352, 1.1389555934922369, 1.1423017847459387, 1.1456479759996403, 1.148994167253342, 1.1523403585070437, 1.1556865497607454, 1.159032741014447, 1.1623789322681488, 1.1657251235218504, 1.1690713147755523, 1.1724175060292539, 1.1757636972829557, 1.1791098885366573, 1.182456079790359, 1.1858022710440608, 1.1891484622977624, 1.192494653551464, 1.1958408448051658, 1.1991870360588675, 1.202533227312569, 1.205879418566271, 1.2092256098199725, 1.2125718010736741, 1.215917992327376, 1.2192641835810776, 1.2226103748347794, 1.225956566088481, 1.2293027573421829, 1.2326489485958845, 1.235995139849586, 1.239341331103288, 1.2426875223569895, 1.2460337136106912, 1.249379904864393, 1.2527260961180946, 1.2560722873717962, 1.259418478625498, 1.2627646698791997, 1.2661108611329013, 1.2694570523866031, 1.272803243640305, 1.2761494348940063, 1.2794956261477082, 1.28284181740141, 1.2861880086551116, 1.2895341999088132, 1.292880391162515, 1.2962265824162167, 1.2995727736699183, 1.3029189649236201, 1.3062651561773218, 1.3096113474310234, 1.3129575386847252, 1.3163037299384268, 1.3196499211921284, 1.3229961124458303, 1.326342303699532, 1.3296884949532335, 1.3330346862069353, 1.3363808774606372, 1.3397270687143388, 1.3430732599680404, 1.3464194512217422, 1.3497656424754438, 1.3531118337291455, 1.3564580249828473, 1.359804216236549, 1.3631504074902505, 1.3664965987439524, 1.369842789997654, 1.3731889812513556, 1.3765351725050574, 1.3798813637587592, 1.3832275550124609, 1.3865737462661625, 1.3899199375198643, 1.393266128773566, 1.3966123200272675, 1.3999585112809694, 1.403304702534671, 1.406502720583159, 1.4096117552896417, 1.4127207899961243, 1.415829824702607, 1.41893885940909, 1.4220478941155725, 1.4251569288220554, 1.428265963528538, 1.4313749982350208, 1.4344840329415036]
2 | validation acc: [0.3756, 0.4218, 0.4576, 0.4572, 0.4651, 0.4857, 0.5136, 0.5337, 0.5265, 0.5388, 0.5286, 0.5851, 0.5796, 0.5462, 0.5908, 0.5742, 0.6136, 0.6284, 0.62, 0.6343, 0.6097, 0.5827, 0.6341, 0.6228, 0.6097, 0.6276, 0.6345, 0.6337, 0.6002, 0.6019, 0.6545, 0.6631, 0.657, 0.6518, 0.5948, 0.6656, 0.6811, 0.6565, 0.656, 0.6623, 0.6691, 0.6784, 0.6245, 0.6595, 0.6673, 0.6807, 0.6832, 0.6674, 0.6831, 0.6682, 0.6918, 0.6821, 0.6889, 0.6752, 0.6908, 0.6817, 0.6664, 0.6706, 0.6626, 0.6724, 0.6938, 0.6982, 0.6643, 0.6889, 0.6762, 0.685, 0.6905, 0.6932, 0.6746, 0.6929, 0.6961, 0.6895, 0.7006, 0.6895, 0.6689, 0.6777, 0.6898, 0.691, 0.6958, 0.6895, 0.6984, 0.6855, 0.7083, 0.6827, 0.6995, 0.6969, 0.6915, 0.6919, 0.7012, 0.6939, 0.6988, 0.7037, 0.6882, 0.7033, 0.7001, 0.6896, 0.7084, 0.6972, 0.6925, 0.6818, 0.7096, 0.6979, 0.6871, 0.7024, 0.6981, 0.705, 0.7077, 0.6933, 0.6958, 0.7041, 0.7097, 0.6911, 0.711, 0.6803, 0.7049, 0.696, 0.6952, 0.7037, 0.6988, 0.7038, 0.7027, 0.709, 0.7009, 0.7011, 0.6897, 0.6994, 0.7066, 0.7003, 0.7055, 0.7056, 0.7117, 0.6883, 0.6999, 0.6967, 0.6828, 0.6997, 0.6998, 0.7012, 0.7046, 0.7142, 0.7072, 0.7101, 0.7134, 0.7073, 0.7083, 0.6998, 0.7108, 0.7114, 0.7121, 0.6971, 0.6996, 0.708, 0.7059, 0.7061, 0.7115, 0.7101, 0.7076, 0.713, 0.7002, 0.706, 0.7077, 0.7046, 0.7088, 0.7055, 0.7149, 0.6985, 0.7109, 0.7013, 0.6881, 0.7105, 0.7124, 0.6967, 0.7129, 0.7077, 0.7158, 0.7088, 0.7125, 0.7061, 0.7141, 0.7113, 0.7113, 0.701, 0.7078, 0.7119, 0.7149, 0.7165, 0.7112, 0.7059, 0.7105, 0.711, 0.7144, 0.7158, 0.6987, 0.7047, 0.7062, 0.7107, 0.7102, 0.713, 0.7108, 0.7112]
3 | 


--------------------------------------------------------------------------------
/results/cifar_dpsgd_delta_1e-05_lr_0.001.txt:
--------------------------------------------------------------------------------
1 | eps: [0.6429531615298255, 0.6462993527835272, 0.6496455440372289, 0.6529917352909306, 0.6563379265446323, 0.659684117798334, 0.6630303090520356, 0.6663765003057374, 0.6697226915594391, 0.6730688828131408, 0.6764150740668424, 0.6797612653205442, 0.6831074565742459, 0.6864536478279476, 0.6897998390816492, 0.6931460303353509, 0.6964922215890527, 0.6998384128427544, 0.703184604096456, 0.7065307953501577, 0.7098769866038595, 0.7132231778575611, 0.7165693691112628, 0.7199155603649645, 0.7232617516186662, 0.726607942872368, 0.7299541341260696, 0.7333003253797713, 0.736646516633473, 0.7399927078871747, 0.7433388991408764, 0.7466850903945781, 0.7500312816482798, 0.7533774729019815, 0.7567236641556832, 0.7600698554093849, 0.7634160466630866, 0.7667622379167882, 0.7701084291704899, 0.7734546204241917, 0.7768008116778934, 0.7801470029315951, 0.7834931941852967, 0.7868393854389985, 0.7901855766927002, 0.7935317679464019, 0.7968779592001036, 0.8002241504538052, 0.803570341707507, 0.8069165329612087, 0.8102627242149103, 0.813608915468612, 0.8169551067223138, 0.8203012979760155, 0.8236474892297172, 0.8269936804834188, 0.8303398717371205, 0.8336860629908223, 0.8370322542445239, 0.8403784454982256, 0.8437246367519273, 0.847070828005629, 0.8504170192593308, 0.8537632105130324, 0.8571094017667341, 0.8604555930204358, 0.8638017842741375, 0.8671479755278392, 0.8704941667815409, 0.8738403580352426, 0.8771865492889444, 0.880532740542646, 0.8838789317963477, 0.8872251230500494, 0.890571314303751, 0.8939175055574528, 0.8972636968111545, 0.9006098880648561, 0.9039560793185579, 0.9073022705722595, 0.9106484618259613, 0.913994653079663, 0.9173408443333646, 0.9206870355870663, 0.924033226840768, 0.9273794180944697, 0.9307256093481715, 0.9340718006018731, 0.9374179918555748, 0.9407641831092766, 0.9441103743629782, 0.94745656561668, 0.9508027568703816, 0.9541489481240834, 0.9574951393777851, 0.9608413306314867, 0.9641875218851884, 0.9675337131388901, 0.9708799043925918, 0.9742260956462936, 0.9775722868999952, 0.9809184781536969, 0.9842646694073987, 0.9876108606611003, 0.990957051914802, 0.9943032431685037, 0.9976494344222053, 1.0009956256759072, 1.0043418169296088, 1.0076880081833104, 1.0110341994370122, 1.0143803906907138, 1.0177265819444155, 1.0210727731981173, 1.024418964451819, 1.0277651557055207, 1.0311113469592224, 1.0344575382129242, 1.0378037294666258, 1.0411499207203274, 1.0444961119740293, 1.0478423032277309, 1.0511884944814325, 1.0545346857351343, 1.057880876988836, 1.0612270682425375, 1.0645732594962394, 1.067919450749941, 1.0712656420036426, 1.0746118332573444, 1.077958024511046, 1.081304215764748, 1.0846504070184495, 1.0879965982721513, 1.091342789525853, 1.0946889807795546, 1.0980351720332564, 1.101381363286958, 1.1047275545406596, 1.1080737457943615, 1.111419937048063, 1.1147661283017647, 1.1181123195554665, 1.1214585108091681, 1.1248047020628698, 1.1281508933165716, 1.1314970845702732, 1.134843275823975, 1.1381894670776767, 1.1415356583313785, 1.14488184958508, 1.1482280408387817, 1.1515742320924836, 1.1549204233461852, 1.1582666145998868, 1.1616128058535886, 1.1649589971072902, 1.1683051883609918, 1.1715674280348853, 1.1746764627413682, 1.177785497447851, 1.1808945321543336, 1.1840035668608162, 1.187112601567299, 1.1902216362737819, 1.1933306709802645, 1.1964397056867473, 1.19954874039323, 1.2026577750997127, 1.2057668098061955, 1.2088758445126782, 1.2119848792191608, 1.2150939139256436, 1.2182029486321264, 1.2213119833386092, 1.2244210180450918, 1.2275300527515745, 1.2306390874580573, 1.23374812216454, 1.2368571568710227, 1.2399661915775053, 1.2430752262839881, 1.246184260990471, 1.2492932956969538, 1.2524023304034364, 1.255511365109919, 1.2586203998164018, 1.2617294345228847, 1.2648384692293675, 1.26794750393585, 1.2710565386423327, 1.2741655733488155, 1.2772746080552984, 1.280383642761781, 1.2834926774682636, 1.2866017121747464, 1.2897107468812292, 1.292819781587712, 1.2959288162941947, 1.2990378510006773]
2 | validation acc: [0.3862, 0.4004, 0.472, 0.4846, 0.4885, 0.5227, 0.5376, 0.5356, 0.5393, 0.5635, 0.5725, 0.5661, 0.5843, 0.5438, 0.5578, 0.5831, 0.5552, 0.6103, 0.6269, 0.6374, 0.6368, 0.6487, 0.6304, 0.6503, 0.6575, 0.6605, 0.6447, 0.6517, 0.6564, 0.6473, 0.6612, 0.6177, 0.6586, 0.6693, 0.6787, 0.6607, 0.6713, 0.6521, 0.6856, 0.6873, 0.6751, 0.6718, 0.6855, 0.6855, 0.6903, 0.6754, 0.6879, 0.6705, 0.6817, 0.6988, 0.6854, 0.6942, 0.6741, 0.6766, 0.6513, 0.6887, 0.6859, 0.6742, 0.6927, 0.6952, 0.6957, 0.6914, 0.6613, 0.6897, 0.6996, 0.6947, 0.6949, 0.7076, 0.7055, 0.6975, 0.6965, 0.6952, 0.7058, 0.6886, 0.7069, 0.6995, 0.7004, 0.6913, 0.6775, 0.6852, 0.7103, 0.6981, 0.7069, 0.6831, 0.686, 0.6965, 0.7016, 0.7051, 0.7053, 0.71, 0.6983, 0.7085, 0.6941, 0.7142, 0.7117, 0.7053, 0.703, 0.6986, 0.7028, 0.687, 0.7078, 0.7027, 0.7033, 0.7014, 0.7109, 0.7074, 0.6972, 0.7062, 0.7017, 0.6891, 0.7045, 0.7116, 0.6902, 0.7089, 0.7077, 0.7067, 0.6935, 0.7092, 0.7053, 0.7038, 0.7144, 0.7054, 0.6911, 0.7094, 0.7109, 0.7051, 0.7122, 0.7066, 0.7152, 0.6971, 0.7127, 0.7078, 0.6832, 0.7162, 0.7108, 0.7104, 0.7095, 0.6962, 0.709, 0.7153, 0.7138, 0.7133, 0.7069, 0.7129, 0.718, 0.6988, 0.7103, 0.7074, 0.7074, 0.7025, 0.7119, 0.7105, 0.7086, 0.7141, 0.7092, 0.7044, 0.7129, 0.7082, 0.7058, 0.7043, 0.7112, 0.712, 0.7115, 0.7156, 0.7187, 0.7084, 0.7186, 0.7017, 0.7182, 0.7187, 0.709, 0.7035, 0.7123, 0.7159, 0.7169, 0.7134, 0.7068, 0.7085, 0.7115, 0.7112, 0.7163, 0.7135, 0.7191, 0.721, 0.7158, 0.7156, 0.7113, 0.7226, 0.7208, 0.7148, 0.7143, 0.7124, 0.7221, 0.7156, 0.7205, 0.7194, 0.7078, 0.7183, 0.7229, 0.7208]
3 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/tensor_buffer.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """A lightweight buffer for maintaining tensors."""
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import tensorflow as tf
 21 | 
 22 | 
 23 | class TensorBuffer(object):
 24 |   """A lightweight buffer for maintaining lists.
 25 | 
 26 |   The TensorBuffer accumulates tensors of the given shape into a tensor (whose
 27 |   rank is one more than that of the given shape) via calls to `append`. The
 28 |   current value of the accumulated tensor can be extracted via the property
 29 |   `values`.
 30 |   """
 31 | 
 32 |   def __init__(self, capacity, shape, dtype=tf.int32, name=None):
 33 |     """Initializes the TensorBuffer.
 34 | 
 35 |     Args:
 36 |       capacity: Initial capacity. Buffer will double in capacity each time it is
 37 |         filled to capacity.
 38 |       shape: The shape (as tuple or list) of the tensors to accumulate.
 39 |       dtype: The type of the tensors.
 40 |       name: A string name for the variable_scope used.
 41 | 
 42 |     Raises:
 43 |       ValueError: If the shape is empty (specifies scalar shape).
 44 |     """
 45 |     shape = list(shape)
 46 |     self._rank = len(shape)
 47 |     self._name = name
 48 |     self._dtype = dtype
 49 |     if not self._rank:
 50 |       raise ValueError('Shape cannot be scalar.')
 51 |     shape = [capacity] + shape
 52 | 
 53 |     with tf.variable_scope(self._name):
 54 |       # We need to use a placeholder as the initial value to allow resizing.
 55 |       self._buffer = tf.Variable(
 56 |           initial_value=tf.placeholder_with_default(
 57 |               tf.zeros(shape, dtype), shape=None),
 58 |           trainable=False,
 59 |           name='buffer',
 60 |           use_resource=True)
 61 |       self._current_size = tf.Variable(
 62 |           initial_value=0, dtype=tf.int32, trainable=False, name='current_size')
 63 |       self._capacity = tf.Variable(
 64 |           initial_value=capacity,
 65 |           dtype=tf.int32,
 66 |           trainable=False,
 67 |           name='capacity')
 68 | 
 69 |   def append(self, value):
 70 |     """Appends a new tensor to the end of the buffer.
 71 | 
 72 |     Args:
 73 |       value: The tensor to append. Must match the shape specified in the
 74 |         initializer.
 75 | 
 76 |     Returns:
 77 |       An op appending the new tensor to the end of the buffer.
 78 |     """
 79 | 
 80 |     def _double_capacity():
 81 |       """Doubles the capacity of the current tensor buffer."""
 82 |       padding = tf.zeros_like(self._buffer, self._buffer.dtype)
 83 |       new_buffer = tf.concat([self._buffer, padding], axis=0)
 84 |       if tf.executing_eagerly():
 85 |         with tf.variable_scope(self._name, reuse=True):
 86 |           self._buffer = tf.get_variable(
 87 |               name='buffer',
 88 |               dtype=self._dtype,
 89 |               initializer=new_buffer,
 90 |               trainable=False)
 91 |           return self._buffer, tf.assign(self._capacity,
 92 |                                          tf.multiply(self._capacity, 2))
 93 |       else:
 94 |         return tf.assign(
 95 |             self._buffer, new_buffer,
 96 |             validate_shape=False), tf.assign(self._capacity,
 97 |                                              tf.multiply(self._capacity, 2))
 98 | 
 99 |     update_buffer, update_capacity = tf.cond(
100 |         tf.equal(self._current_size, self._capacity),
101 |         _double_capacity, lambda: (self._buffer, self._capacity))
102 | 
103 |     with tf.control_dependencies([update_buffer, update_capacity]):
104 |       with tf.control_dependencies([
105 |           tf.assert_less(
106 |               self._current_size,
107 |               self._capacity,
108 |               message='Appending past end of TensorBuffer.'),
109 |           tf.assert_equal(
110 |               tf.shape(value),
111 |               tf.shape(self._buffer)[1:],
112 |               message='Appending value of inconsistent shape.')
113 |       ]):
114 |         with tf.control_dependencies(
115 |             [tf.assign(self._buffer[self._current_size, :], value)]):
116 |           return tf.assign_add(self._current_size, 1)
117 | 
118 |   @property
119 |   def values(self):
120 |     """Returns the accumulated tensor."""
121 |     begin_value = tf.zeros([self._rank + 1], dtype=tf.int32)
122 |     value_size = tf.concat([[self._current_size],
123 |                             tf.constant(-1, tf.int32, [self._rank])], 0)
124 |     return tf.slice(self._buffer, begin_value, value_size)
125 | 
126 |   @property
127 |   def current_size(self):
128 |     """Returns the current number of tensors in the buffer."""
129 |     return self._current_size
130 | 
131 |   @property
132 |   def capacity(self):
133 |     """Returns the current capacity of the buffer."""
134 |     return self._capacity
135 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/tutorials/walkthrough/mnist_scratch.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019, The TensorFlow Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | 
 16 | """Scratchpad for training a CNN on MNIST with DPSGD."""
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import numpy as np
 22 | import tensorflow as tf
 23 | 
 24 | tf.flags.DEFINE_float('learning_rate', .15, 'Learning rate for training')
 25 | tf.flags.DEFINE_integer('batch_size', 256, 'Batch size')
 26 | tf.flags.DEFINE_integer('epochs', 15, 'Number of epochs')
 27 | 
 28 | FLAGS = tf.flags.FLAGS
 29 | 
 30 | 
 31 | def cnn_model_fn(features, labels, mode):
 32 |   """Model function for a CNN."""
 33 | 
 34 |   # Define CNN architecture using tf.keras.layers.
 35 |   input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])
 36 |   y = tf.keras.layers.Conv2D(16, 8,
 37 |                              strides=2,
 38 |                              padding='same',
 39 |                              activation='relu').apply(input_layer)
 40 |   y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
 41 |   y = tf.keras.layers.Conv2D(32, 4,
 42 |                              strides=2,
 43 |                              padding='valid',
 44 |                              activation='relu').apply(y)
 45 |   y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
 46 |   y = tf.keras.layers.Flatten().apply(y)
 47 |   y = tf.keras.layers.Dense(32, activation='relu').apply(y)
 48 |   logits = tf.keras.layers.Dense(10).apply(y)
 49 | 
 50 |   # Calculate loss as a vector and as its average across minibatch.
 51 |   vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
 52 |                                                                logits=logits)
 53 |   scalar_loss = tf.reduce_mean(vector_loss)
 54 | 
 55 |   # Configure the training op (for TRAIN mode).
 56 |   if mode == tf.estimator.ModeKeys.TRAIN:
 57 |     optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
 58 |     opt_loss = scalar_loss
 59 |     global_step = tf.train.get_global_step()
 60 |     train_op = optimizer.minimize(loss=opt_loss, global_step=global_step)
 61 |     return tf.estimator.EstimatorSpec(mode=mode,
 62 |                                       loss=scalar_loss,
 63 |                                       train_op=train_op)
 64 | 
 65 |   # Add evaluation metrics (for EVAL mode).
 66 |   elif mode == tf.estimator.ModeKeys.EVAL:
 67 |     eval_metric_ops = {
 68 |         'accuracy':
 69 |             tf.metrics.accuracy(
 70 |                 labels=labels,
 71 |                 predictions=tf.argmax(input=logits, axis=1))
 72 |     }
 73 |     return tf.estimator.EstimatorSpec(mode=mode,
 74 |                                       loss=scalar_loss,
 75 |                                       eval_metric_ops=eval_metric_ops)
 76 | 
 77 | 
 78 | def load_mnist():
 79 |   """Loads MNIST and preprocesses to combine training and validation data."""
 80 |   train, test = tf.keras.datasets.mnist.load_data()
 81 |   train_data, train_labels = train
 82 |   test_data, test_labels = test
 83 | 
 84 |   train_data = np.array(train_data, dtype=np.float32) / 255
 85 |   test_data = np.array(test_data, dtype=np.float32) / 255
 86 | 
 87 |   train_labels = np.array(train_labels, dtype=np.int32)
 88 |   test_labels = np.array(test_labels, dtype=np.int32)
 89 | 
 90 |   assert train_data.min() == 0.
 91 |   assert train_data.max() == 1.
 92 |   assert test_data.min() == 0.
 93 |   assert test_data.max() == 1.
 94 |   assert train_labels.ndim == 1
 95 |   assert test_labels.ndim == 1
 96 | 
 97 |   return train_data, train_labels, test_data, test_labels
 98 | 
 99 | 
100 | def main(unused_argv):
101 |   tf.logging.set_verbosity(tf.logging.INFO)
102 | 
103 |   # Load training and test data.
104 |   train_data, train_labels, test_data, test_labels = load_mnist()
105 | 
106 |   # Instantiate the tf.Estimator.
107 |   mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn)
108 | 
109 |   # Create tf.Estimator input functions for the training and test data.
110 |   train_input_fn = tf.estimator.inputs.numpy_input_fn(
111 |       x={'x': train_data},
112 |       y=train_labels,
113 |       batch_size=FLAGS.batch_size,
114 |       num_epochs=FLAGS.epochs,
115 |       shuffle=True)
116 |   eval_input_fn = tf.estimator.inputs.numpy_input_fn(
117 |       x={'x': test_data},
118 |       y=test_labels,
119 |       num_epochs=1,
120 |       shuffle=False)
121 | 
122 |   # Training loop.
123 |   steps_per_epoch = 60000 // FLAGS.batch_size
124 |   for epoch in range(1, FLAGS.epochs + 1):
125 |     # Train the model for one epoch.
126 |     mnist_classifier.train(input_fn=train_input_fn, steps=steps_per_epoch)
127 | 
128 |     # Evaluate the model and print results
129 |     eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
130 |     test_accuracy = eval_results['accuracy']
131 |     print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy))
132 | 
133 | if __name__ == '__main__':
134 |   tf.app.run()
135 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/privacy_ledger_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Tests for PrivacyLedger."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import tensorflow as tf
 22 | 
 23 | from privacy.analysis import privacy_ledger
 24 | from privacy.dp_query import gaussian_query
 25 | from privacy.dp_query import nested_query
 26 | from privacy.dp_query import test_utils
 27 | 
 28 | tf.enable_eager_execution()
 29 | 
 30 | 
 31 | class PrivacyLedgerTest(tf.test.TestCase):
 32 | 
 33 |   def test_fail_on_probability_zero(self):
 34 |     with self.assertRaisesRegexp(ValueError,
 35 |                                  'Selection probability cannot be 0.'):
 36 |       privacy_ledger.PrivacyLedger(10, 0)
 37 | 
 38 |   def test_basic(self):
 39 |     ledger = privacy_ledger.PrivacyLedger(10, 0.1)
 40 |     ledger.record_sum_query(5.0, 1.0)
 41 |     ledger.record_sum_query(2.0, 0.5)
 42 | 
 43 |     ledger.finalize_sample()
 44 | 
 45 |     expected_queries = [[5.0, 1.0], [2.0, 0.5]]
 46 |     formatted = ledger.get_formatted_ledger_eager()
 47 | 
 48 |     sample = formatted[0]
 49 |     self.assertAllClose(sample.population_size, 10.0)
 50 |     self.assertAllClose(sample.selection_probability, 0.1)
 51 |     self.assertAllClose(sorted(sample.queries), sorted(expected_queries))
 52 | 
 53 |   def test_sum_query(self):
 54 |     record1 = tf.constant([2.0, 0.0])
 55 |     record2 = tf.constant([-1.0, 1.0])
 56 | 
 57 |     population_size = tf.Variable(0)
 58 |     selection_probability = tf.Variable(1.0)
 59 | 
 60 |     query = gaussian_query.GaussianSumQuery(
 61 |         l2_norm_clip=10.0, stddev=0.0)
 62 |     query = privacy_ledger.QueryWithLedger(
 63 |         query, population_size, selection_probability)
 64 | 
 65 |     # First sample.
 66 |     tf.assign(population_size, 10)
 67 |     tf.assign(selection_probability, 0.1)
 68 |     test_utils.run_query(query, [record1, record2])
 69 | 
 70 |     expected_queries = [[10.0, 0.0]]
 71 |     formatted = query.ledger.get_formatted_ledger_eager()
 72 |     sample_1 = formatted[0]
 73 |     self.assertAllClose(sample_1.population_size, 10.0)
 74 |     self.assertAllClose(sample_1.selection_probability, 0.1)
 75 |     self.assertAllClose(sample_1.queries, expected_queries)
 76 | 
 77 |     # Second sample.
 78 |     tf.assign(population_size, 20)
 79 |     tf.assign(selection_probability, 0.2)
 80 |     test_utils.run_query(query, [record1, record2])
 81 | 
 82 |     formatted = query.ledger.get_formatted_ledger_eager()
 83 |     sample_1, sample_2 = formatted
 84 |     self.assertAllClose(sample_1.population_size, 10.0)
 85 |     self.assertAllClose(sample_1.selection_probability, 0.1)
 86 |     self.assertAllClose(sample_1.queries, expected_queries)
 87 | 
 88 |     self.assertAllClose(sample_2.population_size, 20.0)
 89 |     self.assertAllClose(sample_2.selection_probability, 0.2)
 90 |     self.assertAllClose(sample_2.queries, expected_queries)
 91 | 
 92 |   def test_nested_query(self):
 93 |     population_size = tf.Variable(0)
 94 |     selection_probability = tf.Variable(1.0)
 95 | 
 96 |     query1 = gaussian_query.GaussianAverageQuery(
 97 |         l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0)
 98 |     query2 = gaussian_query.GaussianAverageQuery(
 99 |         l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0)
100 | 
101 |     query = nested_query.NestedQuery([query1, query2])
102 |     query = privacy_ledger.QueryWithLedger(
103 |         query, population_size, selection_probability)
104 | 
105 |     record1 = [1.0, [12.0, 9.0]]
106 |     record2 = [5.0, [1.0, 2.0]]
107 | 
108 |     # First sample.
109 |     tf.assign(population_size, 10)
110 |     tf.assign(selection_probability, 0.1)
111 |     test_utils.run_query(query, [record1, record2])
112 | 
113 |     expected_queries = [[4.0, 2.0], [5.0, 1.0]]
114 |     formatted = query.ledger.get_formatted_ledger_eager()
115 |     sample_1 = formatted[0]
116 |     self.assertAllClose(sample_1.population_size, 10.0)
117 |     self.assertAllClose(sample_1.selection_probability, 0.1)
118 |     self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries))
119 | 
120 |     # Second sample.
121 |     tf.assign(population_size, 20)
122 |     tf.assign(selection_probability, 0.2)
123 |     test_utils.run_query(query, [record1, record2])
124 | 
125 |     formatted = query.ledger.get_formatted_ledger_eager()
126 |     sample_1, sample_2 = formatted
127 |     self.assertAllClose(sample_1.population_size, 10.0)
128 |     self.assertAllClose(sample_1.selection_probability, 0.1)
129 |     self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries))
130 | 
131 |     self.assertAllClose(sample_2.population_size, 20.0)
132 |     self.assertAllClose(sample_2.selection_probability, 0.2)
133 |     self.assertAllClose(sorted(sample_2.queries), sorted(expected_queries))
134 | 
135 | 
136 | if __name__ == '__main__':
137 |   tf.test.main()
138 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2017/aggregation.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import numpy as np
 22 | from six.moves import xrange
 23 | 
 24 | 
 25 | def labels_from_probs(probs):
 26 |   """
 27 |   Helper function: computes argmax along last dimension of array to obtain
 28 |   labels (max prob or max logit value)
 29 |   :param probs: numpy array where probabilities or logits are on last dimension
 30 |   :return: array with same shape as input besides last dimension with shape 1
 31 |           now containing the labels
 32 |   """
 33 |   # Compute last axis index
 34 |   last_axis = len(np.shape(probs)) - 1
 35 | 
 36 |   # Label is argmax over last dimension
 37 |   labels = np.argmax(probs, axis=last_axis)
 38 | 
 39 |   # Return as np.int32
 40 |   return np.asarray(labels, dtype=np.int32)
 41 | 
 42 | 
 43 | def noisy_max(logits, lap_scale, return_clean_votes=False):
 44 |   """
 45 |   This aggregation mechanism takes the softmax/logit output of several models
 46 |   resulting from inference on identical inputs and computes the noisy-max of
 47 |   the votes for candidate classes to select a label for each sample: it
 48 |   adds Laplacian noise to label counts and returns the most frequent label.
 49 |   :param logits: logits or probabilities for each sample
 50 |   :param lap_scale: scale of the Laplacian noise to be added to counts
 51 |   :param return_clean_votes: if set to True, also returns clean votes (without
 52 |                       Laplacian noise). This can be used to perform the
 53 |                       privacy analysis of this aggregation mechanism.
 54 |   :return: pair of result and (if clean_votes is set to True) the clean counts
 55 |            for each class per sample and the original labels produced by
 56 |            the teachers.
 57 |   """
 58 | 
 59 |   # Compute labels from logits/probs and reshape array properly
 60 |   labels = labels_from_probs(logits)
 61 |   labels_shape = np.shape(labels)
 62 |   labels = labels.reshape((labels_shape[0], labels_shape[1]))
 63 | 
 64 |   # Initialize array to hold final labels
 65 |   result = np.zeros(int(labels_shape[1]))
 66 | 
 67 |   if return_clean_votes:
 68 |     # Initialize array to hold clean votes for each sample
 69 |     clean_votes = np.zeros((int(labels_shape[1]), 10))
 70 | 
 71 |   # Parse each sample
 72 |   for i in xrange(int(labels_shape[1])):
 73 |     # Count number of votes assigned to each class
 74 |     label_counts = np.bincount(labels[:, i], minlength=10)
 75 | 
 76 |     if return_clean_votes:
 77 |       # Store vote counts for export
 78 |       clean_votes[i] = label_counts
 79 | 
 80 |     # Cast in float32 to prepare before addition of Laplacian noise
 81 |     label_counts = np.asarray(label_counts, dtype=np.float32)
 82 | 
 83 |     # Sample independent Laplacian noise for each class
 84 |     for item in xrange(10):
 85 |       label_counts[item] += np.random.laplace(loc=0.0, scale=float(lap_scale))
 86 | 
 87 |     # Result is the most frequent label
 88 |     result[i] = np.argmax(label_counts)
 89 | 
 90 |   # Cast labels to np.int32 for compatibility with deep_cnn.py feed dictionaries
 91 |   result = np.asarray(result, dtype=np.int32)
 92 | 
 93 |   if return_clean_votes:
 94 |     # Returns several array, which are later saved:
 95 |     # result: labels obtained from the noisy aggregation
 96 |     # clean_votes: the number of teacher votes assigned to each sample and class
 97 |     # labels: the labels assigned by teachers (before the noisy aggregation)
 98 |     return result, clean_votes, labels
 99 |   else:
100 |     # Only return labels resulting from noisy aggregation
101 |     return result
102 | 
103 | 
104 | def aggregation_most_frequent(logits):
105 |   """
106 |   This aggregation mechanism takes the softmax/logit output of several models
107 |   resulting from inference on identical inputs and computes the most frequent
108 |   label. It is deterministic (no noise injection like noisy_max() above.
109 |   :param logits: logits or probabilities for each sample
110 |   :return:
111 |   """
112 |   # Compute labels from logits/probs and reshape array properly
113 |   labels = labels_from_probs(logits)
114 |   labels_shape = np.shape(labels)
115 |   labels = labels.reshape((labels_shape[0], labels_shape[1]))
116 | 
117 |   # Initialize array to hold final labels
118 |   result = np.zeros(int(labels_shape[1]))
119 | 
120 |   # Parse each sample
121 |   for i in xrange(int(labels_shape[1])):
122 |     # Count number of votes assigned to each class
123 |     label_counts = np.bincount(labels[:, i], minlength=10)
124 | 
125 |     label_counts = np.asarray(label_counts, dtype=np.int32)
126 | 
127 |     # Result is the most frequent label
128 |     result[i] = np.argmax(label_counts)
129 | 
130 |   return np.asarray(result, dtype=np.int32)
131 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/gaussian_query.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018, The TensorFlow Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Implements DPQuery interface for Gaussian average queries.
 16 | """
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | import collections
 23 | 
 24 | from distutils.version import LooseVersion
 25 | import tensorflow as tf
 26 | 
 27 | from privacy.dp_query import dp_query
 28 | from privacy.dp_query import normalized_query
 29 | 
 30 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
 31 |   nest = tf.contrib.framework.nest
 32 | else:
 33 |   nest = tf.nest
 34 | 
 35 | 
 36 | class GaussianSumQuery(dp_query.SumAggregationDPQuery):
 37 |   """Implements DPQuery interface for Gaussian sum queries.
 38 | 
 39 |   Accumulates clipped vectors, then adds Gaussian noise to the sum.
 40 |   """
 41 | 
 42 |   # pylint: disable=invalid-name
 43 |   _GlobalState = collections.namedtuple(
 44 |       '_GlobalState', ['l2_norm_clip', 'stddev'])
 45 | 
 46 |   def __init__(self, l2_norm_clip, stddev):
 47 |     """Initializes the GaussianSumQuery.
 48 | 
 49 |     Args:
 50 |       l2_norm_clip: The clipping norm to apply to the global norm of each
 51 |         record.
 52 |       stddev: The stddev of the noise added to the sum.
 53 |     """
 54 |     self._l2_norm_clip = l2_norm_clip
 55 |     self._stddev = stddev
 56 |     self._ledger = None
 57 | 
 58 |   def set_ledger(self, ledger):
 59 |     self._ledger = ledger
 60 | 
 61 |   def make_global_state(self, l2_norm_clip, stddev):
 62 |     """Creates a global state from the given parameters."""
 63 |     return self._GlobalState(tf.cast(l2_norm_clip, tf.float32),
 64 |                              tf.cast(stddev, tf.float32))
 65 | 
 66 |   def initial_global_state(self):
 67 |     return self.make_global_state(self._l2_norm_clip, self._stddev)
 68 | 
 69 |   def derive_sample_params(self, global_state):
 70 |     return global_state.l2_norm_clip
 71 | 
 72 |   def initial_sample_state(self, template):
 73 |     return nest.map_structure(
 74 |         dp_query.zeros_like, template)
 75 | 
 76 |   def preprocess_record_impl(self, params, record):
 77 |     """Clips the l2 norm, returning the clipped record and the l2 norm.
 78 | 
 79 |     Args:
 80 |       params: The parameters for the sample.
 81 |       record: The record to be processed.
 82 | 
 83 |     Returns:
 84 |       A tuple (preprocessed_records, l2_norm) where `preprocessed_records` is
 85 |         the structure of preprocessed tensors, and l2_norm is the total l2 norm
 86 |         before clipping.
 87 |     """
 88 |     l2_norm_clip = params
 89 |     record_as_list = nest.flatten(record)
 90 |     clipped_as_list, norm = tf.clip_by_global_norm(record_as_list, l2_norm_clip)
 91 |     return nest.pack_sequence_as(record, clipped_as_list), norm
 92 | 
 93 |   def preprocess_record(self, params, record):
 94 |     preprocessed_record, _ = self.preprocess_record_impl(params, record)
 95 |     return preprocessed_record
 96 | 
 97 |   def get_noised_result(self, sample_state, global_state):
 98 |     """See base class."""
 99 |     if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
100 |       def add_noise(v):
101 |         return v + tf.random_normal(tf.shape(v), stddev=global_state.stddev)
102 |     else:
103 |       random_normal = tf.random_normal_initializer(stddev=global_state.stddev)
104 |       def add_noise(v):
105 |         return v + random_normal(tf.shape(v))
106 | 
107 |     if self._ledger:
108 |       dependencies = [
109 |           self._ledger.record_sum_query(
110 |               global_state.l2_norm_clip, global_state.stddev)
111 |       ]
112 |     else:
113 |       dependencies = []
114 |     with tf.control_dependencies(dependencies):
115 |       return nest.map_structure(add_noise, sample_state), global_state
116 | 
117 | 
118 | class GaussianAverageQuery(normalized_query.NormalizedQuery):
119 |   """Implements DPQuery interface for Gaussian average queries.
120 | 
121 |   Accumulates clipped vectors, adds Gaussian noise, and normalizes.
122 | 
123 |   Note that we use "fixed-denominator" estimation: the denominator should be
124 |   specified as the expected number of records per sample. Accumulating the
125 |   denominator separately would also be possible but would be produce a higher
126 |   variance estimator.
127 |   """
128 | 
129 |   def __init__(self,
130 |                l2_norm_clip,
131 |                sum_stddev,
132 |                denominator):
133 |     """Initializes the GaussianAverageQuery.
134 | 
135 |     Args:
136 |       l2_norm_clip: The clipping norm to apply to the global norm of each
137 |         record.
138 |       sum_stddev: The stddev of the noise added to the sum (before
139 |         normalization).
140 |       denominator: The normalization constant (applied after noise is added to
141 |         the sum).
142 |     """
143 |     super(GaussianAverageQuery, self).__init__(
144 |         numerator_query=GaussianSumQuery(l2_norm_clip, sum_stddev),
145 |         denominator=denominator)
146 | 


--------------------------------------------------------------------------------
/dp_optimizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018, The TensorFlow Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Differentially private optimizers for TensorFlow."""
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import tensorflow as tf
 21 | 
 22 | from privacy.analysis import privacy_ledger
 23 | from privacy.dp_query import gaussian_query
 24 | 
 25 | def make_optimizer_class(cls):
 26 |   """Constructs a DP optimizer class from an existing one."""
 27 |   parent_code = tf.optimizers.Optimizer._compute_gradients.__code__
 28 |   child_code = cls._compute_gradients.__code__
 29 |   if child_code is not parent_code:
 30 |     tf.logging.warning(
 31 |         'WARNING: Calling make_optimizer_class() on class %s that overrides '
 32 |         'method compute_gradients(). Check to ensure that '
 33 |         'make_optimizer_class() does not interfere with overridden version.',
 34 |         cls.__name__)
 35 | 
 36 |   class DPOptimizerClass(cls):
 37 |     """Differentially private subclass of given class cls."""
 38 | 
 39 |     def __init__(
 40 |         self,
 41 |         dp_sum_query,
 42 |         num_microbatches=None,
 43 |         unroll_microbatches=False,
 44 |         *args,
 45 |         **kwargs):
 46 |       """Initialize the DPOptimizerClass.
 47 | 
 48 |       Args:
 49 |         dp_sum_query: DPQuery object, specifying differential privacy
 50 |           mechanism to use.
 51 |         num_microbatches: How many microbatches into which the minibatch is
 52 |           split. If None, will default to the size of the minibatch, and
 53 |           per-example gradients will be computed.
 54 |         unroll_microbatches: If true, processes microbatches within a Python
 55 |           loop instead of a tf.while_loop. Can be used if using a tf.while_loop
 56 |           raises an exception.
 57 |       """
 58 |       super(DPOptimizerClass, self).__init__(*args, **kwargs)
 59 |       ###### accountant + sanitizer ######
 60 |       self._dp_sum_query = dp_sum_query
 61 |       ######
 62 |       self._num_microbatches = num_microbatches
 63 |       self._global_state = self._dp_sum_query.initial_global_state()
 64 |       self._unroll_microbatches = unroll_microbatches
 65 | 
 66 |     def compute_gradients(self, loss, var_list, gate_gradients=None, aggregation_method=None, colocate_gradients_with_ops=False, grad_loss=None, gradient_tape=None):
 67 |       if not gradient_tape:
 68 |         raise ValueError('A tape needs to be passed.')
 69 | 
 70 |       vector_loss = loss()
 71 |       if self._num_microbatches is None:
 72 |         self._num_microbatches = tf.shape(vector_loss)[0]
 73 |       sample_state = self._dp_sum_query.initial_sample_state(var_list)
 74 |       microbatches_losses = tf.reshape(vector_loss, [self._num_microbatches, -1])
 75 |       sample_params = (self._dp_sum_query.derive_sample_params(self._global_state))
 76 | 
 77 |       for idx in range(self._num_microbatches):
 78 |         ###### compute gradient ######
 79 |         microbatch_loss = tf.reduce_mean(tf.gather(microbatches_losses, [idx]))
 80 |         grads = gradient_tape.gradient(microbatch_loss, var_list)
 81 |         ######
 82 | 
 83 |         ###### accountant ######
 84 |         sample_state = self._dp_sum_query.accumulate_record(sample_params, sample_state, grads)
 85 |         ######
 86 | 
 87 |       ###### sanitizer ######
 88 |       grad_sums, self._global_state = (self._dp_sum_query.get_noised_result(sample_state, self._global_state)) 
 89 |       ######
 90 | 
 91 |       def normalize(v):
 92 |         return v / tf.cast(self._num_microbatches, tf.float32)
 93 | 
 94 |       final_grads = tf.nest.map_structure(normalize, grad_sums)
 95 | 
 96 |       grads_and_vars = list(zip(final_grads, var_list))
 97 |       return grads_and_vars
 98 | 
 99 |   return DPOptimizerClass
100 | 
101 | 
102 | def make_gaussian_optimizer_class(cls):
103 |   """Constructs a DP optimizer with Gaussian averaging of updates."""
104 | 
105 |   class DPGaussianOptimizerClass(make_optimizer_class(cls)):
106 |     """DP subclass of given class cls using Gaussian averaging."""
107 | 
108 |     def __init__(self, l2_norm_clip, noise_multiplier, num_microbatches=None, ledger=None, unroll_microbatches=False, *args, **kwargs):
109 |       dp_sum_query = gaussian_query.GaussianSumQuery(l2_norm_clip, l2_norm_clip * noise_multiplier)
110 | 
111 |       if ledger:
112 |         dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, ledger=ledger)
113 | 
114 |       super(DPGaussianOptimizerClass, self).__init__(dp_sum_query, num_microbatches, unroll_microbatches, *args, **kwargs)
115 | 
116 |     @property
117 |     def ledger(self):
118 |       return self._dp_sum_query.ledger
119 | 
120 |   return DPGaussianOptimizerClass
121 | 
122 | DPAdagradOptimizer = make_optimizer_class(tf.optimizers.Adagrad)
123 | DPAdamOptimizer = make_optimizer_class(tf.optimizers.Adam)
124 | DPGradientDescentOptimizer = make_optimizer_class(tf.optimizers.SGD)
125 | 
126 | DPAdagradGaussianOptimizer = make_gaussian_optimizer_class(tf.optimizers.Adagrad)
127 | DPAdamGaussianOptimizer = make_gaussian_optimizer_class(tf.optimizers.Adam)
128 | DPGradientDescentGaussianOptimizer = make_gaussian_optimizer_class(tf.optimizers.SGD)
129 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/optimizers/dp_optimizer_eager_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019, The TensorFlow Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Tests for differentially private optimizers."""
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | from absl.testing import parameterized
 21 | import numpy as np
 22 | import tensorflow as tf
 23 | 
 24 | from privacy.analysis import privacy_ledger
 25 | from privacy.dp_query import gaussian_query
 26 | from privacy.optimizers import dp_optimizer
 27 | 
 28 | 
 29 | class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
 30 | 
 31 |   def setUp(self):
 32 |     tf.enable_eager_execution()
 33 |     super(DPOptimizerEagerTest, self).setUp()
 34 | 
 35 |   def _loss_fn(self, val0, val1):
 36 |     return 0.5 * tf.reduce_sum(tf.squared_difference(val0, val1), axis=1)
 37 | 
 38 |   @parameterized.named_parameters(
 39 |       ('DPGradientDescent 1', dp_optimizer.DPGradientDescentOptimizer, 1,
 40 |        [-2.5, -2.5]),
 41 |       ('DPGradientDescent 2', dp_optimizer.DPGradientDescentOptimizer, 2,
 42 |        [-2.5, -2.5]),
 43 |       ('DPGradientDescent 4', dp_optimizer.DPGradientDescentOptimizer, 4,
 44 |        [-2.5, -2.5]),
 45 |       ('DPAdagrad 1', dp_optimizer.DPAdagradOptimizer, 1, [-2.5, -2.5]),
 46 |       ('DPAdagrad 2', dp_optimizer.DPAdagradOptimizer, 2, [-2.5, -2.5]),
 47 |       ('DPAdagrad 4', dp_optimizer.DPAdagradOptimizer, 4, [-2.5, -2.5]),
 48 |       ('DPAdam 1', dp_optimizer.DPAdamOptimizer, 1, [-2.5, -2.5]),
 49 |       ('DPAdam 2', dp_optimizer.DPAdamOptimizer, 2, [-2.5, -2.5]),
 50 |       ('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5]))
 51 |   def testBaseline(self, cls, num_microbatches, expected_answer):
 52 |     with tf.GradientTape(persistent=True) as gradient_tape:
 53 |       var0 = tf.Variable([1.0, 2.0])
 54 |       data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
 55 | 
 56 |       dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
 57 |       dp_sum_query = privacy_ledger.QueryWithLedger(
 58 |           dp_sum_query, 1e6, num_microbatches / 1e6)
 59 | 
 60 |       opt = cls(
 61 |           dp_sum_query,
 62 |           num_microbatches=num_microbatches,
 63 |           learning_rate=2.0)
 64 | 
 65 |       self.evaluate(tf.global_variables_initializer())
 66 |       # Fetch params to validate initial values
 67 |       self.assertAllClose([1.0, 2.0], self.evaluate(var0))
 68 | 
 69 |       # Expected gradient is sum of differences divided by number of
 70 |       # microbatches.
 71 |       grads_and_vars = opt.compute_gradients(
 72 |           lambda: self._loss_fn(var0, data0), [var0],
 73 |           gradient_tape=gradient_tape)
 74 |       self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0])
 75 | 
 76 |   @parameterized.named_parameters(
 77 |       ('DPGradientDescent', dp_optimizer.DPGradientDescentOptimizer),
 78 |       ('DPAdagrad', dp_optimizer.DPAdagradOptimizer),
 79 |       ('DPAdam', dp_optimizer.DPAdamOptimizer))
 80 |   def testClippingNorm(self, cls):
 81 |     with tf.GradientTape(persistent=True) as gradient_tape:
 82 |       var0 = tf.Variable([0.0, 0.0])
 83 |       data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
 84 | 
 85 |       dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
 86 |       dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
 87 | 
 88 |       opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
 89 | 
 90 |       self.evaluate(tf.global_variables_initializer())
 91 |       # Fetch params to validate initial values
 92 |       self.assertAllClose([0.0, 0.0], self.evaluate(var0))
 93 | 
 94 |       # Expected gradient is sum of differences.
 95 |       grads_and_vars = opt.compute_gradients(
 96 |           lambda: self._loss_fn(var0, data0), [var0],
 97 |           gradient_tape=gradient_tape)
 98 |       self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0])
 99 | 
100 |   @parameterized.named_parameters(
101 |       ('DPGradientDescent', dp_optimizer.DPGradientDescentOptimizer),
102 |       ('DPAdagrad', dp_optimizer.DPAdagradOptimizer),
103 |       ('DPAdam', dp_optimizer.DPAdamOptimizer))
104 |   def testNoiseMultiplier(self, cls):
105 |     with tf.GradientTape(persistent=True) as gradient_tape:
106 |       var0 = tf.Variable([0.0])
107 |       data0 = tf.Variable([[0.0]])
108 | 
109 |       dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0)
110 |       dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
111 | 
112 |       opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
113 | 
114 |       self.evaluate(tf.global_variables_initializer())
115 |       # Fetch params to validate initial values
116 |       self.assertAllClose([0.0], self.evaluate(var0))
117 | 
118 |       grads = []
119 |       for _ in range(1000):
120 |         grads_and_vars = opt.compute_gradients(
121 |             lambda: self._loss_fn(var0, data0), [var0],
122 |             gradient_tape=gradient_tape)
123 |         grads.append(grads_and_vars[0][0])
124 | 
125 |       # Test standard deviation is close to l2_norm_clip * noise_multiplier.
126 |       self.assertNear(np.std(grads), 2.0 * 4.0, 0.5)
127 | 
128 | 
129 | if __name__ == '__main__':
130 |   tf.test.main()
131 | 


--------------------------------------------------------------------------------
/mnist.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019, The TensorFlow Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Training a CNN on MNIST with Keras and the DP SGD optimizer."""
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | from absl import app
 21 | from absl import flags
 22 | from absl import logging
 23 | 
 24 | import numpy as np
 25 | import tensorflow as tf
 26 | 
 27 | from privacy.analysis.rdp_accountant import compute_rdp
 28 | from privacy.analysis.rdp_accountant import get_privacy_spent
 29 | from dp_optimizer import DPGradientDescentGaussianOptimizer
 30 | 
 31 | GradientDescentOptimizer = tf.compat.v1.train.GradientDescentOptimizer
 32 | 
 33 | flags.DEFINE_boolean('dpsgd', True, 'If True, train with DP-SGD. If False, train with vanilla SGD.')
 34 | flags.DEFINE_float('learning_rate', 1e-3, 'Learning rate for training')
 35 | flags.DEFINE_float('noise_multiplier', 1.1, 'Ratio of the standard deviation to the clipping norm')
 36 | flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
 37 | flags.DEFINE_integer('batch_size', 250, 'Batch size')
 38 | flags.DEFINE_integer('epochs', 400, 'Number of epochs')
 39 | flags.DEFINE_integer('microbatches', 250, 'Number of microbatches (must evenly divide batch_size)')
 40 | flags.DEFINE_string('model_dir', None, 'Model directory')
 41 | 
 42 | FLAGS = flags.FLAGS
 43 | delta = 1e-2  # Delta is set to 1e-5 because MNIST has 60000 training points.
 44 | 
 45 | 
 46 | def compute_epsilon(steps):
 47 |   """Computes epsilon value for given hyperparameters."""
 48 |   if FLAGS.noise_multiplier == 0.0:
 49 |     return float('inf')
 50 |   orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
 51 |   sampling_probability = FLAGS.batch_size / 60000
 52 |   rdp = compute_rdp(q=sampling_probability, noise_multiplier=FLAGS.noise_multiplier, steps=steps, orders=orders)
 53 |   return get_privacy_spent(orders, rdp, target_delta=delta)[0]
 54 | 
 55 | 
 56 | class EpsilonPrintingCallback(tf.keras.callbacks.Callback):
 57 |   """Callback for Keras model to evaluate epsilon after every epoch."""
 58 |   def __init__(self):
 59 |     self.eps_history = []
 60 | 
 61 |   def on_epoch_end(self, epoch, logs=None):
 62 |     if FLAGS.dpsgd:
 63 |       eps = compute_epsilon((epoch + 1) * (60000 // FLAGS.batch_size))
 64 |       self.eps_history.append(eps)
 65 |       print(', eps = {}'.format(eps))
 66 | 
 67 | 
 68 | def load_mnist():
 69 |   """Loads MNIST and preprocesses to combine training and validation data."""
 70 |   train, test = tf.keras.datasets.mnist.load_data()
 71 |   train_data, train_labels = train
 72 |   test_data, test_labels = test
 73 | 
 74 |   train_data = np.array(train_data, dtype=np.float32) / 255
 75 |   test_data = np.array(test_data, dtype=np.float32) / 255
 76 | 
 77 |   train_data = train_data.reshape(train_data.shape[0], 28, 28, 1)
 78 |   test_data = test_data.reshape(test_data.shape[0], 28, 28, 1)
 79 | 
 80 |   train_labels = np.array(train_labels, dtype=np.int32)
 81 |   test_labels = np.array(test_labels, dtype=np.int32)
 82 | 
 83 |   train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10)
 84 |   test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10)
 85 | 
 86 |   return train_data, train_labels, test_data, test_labels
 87 | 
 88 | 
 89 | def main(unused_argv):
 90 |   logging.set_verbosity(logging.INFO)
 91 |   if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0:
 92 |     raise ValueError('Number of microbatches should divide evenly batch_size')
 93 | 
 94 |   # Load training and test data.
 95 |   train_data, train_labels, test_data, test_labels = load_mnist()
 96 | 
 97 |   # Define a sequential Keras model
 98 |   model = tf.keras.Sequential([
 99 |       tf.keras.layers.Conv2D(16, 8, strides=2, padding='same', activation='relu', input_shape=(28, 28, 1)),
100 |       tf.keras.layers.MaxPool2D(2, 1),
101 |       tf.keras.layers.Conv2D(32, 4, strides=2,  padding='valid', activation='relu'),
102 |       tf.keras.layers.MaxPool2D(2, 1),
103 |       tf.keras.layers.Flatten(),
104 |       tf.keras.layers.Dense(32, activation='relu'),
105 |       tf.keras.layers.Dense(10)
106 |   ])
107 | 
108 |   if FLAGS.dpsgd:
109 |     optimizer = DPGradientDescentGaussianOptimizer(
110 |         l2_norm_clip=FLAGS.l2_norm_clip,
111 |         noise_multiplier=FLAGS.noise_multiplier,
112 |         num_microbatches=FLAGS.microbatches,
113 |         learning_rate=FLAGS.learning_rate)
114 |     # Compute vector of per-example loss rather than its mean over a minibatch.
115 |     loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True, reduction=tf.compat.v1.losses.Reduction.NONE)
116 |   else:
117 |     optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
118 |     loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
119 | 
120 |   # Compile model with Keras
121 |   model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
122 | 
123 |   # Train model with Keras
124 |   eps_callback = EpsilonPrintingCallback()
125 |   fit_history = model.fit(train_data, train_labels, epochs=FLAGS.epochs, validation_data=(test_data, test_labels), batch_size=FLAGS.batch_size, callbacks=[eps_callback])
126 |   eps_history = eps_callback.eps_history
127 |   val_acc_history = fit_history.history['val_accuracy']
128 |   with open('delta_{}_lr_{}.txt'.format(delta, FLAGS.learning_rate), 'w') as f:
129 |         f.write('eps: {}\n'.format(eps_history))
130 |         f.write('validation acc: {}\n'.format(val_acc_history))
131 | 
132 | if __name__ == '__main__':
133 |   app.run(main)


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/nested_query_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018, The TensorFlow Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Tests for NestedQuery."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | 
 22 | from absl.testing import parameterized
 23 | from distutils.version import LooseVersion
 24 | import numpy as np
 25 | import tensorflow as tf
 26 | 
 27 | from privacy.dp_query import gaussian_query
 28 | from privacy.dp_query import nested_query
 29 | from privacy.dp_query import test_utils
 30 | 
 31 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
 32 |   nest = tf.contrib.framework.nest
 33 | else:
 34 |   nest = tf.nest
 35 | 
 36 | _basic_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
 37 | 
 38 | 
 39 | class NestedQueryTest(tf.test.TestCase, parameterized.TestCase):
 40 | 
 41 |   def test_nested_gaussian_sum_no_clip_no_noise(self):
 42 |     with self.cached_session() as sess:
 43 |       query1 = gaussian_query.GaussianSumQuery(
 44 |           l2_norm_clip=10.0, stddev=0.0)
 45 |       query2 = gaussian_query.GaussianSumQuery(
 46 |           l2_norm_clip=10.0, stddev=0.0)
 47 | 
 48 |       query = nested_query.NestedQuery([query1, query2])
 49 | 
 50 |       record1 = [1.0, [2.0, 3.0]]
 51 |       record2 = [4.0, [3.0, 2.0]]
 52 | 
 53 |       query_result, _ = test_utils.run_query(query, [record1, record2])
 54 |       result = sess.run(query_result)
 55 |       expected = [5.0, [5.0, 5.0]]
 56 |       self.assertAllClose(result, expected)
 57 | 
 58 |   def test_nested_gaussian_average_no_clip_no_noise(self):
 59 |     with self.cached_session() as sess:
 60 |       query1 = gaussian_query.GaussianAverageQuery(
 61 |           l2_norm_clip=10.0, sum_stddev=0.0, denominator=5.0)
 62 |       query2 = gaussian_query.GaussianAverageQuery(
 63 |           l2_norm_clip=10.0, sum_stddev=0.0, denominator=5.0)
 64 | 
 65 |       query = nested_query.NestedQuery([query1, query2])
 66 | 
 67 |       record1 = [1.0, [2.0, 3.0]]
 68 |       record2 = [4.0, [3.0, 2.0]]
 69 | 
 70 |       query_result, _ = test_utils.run_query(query, [record1, record2])
 71 |       result = sess.run(query_result)
 72 |       expected = [1.0, [1.0, 1.0]]
 73 |       self.assertAllClose(result, expected)
 74 | 
 75 |   def test_nested_gaussian_average_with_clip_no_noise(self):
 76 |     with self.cached_session() as sess:
 77 |       query1 = gaussian_query.GaussianAverageQuery(
 78 |           l2_norm_clip=4.0, sum_stddev=0.0, denominator=5.0)
 79 |       query2 = gaussian_query.GaussianAverageQuery(
 80 |           l2_norm_clip=5.0, sum_stddev=0.0, denominator=5.0)
 81 | 
 82 |       query = nested_query.NestedQuery([query1, query2])
 83 | 
 84 |       record1 = [1.0, [12.0, 9.0]]  # Clipped to [1.0, [4.0, 3.0]]
 85 |       record2 = [5.0, [1.0, 2.0]]   # Clipped to [4.0, [1.0, 2.0]]
 86 | 
 87 |       query_result, _ = test_utils.run_query(query, [record1, record2])
 88 |       result = sess.run(query_result)
 89 |       expected = [1.0, [1.0, 1.0]]
 90 |       self.assertAllClose(result, expected)
 91 | 
 92 |   def test_complex_nested_query(self):
 93 |     with self.cached_session() as sess:
 94 |       query_ab = gaussian_query.GaussianSumQuery(
 95 |           l2_norm_clip=1.0, stddev=0.0)
 96 |       query_c = gaussian_query.GaussianAverageQuery(
 97 |           l2_norm_clip=10.0, sum_stddev=0.0, denominator=2.0)
 98 |       query_d = gaussian_query.GaussianSumQuery(
 99 |           l2_norm_clip=10.0, stddev=0.0)
100 | 
101 |       query = nested_query.NestedQuery(
102 |           [query_ab, {'c': query_c, 'd': [query_d]}])
103 | 
104 |       record1 = [{'a': 0.0, 'b': 2.71828}, {'c': (-4.0, 6.0), 'd': [-4.0]}]
105 |       record2 = [{'a': 3.14159, 'b': 0.0}, {'c': (6.0, -4.0), 'd': [5.0]}]
106 | 
107 |       query_result, _ = test_utils.run_query(query, [record1, record2])
108 |       result = sess.run(query_result)
109 |       expected = [{'a': 1.0, 'b': 1.0}, {'c': (1.0, 1.0), 'd': [1.0]}]
110 |       self.assertAllClose(result, expected)
111 | 
112 |   def test_nested_query_with_noise(self):
113 |     with self.cached_session() as sess:
114 |       sum_stddev = 2.71828
115 |       denominator = 3.14159
116 | 
117 |       query1 = gaussian_query.GaussianSumQuery(
118 |           l2_norm_clip=1.5, stddev=sum_stddev)
119 |       query2 = gaussian_query.GaussianAverageQuery(
120 |           l2_norm_clip=0.5, sum_stddev=sum_stddev, denominator=denominator)
121 |       query = nested_query.NestedQuery((query1, query2))
122 | 
123 |       record1 = (3.0, [2.0, 1.5])
124 |       record2 = (0.0, [-1.0, -3.5])
125 | 
126 |       query_result, _ = test_utils.run_query(query, [record1, record2])
127 | 
128 |       noised_averages = []
129 |       for _ in range(1000):
130 |         noised_averages.append(nest.flatten(sess.run(query_result)))
131 | 
132 |       result_stddev = np.std(noised_averages, 0)
133 |       avg_stddev = sum_stddev / denominator
134 |       expected_stddev = [sum_stddev, avg_stddev, avg_stddev]
135 |       self.assertArrayNear(result_stddev, expected_stddev, 0.1)
136 | 
137 |   @parameterized.named_parameters(
138 |       ('type_mismatch', [_basic_query], (1.0,), TypeError),
139 |       ('too_many_queries', [_basic_query, _basic_query], [1.0], ValueError),
140 |       ('query_too_deep', [_basic_query, [_basic_query]], [1.0, 1.0], TypeError))
141 |   def test_record_incompatible_with_query(
142 |       self, queries, record, error_type):
143 |     with self.assertRaises(error_type):
144 |       test_utils.run_query(nested_query.NestedQuery(queries), [record])
145 | 
146 | 
147 | if __name__ == '__main__':
148 |   tf.test.main()
149 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/research/pate_2017/README.md:
--------------------------------------------------------------------------------
  1 | # Learning private models with multiple teachers
  2 | 
  3 | This repository contains code to create a setup for learning privacy-preserving
  4 | student models by transferring knowledge from an ensemble of teachers trained
  5 | on disjoint subsets of the data for which privacy guarantees are to be provided.
  6 | 
  7 | Knowledge acquired by teachers is transferred to the student in a differentially
  8 | private manner by noisily aggregating the teacher decisions before feeding them
  9 | to the student during training.
 10 | 
 11 | The paper describing the approach is [arXiv:1610.05755](https://arxiv.org/abs/1610.05755)
 12 | 
 13 | ## Dependencies
 14 | 
 15 | This model uses `TensorFlow` to perform numerical computations associated with
 16 | machine learning models, as well as common Python libraries like: `numpy`,
 17 | `scipy`, and `six`. Instructions to install these can be found in their
 18 | respective documentations.
 19 | 
 20 | ## How to run
 21 | 
 22 | This repository supports the MNIST and SVHN datasets. The following
 23 | instructions are given for MNIST but can easily be adapted by replacing the
 24 | flag `--dataset=mnist` by `--dataset=svhn`.
 25 | There are 2 steps: teacher training and student training. Data will be
 26 | automatically downloaded when you start the teacher training.
 27 | 
 28 | The following is a two-step process: first we train an ensemble of teacher
 29 | models and second we train a student using predictions made by this ensemble.
 30 | 
 31 | **Training the teachers:** first run the `train_teachers.py` file with at least
 32 | three flags specifying (1) the number of teachers, (2) the ID of the teacher
 33 | you are training among these teachers, and (3) the dataset on which to train.
 34 | For instance, to train teacher number 10 among an ensemble of 100 teachers for
 35 | MNIST, you use the following command:
 36 | 
 37 | ```
 38 | python train_teachers.py --nb_teachers=100 --teacher_id=10 --dataset=mnist
 39 | ```
 40 | 
 41 | Other flags like `train_dir` and `data_dir` should optionally be set to
 42 | respectively point to the directory where model checkpoints and temporary data
 43 | (like the dataset) should be saved. The flag `max_steps` (default at 3000)
 44 | controls the length of training. See `train_teachers.py` and `deep_cnn.py`
 45 | to find available flags and their descriptions.
 46 | 
 47 | **Training the student:** once the teachers are all trained, e.g., teachers
 48 | with IDs `0` to `99` are trained for `nb_teachers=100`, we are ready to train
 49 | the student. The student is trained by labeling some of the test data with
 50 | predictions from the teachers. The predictions are aggregated by counting the
 51 | votes assigned to each class among the ensemble of teachers, adding Laplacian
 52 | noise to these votes, and assigning the label with the maximum noisy vote count
 53 | to the sample. This is detailed in function `noisy_max` in the file
 54 | `aggregation.py`. To learn the student, use the following command:
 55 | 
 56 | ```
 57 | python train_student.py --nb_teachers=100 --dataset=mnist --stdnt_share=5000
 58 | ```
 59 | 
 60 | The flag `--stdnt_share=5000` indicates that the student should be able to
 61 | use the first `5000` samples of the dataset's test subset as unlabeled
 62 | training points (they will be labeled using the teacher predictions). The
 63 | remaining samples are used for evaluation of the student's accuracy, which
 64 | is displayed upon completion of training.
 65 | 
 66 | ## Using semi-supervised GANs to train the student
 67 | 
 68 | In the paper, we describe how to train the student in a semi-supervised 
 69 | fashion using Generative Adversarial Networks. This can be reproduced for MNIST 
 70 | by cloning the [improved-gan](https://github.com/openai/improved-gan)
 71 | repository and adding to your `PATH` variable before running the shell
 72 | script `train_student_mnist_250_lap_20_count_50_epochs_600.sh`.
 73 | 
 74 | ```
 75 | export PATH="/path/to/improved-gan/mnist_svhn_cifar10":$PATH
 76 | sh train_student_mnist_250_lap_20_count_50_epochs_600.sh
 77 | ```
 78 | 
 79 | 
 80 | ## Alternative deeper convolutional architecture
 81 | 
 82 | Note that a deeper convolutional model is available. Both the default and
 83 | deeper models graphs are defined in `deep_cnn.py`, respectively by
 84 | functions `inference` and `inference_deeper`. Use the flag `--deeper=true`
 85 | to switch to that model when launching `train_teachers.py` and
 86 | `train_student.py`.
 87 | 
 88 | ## Privacy analysis
 89 | 
 90 | In the paper, we detail how data-dependent differential privacy bounds can be
 91 | computed to estimate the cost of training the student. In order to reproduce
 92 | the bounds given in the paper, we include the label predicted by our two
 93 | teacher ensembles: MNIST and SVHN. You can run the privacy analysis for each
 94 | dataset with the following commands:
 95 | 
 96 | ```
 97 | python analysis.py --counts_file=mnist_250_teachers_labels.npy --indices_file=mnist_250_teachers_100_indices_used_by_student.npy
 98 | 
 99 | python analysis.py --counts_file=svhn_250_teachers_labels.npy --max_examples=1000 --delta=1e-6
100 | ```
101 | 
102 | To expedite experimentation with the privacy analysis of student training,
103 | the `analysis.py` file is configured to download the labels produced by 250
104 | teacher models, for MNIST and SVHN when running the two commands included
105 | above. These 250 teacher models were trained using the following command lines,
106 | where `XXX` takes values between `0` and `249`:
107 | 
108 | ```
109 | python train_teachers.py --nb_teachers=250 --teacher_id=XXX --dataset=mnist
110 | python train_teachers.py --nb_teachers=250 --teacher_id=XXX --dataset=svhn
111 | ```
112 | 
113 | Note that these labels may also be used in lieu of function `ensemble_preds`
114 | in `train_student.py`, to compare the performance of alternative student model
115 | architectures and learning techniques. This facilitates future work, by
116 | removing the need for training the MNIST and SVHN teacher ensembles when
117 | proposing new student training approaches.
118 | 
119 | ## Contact
120 | 
121 | To ask questions, please email `nicolas@papernot.fr` or open an issue on
122 | the `tensorflow/models` issues tracker. Please assign issues to
123 | [@npapernot](https://github.com/npapernot).
124 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_keras.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019, The TensorFlow Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Training a CNN on MNIST with Keras and the DP SGD optimizer."""
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | from absl import app
 21 | from absl import flags
 22 | 
 23 | from distutils.version import LooseVersion
 24 | 
 25 | import numpy as np
 26 | import tensorflow as tf
 27 | 
 28 | from privacy.analysis.rdp_accountant import compute_rdp
 29 | from privacy.analysis.rdp_accountant import get_privacy_spent
 30 | from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
 31 | 
 32 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
 33 |   GradientDescentOptimizer = tf.train.GradientDescentOptimizer
 34 | else:
 35 |   GradientDescentOptimizer = tf.optimizers.SGD  # pylint: disable=invalid-name
 36 | 
 37 | flags.DEFINE_boolean(
 38 |     'dpsgd', True, 'If True, train with DP-SGD. If False, '
 39 |     'train with vanilla SGD.')
 40 | flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training')
 41 | flags.DEFINE_float('noise_multiplier', 1.1,
 42 |                    'Ratio of the standard deviation to the clipping norm')
 43 | flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
 44 | flags.DEFINE_integer('batch_size', 250, 'Batch size')
 45 | flags.DEFINE_integer('epochs', 60, 'Number of epochs')
 46 | flags.DEFINE_integer(
 47 |     'microbatches', 250, 'Number of microbatches '
 48 |     '(must evenly divide batch_size)')
 49 | flags.DEFINE_string('model_dir', None, 'Model directory')
 50 | 
 51 | FLAGS = flags.FLAGS
 52 | 
 53 | 
 54 | def compute_epsilon(steps):
 55 |   """Computes epsilon value for given hyperparameters."""
 56 |   if FLAGS.noise_multiplier == 0.0:
 57 |     return float('inf')
 58 |   orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
 59 |   sampling_probability = FLAGS.batch_size / 60000
 60 |   rdp = compute_rdp(q=sampling_probability,
 61 |                     noise_multiplier=FLAGS.noise_multiplier,
 62 |                     steps=steps,
 63 |                     orders=orders)
 64 |   # Delta is set to 1e-5 because MNIST has 60000 training points.
 65 |   return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
 66 | 
 67 | 
 68 | def load_mnist():
 69 |   """Loads MNIST and preprocesses to combine training and validation data."""
 70 |   train, test = tf.keras.datasets.mnist.load_data()
 71 |   train_data, train_labels = train
 72 |   test_data, test_labels = test
 73 | 
 74 |   train_data = np.array(train_data, dtype=np.float32) / 255
 75 |   test_data = np.array(test_data, dtype=np.float32) / 255
 76 | 
 77 |   train_data = train_data.reshape(train_data.shape[0], 28, 28, 1)
 78 |   test_data = test_data.reshape(test_data.shape[0], 28, 28, 1)
 79 | 
 80 |   train_labels = np.array(train_labels, dtype=np.int32)
 81 |   test_labels = np.array(test_labels, dtype=np.int32)
 82 | 
 83 |   train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10)
 84 |   test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10)
 85 | 
 86 |   assert train_data.min() == 0.
 87 |   assert train_data.max() == 1.
 88 |   assert test_data.min() == 0.
 89 |   assert test_data.max() == 1.
 90 | 
 91 |   return train_data, train_labels, test_data, test_labels
 92 | 
 93 | 
 94 | def main(unused_argv):
 95 |   tf.logging.set_verbosity(tf.logging.INFO)
 96 |   if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0:
 97 |     raise ValueError('Number of microbatches should divide evenly batch_size')
 98 | 
 99 |   # Load training and test data.
100 |   train_data, train_labels, test_data, test_labels = load_mnist()
101 | 
102 |   # Define a sequential Keras model
103 |   model = tf.keras.Sequential([
104 |       tf.keras.layers.Conv2D(16, 8,
105 |                              strides=2,
106 |                              padding='same',
107 |                              activation='relu',
108 |                              input_shape=(28, 28, 1)),
109 |       tf.keras.layers.MaxPool2D(2, 1),
110 |       tf.keras.layers.Conv2D(32, 4,
111 |                              strides=2,
112 |                              padding='valid',
113 |                              activation='relu'),
114 |       tf.keras.layers.MaxPool2D(2, 1),
115 |       tf.keras.layers.Flatten(),
116 |       tf.keras.layers.Dense(32, activation='relu'),
117 |       tf.keras.layers.Dense(10)
118 |   ])
119 | 
120 |   if FLAGS.dpsgd:
121 |     optimizer = DPGradientDescentGaussianOptimizer(
122 |         l2_norm_clip=FLAGS.l2_norm_clip,
123 |         noise_multiplier=FLAGS.noise_multiplier,
124 |         num_microbatches=FLAGS.microbatches,
125 |         learning_rate=FLAGS.learning_rate)
126 |     # Compute vector of per-example loss rather than its mean over a minibatch.
127 |     loss = tf.keras.losses.CategoricalCrossentropy(
128 |         from_logits=True, reduction=tf.losses.Reduction.NONE)
129 |   else:
130 |     optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
131 |     loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
132 | 
133 |   # Compile model with Keras
134 |   model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
135 | 
136 |   # Train model with Keras
137 |   model.fit(train_data, train_labels,
138 |             epochs=FLAGS.epochs,
139 |             validation_data=(test_data, test_labels),
140 |             batch_size=FLAGS.batch_size)
141 | 
142 |   # Compute the privacy budget expended.
143 |   if FLAGS.dpsgd:
144 |     eps = compute_epsilon(FLAGS.epochs * 60000 // FLAGS.batch_size)
145 |     print('For delta=1e-5, the current epsilon is: %.2f' % eps)
146 |   else:
147 |     print('Trained with vanilla non-private SGD optimizer')
148 | 
149 | if __name__ == '__main__':
150 |   app.run(main)
151 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/gaussian_query_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018, The TensorFlow Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Tests for GaussianAverageQuery."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | from absl.testing import parameterized
 22 | import numpy as np
 23 | from six.moves import xrange
 24 | import tensorflow as tf
 25 | 
 26 | from privacy.dp_query import gaussian_query
 27 | from privacy.dp_query import test_utils
 28 | 
 29 | 
 30 | class GaussianQueryTest(tf.test.TestCase, parameterized.TestCase):
 31 | 
 32 |   def test_gaussian_sum_no_clip_no_noise(self):
 33 |     with self.cached_session() as sess:
 34 |       record1 = tf.constant([2.0, 0.0])
 35 |       record2 = tf.constant([-1.0, 1.0])
 36 | 
 37 |       query = gaussian_query.GaussianSumQuery(
 38 |           l2_norm_clip=10.0, stddev=0.0)
 39 |       query_result, _ = test_utils.run_query(query, [record1, record2])
 40 |       result = sess.run(query_result)
 41 |       expected = [1.0, 1.0]
 42 |       self.assertAllClose(result, expected)
 43 | 
 44 |   def test_gaussian_sum_with_clip_no_noise(self):
 45 |     with self.cached_session() as sess:
 46 |       record1 = tf.constant([-6.0, 8.0])  # Clipped to [-3.0, 4.0].
 47 |       record2 = tf.constant([4.0, -3.0])  # Not clipped.
 48 | 
 49 |       query = gaussian_query.GaussianSumQuery(
 50 |           l2_norm_clip=5.0, stddev=0.0)
 51 |       query_result, _ = test_utils.run_query(query, [record1, record2])
 52 |       result = sess.run(query_result)
 53 |       expected = [1.0, 1.0]
 54 |       self.assertAllClose(result, expected)
 55 | 
 56 |   def test_gaussian_sum_with_changing_clip_no_noise(self):
 57 |     with self.cached_session() as sess:
 58 |       record1 = tf.constant([-6.0, 8.0])  # Clipped to [-3.0, 4.0].
 59 |       record2 = tf.constant([4.0, -3.0])  # Not clipped.
 60 | 
 61 |       l2_norm_clip = tf.Variable(5.0)
 62 |       l2_norm_clip_placeholder = tf.placeholder(tf.float32)
 63 |       assign_l2_norm_clip = tf.assign(l2_norm_clip, l2_norm_clip_placeholder)
 64 |       query = gaussian_query.GaussianSumQuery(
 65 |           l2_norm_clip=l2_norm_clip, stddev=0.0)
 66 |       query_result, _ = test_utils.run_query(query, [record1, record2])
 67 | 
 68 |       self.evaluate(tf.global_variables_initializer())
 69 |       result = sess.run(query_result)
 70 |       expected = [1.0, 1.0]
 71 |       self.assertAllClose(result, expected)
 72 | 
 73 |       sess.run(assign_l2_norm_clip, {l2_norm_clip_placeholder: 0.0})
 74 |       result = sess.run(query_result)
 75 |       expected = [0.0, 0.0]
 76 |       self.assertAllClose(result, expected)
 77 | 
 78 |   def test_gaussian_sum_with_noise(self):
 79 |     with self.cached_session() as sess:
 80 |       record1, record2 = 2.71828, 3.14159
 81 |       stddev = 1.0
 82 | 
 83 |       query = gaussian_query.GaussianSumQuery(
 84 |           l2_norm_clip=5.0, stddev=stddev)
 85 |       query_result, _ = test_utils.run_query(query, [record1, record2])
 86 | 
 87 |       noised_sums = []
 88 |       for _ in xrange(1000):
 89 |         noised_sums.append(sess.run(query_result))
 90 | 
 91 |       result_stddev = np.std(noised_sums)
 92 |       self.assertNear(result_stddev, stddev, 0.1)
 93 | 
 94 |   def test_gaussian_sum_merge(self):
 95 |     records1 = [tf.constant([2.0, 0.0]), tf.constant([-1.0, 1.0])]
 96 |     records2 = [tf.constant([3.0, 5.0]), tf.constant([-1.0, 4.0])]
 97 | 
 98 |     def get_sample_state(records):
 99 |       query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=1.0)
100 |       global_state = query.initial_global_state()
101 |       params = query.derive_sample_params(global_state)
102 |       sample_state = query.initial_sample_state(records[0])
103 |       for record in records:
104 |         sample_state = query.accumulate_record(params, sample_state, record)
105 |       return sample_state
106 | 
107 |     sample_state_1 = get_sample_state(records1)
108 |     sample_state_2 = get_sample_state(records2)
109 | 
110 |     merged = gaussian_query.GaussianSumQuery(10.0, 1.0).merge_sample_states(
111 |         sample_state_1,
112 |         sample_state_2)
113 | 
114 |     with self.cached_session() as sess:
115 |       result = sess.run(merged)
116 | 
117 |     expected = [3.0, 10.0]
118 |     self.assertAllClose(result, expected)
119 | 
120 |   def test_gaussian_average_no_noise(self):
121 |     with self.cached_session() as sess:
122 |       record1 = tf.constant([5.0, 0.0])   # Clipped to [3.0, 0.0].
123 |       record2 = tf.constant([-1.0, 2.0])  # Not clipped.
124 | 
125 |       query = gaussian_query.GaussianAverageQuery(
126 |           l2_norm_clip=3.0, sum_stddev=0.0, denominator=2.0)
127 |       query_result, _ = test_utils.run_query(query, [record1, record2])
128 |       result = sess.run(query_result)
129 |       expected_average = [1.0, 1.0]
130 |       self.assertAllClose(result, expected_average)
131 | 
132 |   def test_gaussian_average_with_noise(self):
133 |     with self.cached_session() as sess:
134 |       record1, record2 = 2.71828, 3.14159
135 |       sum_stddev = 1.0
136 |       denominator = 2.0
137 | 
138 |       query = gaussian_query.GaussianAverageQuery(
139 |           l2_norm_clip=5.0, sum_stddev=sum_stddev, denominator=denominator)
140 |       query_result, _ = test_utils.run_query(query, [record1, record2])
141 | 
142 |       noised_averages = []
143 |       for _ in range(1000):
144 |         noised_averages.append(sess.run(query_result))
145 | 
146 |       result_stddev = np.std(noised_averages)
147 |       avg_stddev = sum_stddev / denominator
148 |       self.assertNear(result_stddev, avg_stddev, 0.1)
149 | 
150 |   @parameterized.named_parameters(
151 |       ('type_mismatch', [1.0], (1.0,), TypeError),
152 |       ('too_few_on_left', [1.0], [1.0, 1.0], ValueError),
153 |       ('too_few_on_right', [1.0, 1.0], [1.0], ValueError))
154 |   def test_incompatible_records(self, record1, record2, error_type):
155 |     query = gaussian_query.GaussianSumQuery(1.0, 0.0)
156 |     with self.assertRaises(error_type):
157 |       test_utils.run_query(query, [record1, record2])
158 | 
159 | 
160 | if __name__ == '__main__':
161 |   tf.test.main()
162 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_eager.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019, The TensorFlow Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Training a CNN on MNIST in TF Eager mode with DP-SGD optimizer."""
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | from absl import app
 20 | from absl import flags
 21 | 
 22 | from distutils.version import LooseVersion
 23 | 
 24 | import numpy as np
 25 | import tensorflow as tf
 26 | 
 27 | from privacy.analysis.rdp_accountant import compute_rdp
 28 | from privacy.analysis.rdp_accountant import get_privacy_spent
 29 | from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
 30 | 
 31 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
 32 |   GradientDescentOptimizer = tf.train.GradientDescentOptimizer
 33 |   tf.enable_eager_execution()
 34 | else:
 35 |   GradientDescentOptimizer = tf.optimizers.SGD  # pylint: disable=invalid-name
 36 | 
 37 | flags.DEFINE_boolean('dpsgd', True, 'If True, train with DP-SGD. If False, '
 38 |                      'train with vanilla SGD.')
 39 | flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training')
 40 | flags.DEFINE_float('noise_multiplier', 1.1,
 41 |                    'Ratio of the standard deviation to the clipping norm')
 42 | flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
 43 | flags.DEFINE_integer('batch_size', 250, 'Batch size')
 44 | flags.DEFINE_integer('epochs', 60, 'Number of epochs')
 45 | flags.DEFINE_integer('microbatches', 250, 'Number of microbatches '
 46 |                      '(must evenly divide batch_size)')
 47 | 
 48 | FLAGS = flags.FLAGS
 49 | 
 50 | 
 51 | def compute_epsilon(steps):
 52 |   """Computes epsilon value for given hyperparameters."""
 53 |   if FLAGS.noise_multiplier == 0.0:
 54 |     return float('inf')
 55 |   orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
 56 |   sampling_probability = FLAGS.batch_size / 60000
 57 |   rdp = compute_rdp(q=sampling_probability,
 58 |                     noise_multiplier=FLAGS.noise_multiplier,
 59 |                     steps=steps,
 60 |                     orders=orders)
 61 |   # Delta is set to 1e-5 because MNIST has 60000 training points.
 62 |   return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
 63 | 
 64 | 
 65 | def main(_):
 66 |   if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0:
 67 |     raise ValueError('Number of microbatches should divide evenly batch_size')
 68 | 
 69 |   # Fetch the mnist data
 70 |   train, test = tf.keras.datasets.mnist.load_data()
 71 |   train_images, train_labels = train
 72 |   test_images, test_labels = test
 73 | 
 74 |   # Create a dataset object and batch for the training data
 75 |   dataset = tf.data.Dataset.from_tensor_slices(
 76 |       (tf.cast(train_images[..., tf.newaxis]/255, tf.float32),
 77 |        tf.cast(train_labels, tf.int64)))
 78 |   dataset = dataset.shuffle(1000).batch(FLAGS.batch_size)
 79 | 
 80 |   # Create a dataset object and batch for the test data
 81 |   eval_dataset = tf.data.Dataset.from_tensor_slices(
 82 |       (tf.cast(test_images[..., tf.newaxis]/255, tf.float32),
 83 |        tf.cast(test_labels, tf.int64)))
 84 |   eval_dataset = eval_dataset.batch(10000)
 85 | 
 86 |   # Define the model using tf.keras.layers
 87 |   mnist_model = tf.keras.Sequential([
 88 |       tf.keras.layers.Conv2D(16, 8,
 89 |                              strides=2,
 90 |                              padding='same',
 91 |                              activation='relu'),
 92 |       tf.keras.layers.MaxPool2D(2, 1),
 93 |       tf.keras.layers.Conv2D(32, 4, strides=2, activation='relu'),
 94 |       tf.keras.layers.MaxPool2D(2, 1),
 95 |       tf.keras.layers.Flatten(),
 96 |       tf.keras.layers.Dense(32, activation='relu'),
 97 |       tf.keras.layers.Dense(10)
 98 |   ])
 99 | 
100 |   # Instantiate the optimizer
101 |   if FLAGS.dpsgd:
102 |     opt = DPGradientDescentGaussianOptimizer(
103 |         l2_norm_clip=FLAGS.l2_norm_clip,
104 |         noise_multiplier=FLAGS.noise_multiplier,
105 |         num_microbatches=FLAGS.microbatches,
106 |         learning_rate=FLAGS.learning_rate)
107 |   else:
108 |     opt = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
109 | 
110 |   # Training loop.
111 |   steps_per_epoch = 60000 // FLAGS.batch_size
112 |   for epoch in range(FLAGS.epochs):
113 |     # Train the model for one epoch.
114 |     for (_, (images, labels)) in enumerate(dataset.take(-1)):
115 |       with tf.GradientTape(persistent=True) as gradient_tape:
116 |         # This dummy call is needed to obtain the var list.
117 |         logits = mnist_model(images, training=True)
118 |         var_list = mnist_model.trainable_variables
119 | 
120 |         # In Eager mode, the optimizer takes a function that returns the loss.
121 |         def loss_fn():
122 |           logits = mnist_model(images, training=True)  # pylint: disable=undefined-loop-variable,cell-var-from-loop
123 |           loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
124 |               labels=labels, logits=logits)  # pylint: disable=undefined-loop-variable,cell-var-from-loop
125 |           # If training without privacy, the loss is a scalar not a vector.
126 |           if not FLAGS.dpsgd:
127 |             loss = tf.reduce_mean(loss)
128 |           return loss
129 | 
130 |         if FLAGS.dpsgd:
131 |           grads_and_vars = opt.compute_gradients(loss_fn, var_list,
132 |                                                  gradient_tape=gradient_tape)
133 |         else:
134 |           grads_and_vars = opt.compute_gradients(loss_fn, var_list)
135 | 
136 |       opt.apply_gradients(grads_and_vars)
137 | 
138 |     # Evaluate the model and print results
139 |     for (_, (images, labels)) in enumerate(eval_dataset.take(-1)):
140 |       logits = mnist_model(images, training=False)
141 |       correct_preds = tf.equal(tf.argmax(logits, axis=1), labels)
142 |     test_accuracy = np.mean(correct_preds.numpy())
143 |     print('Test accuracy after epoch %d is: %.3f' % (epoch, test_accuracy))
144 | 
145 |     # Compute the privacy budget expended so far.
146 |     if FLAGS.dpsgd:
147 |       eps = compute_epsilon((epoch + 1) * steps_per_epoch)
148 |       print('For delta=1e-5, the current epsilon is: %.2f' % eps)
149 |     else:
150 |       print('Trained with vanilla non-private SGD optimizer')
151 | 
152 | if __name__ == '__main__':
153 |   app.run(main)
154 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/tutorials/README.md:
--------------------------------------------------------------------------------
  1 | # Tutorials
  2 | 
  3 | This folder contains a set of tutorials that demonstrate the features of this
  4 | library.
  5 | As demonstrated on MNIST in `mnist_dpsgd_tutorial.py`, the easiest way to use
  6 | a differentially private optimizer is to modify an existing TF training loop
  7 | to replace an existing vanilla optimizer with its differentially private
  8 | counterpart implemented in the library.
  9 | 
 10 | Here is a list of all the tutorials included:
 11 | 
 12 | * `lm_dpsgd_tutorial.py`: learn a language model with differential privacy.
 13 | 
 14 | * `mnist_dpsgd_tutorial.py`: learn a convolutional neural network on MNIST with
 15 |   differential privacy.
 16 | 
 17 | * `mnist_dpsgd_tutorial_eager.py`: learn a convolutional neural network on MNIST
 18 |   with differential privacy using Eager mode.
 19 | 
 20 | * `mnist_dpsgd_tutorial_keras.py`: learn a convolutional neural network on MNIST
 21 |   with differential privacy using tf.Keras.
 22 | 
 23 | * `mnist_lr_tutorial.py`: learn a differentially private logistic regression
 24 |   model on MNIST. The model illustrates application of the
 25 |   "amplification-by-iteration" analysis (https://arxiv.org/abs/1808.06651).
 26 | 
 27 | The rest of this README describes the different parameters used to configure
 28 | DP-SGD as well as expected outputs for the `mnist_dpsgd_tutorial.py` tutorial.
 29 | 
 30 | ## Parameters
 31 | 
 32 | All of the optimizers share some privacy-specific parameters that need to
 33 | be tuned in addition to any existing hyperparameter. There are currently four:
 34 | 
 35 | * `learning_rate` (float): The learning rate of the SGD training algorithm. The
 36 |   higher the learning rate, the more each update matters. If the updates are noisy
 37 |   (such as when the additive noise is large compared to the clipping
 38 |   threshold), the learning rate must be kept low for the training procedure to converge.
 39 | * `num_microbatches` (int): The input data for each step (i.e., batch) of your
 40 |   original training algorithm is split into this many microbatches. Generally,
 41 |   increasing this will improve your utility but slow down your training in terms
 42 |   of wall-clock time. The total number of examples consumed in one global step
 43 |   remains the same. This number should evenly divide your input batch size.
 44 | * `l2_norm_clip` (float): The cumulative gradient across all network parameters
 45 |   from each microbatch will be clipped so that its L2 norm is at most this
 46 |   value. You should set this to something close to some percentile of what
 47 |   you expect the gradient from each microbatch to be. In previous experiments,
 48 |   we've found numbers from 0.5 to 1.0 to work reasonably well.
 49 | * `noise_multiplier` (float): This governs the amount of noise added during
 50 |   training. Generally, more noise results in better privacy and lower utility.
 51 |   This generally has to be at least 0.3 to obtain rigorous privacy guarantees,
 52 |   but smaller values may still be acceptable for practical purposes.
 53 | 
 54 | ## Measuring Privacy
 55 | 
 56 | Differential privacy can be expressed using two values, epsilon and delta.
 57 | Roughly speaking, they mean the following:
 58 | 
 59 | * epsilon gives a ceiling on how much the probability of a particular output
 60 |   can increase by including (or removing) a single training example. We usually
 61 |   want it to be a small constant (less than 10, or, for more stringent privacy
 62 |   guarantees, less than 1). However, this is only an upper bound, and a large
 63 |   value of epsilon may still mean good practical privacy.
 64 | * delta bounds the probability of an arbitrary change in model behavior.
 65 |   We can usually set this to a very small number (1e-7 or so) without
 66 |   compromising utility. A rule of thumb is to set it to be less than the inverse
 67 |   of the training data size.
 68 | 
 69 | To find out the epsilon given a fixed delta value for your model, follow the
 70 | approach demonstrated in the `compute_epsilon` of the `mnist_dpsgd_tutorial.py`
 71 | where the arguments used to call the RDP accountant (i.e., the tool used to
 72 | compute the privacy guarantee) are:
 73 | 
 74 | * `q` : The sampling ratio, defined as (number of examples consumed in one
 75 |   step) / (total training examples).
 76 | * `noise_multiplier` : The noise_multiplier from your parameters above.
 77 | * `steps` : The number of global steps taken.
 78 | 
 79 | A detailed writeup of the theory behind the computation of epsilon and delta
 80 | is available at https://arxiv.org/abs/1908.10530.
 81 | 
 82 | ## Expected Output
 83 | 
 84 | When the `mnist_dpsgd_tutorial.py` script is run with the default parameters,
 85 | the output will contain the following lines (leaving out a lot of diagnostic
 86 | info):
 87 | ```
 88 | ...
 89 | Test accuracy after 1 epochs is: 0.774
 90 | For delta=1e-5, the current epsilon is: 1.03
 91 | ...
 92 | Test accuracy after 2 epochs is: 0.877
 93 | For delta=1e-5, the current epsilon is: 1.11
 94 | ...
 95 | Test accuracy after 60 epochs is: 0.966
 96 | For delta=1e-5, the current epsilon is: 3.01
 97 | ```
 98 | 
 99 | ## Using Command-Line Interface for Privacy Budgeting
100 | 
101 | Before launching a (possibly quite lengthy) training procedure, it is possible
102 | to compute, quickly and accurately, privacy loss at any point of the training.
103 | To do so, run the script `privacy/analysis/compute_dp_sgd_privacy.py`, which
104 | does not have any TensorFlow dependencies. For example, executing
105 | ```
106 | compute_dp_sgd_privacy.py --N=60000 --batch_size=256 --noise_multiplier=1.1 --epochs=60 --delta=1e-5
107 | ```
108 | allows us to conclude, in a matter of seconds, that DP-SGD run with default
109 | parameters satisfies differential privacy with eps = 3.01 and delta = 1e-05.
110 | Note that the flags provided in the command above correspond to the tutorial in
111 | `mnist_dpsgd_tutorial.py`. The command is applicable to other datasets but the
112 | values passed must be adapted (e.g., N the number of training points).
113 | 
114 | 
115 | ## Select Parameters
116 | 
117 | The table below has a few sample parameters illustrating various
118 | accuracy/privacy tradeoffs achieved by the MNIST tutorial in
119 | `mnist_dpsgd_tutorial.py` (default parameters are in __bold__; privacy epsilon
120 | is reported at delta=1e-5; accuracy is averaged over 10 runs, its standard
121 | deviation is less than .3% in all cases).
122 | 
123 | | Learning rate | Noise multiplier | Clipping threshold | Number of microbatches | Number of epochs | Privacy eps | Accuracy |
124 | | ------------- | ---------------- | -----------------  | ---------------------- | ---------------- | ----------- | -------- |
125 | | 0.1           |                  |                    | __256__                | 20               | no privacy  | 99.0%    |
126 | | 0.25          | 1.3              | 1.5                | __256__                | 15               | 1.19        | 95.0%    |
127 | | __0.15__      | __1.1__          | __1.0__            | __256__                |__60__            | 3.01        | 96.6%    |
128 | | 0.25          | 0.7              | 1.5                | __256__                | 45               | 7.10        | 97.0%    |
129 | 
130 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for rdp_accountant.py."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import sys
 22 | 
 23 | from absl.testing import absltest
 24 | from absl.testing import parameterized
 25 | from mpmath import exp
 26 | from mpmath import inf
 27 | from mpmath import log
 28 | from mpmath import npdf
 29 | from mpmath import quad
 30 | import numpy as np
 31 | 
 32 | from privacy.analysis import privacy_ledger
 33 | from privacy.analysis import rdp_accountant
 34 | 
 35 | 
 36 | class TestGaussianMoments(parameterized.TestCase):
 37 |   #################################
 38 |   # HELPER FUNCTIONS:             #
 39 |   # Exact computations using      #
 40 |   # multi-precision arithmetic.   #
 41 |   #################################
 42 | 
 43 |   def _log_float_mp(self, x):
 44 |     # Convert multi-precision input to float log space.
 45 |     if x >= sys.float_info.min:
 46 |       return float(log(x))
 47 |     else:
 48 |       return -np.inf
 49 | 
 50 |   def _integral_mp(self, fn, bounds=(-inf, inf)):
 51 |     integral, _ = quad(fn, bounds, error=True, maxdegree=8)
 52 |     return integral
 53 | 
 54 |   def _distributions_mp(self, sigma, q):
 55 | 
 56 |     def _mu0(x):
 57 |       return npdf(x, mu=0, sigma=sigma)
 58 | 
 59 |     def _mu1(x):
 60 |       return npdf(x, mu=1, sigma=sigma)
 61 | 
 62 |     def _mu(x):
 63 |       return (1 - q) * _mu0(x) + q * _mu1(x)
 64 | 
 65 |     return _mu0, _mu  # Closure!
 66 | 
 67 |   def _mu1_over_mu0(self, x, sigma):
 68 |     # Closed-form expression for N(1, sigma^2) / N(0, sigma^2) at x.
 69 |     return exp((2 * x - 1) / (2 * sigma**2))
 70 | 
 71 |   def _mu_over_mu0(self, x, q, sigma):
 72 |     return (1 - q) + q * self._mu1_over_mu0(x, sigma)
 73 | 
 74 |   def _compute_a_mp(self, sigma, q, alpha):
 75 |     """Compute A_alpha for arbitrary alpha by numerical integration."""
 76 |     mu0, _ = self._distributions_mp(sigma, q)
 77 |     a_alpha_fn = lambda z: mu0(z) * self._mu_over_mu0(z, q, sigma)**alpha
 78 |     a_alpha = self._integral_mp(a_alpha_fn)
 79 |     return a_alpha
 80 | 
 81 |   # TEST ROUTINES
 82 |   def test_compute_rdp_no_data(self):
 83 |     # q = 0
 84 |     self.assertEqual(rdp_accountant.compute_rdp(0, 10, 1, 20), 0)
 85 | 
 86 |   def test_compute_rdp_no_sampling(self):
 87 |     # q = 1, RDP = alpha/2 * sigma^2
 88 |     self.assertEqual(rdp_accountant.compute_rdp(1, 10, 1, 20), 0.1)
 89 | 
 90 |   def test_compute_rdp_scalar(self):
 91 |     rdp_scalar = rdp_accountant.compute_rdp(0.1, 2, 10, 5)
 92 |     self.assertAlmostEqual(rdp_scalar, 0.07737, places=5)
 93 | 
 94 |   def test_compute_rdp_sequence(self):
 95 |     rdp_vec = rdp_accountant.compute_rdp(0.01, 2.5, 50,
 96 |                                          [1.5, 2.5, 5, 50, 100, np.inf])
 97 |     self.assertSequenceAlmostEqual(
 98 |         rdp_vec, [0.00065, 0.001085, 0.00218075, 0.023846, 167.416307, np.inf],
 99 |         delta=1e-5)
100 | 
101 |   params = ({'q': 1e-7, 'sigma': .1, 'order': 1.01},
102 |             {'q': 1e-6, 'sigma': .1, 'order': 256},
103 |             {'q': 1e-5, 'sigma': .1, 'order': 256.1},
104 |             {'q': 1e-6, 'sigma': 1, 'order': 27},
105 |             {'q': 1e-4, 'sigma': 1., 'order': 1.5},
106 |             {'q': 1e-3, 'sigma': 1., 'order': 2},
107 |             {'q': .01, 'sigma': 10, 'order': 20},
108 |             {'q': .1, 'sigma': 100, 'order': 20.5},
109 |             {'q': .99, 'sigma': .1, 'order': 256},
110 |             {'q': .999, 'sigma': 100, 'order': 256.1})
111 | 
112 |   # pylint:disable=undefined-variable
113 |   @parameterized.parameters(p for p in params)
114 |   def test_compute_log_a_equals_mp(self, q, sigma, order):
115 |     # Compare the cheap computation of log(A) with an expensive, multi-precision
116 |     # computation.
117 |     log_a = rdp_accountant._compute_log_a(q, sigma, order)
118 |     log_a_mp = self._log_float_mp(self._compute_a_mp(sigma, q, order))
119 |     np.testing.assert_allclose(log_a, log_a_mp, rtol=1e-4)
120 | 
121 |   def test_get_privacy_spent_check_target_delta(self):
122 |     orders = range(2, 33)
123 |     rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders)
124 |     eps, _, opt_order = rdp_accountant.get_privacy_spent(
125 |         orders, rdp, target_delta=1e-5)
126 |     self.assertAlmostEqual(eps, 1.258575, places=5)
127 |     self.assertEqual(opt_order, 20)
128 | 
129 |   def test_get_privacy_spent_check_target_eps(self):
130 |     orders = range(2, 33)
131 |     rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders)
132 |     _, delta, opt_order = rdp_accountant.get_privacy_spent(
133 |         orders, rdp, target_eps=1.258575)
134 |     self.assertAlmostEqual(delta, 1e-5)
135 |     self.assertEqual(opt_order, 20)
136 | 
137 |   def test_check_composition(self):
138 |     orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12., 14.,
139 |               16., 20., 24., 28., 32., 64., 256.)
140 | 
141 |     rdp = rdp_accountant.compute_rdp(q=1e-4,
142 |                                      noise_multiplier=.4,
143 |                                      steps=40000,
144 |                                      orders=orders)
145 | 
146 |     eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp,
147 |                                                          target_delta=1e-6)
148 | 
149 |     rdp += rdp_accountant.compute_rdp(q=0.1,
150 |                                       noise_multiplier=2,
151 |                                       steps=100,
152 |                                       orders=orders)
153 |     eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp,
154 |                                                          target_delta=1e-5)
155 |     self.assertAlmostEqual(eps, 8.509656, places=5)
156 |     self.assertEqual(opt_order, 2.5)
157 | 
158 |   def test_compute_rdp_from_ledger(self):
159 |     orders = range(2, 33)
160 |     q = 0.1
161 |     n = 1000
162 |     l2_norm_clip = 3.14159
163 |     noise_stddev = 2.71828
164 |     steps = 3
165 | 
166 |     query_entry = privacy_ledger.GaussianSumQueryEntry(
167 |         l2_norm_clip, noise_stddev)
168 |     ledger = [privacy_ledger.SampleEntry(n, q, [query_entry])] * steps
169 | 
170 |     z = noise_stddev / l2_norm_clip
171 |     rdp = rdp_accountant.compute_rdp(q, z, steps, orders)
172 |     rdp_from_ledger = rdp_accountant.compute_rdp_from_ledger(ledger, orders)
173 |     self.assertSequenceAlmostEqual(rdp, rdp_from_ledger)
174 | 
175 | 
176 | if __name__ == '__main__':
177 |   absltest.main()
178 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019, The TensorFlow Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Vectorized differentially private optimizers for TensorFlow."""
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | from distutils.version import LooseVersion
 21 | import tensorflow as tf
 22 | 
 23 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
 24 |   nest = tf.contrib.framework.nest
 25 |   AdagradOptimizer = tf.train.AdagradOptimizer
 26 |   AdamOptimizer = tf.train.AdamOptimizer
 27 |   GradientDescentOptimizer = tf.train.GradientDescentOptimizer
 28 |   parent_code = tf.train.Optimizer.compute_gradients.__code__
 29 |   GATE_OP = tf.train.Optimizer.GATE_OP  # pylint: disable=invalid-name
 30 | else:
 31 |   nest = tf.nest
 32 |   AdagradOptimizer = tf.optimizers.Adagrad
 33 |   AdamOptimizer = tf.optimizers.Adam
 34 |   GradientDescentOptimizer = tf.optimizers.SGD  # pylint: disable=invalid-name
 35 |   parent_code = tf.optimizers.Optimizer._compute_gradients.__code__  # pylint: disable=protected-access
 36 |   GATE_OP = None  # pylint: disable=invalid-name
 37 | 
 38 | 
 39 | def make_vectorized_optimizer_class(cls):
 40 |   """Constructs a vectorized DP optimizer class from an existing one."""
 41 |   if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
 42 |     child_code = cls.compute_gradients.__code__
 43 |   else:
 44 |     child_code = cls._compute_gradients.__code__  # pylint: disable=protected-access
 45 |   if child_code is not parent_code:
 46 |     tf.logging.warning(
 47 |         'WARNING: Calling make_optimizer_class() on class %s that overrides '
 48 |         'method compute_gradients(). Check to ensure that '
 49 |         'make_optimizer_class() does not interfere with overridden version.',
 50 |         cls.__name__)
 51 | 
 52 |   class DPOptimizerClass(cls):
 53 |     """Differentially private subclass of given class cls."""
 54 | 
 55 |     def __init__(
 56 |         self,
 57 |         l2_norm_clip,
 58 |         noise_multiplier,
 59 |         num_microbatches=None,
 60 |         *args,  # pylint: disable=keyword-arg-before-vararg, g-doc-args
 61 |         **kwargs):
 62 |       """Initialize the DPOptimizerClass.
 63 | 
 64 |       Args:
 65 |         l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients)
 66 |         noise_multiplier: Ratio of the standard deviation to the clipping norm
 67 |         num_microbatches: How many microbatches into which the minibatch is
 68 |           split. If None, will default to the size of the minibatch, and
 69 |           per-example gradients will be computed.
 70 |       """
 71 |       super(DPOptimizerClass, self).__init__(*args, **kwargs)
 72 |       self._l2_norm_clip = l2_norm_clip
 73 |       self._noise_multiplier = noise_multiplier
 74 |       self._num_microbatches = num_microbatches
 75 | 
 76 |     def compute_gradients(self,
 77 |                           loss,
 78 |                           var_list,
 79 |                           gate_gradients=GATE_OP,
 80 |                           aggregation_method=None,
 81 |                           colocate_gradients_with_ops=False,
 82 |                           grad_loss=None,
 83 |                           gradient_tape=None):
 84 |       if callable(loss):
 85 |         # TF is running in Eager mode
 86 |         raise NotImplementedError('Vectorized optimizer unavailable for TF2.')
 87 |       else:
 88 |         # TF is running in graph mode, check we did not receive a gradient tape.
 89 |         if gradient_tape:
 90 |           raise ValueError('When in graph mode, a tape should not be passed.')
 91 | 
 92 |         batch_size = tf.shape(loss)[0]
 93 |         if self._num_microbatches is None:
 94 |           self._num_microbatches = batch_size
 95 | 
 96 |         # Note: it would be closer to the correct i.i.d. sampling of records if
 97 |         # we sampled each microbatch from the appropriate binomial distribution,
 98 |         # although that still wouldn't be quite correct because it would be
 99 |         # sampling from the dataset without replacement.
100 |         microbatch_losses = tf.reshape(loss, [self._num_microbatches, -1])
101 | 
102 |         if var_list is None:
103 |           var_list = (
104 |               tf.trainable_variables() + tf.get_collection(
105 |                   tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
106 | 
107 |         def process_microbatch(microbatch_loss):
108 |           """Compute clipped grads for one microbatch."""
109 |           microbatch_loss = tf.reduce_mean(microbatch_loss)
110 |           grads, _ = zip(*super(DPOptimizerClass, self).compute_gradients(
111 |               microbatch_loss,
112 |               var_list,
113 |               gate_gradients,
114 |               aggregation_method,
115 |               colocate_gradients_with_ops,
116 |               grad_loss))
117 |           grads_list = [
118 |               g if g is not None else tf.zeros_like(v)
119 |               for (g, v) in zip(list(grads), var_list)
120 |           ]
121 |           # Clip gradients to have L2 norm of l2_norm_clip.
122 |           # Here, we use TF primitives rather than the built-in
123 |           # tf.clip_by_global_norm() so that operations can be vectorized
124 |           # across microbatches.
125 |           grads_flat = nest.flatten(grads_list)
126 |           squared_l2_norms = [tf.reduce_sum(tf.square(g)) for g in grads_flat]
127 |           global_norm = tf.sqrt(tf.add_n(squared_l2_norms))
128 |           div = tf.maximum(global_norm / self._l2_norm_clip, 1.)
129 |           clipped_flat = [g / div for g in grads_flat]
130 |           clipped_grads = nest.pack_sequence_as(grads_list, clipped_flat)
131 |           return clipped_grads
132 | 
133 |         clipped_grads = tf.vectorized_map(process_microbatch, microbatch_losses)
134 | 
135 |         def reduce_noise_normalize_batch(stacked_grads):
136 |           summed_grads = tf.reduce_sum(stacked_grads, axis=0)
137 |           noise_stddev = self._l2_norm_clip * self._noise_multiplier
138 |           noise = tf.random.normal(tf.shape(summed_grads),
139 |                                    stddev=noise_stddev)
140 |           noised_grads = summed_grads + noise
141 |           return noised_grads / tf.cast(self._num_microbatches, tf.float32)
142 | 
143 |         final_grads = nest.map_structure(reduce_noise_normalize_batch,
144 |                                          clipped_grads)
145 | 
146 |         return list(zip(final_grads, var_list))
147 | 
148 |   return DPOptimizerClass
149 | 
150 | 
151 | VectorizedDPAdagrad = make_vectorized_optimizer_class(AdagradOptimizer)
152 | VectorizedDPAdam = make_vectorized_optimizer_class(AdamOptimizer)
153 | VectorizedDPSGD = make_vectorized_optimizer_class(GradientDescentOptimizer)
154 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/tutorials/bolton_tutorial.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019, The TensorFlow Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Tutorial for bolt_on module, the model and the optimizer."""
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | import tensorflow as tf  # pylint: disable=wrong-import-position
 19 | from privacy.bolt_on import losses  # pylint: disable=wrong-import-position
 20 | from privacy.bolt_on import models  # pylint: disable=wrong-import-position
 21 | from privacy.bolt_on.optimizers import BoltOn  # pylint: disable=wrong-import-position
 22 | # -------
 23 | # First, we will create a binary classification dataset with a single output
 24 | # dimension. The samples for each label are repeated data points at different
 25 | # points in space.
 26 | # -------
 27 | # Parameters for dataset
 28 | n_samples = 10
 29 | input_dim = 2
 30 | n_outputs = 1
 31 | # Create binary classification dataset:
 32 | x_stack = [tf.constant(-1, tf.float32, (n_samples, input_dim)),
 33 |            tf.constant(1, tf.float32, (n_samples, input_dim))]
 34 | y_stack = [tf.constant(0, tf.float32, (n_samples, 1)),
 35 |            tf.constant(1, tf.float32, (n_samples, 1))]
 36 | x, y = tf.concat(x_stack, 0), tf.concat(y_stack, 0)
 37 | print(x.shape, y.shape)
 38 | generator = tf.data.Dataset.from_tensor_slices((x, y))
 39 | generator = generator.batch(10)
 40 | generator = generator.shuffle(10)
 41 | # -------
 42 | # First, we will explore using the pre - built BoltOnModel, which is a thin
 43 | # wrapper around a Keras Model using a single - layer neural network.
 44 | # It automatically uses the BoltOn Optimizer which encompasses all the logic
 45 | # required for the BoltOn Differential Privacy method.
 46 | # -------
 47 | bolt = models.BoltOnModel(n_outputs)  # tell the model how many outputs we have.
 48 | # -------
 49 | # Now, we will pick our optimizer and Strongly Convex Loss function. The loss
 50 | # must extend from StrongConvexMixin and implement the associated methods.Some
 51 | # existing loss functions are pre - implemented in bolt_on.loss
 52 | # -------
 53 | optimizer = tf.optimizers.SGD()
 54 | reg_lambda = 1
 55 | C = 1
 56 | radius_constant = 1
 57 | loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
 58 | # -------
 59 | # For simplicity, we pick all parameters of the StrongConvexBinaryCrossentropy
 60 | # to be 1; these are all tunable and their impact can be read in losses.
 61 | # StrongConvexBinaryCrossentropy.We then compile the model with the chosen
 62 | # optimizer and loss, which will automatically wrap the chosen optimizer with
 63 | # the BoltOn Optimizer, ensuring the required components function as required
 64 | # for privacy guarantees.
 65 | # -------
 66 | bolt.compile(optimizer, loss)
 67 | # -------
 68 | # To fit the model, the optimizer will require additional information about
 69 | # the dataset and model.These parameters are:
 70 | # 1. the class_weights used
 71 | # 2. the number of samples in the dataset
 72 | # 3. the batch size which the model will try to infer, if possible.  If not,
 73 | # you will be required to pass these explicitly to the fit method.
 74 | #
 75 | # As well, there are two privacy parameters than can be altered:
 76 | # 1. epsilon, a float
 77 | # 2. noise_distribution, a valid string indicating the distriution to use (must
 78 | # be implemented)
 79 | #
 80 | # The BoltOnModel offers a helper method,.calculate_class_weight to aid in
 81 | # class_weight calculation.
 82 | # required parameters
 83 | # -------
 84 | class_weight = None  # default, use .calculate_class_weight for other values
 85 | batch_size = None  # default, if it cannot be inferred, specify this
 86 | n_samples = None  # default, if it cannot be iferred, specify this
 87 | # privacy parameters
 88 | epsilon = 2
 89 | noise_distribution = 'laplace'
 90 | 
 91 | bolt.fit(x,
 92 |          y,
 93 |          epsilon=epsilon,
 94 |          class_weight=class_weight,
 95 |          batch_size=batch_size,
 96 |          n_samples=n_samples,
 97 |          noise_distribution=noise_distribution,
 98 |          epochs=2)
 99 | # -------
100 | # We may also train a generator object, or try different optimizers and loss
101 | # functions. Below, we will see that we must pass the number of samples as the
102 | # fit method is unable to infer it for a generator.
103 | # -------
104 | optimizer2 = tf.optimizers.Adam()
105 | bolt.compile(optimizer2, loss)
106 | # required parameters
107 | class_weight = None  # default, use .calculate_class_weight for other values
108 | batch_size = None  # default, if it cannot be inferred, specify this
109 | n_samples = None  # default, if it cannot be iferred, specify this
110 | # privacy parameters
111 | epsilon = 2
112 | noise_distribution = 'laplace'
113 | try:
114 |   bolt.fit(generator,
115 |            epsilon=epsilon,
116 |            class_weight=class_weight,
117 |            batch_size=batch_size,
118 |            n_samples=n_samples,
119 |            noise_distribution=noise_distribution,
120 |            verbose=0)
121 | except ValueError as e:
122 |   print(e)
123 | # -------
124 | # And now, re running with the parameter set.
125 | # -------
126 | n_samples = 20
127 | bolt.fit_generator(generator,
128 |                    epsilon=epsilon,
129 |                    class_weight=class_weight,
130 |                    n_samples=n_samples,
131 |                    noise_distribution=noise_distribution,
132 |                    verbose=0)
133 | # -------
134 | # You don't have to use the BoltOn model to use the BoltOn method.
135 | # There are only a few requirements:
136 | # 1. make sure any requirements from the loss are implemented in the model.
137 | # 2. instantiate the optimizer and use it as a context around the fit operation.
138 | # -------
139 | # -------------------- Part 2, using the Optimizer
140 | 
141 | # -------
142 | # Here, we create our own model and setup the BoltOn optimizer.
143 | # -------
144 | 
145 | 
146 | class TestModel(tf.keras.Model):  # pylint: disable=abstract-method
147 | 
148 |   def __init__(self, reg_layer, number_of_outputs=1):
149 |     super(TestModel, self).__init__(name='test')
150 |     self.output_layer = tf.keras.layers.Dense(number_of_outputs,
151 |                                               kernel_regularizer=reg_layer)
152 | 
153 |   def call(self, inputs):  # pylint: disable=arguments-differ
154 |     return self.output_layer(inputs)
155 | 
156 | 
157 | optimizer = tf.optimizers.SGD()
158 | loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
159 | optimizer = BoltOn(optimizer, loss)
160 | # -------
161 | # Now, we instantiate our model and check for 1. Since our loss requires L2
162 | # regularization over the kernel, we will pass it to the model.
163 | # -------
164 | n_outputs = 1  # parameter for model and optimizer context.
165 | test_model = TestModel(loss.kernel_regularizer(), n_outputs)
166 | test_model.compile(optimizer, loss)
167 | # -------
168 | # We comply with 2., and use the BoltOn Optimizer as a context around the fit
169 | # method.
170 | # -------
171 | # parameters for context
172 | noise_distribution = 'laplace'
173 | epsilon = 2
174 | class_weights = 1  # Previously, the fit method auto-detected the class_weights.
175 | # Here, we need to pass the class_weights explicitly. 1 is the same as None.
176 | n_samples = 20
177 | batch_size = 5
178 | 
179 | with optimizer(
180 |     noise_distribution=noise_distribution,
181 |     epsilon=epsilon,
182 |     layers=test_model.layers,
183 |     class_weights=class_weights,
184 |     n_samples=n_samples,
185 |     batch_size=batch_size
186 | ) as _:
187 |   test_model.fit(x, y, batch_size=batch_size, epochs=2)
188 | 


--------------------------------------------------------------------------------