├── tensorflow_privacy ├── research │ ├── pate_2017 │ │ ├── __init__.py │ │ ├── train_student_mnist_250_lap_20_count_50_epochs_600.sh │ │ ├── utils.py │ │ ├── metrics.py │ │ ├── train_teachers.py │ │ ├── aggregation.py │ │ └── README.md │ ├── README.md │ └── pate_2018 │ │ ├── ICLR2018 │ │ ├── generate_figures.sh │ │ ├── download.py │ │ ├── README.md │ │ ├── generate_table.sh │ │ ├── generate_table_data_independent.sh │ │ ├── plot_ls_q.py │ │ └── utility_queries_answered.py │ │ ├── README.md │ │ ├── core_test.py │ │ └── smooth_sensitivity_test.py ├── requirements.txt ├── privacy │ ├── BUILD │ ├── bolt_on │ │ ├── __init__.py │ │ └── README.md │ ├── dp_query │ │ ├── normalized_query_test.py │ │ ├── test_utils.py │ │ ├── no_privacy_query.py │ │ ├── no_privacy_query_test.py │ │ ├── normalized_query.py │ │ ├── BUILD │ │ ├── nested_query.py │ │ ├── gaussian_query.py │ │ ├── nested_query_test.py │ │ └── gaussian_query_test.py │ ├── __init__.py │ ├── analysis │ │ ├── tensor_buffer_test_graph.py │ │ ├── tensor_buffer_test_eager.py │ │ ├── compute_dp_sgd_privacy.py │ │ ├── tensor_buffer.py │ │ ├── privacy_ledger_test.py │ │ └── rdp_accountant_test.py │ └── optimizers │ │ ├── dp_optimizer_eager_test.py │ │ └── dp_optimizer_vectorized.py ├── CONTRIBUTING.md ├── setup.py ├── README.md └── tutorials │ ├── walkthrough │ └── mnist_scratch.py │ ├── mnist_dpsgd_tutorial_keras.py │ ├── mnist_dpsgd_tutorial_eager.py │ ├── README.md │ └── bolton_tutorial.py ├── requirements.txt ├── figures ├── Mnist_epoch-ε_lr_3e-06.png ├── Mnist_accuracy-epoch_lr_3e-06.png ├── Mnist_accuracy-epoch_lr_e-03.png ├── Mnist_accuracy-epsilon_lr_3e-06.png ├── cifar10_accuracy-epoch_lr_e-03.png └── cifar10_accuracy-epsilon_lr_e-03.png ├── README.md ├── results ├── mnist_dpsgd_delta_1e-05_lr_3e-06.txt ├── mnist_dpsgd_delta_0.0001_lr_3e-06.txt ├── mnist_dpsgd_delta_0.001_lr_3e-06.txt ├── mnist_dpsgd_delta_0.01_lr_3e-06.txt ├── cifar_dpsgd_delta_0.0001_lr_0.001.txt ├── cifar_dpsgd_delta_1e-06_lr_0.001.txt └── cifar_dpsgd_delta_1e-05_lr_0.001.txt ├── dp_optimizer.py └── mnist.py /tensorflow_privacy/research/pate_2017/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow==2.0.0 2 | tensorflow-privacy==0.1.0 3 | -------------------------------------------------------------------------------- /tensorflow_privacy/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow>=1.13 2 | mpmath 3 | scipy>=0.17 4 | -------------------------------------------------------------------------------- /figures/Mnist_epoch-ε_lr_3e-06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/Mnist_epoch-ε_lr_3e-06.png -------------------------------------------------------------------------------- /figures/Mnist_accuracy-epoch_lr_3e-06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/Mnist_accuracy-epoch_lr_3e-06.png -------------------------------------------------------------------------------- /figures/Mnist_accuracy-epoch_lr_e-03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/Mnist_accuracy-epoch_lr_e-03.png -------------------------------------------------------------------------------- /figures/Mnist_accuracy-epsilon_lr_3e-06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/Mnist_accuracy-epsilon_lr_3e-06.png -------------------------------------------------------------------------------- /figures/cifar10_accuracy-epoch_lr_e-03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/cifar10_accuracy-epoch_lr_e-03.png -------------------------------------------------------------------------------- /figures/cifar10_accuracy-epsilon_lr_e-03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lingyunhao/Deep-Learning-with-Differential-Privacy/HEAD/figures/cifar10_accuracy-epsilon_lr_e-03.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Learning with Differential Privacy 2 | 3 | ## Prerequisites 4 | Windows 10 + CUDA 10 + CUDNN 7 + TensorFlow 2.0 with Anaconda 3 5 | ``` 6 | conda create -n tf2 python=3.6 7 | activate tf2 8 | conda install tensorflow-gpu==2.0.0 9 | pip install tensorflow-privacy==0.1.0 10 | ``` -------------------------------------------------------------------------------- /tensorflow_privacy/research/README.md: -------------------------------------------------------------------------------- 1 | # Research 2 | 3 | This folder contains code to reproduce results from research papers. Currently, 4 | the following papers are included: 5 | 6 | * Semi-supervised Knowledge Transfer for Deep Learning from Private Training 7 | Data (ICLR 2017): `pate_2017` 8 | 9 | * Scalable Private Learning with PATE (ICLR 2018): `pate_2018` 10 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) # Apache 2.0 4 | 5 | exports_files(["LICENSE"]) 6 | 7 | py_library( 8 | name = "privacy", 9 | srcs = ["__init__.py"], 10 | deps = [ 11 | "//third_party/py/tensorflow_privacy/privacy/analysis:privacy_ledger", 12 | "//third_party/py/tensorflow_privacy/privacy/analysis:rdp_accountant", 13 | "//third_party/py/tensorflow_privacy/privacy/dp_query", 14 | "//third_party/py/tensorflow_privacy/privacy/dp_query:gaussian_query", 15 | "//third_party/py/tensorflow_privacy/privacy/dp_query:nested_query", 16 | "//third_party/py/tensorflow_privacy/privacy/dp_query:no_privacy_query", 17 | "//third_party/py/tensorflow_privacy/privacy/dp_query:normalized_query", 18 | "//third_party/py/tensorflow_privacy/privacy/dp_query:quantile_adaptive_clip_sum_query", 19 | "//third_party/py/tensorflow_privacy/privacy/optimizers:dp_optimizer", 20 | ], 21 | ) 22 | -------------------------------------------------------------------------------- /tensorflow_privacy/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows Google's 28 | [Open Source Community Guidelines](https://opensource.google.com/conduct/). 29 | -------------------------------------------------------------------------------- /tensorflow_privacy/research/pate_2017/train_student_mnist_250_lap_20_count_50_epochs_600.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | 17 | # Be sure to clone https://github.com/openai/improved-gan 18 | # and add improved-gan/mnist_svhn_cifar10 to your PATH variable 19 | 20 | # Download labels used to train the student 21 | wget https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_student_labels_lap_20.npy 22 | 23 | # Train the student using improved-gan 24 | THEANO_FLAGS='floatX=float32,device=gpu,lib.cnmem=1' train_mnist_fm_custom_labels.py --labels mnist_250_student_labels_lap_20.npy --count 50 --epochs 600 25 | 26 | -------------------------------------------------------------------------------- /tensorflow_privacy/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """TensorFlow Privacy library setup file for pip.""" 15 | from setuptools import find_packages 16 | from setuptools import setup 17 | 18 | setup(name='tensorflow_privacy', 19 | version='0.1.0', 20 | url='https://github.com/tensorflow/privacy', 21 | license='Apache-2.0', 22 | install_requires=[ 23 | 'scipy>=0.17', 24 | 'mpmath', # used in tests only 25 | ], 26 | # Explicit dependence on TensorFlow is not supported. 27 | # See https://github.com/tensorflow/tensorflow/issues/7166 28 | extras_require={ 29 | 'tf': ['tensorflow>=1.0.0'], 30 | 'tf_gpu': ['tensorflow-gpu>=1.0.0'], 31 | }, 32 | packages=find_packages()) 33 | -------------------------------------------------------------------------------- /tensorflow_privacy/research/pate_2017/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | 17 | def batch_indices(batch_nb, data_length, batch_size): 18 | """ 19 | This helper function computes a batch start and end index 20 | :param batch_nb: the batch number 21 | :param data_length: the total length of the data being parsed by batches 22 | :param batch_size: the number of inputs in each batch 23 | :return: pair of (start, end) indices 24 | """ 25 | # Batch start and end index 26 | start = int(batch_nb * batch_size) 27 | end = int((batch_nb + 1) * batch_size) 28 | 29 | # When there are not enough inputs left, we reuse some to complete the batch 30 | if end > data_length: 31 | shift = end - data_length 32 | start -= shift 33 | end -= shift 34 | 35 | return start, end 36 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/bolt_on/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """BoltOn Method for privacy.""" 15 | import sys 16 | from distutils.version import LooseVersion 17 | import tensorflow as tf 18 | 19 | if LooseVersion(tf.__version__) < LooseVersion("2.0.0"): 20 | raise ImportError("Please upgrade your version " 21 | "of tensorflow from: {0} to at least 2.0.0 to " 22 | "use privacy/bolt_on".format(LooseVersion(tf.__version__))) 23 | if hasattr(sys, "skip_tf_privacy_import"): # Useful for standalone scripts. 24 | pass 25 | else: 26 | from privacy.bolt_on.models import BoltOnModel # pylint: disable=g-import-not-at-top 27 | from privacy.bolt_on.optimizers import BoltOn # pylint: disable=g-import-not-at-top 28 | from privacy.bolt_on.losses import StrongConvexHuber # pylint: disable=g-import-not-at-top 29 | from privacy.bolt_on.losses import StrongConvexBinaryCrossentropy # pylint: disable=g-import-not-at-top 30 | -------------------------------------------------------------------------------- /tensorflow_privacy/research/pate_2018/ICLR2018/generate_figures.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | 18 | counts_file="data/glyph_5000_teachers.npy" 19 | output_dir="figures/" 20 | 21 | mkdir -p $output_dir 22 | 23 | if [ ! -d "$output_dir" ]; then 24 | echo "Directory $output_dir does not exist." 25 | exit 1 26 | fi 27 | 28 | python rdp_bucketized.py \ 29 | --plot=small \ 30 | --counts_file=$counts_file \ 31 | --plot_file=$output_dir"noisy_thresholding_check_perf.pdf" 32 | 33 | python rdp_bucketized.py \ 34 | --plot=large \ 35 | --counts_file=$counts_file \ 36 | --plot_file=$output_dir"noisy_thresholding_check_perf_details.pdf" 37 | 38 | python rdp_cumulative.py \ 39 | --cache=False \ 40 | --counts_file=$counts_file \ 41 | --figures_dir=$output_dir 42 | 43 | python utility_queries_answered.py --plot_file=$output_dir"utility_queries_answered.pdf" -------------------------------------------------------------------------------- /tensorflow_privacy/research/pate_2018/ICLR2018/download.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Script to download votes files to the data/ directory. 16 | """ 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from six.moves import urllib 23 | import os 24 | import tarfile 25 | 26 | FILE_URI = 'https://storage.googleapis.com/pate-votes/votes.gz' 27 | DATA_DIR = 'data/' 28 | 29 | 30 | def download(): 31 | print('Downloading ' + FILE_URI) 32 | tar_filename, _ = urllib.request.urlretrieve(FILE_URI) 33 | print('Unpacking ' + tar_filename) 34 | with tarfile.open(tar_filename, "r:gz") as tar: 35 | tar.extractall(DATA_DIR) 36 | print('Done!') 37 | 38 | 39 | if __name__ == '__main__': 40 | if not os.path.exists(DATA_DIR): 41 | print('Data directory does not exist. Creating ' + DATA_DIR) 42 | os.makedirs(DATA_DIR) 43 | download() 44 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/dp_query/normalized_query_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for GaussianAverageQuery.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | from privacy.dp_query import gaussian_query 24 | from privacy.dp_query import normalized_query 25 | from privacy.dp_query import test_utils 26 | 27 | 28 | class NormalizedQueryTest(tf.test.TestCase): 29 | 30 | def test_normalization(self): 31 | with self.cached_session() as sess: 32 | record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0]. 33 | record2 = tf.constant([4.0, -3.0]) # Not clipped. 34 | 35 | sum_query = gaussian_query.GaussianSumQuery( 36 | l2_norm_clip=5.0, stddev=0.0) 37 | query = normalized_query.NormalizedQuery( 38 | numerator_query=sum_query, denominator=2.0) 39 | 40 | query_result, _ = test_utils.run_query(query, [record1, record2]) 41 | result = sess.run(query_result) 42 | expected = [0.5, 0.5] 43 | self.assertAllClose(result, expected) 44 | 45 | 46 | if __name__ == '__main__': 47 | tf.test.main() 48 | -------------------------------------------------------------------------------- /tensorflow_privacy/research/pate_2017/metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import numpy as np 21 | 22 | 23 | def accuracy(logits, labels): 24 | """ 25 | Return accuracy of the array of logits (or label predictions) wrt the labels 26 | :param logits: this can either be logits, probabilities, or a single label 27 | :param labels: the correct labels to match against 28 | :return: the accuracy as a float 29 | """ 30 | assert len(logits) == len(labels) 31 | 32 | if len(np.shape(logits)) > 1: 33 | # Predicted labels are the argmax over axis 1 34 | predicted_labels = np.argmax(logits, axis=1) 35 | else: 36 | # Input was already labels 37 | assert len(np.shape(logits)) == 1 38 | predicted_labels = logits 39 | 40 | # Check against correct labels to compute correct guesses 41 | correct = np.sum(predicted_labels == labels.reshape(len(labels))) 42 | 43 | # Divide by number of labels to obtain accuracy 44 | accuracy = float(correct) / len(labels) 45 | 46 | # Return float value 47 | return accuracy 48 | 49 | 50 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/dp_query/test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utility methods for testing private queries. 15 | 16 | Utility methods for testing private queries. 17 | """ 18 | 19 | from __future__ import absolute_import 20 | from __future__ import division 21 | from __future__ import print_function 22 | 23 | 24 | def run_query(query, records, global_state=None, weights=None): 25 | """Executes query on the given set of records as a single sample. 26 | 27 | Args: 28 | query: A PrivateQuery to run. 29 | records: An iterable containing records to pass to the query. 30 | global_state: The current global state. If None, an initial global state is 31 | generated. 32 | weights: An optional iterable containing the weights of the records. 33 | 34 | Returns: 35 | A tuple (result, new_global_state) where "result" is the result of the 36 | query and "new_global_state" is the updated global state. 37 | """ 38 | if not global_state: 39 | global_state = query.initial_global_state() 40 | params = query.derive_sample_params(global_state) 41 | sample_state = query.initial_sample_state(next(iter(records))) 42 | if weights is None: 43 | for record in records: 44 | sample_state = query.accumulate_record(params, sample_state, record) 45 | else: 46 | for weight, record in zip(weights, records): 47 | sample_state = query.accumulate_record( 48 | params, sample_state, record, weight) 49 | return query.get_noised_result(sample_state, global_state) 50 | -------------------------------------------------------------------------------- /tensorflow_privacy/research/pate_2018/ICLR2018/README.md: -------------------------------------------------------------------------------- 1 | Scripts in support of the paper "Scalable Private Learning with PATE" by Nicolas 2 | Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar, Ulfar 3 | Erlingsson (ICLR 2018, https://arxiv.org/abs/1802.08908). 4 | 5 | 6 | ### Requirements 7 | 8 | * Python, version ≥ 2.7 9 | * absl (see [here](https://github.com/abseil/abseil-py), or just type `pip install absl-py`) 10 | * matplotlib 11 | * numpy 12 | * scipy 13 | * sympy (for smooth sensitivity analysis) 14 | * write access to the current directory (otherwise, output directories in download.py and *.sh 15 | scripts must be changed) 16 | 17 | ## Reproducing Figures 1 and 5, and Table 2 18 | 19 | Before running any of the analysis scripts, create the data/ directory and download votes files by running\ 20 | `$ python download.py` 21 | 22 | To generate Figures 1 and 5 run\ 23 | `$ sh generate_figures.sh`\ 24 | The output is written to the figures/ directory. 25 | 26 | For Table 2 run (may take several hours)\ 27 | `$ sh generate_table.sh`\ 28 | The output is written to the console. 29 | 30 | For data-independent bounds (for comparison with Table 2), run\ 31 | `$ sh generate_table_data_independent.sh`\ 32 | The output is written to the console. 33 | 34 | ## Files in this directory 35 | 36 | * generate_figures.sh — Master script for generating Figures 1 and 5. 37 | 38 | * generate_table.sh — Master script for generating Table 2. 39 | 40 | * generate_table_data_independent.sh — Master script for computing data-independent 41 | bounds. 42 | 43 | * rdp_bucketized.py — Script for producing Figure 1 (right) and Figure 5 (right). 44 | 45 | * rdp_cumulative.py — Script for producing Figure 1 (middle) and Figure 5 (left). 46 | 47 | * smooth_sensitivity_table.py — Script for generating Table 2. 48 | 49 | * utility_queries_answered — Script for producing Figure 1 (left). 50 | 51 | * plot_partition.py — Script for producing partition.pdf, a detailed breakdown of privacy 52 | costs for Confident-GNMax with smooth sensitivity analysis (takes ~50 hours). 53 | 54 | * plots_for_slides.py — Script for producing several plots for the slide deck. 55 | 56 | * download.py — Utility script for populating the data/ directory. 57 | 58 | * plot_ls_q.py is not used. 59 | 60 | 61 | All Python files take flags. Run script_name.py --help for help on flags. 62 | -------------------------------------------------------------------------------- /tensorflow_privacy/research/pate_2018/ICLR2018/generate_table.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | 18 | echo "Reproducing Table 2. Takes a couple of hours." 19 | 20 | executable="python smooth_sensitivity_table.py" 21 | data_dir="data" 22 | 23 | echo 24 | echo "######## MNIST ########" 25 | echo 26 | 27 | $executable \ 28 | --counts_file=$data_dir"/mnist_250_teachers.npy" \ 29 | --threshold=200 \ 30 | --sigma1=150 \ 31 | --sigma2=40 \ 32 | --queries=640 \ 33 | --delta=1e-5 34 | 35 | echo 36 | echo "######## SVHN ########" 37 | echo 38 | 39 | $executable \ 40 | --counts_file=$data_dir"/svhn_250_teachers.npy" \ 41 | --threshold=300 \ 42 | --sigma1=200 \ 43 | --sigma2=40 \ 44 | --queries=8500 \ 45 | --delta=1e-6 46 | 47 | echo 48 | echo "######## Adult ########" 49 | echo 50 | 51 | $executable \ 52 | --counts_file=$data_dir"/adult_250_teachers.npy" \ 53 | --threshold=300 \ 54 | --sigma1=200 \ 55 | --sigma2=40 \ 56 | --queries=1500 \ 57 | --delta=1e-5 58 | 59 | echo 60 | echo "######## Glyph (Confident) ########" 61 | echo 62 | 63 | $executable \ 64 | --counts_file=$data_dir"/glyph_5000_teachers.npy" \ 65 | --threshold=1000 \ 66 | --sigma1=500 \ 67 | --sigma2=100 \ 68 | --queries=12000 \ 69 | --delta=1e-8 70 | 71 | echo 72 | echo "######## Glyph (Interactive, Round 1) ########" 73 | echo 74 | 75 | $executable \ 76 | --counts_file=$data_dir"/glyph_round1.npy" \ 77 | --threshold=3500 \ 78 | --sigma1=1500 \ 79 | --sigma2=100 \ 80 | --delta=1e-8 81 | 82 | echo 83 | echo "######## Glyph (Interactive, Round 2) ########" 84 | echo 85 | 86 | $executable \ 87 | --counts_file=$data_dir"/glyph_round2.npy" \ 88 | --baseline_file=$data_dir"/glyph_round2_student.npy" \ 89 | --threshold=3500 \ 90 | --sigma1=2000 \ 91 | --sigma2=200 \ 92 | --teachers=5000 \ 93 | --delta=1e-8 94 | -------------------------------------------------------------------------------- /results/mnist_dpsgd_delta_1e-05_lr_3e-06.txt: -------------------------------------------------------------------------------- 1 | eps: [1.0244832274550977, 1.0895559994960096, 1.152984324480169, 1.1884359033987033, 1.2238874823172377, 1.259339061235772, 1.2947906401543063, 1.3302422190728407, 1.365693797991375, 1.4011453769099091, 1.4365969558284435, 1.4720485347469778, 1.5075001136655122, 1.5429516925840465, 1.5784032715025806, 1.6138548504211152, 1.6493064293396493, 1.6847580082581837, 1.720209587176718, 1.7556611660952521, 1.7911127450137867, 1.8265643239323208, 1.8620159028508552, 1.8974674817693895, 1.9329190606879239, 1.9683706396064582, 2.0038222185249923, 2.039273797443527, 2.074725376362061, 2.1070971504466094, 2.1385696633235494, 2.1700421762004893, 2.2015146890774293, 2.2329872019543693, 2.2644597148313097, 2.2959322277082497, 2.3273444725641, 2.3584159072093906, 2.389150196384082, 2.4195528187374924, 2.4496306673437216, 2.47939177188419, 2.5088451064044532, 2.5380004577251647, 2.566868337378801, 2.5954599248338712, 2.6237615073107916, 2.651776396750234, 2.6795399578410217, 2.7070652189159037, 2.734365832716886, 2.76142277415431, 2.788203585584366, 2.814789652616146, 2.84119660459472, 2.8673271737581487, 2.8932638632808336, 2.9190555302993633, 2.9445717345610545, 2.969930040191774, 2.9951400019060035, 3.0200849330064745, 3.0449423727552256, 3.0695416941430738, 3.0940278310878373, 3.1183227421228525, 3.1424603930913553, 3.16645149256044, 3.190264545989246, 3.213952427257068, 3.2374659175249123, 3.2608511687943733, 3.284091357435858, 3.3071745619803172, 3.330169025294796, 3.3529507616602854, 3.375728486088109, 3.3982093282729817, 3.4206901704578545, 3.442981331768199, 3.4651618493845584, 3.4872994645999036, 3.509180223018679, 3.531060981437454, 3.5527797253598825, 3.5743612858426586, 3.5959428463254346, 3.617279590270551, 3.6385625100894483, 3.659845429908345, 3.6808390641874653, 3.701823896735645, 3.7228087292838246, 3.743500323855752, 3.764187618748881, 3.784874913642011, 3.80530789401406, 3.825698197184275, 3.84608850035449, 3.866308840554522] 2 | validation acc: [0.0594, 0.1068, 0.1737, 0.2333, 0.2853, 0.337, 0.3641, 0.3896, 0.4202, 0.4531, 0.5045, 0.5675, 0.6272, 0.6809, 0.7356, 0.7729, 0.7981, 0.8153, 0.8298, 0.8387, 0.849, 0.8564, 0.8637, 0.8688, 0.8737, 0.8804, 0.8856, 0.8913, 0.8965, 0.8998, 0.9034, 0.9064, 0.9102, 0.9129, 0.9151, 0.9179, 0.92, 0.9222, 0.9237, 0.9245, 0.927, 0.9291, 0.9301, 0.9323, 0.933, 0.9337, 0.9363, 0.9348, 0.9372, 0.939, 0.9399, 0.9399, 0.9411, 0.9413, 0.9434, 0.9435, 0.944, 0.9435, 0.9454, 0.9458, 0.9469, 0.9472, 0.9476, 0.9484, 0.9482, 0.949, 0.9497, 0.9502, 0.9516, 0.9515, 0.952, 0.9516, 0.9526, 0.9526, 0.9522, 0.9533, 0.9523, 0.9533, 0.9551, 0.9536, 0.9547, 0.9554, 0.9554, 0.955, 0.957, 0.9563, 0.9566, 0.9572, 0.9567, 0.9581, 0.9574, 0.9583, 0.9587, 0.9579, 0.9582, 0.9606, 0.9602, 0.9609, 0.9602, 0.9611] 3 | -------------------------------------------------------------------------------- /results/mnist_dpsgd_delta_0.0001_lr_3e-06.txt: -------------------------------------------------------------------------------- 1 | eps: [0.8326011363722605, 0.8976739084131724, 0.9436584069352558, 0.9791099858537902, 1.0145615647723245, 1.0500131436908586, 1.085464722609393, 1.1209163015279273, 1.1563678804464617, 1.191819459364996, 1.2272710382835303, 1.2627226172020647, 1.2981741961205988, 1.3336257750391332, 1.3690773539576675, 1.4045289328762018, 1.4399805117947362, 1.4754320907132703, 1.5108836696318049, 1.546335248550339, 1.5817868274688733, 1.6172384063874077, 1.6526899853059418, 1.6856777203572872, 1.7171502332342274, 1.7486227461111676, 1.7800952589881076, 1.8115677718650476, 1.8430402847419878, 1.8743356446438426, 1.9052181718400023, 1.9356422549899939, 1.96567941857039, 1.9953366822421725, 2.0246147124836638, 2.0534946699030403, 2.082028256362481, 2.110225372299399, 2.1380963940615825, 2.165652175132723, 2.1929040544609935, 2.219863870575365, 2.2465439806151197, 2.272957283675776, 2.2990878528392047, 2.3249403189786673, 2.350554146796505, 2.375944032153419, 2.4011253446527228, 2.4260296809068174, 2.4507134910368893, 2.47522226487027, 2.499517175905285, 2.5235647542607182, 2.547474060537832, 2.5711619418056544, 2.594649784273822, 2.618035035543283, 2.6411230086753616, 2.664135220235837, 2.6869169566013262, 2.709575125744436, 2.732055967929309, 2.754385065414559, 2.7765655830309184, 2.798591875047433, 2.820472633466208, 2.842223780287906, 2.863805340770682, 2.8853105042888822, 2.906593424107779, 2.927876343926676, 2.9488682178596077, 2.969853050407788, 2.9906628001059756, 3.0113500949991048, 3.032012133309119, 3.0524024364793334, 3.0727927396495485, 3.093047072443617, 3.113140926228243, 3.1332347800128684, 3.1531211515642337, 3.172919094784786, 3.1927170380053385, 3.212277580271345, 3.231780148313306, 3.2512827163552673, 3.270562153959215, 3.2897698788428245, 3.308977603726434, 3.3280234805794278, 3.3469368910280495, 3.365850301476671, 3.3847132309541426, 3.4033328524582283, 3.421952473962314, 3.4405720954663996, 3.459012770868646, 3.477339125747593] 2 | validation acc: [0.0623, 0.1518, 0.2339, 0.2726, 0.2902, 0.3211, 0.3532, 0.4021, 0.4795, 0.5877, 0.6763, 0.7647, 0.8091, 0.8336, 0.8494, 0.8648, 0.8742, 0.8808, 0.8885, 0.893, 0.8974, 0.9032, 0.9063, 0.91, 0.9137, 0.9162, 0.9186, 0.921, 0.9246, 0.9264, 0.9289, 0.9309, 0.9311, 0.9326, 0.9339, 0.9365, 0.9359, 0.9389, 0.94, 0.9398, 0.9403, 0.9413, 0.9423, 0.9437, 0.9441, 0.9456, 0.9463, 0.9469, 0.9472, 0.9482, 0.9484, 0.9501, 0.9503, 0.9514, 0.9519, 0.9517, 0.9526, 0.9531, 0.9533, 0.9537, 0.9544, 0.9547, 0.9554, 0.956, 0.9564, 0.9565, 0.9567, 0.9576, 0.958, 0.9577, 0.9581, 0.9591, 0.9585, 0.9595, 0.9604, 0.9601, 0.9608, 0.9608, 0.9618, 0.9608, 0.9612, 0.9622, 0.9628, 0.9624, 0.9629, 0.9633, 0.9632, 0.9635, 0.9635, 0.9641, 0.9644, 0.9647, 0.9652, 0.9645, 0.9654, 0.9654, 0.9658, 0.9661, 0.9659, 0.9658] 3 | -------------------------------------------------------------------------------- /results/mnist_dpsgd_delta_0.001_lr_3e-06.txt: -------------------------------------------------------------------------------- 1 | eps: [0.6407190452894234, 0.6988809104718083, 0.7343324893903427, 0.769784068308877, 0.8052356472274113, 0.8406872261459456, 0.8761388050644799, 0.9115903839830142, 0.9470419629015485, 0.9824935418200829, 1.0179451207386172, 1.0533966996571515, 1.0888482785756857, 1.12429985749422, 1.1597514364127544, 1.1952030153312887, 1.230654594249823, 1.2642582902679655, 1.2957308031449055, 1.3272033160218457, 1.358675828898786, 1.3901483417757259, 1.4212154199821514, 1.4517316912424953, 1.4817234307631033, 1.5111556227476015, 1.5401210024272802, 1.5685965878910908, 1.5966528302821439, 1.6242391313495421, 1.651442276205101, 1.6782716502966837, 1.704717962756832, 1.7307937037915002, 1.756536559031956, 1.7819580241150645, 1.8070700925530656, 1.8318852879307048, 1.8564166986527024, 1.8806780159609964, 1.9046691154300808, 1.928371456354241, 1.9518336510227319, 1.9750714553704065, 1.998101415176878, 2.0208831515423666, 2.0434217654007636, 2.065788799060919, 2.0879693166772784, 2.109884285494962, 2.1316678352159295, 2.1532493956987055, 2.17462433812611, 2.1959072579450067, 2.2168973715317506, 2.2378252763561997, 2.258512571249329, 2.2791066757743925, 2.2994969789446076, 2.3197853043327132, 2.3398791581173386, 2.3598913778680375, 2.3796893210885894, 2.3994569011825284, 2.4189594692244896, 2.4384620372664503, 2.457723546690314, 2.476931271573923, 2.4960176104345706, 2.5149310208831928, 2.533844431331814, 2.552499639343671, 2.5711192608477567, 2.589677989431748, 2.608004344310695, 2.6263306991896416, 2.644543279614852, 2.6625768870738824, 2.6806104945329134, 2.6985240355045024, 2.7162654116914053, 2.7340067878783083, 2.751671671352242, 2.769121329409039, 2.786570987465835, 2.8040206455226313, 2.821199636692215, 2.8383580868067537, 2.855516536921293, 2.872559270264915, 2.8894270197188785, 2.906294769172841, 2.9231625186268038, 2.9398410578754635, 2.9564186110890702, 2.972996164302677, 2.9895737175162838, 3.0059561161040556, 3.0222439746836636, 3.0385318332632716] 2 | validation acc: [0.1552, 0.201, 0.269, 0.3583, 0.4291, 0.4754, 0.5168, 0.5515, 0.6042, 0.6536, 0.6972, 0.7508, 0.7861, 0.8116, 0.8305, 0.8419, 0.8523, 0.8625, 0.8687, 0.8793, 0.8832, 0.8886, 0.8923, 0.8974, 0.8993, 0.9025, 0.9062, 0.9072, 0.9088, 0.9121, 0.9126, 0.9139, 0.9173, 0.9184, 0.9179, 0.921, 0.9215, 0.9238, 0.924, 0.9251, 0.9272, 0.9278, 0.9285, 0.93, 0.9303, 0.9322, 0.932, 0.9352, 0.9359, 0.9358, 0.9377, 0.9383, 0.9389, 0.9398, 0.9402, 0.9414, 0.9414, 0.9424, 0.9434, 0.9454, 0.944, 0.9457, 0.9471, 0.947, 0.9472, 0.9484, 0.9489, 0.9493, 0.9502, 0.9503, 0.951, 0.9518, 0.9522, 0.953, 0.9526, 0.9535, 0.9542, 0.9536, 0.9537, 0.9546, 0.955, 0.9558, 0.9562, 0.9546, 0.9559, 0.9565, 0.9561, 0.9571, 0.957, 0.9575, 0.9576, 0.958, 0.9576, 0.9582, 0.9584, 0.9588, 0.9588, 0.959, 0.9593, 0.9592] 3 | -------------------------------------------------------------------------------- /results/mnist_dpsgd_delta_0.01_lr_3e-06.txt: -------------------------------------------------------------------------------- 1 | eps: [0.4488369542065862, 0.4895549929268951, 0.5250065718454293, 0.5604581507639637, 0.595909729682498, 0.6313613086010323, 0.6668128875195666, 0.7022644664381009, 0.7377160453566352, 0.7731676242751695, 0.8086192031937038, 0.8428388601786436, 0.8743113730555838, 0.9057838859325238, 0.9371678223219213, 0.9678211274949969, 0.9976683411210863, 1.0267473349515202, 1.0551126861496996, 1.0828260875663616, 1.1099319352876824, 1.136478641837888, 1.1624701594893336, 1.1879720160767095, 1.2130148404534087, 1.237611132435135, 1.2617823771303591, 1.2855809709028272, 1.3090175177716414, 1.3320676101179185, 1.354787562872218, 1.3771925327072796, 1.3992959375237164, 1.421111890143953, 1.4426552521444411, 1.463938171963338, 1.484926525203894, 1.5056750474995524, 1.5262012182396667, 1.5465235362218086, 1.5666173900064342, 1.586459547392393, 1.6061387901356725, 1.6256413581776337, 1.6448849394214122, 1.6640117402897139, 1.6829251507383356, 1.7016664262291141, 1.7202860477331998, 1.7386695628737965, 1.7569959177527434, 1.775051258049255, 1.7930848655082858, 1.8108436077942702, 1.8285849839811732, 1.8460808862726925, 1.863530544329489, 1.8807997577948097, 1.8979582079093487, 1.9150395135099436, 1.9319072629639062, 1.948775012417869, 1.9654198896548445, 1.9819974428684513, 1.9985414578761678, 2.0148293164557756, 2.0311171750353836, 2.047319524470186, 2.063318187250421, 2.079316850030656, 2.095230822970841, 2.110940786057371, 2.1266507491439004, 2.142333957071471, 2.1577557138768193, 2.173177470682168, 2.188599227487517, 2.203826372709379, 2.218960413994861, 2.234094455280343, 2.2492215146115266, 2.2640683285237113, 2.2789151424358964, 2.2937619563480807, 2.308575939755106, 2.3231360118616964, 2.3376960839682868, 2.3522561560748776, 2.3668162281814684, 2.381112291486219, 2.395386104814934, 2.4096599181436478, 2.4239337314723626, 2.438087536786136, 2.4520755718531704, 2.466063606920205, 2.480051641987239, 2.4940396770542734, 2.5078632962244485, 2.52156603107018] 2 | validation acc: [0.1643, 0.1843, 0.2231, 0.2901, 0.3378, 0.3651, 0.3892, 0.4121, 0.4509, 0.4894, 0.55, 0.6317, 0.7042, 0.7651, 0.7938, 0.816, 0.8333, 0.8453, 0.8603, 0.8713, 0.8776, 0.882, 0.887, 0.8912, 0.8959, 0.9004, 0.9038, 0.907, 0.9092, 0.9123, 0.9154, 0.9176, 0.9196, 0.9214, 0.9244, 0.9251, 0.9279, 0.9287, 0.9308, 0.9316, 0.9326, 0.9333, 0.9352, 0.936, 0.9369, 0.938, 0.9388, 0.9404, 0.9409, 0.9412, 0.942, 0.943, 0.9447, 0.9447, 0.9461, 0.9462, 0.9467, 0.9481, 0.9481, 0.9478, 0.948, 0.95, 0.9501, 0.9507, 0.9507, 0.9513, 0.9516, 0.9524, 0.9532, 0.9542, 0.9541, 0.9543, 0.9549, 0.9557, 0.9561, 0.9562, 0.9577, 0.9565, 0.9576, 0.9584, 0.9585, 0.9592, 0.9593, 0.9598, 0.9594, 0.9603, 0.9609, 0.9611, 0.9612, 0.9619, 0.9623, 0.9625, 0.963, 0.9628, 0.9634, 0.9632, 0.9635, 0.9637, 0.9635, 0.9634] 3 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/dp_query/no_privacy_query.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Implements DPQuery interface for no privacy average queries.""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | from distutils.version import LooseVersion 21 | import tensorflow as tf 22 | 23 | from privacy.dp_query import dp_query 24 | 25 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): 26 | nest = tf.contrib.framework.nest 27 | else: 28 | nest = tf.nest 29 | 30 | 31 | class NoPrivacySumQuery(dp_query.SumAggregationDPQuery): 32 | """Implements DPQuery interface for a sum query with no privacy. 33 | 34 | Accumulates vectors without clipping or adding noise. 35 | """ 36 | 37 | def get_noised_result(self, sample_state, global_state): 38 | """See base class.""" 39 | return sample_state, global_state 40 | 41 | 42 | class NoPrivacyAverageQuery(dp_query.SumAggregationDPQuery): 43 | """Implements DPQuery interface for an average query with no privacy. 44 | 45 | Accumulates vectors and normalizes by the total number of accumulated vectors. 46 | """ 47 | 48 | def initial_sample_state(self, template): 49 | """See base class.""" 50 | return (super(NoPrivacyAverageQuery, self).initial_sample_state(template), 51 | tf.constant(0.0)) 52 | 53 | def preprocess_record(self, params, record, weight=1): 54 | """Multiplies record by weight.""" 55 | weighted_record = nest.map_structure(lambda t: weight * t, record) 56 | return (weighted_record, tf.cast(weight, tf.float32)) 57 | 58 | def accumulate_record(self, params, sample_state, record, weight=1): 59 | """Accumulates record, multiplying by weight.""" 60 | weighted_record = nest.map_structure(lambda t: weight * t, record) 61 | return self.accumulate_preprocessed_record( 62 | sample_state, (weighted_record, tf.cast(weight, tf.float32))) 63 | 64 | def get_noised_result(self, sample_state, global_state): 65 | """See base class.""" 66 | sum_state, denominator = sample_state 67 | 68 | return ( 69 | nest.map_structure(lambda t: t / denominator, sum_state), 70 | global_state) 71 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """TensorFlow Privacy library.""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import sys 21 | 22 | # pylint: disable=g-import-not-at-top 23 | 24 | if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts. 25 | pass 26 | else: 27 | from privacy.analysis.privacy_ledger import GaussianSumQueryEntry 28 | from privacy.analysis.privacy_ledger import PrivacyLedger 29 | from privacy.analysis.privacy_ledger import QueryWithLedger 30 | from privacy.analysis.privacy_ledger import SampleEntry 31 | 32 | from privacy.dp_query.dp_query import DPQuery 33 | from privacy.dp_query.gaussian_query import GaussianAverageQuery 34 | from privacy.dp_query.gaussian_query import GaussianSumQuery 35 | from privacy.dp_query.nested_query import NestedQuery 36 | from privacy.dp_query.no_privacy_query import NoPrivacyAverageQuery 37 | from privacy.dp_query.no_privacy_query import NoPrivacySumQuery 38 | from privacy.dp_query.normalized_query import NormalizedQuery 39 | from privacy.dp_query.quantile_adaptive_clip_sum_query import QuantileAdaptiveClipSumQuery 40 | from privacy.dp_query.quantile_adaptive_clip_sum_query import QuantileAdaptiveClipAverageQuery 41 | 42 | from privacy.optimizers.dp_optimizer import DPAdagradGaussianOptimizer 43 | from privacy.optimizers.dp_optimizer import DPAdagradOptimizer 44 | from privacy.optimizers.dp_optimizer import DPAdamGaussianOptimizer 45 | from privacy.optimizers.dp_optimizer import DPAdamOptimizer 46 | from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer 47 | from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer 48 | 49 | try: 50 | from privacy.bolt_on.models import BoltOnModel 51 | from privacy.bolt_on.optimizers import BoltOn 52 | from privacy.bolt_on.losses import StrongConvexMixin 53 | from privacy.bolt_on.losses import StrongConvexBinaryCrossentropy 54 | from privacy.bolt_on.losses import StrongConvexHuber 55 | except ImportError: 56 | # module `bolt_on` not yet available in this version of TF Privacy 57 | pass 58 | -------------------------------------------------------------------------------- /tensorflow_privacy/research/pate_2018/ICLR2018/generate_table_data_independent.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | 18 | echo "Table 2 with data-independent analysis." 19 | 20 | executable="python smooth_sensitivity_table.py" 21 | data_dir="data" 22 | 23 | echo 24 | echo "######## MNIST ########" 25 | echo 26 | 27 | $executable \ 28 | --counts_file=$data_dir"/mnist_250_teachers.npy" \ 29 | --threshold=200 \ 30 | --sigma1=150 \ 31 | --sigma2=40 \ 32 | --queries=640 \ 33 | --delta=1e-5 \ 34 | --data_independent 35 | echo 36 | echo "######## SVHN ########" 37 | echo 38 | 39 | $executable \ 40 | --counts_file=$data_dir"/svhn_250_teachers.npy" \ 41 | --threshold=300 \ 42 | --sigma1=200 \ 43 | --sigma2=40 \ 44 | --queries=8500 \ 45 | --delta=1e-6 \ 46 | --data_independent 47 | 48 | echo 49 | echo "######## Adult ########" 50 | echo 51 | 52 | $executable \ 53 | --counts_file=$data_dir"/adult_250_teachers.npy" \ 54 | --threshold=300 \ 55 | --sigma1=200 \ 56 | --sigma2=40 \ 57 | --queries=1500 \ 58 | --delta=1e-5 \ 59 | --data_independent 60 | 61 | echo 62 | echo "######## Glyph (Confident) ########" 63 | echo 64 | 65 | $executable \ 66 | --counts_file=$data_dir"/glyph_5000_teachers.npy" \ 67 | --threshold=1000 \ 68 | --sigma1=500 \ 69 | --sigma2=100 \ 70 | --queries=12000 \ 71 | --delta=1e-8 \ 72 | --data_independent 73 | 74 | echo 75 | echo "######## Glyph (Interactive, Round 1) ########" 76 | echo 77 | 78 | $executable \ 79 | --counts_file=$data_dir"/glyph_round1.npy" \ 80 | --threshold=3500 \ 81 | --sigma1=1500 \ 82 | --sigma2=100 \ 83 | --delta=1e-8 \ 84 | --data_independent 85 | 86 | echo 87 | echo "######## Glyph (Interactive, Round 2) ########" 88 | echo 89 | 90 | $executable \ 91 | --counts_file=$data_dir"/glyph_round2.npy" \ 92 | --baseline_file=$data_dir"/glyph_round2_student.npy" \ 93 | --threshold=3500 \ 94 | --sigma1=2000 \ 95 | --sigma2=200 \ 96 | --teachers=5000 \ 97 | --delta=1e-8 \ 98 | --order=8.5 \ 99 | --data_independent 100 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/bolt_on/README.md: -------------------------------------------------------------------------------- 1 | # BoltOn Subpackage 2 | 3 | This package contains source code for the BoltOn method, a particular 4 | differential-privacy (DP) technique that uses output perturbations and 5 | leverages additional assumptions to provide a new way of approaching the 6 | privacy guarantees. 7 | 8 | ## BoltOn Description 9 | 10 | This method uses 4 key steps to achieve privacy guarantees: 11 | 1. Adds noise to weights after training (output perturbation). 12 | 2. Projects weights to R, the radius of the hypothesis space, 13 | after each batch. This value is configurable by the user. 14 | 3. Limits learning rate 15 | 4. Uses a strongly convex loss function (see compile) 16 | 17 | For more details on the strong convexity requirements, see: 18 | Bolt-on Differential Privacy for Scalable Stochastic Gradient 19 | Descent-based Analytics by Xi Wu et al. at https://arxiv.org/pdf/1606.04722.pdf 20 | 21 | ## Why BoltOn? 22 | 23 | The major difference for the BoltOn method is that it injects noise post model 24 | convergence, rather than noising gradients or weights during training. This 25 | approach requires some additional constraints listed in the Description. 26 | Should the use-case and model satisfy these constraints, this is another 27 | approach that can be trained to maximize utility while maintaining the privacy. 28 | The paper describes in detail the advantages and disadvantages of this approach 29 | and its results compared to some other methods, namely noising at each iteration 30 | and no noising. 31 | 32 | ## Tutorials 33 | 34 | This package has a tutorial that can be found in the root tutorials directory, 35 | under `bolton_tutorial.py`. 36 | 37 | ## Contribution 38 | 39 | This package was initially contributed by Georgian Partners with the hope of 40 | growing the tensorflow/privacy library. There are several rich use cases for 41 | delta-epsilon privacy in machine learning, some of which can be explored here: 42 | https://medium.com/apache-mxnet/epsilon-differential-privacy-for-machine-learning-using-mxnet-a4270fe3865e 43 | https://arxiv.org/pdf/1811.04911.pdf 44 | 45 | ## Stability 46 | 47 | As we are pegged on tensorflow2.0, this package may encounter stability 48 | issues in the ongoing development of tensorflow2.0. 49 | 50 | This sub-package is currently stable for 2.0.0a0, 2.0.0b0, and 2.0.0.b1 If you 51 | would like to use this subpackage, please do use one of these versions as we 52 | cannot guarantee it will work for all latest releases. If you do find issues, 53 | feel free to raise an issue to the contributors listed below. 54 | 55 | ## Contacts 56 | 57 | In addition to the maintainers of tensorflow/privacy listed in the root 58 | README.md, please feel free to contact members of Georgian Partners. In 59 | particular, 60 | 61 | * Georgian Partners(@georgianpartners) 62 | * Ji Chao Zhang(@Jichaogp) 63 | * Christopher Choquette(@cchoquette) 64 | 65 | ## Copyright 66 | 67 | Copyright 2019 - Google LLC 68 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/dp_query/no_privacy_query_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for NoPrivacyAverageQuery.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from absl.testing import parameterized 22 | import tensorflow as tf 23 | 24 | from privacy.dp_query import no_privacy_query 25 | from privacy.dp_query import test_utils 26 | 27 | 28 | class NoPrivacyQueryTest(tf.test.TestCase, parameterized.TestCase): 29 | 30 | def test_sum(self): 31 | with self.cached_session() as sess: 32 | record1 = tf.constant([2.0, 0.0]) 33 | record2 = tf.constant([-1.0, 1.0]) 34 | 35 | query = no_privacy_query.NoPrivacySumQuery() 36 | query_result, _ = test_utils.run_query(query, [record1, record2]) 37 | result = sess.run(query_result) 38 | expected = [1.0, 1.0] 39 | self.assertAllClose(result, expected) 40 | 41 | def test_no_privacy_average(self): 42 | with self.cached_session() as sess: 43 | record1 = tf.constant([5.0, 0.0]) 44 | record2 = tf.constant([-1.0, 2.0]) 45 | 46 | query = no_privacy_query.NoPrivacyAverageQuery() 47 | query_result, _ = test_utils.run_query(query, [record1, record2]) 48 | result = sess.run(query_result) 49 | expected = [2.0, 1.0] 50 | self.assertAllClose(result, expected) 51 | 52 | def test_no_privacy_weighted_average(self): 53 | with self.cached_session() as sess: 54 | record1 = tf.constant([4.0, 0.0]) 55 | record2 = tf.constant([-1.0, 1.0]) 56 | 57 | weights = [1, 3] 58 | 59 | query = no_privacy_query.NoPrivacyAverageQuery() 60 | query_result, _ = test_utils.run_query( 61 | query, [record1, record2], weights=weights) 62 | result = sess.run(query_result) 63 | expected = [0.25, 0.75] 64 | self.assertAllClose(result, expected) 65 | 66 | @parameterized.named_parameters( 67 | ('type_mismatch', [1.0], (1.0,), TypeError), 68 | ('too_few_on_left', [1.0], [1.0, 1.0], ValueError), 69 | ('too_few_on_right', [1.0, 1.0], [1.0], ValueError)) 70 | def test_incompatible_records(self, record1, record2, error_type): 71 | query = no_privacy_query.NoPrivacySumQuery() 72 | with self.assertRaises(error_type): 73 | test_utils.run_query(query, [record1, record2]) 74 | 75 | 76 | if __name__ == '__main__': 77 | tf.test.main() 78 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/analysis/tensor_buffer_test_graph.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for tensor_buffer in graph mode.""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from privacy.analysis import tensor_buffer 23 | 24 | 25 | class TensorBufferTest(tf.test.TestCase): 26 | """Tests for TensorBuffer in graph mode.""" 27 | 28 | def test_noresize(self): 29 | """Test buffer does not resize if capacity is not exceeded.""" 30 | with self.cached_session() as sess: 31 | size, shape = 2, [2, 3] 32 | 33 | my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') 34 | value1 = [[1, 2, 3], [4, 5, 6]] 35 | with tf.control_dependencies([my_buffer.append(value1)]): 36 | value2 = [[7, 8, 9], [10, 11, 12]] 37 | with tf.control_dependencies([my_buffer.append(value2)]): 38 | values = my_buffer.values 39 | current_size = my_buffer.current_size 40 | capacity = my_buffer.capacity 41 | self.evaluate(tf.global_variables_initializer()) 42 | 43 | v, cs, cap = sess.run([values, current_size, capacity]) 44 | self.assertAllEqual(v, [value1, value2]) 45 | self.assertEqual(cs, 2) 46 | self.assertEqual(cap, 2) 47 | 48 | def test_resize(self): 49 | """Test buffer resizes if capacity is exceeded.""" 50 | with self.cached_session() as sess: 51 | size, shape = 2, [2, 3] 52 | 53 | my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') 54 | value1 = [[1, 2, 3], [4, 5, 6]] 55 | with tf.control_dependencies([my_buffer.append(value1)]): 56 | value2 = [[7, 8, 9], [10, 11, 12]] 57 | with tf.control_dependencies([my_buffer.append(value2)]): 58 | value3 = [[13, 14, 15], [16, 17, 18]] 59 | with tf.control_dependencies([my_buffer.append(value3)]): 60 | values = my_buffer.values 61 | current_size = my_buffer.current_size 62 | capacity = my_buffer.capacity 63 | self.evaluate(tf.global_variables_initializer()) 64 | 65 | v, cs, cap = sess.run([values, current_size, capacity]) 66 | self.assertAllEqual(v, [value1, value2, value3]) 67 | self.assertEqual(cs, 3) 68 | self.assertEqual(cap, 4) 69 | 70 | 71 | if __name__ == '__main__': 72 | tf.test.main() 73 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/analysis/tensor_buffer_test_eager.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for tensor_buffer in eager mode.""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from privacy.analysis import tensor_buffer 23 | 24 | tf.enable_eager_execution() 25 | 26 | 27 | class TensorBufferTest(tf.test.TestCase): 28 | """Tests for TensorBuffer in eager mode.""" 29 | 30 | def test_basic(self): 31 | size, shape = 2, [2, 3] 32 | 33 | my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') 34 | 35 | value1 = [[1, 2, 3], [4, 5, 6]] 36 | my_buffer.append(value1) 37 | self.assertAllEqual(my_buffer.values.numpy(), [value1]) 38 | 39 | value2 = [[4, 5, 6], [7, 8, 9]] 40 | my_buffer.append(value2) 41 | self.assertAllEqual(my_buffer.values.numpy(), [value1, value2]) 42 | 43 | def test_fail_on_scalar(self): 44 | with self.assertRaisesRegexp(ValueError, 'Shape cannot be scalar.'): 45 | tensor_buffer.TensorBuffer(1, ()) 46 | 47 | def test_fail_on_inconsistent_shape(self): 48 | size, shape = 1, [2, 3] 49 | 50 | my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') 51 | 52 | with self.assertRaisesRegexp( 53 | tf.errors.InvalidArgumentError, 54 | 'Appending value of inconsistent shape.'): 55 | my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32)) 56 | 57 | def test_resize(self): 58 | size, shape = 2, [2, 3] 59 | 60 | my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') 61 | 62 | # Append three buffers. Third one should succeed after resizing. 63 | value1 = [[1, 2, 3], [4, 5, 6]] 64 | my_buffer.append(value1) 65 | self.assertAllEqual(my_buffer.values.numpy(), [value1]) 66 | self.assertAllEqual(my_buffer.current_size.numpy(), 1) 67 | self.assertAllEqual(my_buffer.capacity.numpy(), 2) 68 | 69 | value2 = [[4, 5, 6], [7, 8, 9]] 70 | my_buffer.append(value2) 71 | self.assertAllEqual(my_buffer.values.numpy(), [value1, value2]) 72 | self.assertAllEqual(my_buffer.current_size.numpy(), 2) 73 | self.assertAllEqual(my_buffer.capacity.numpy(), 2) 74 | 75 | value3 = [[7, 8, 9], [10, 11, 12]] 76 | my_buffer.append(value3) 77 | self.assertAllEqual(my_buffer.values.numpy(), [value1, value2, value3]) 78 | self.assertAllEqual(my_buffer.current_size.numpy(), 3) 79 | # Capacity should have doubled. 80 | self.assertAllEqual(my_buffer.capacity.numpy(), 4) 81 | 82 | 83 | if __name__ == '__main__': 84 | tf.test.main() 85 | -------------------------------------------------------------------------------- /tensorflow_privacy/research/pate_2018/README.md: -------------------------------------------------------------------------------- 1 | Implementation of an RDP privacy accountant and smooth sensitivity analysis for 2 | the PATE framework. The underlying theory and supporting experiments appear in 3 | "Scalable Private Learning with PATE" by Nicolas Papernot, Shuang Song, Ilya 4 | Mironov, Ananth Raghunathan, Kunal Talwar, Ulfar Erlingsson (ICLR 2018, 5 | https://arxiv.org/abs/1802.08908). 6 | 7 | ## Overview 8 | 9 | The PATE ('Private Aggregation of Teacher Ensembles') framework was introduced 10 | by Papernot et al. in "Semi-supervised Knowledge Transfer for Deep Learning from 11 | Private Training Data" (ICLR 2017, https://arxiv.org/abs/1610.05755). The 12 | framework enables model-agnostic training that provably provides [differential 13 | privacy](https://en.wikipedia.org/wiki/Differential_privacy) of the training 14 | dataset. 15 | 16 | The framework consists of _teachers_, the _student_ model, and the _aggregator_. The 17 | teachers are models trained on disjoint subsets of the training datasets. The student 18 | model has access to an insensitive (e.g., public) unlabelled dataset, which is labelled by 19 | interacting with the ensemble of teachers via the _aggregator_. The aggregator tallies 20 | outputs of the teacher models, and either forwards a (noisy) aggregate to the student, or 21 | refuses to answer. 22 | 23 | Differential privacy is enforced by the aggregator. The privacy guarantees can be _data-independent_, 24 | which means that they are solely the function of the aggregator's parameters. Alternatively, privacy 25 | analysis can be _data-dependent_, which allows for finer reasoning where, under certain conditions on 26 | the input distribution, the final privacy guarantees can be improved relative to the data-independent 27 | analysis. Data-dependent privacy guarantees may, by themselves, be a function of sensitive data and 28 | therefore publishing these guarantees requires its own sanitization procedure. In our case 29 | sanitization of data-dependent privacy guarantees proceeds via _smooth sensitivity_ analysis. 30 | 31 | The common machinery used for all privacy analyses in this repository is the 32 | Rényi differential privacy, or RDP (see https://arxiv.org/abs/1702.07476). 33 | 34 | This repository contains implementations of privacy accountants and smooth 35 | sensitivity analysis for several data-independent and data-dependent mechanism that together 36 | comprise the PATE framework. 37 | 38 | 39 | ### Requirements 40 | 41 | * Python, version ≥ 2.7 42 | * absl (see [here](https://github.com/abseil/abseil-py), or just type `pip install absl-py`) 43 | * numpy 44 | * scipy 45 | * sympy (for smooth sensitivity analysis) 46 | * unittest (for testing) 47 | 48 | 49 | ### Self-testing 50 | 51 | To verify the installation run 52 | ```bash 53 | $ python core_test.py 54 | $ python smooth_sensitivity_test.py 55 | ``` 56 | 57 | 58 | ## Files in this directory 59 | 60 | * core.py — RDP privacy accountant for several vote aggregators (GNMax, 61 | Threshold, Laplace). 62 | 63 | * smooth_sensitivity.py — Smooth sensitivity analysis for GNMax and 64 | Threshold mechanisms. 65 | 66 | * core_test.py and smooth_sensitivity_test.py — Unit tests for the 67 | files above. 68 | 69 | ## Contact information 70 | 71 | You may direct your comments to mironov@google.com and PR to @ilyamironov. 72 | -------------------------------------------------------------------------------- /tensorflow_privacy/research/pate_2018/ICLR2018/plot_ls_q.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Plots LS(q). 17 | 18 | A script in support of the PATE2 paper. NOT PRESENTLY USED. 19 | 20 | The output is written to a specified directory as a pdf file. 21 | """ 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | 26 | import math 27 | import os 28 | import sys 29 | 30 | sys.path.append('..') # Main modules reside in the parent directory. 31 | 32 | 33 | from absl import app 34 | from absl import flags 35 | import matplotlib 36 | matplotlib.use('TkAgg') 37 | import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top 38 | import numpy as np 39 | import smooth_sensitivity as pate_ss 40 | 41 | plt.style.use('ggplot') 42 | 43 | FLAGS = flags.FLAGS 44 | 45 | flags.DEFINE_string('figures_dir', '', 'Path where the output is written to.') 46 | 47 | 48 | def compute_ls_q(sigma, order, num_classes): 49 | 50 | def beta(q): 51 | return pate_ss._compute_rdp_gnmax(sigma, math.log(q), order) 52 | 53 | def bu(q): 54 | return pate_ss._compute_bu_gnmax(q, sigma, order) 55 | 56 | def bl(q): 57 | return pate_ss._compute_bl_gnmax(q, sigma, order) 58 | 59 | def delta_beta(q): 60 | if q == 0 or q > .8: 61 | return 0 62 | beta_q = beta(q) 63 | beta_bu_q = beta(bu(q)) 64 | beta_bl_q = beta(bl(q)) 65 | assert beta_bl_q <= beta_q <= beta_bu_q 66 | return beta_bu_q - beta_q # max(beta_bu_q - beta_q, beta_q - beta_bl_q) 67 | 68 | logq0 = pate_ss.compute_logq0_gnmax(sigma, order) 69 | logq1 = pate_ss._compute_logq1(sigma, order, num_classes) 70 | print(math.exp(logq1), math.exp(logq0)) 71 | xs = np.linspace(0, .1, num=1000, endpoint=True) 72 | ys = [delta_beta(x) for x in xs] 73 | return xs, ys 74 | 75 | 76 | def main(argv): 77 | del argv # Unused. 78 | 79 | sigma = 20 80 | order = 20. 81 | num_classes = 10 82 | 83 | # sigma = 20 84 | # order = 25. 85 | # num_classes = 10 86 | 87 | x_axis, ys = compute_ls_q(sigma, order, num_classes) 88 | 89 | fig, ax = plt.subplots() 90 | fig.set_figheight(4.5) 91 | fig.set_figwidth(4.7) 92 | 93 | ax.plot(x_axis, ys, alpha=.8, linewidth=5) 94 | plt.xlabel('Number of queries answered', fontsize=16) 95 | plt.ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16) 96 | ax.tick_params(labelsize=14) 97 | fout_name = os.path.join(FLAGS.figures_dir, 'ls_of_q.pdf') 98 | print('Saving the graph to ' + fout_name) 99 | plt.show() 100 | 101 | plt.close('all') 102 | 103 | 104 | if __name__ == '__main__': 105 | app.run(main) 106 | -------------------------------------------------------------------------------- /tensorflow_privacy/research/pate_2018/ICLR2018/utility_queries_answered.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | from absl import app 21 | from absl import flags 22 | import matplotlib 23 | import os 24 | 25 | matplotlib.use('TkAgg') 26 | import matplotlib.pyplot as plt 27 | 28 | plt.style.use('ggplot') 29 | 30 | FLAGS = flags.FLAGS 31 | flags.DEFINE_string('plot_file', '', 'Output file name.') 32 | 33 | qa_lnmax = [500, 750] + range(1000, 12500, 500) 34 | 35 | acc_lnmax = [43.3, 52.3, 59.8, 66.7, 68.8, 70.5, 71.6, 72.3, 72.6, 72.9, 73.4, 36 | 73.4, 73.7, 73.9, 74.2, 74.4, 74.5, 74.7, 74.8, 75, 75.1, 75.1, 37 | 75.4, 75.4, 75.4] 38 | 39 | qa_gnmax = [456, 683, 908, 1353, 1818, 2260, 2702, 3153, 3602, 4055, 4511, 4964, 40 | 5422, 5875, 6332, 6792, 7244, 7696, 8146, 8599, 9041, 9496, 9945, 41 | 10390, 10842] 42 | 43 | acc_gnmax = [39.6, 52.2, 59.6, 66.6, 69.6, 70.5, 71.8, 72, 72.7, 72.9, 73.3, 44 | 73.4, 73.4, 73.8, 74, 74.2, 74.4, 74.5, 74.5, 74.7, 74.8, 75, 75.1, 45 | 75.1, 75.4] 46 | 47 | qa_gnmax_aggressive = [167, 258, 322, 485, 647, 800, 967, 1133, 1282, 1430, 48 | 1573, 1728, 1889, 2028, 2190, 2348, 2510, 2668, 2950, 49 | 3098, 3265, 3413, 3581, 3730] 50 | 51 | acc_gnmax_aggressive = [17.8, 26.8, 39.3, 48, 55.7, 61, 62.8, 64.8, 65.4, 66.7, 52 | 66.2, 68.3, 68.3, 68.7, 69.1, 70, 70.2, 70.5, 70.9, 53 | 70.7, 71.3, 71.3, 71.3, 71.8] 54 | 55 | 56 | def main(argv): 57 | del argv # Unused. 58 | 59 | plt.close('all') 60 | fig, ax = plt.subplots() 61 | fig.set_figheight(4.7) 62 | fig.set_figwidth(5) 63 | ax.plot(qa_lnmax, acc_lnmax, color='r', ls='--', linewidth=5., marker='o', 64 | alpha=.5, label='LNMax') 65 | ax.plot(qa_gnmax, acc_gnmax, color='g', ls='-', linewidth=5., marker='o', 66 | alpha=.5, label='Confident-GNMax') 67 | # ax.plot(qa_gnmax_aggressive, acc_gnmax_aggressive, color='b', ls='-', marker='o', alpha=.5, label='Confident-GNMax (aggressive)') 68 | plt.xticks([0, 2000, 4000, 6000]) 69 | plt.xlim([0, 6000]) 70 | # ax.set_yscale('log') 71 | plt.ylim([65, 76]) 72 | ax.tick_params(labelsize=14) 73 | plt.xlabel('Number of queries answered', fontsize=16) 74 | plt.ylabel('Student test accuracy (%)', fontsize=16) 75 | plt.legend(loc=2, prop={'size': 16}) 76 | 77 | x = [400, 2116, 4600, 4680] 78 | y = [69.5, 68.5, 74, 72.5] 79 | annotations = [0.76, 2.89, 1.42, 5.76] 80 | color_annotations = ['g', 'r', 'g', 'r'] 81 | for i, txt in enumerate(annotations): 82 | ax.annotate(r'${\varepsilon=}$' + str(txt), (x[i], y[i]), fontsize=16, 83 | color=color_annotations[i]) 84 | 85 | plot_filename = os.path.expanduser(FLAGS.plot_file) 86 | plt.savefig(plot_filename, bbox_inches='tight') 87 | plt.show() 88 | 89 | if __name__ == '__main__': 90 | app.run(main) 91 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/dp_query/normalized_query.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Implements DPQuery interface for normalized queries. 16 | """ 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import collections 23 | 24 | from distutils.version import LooseVersion 25 | import tensorflow as tf 26 | 27 | from privacy.dp_query import dp_query 28 | 29 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): 30 | nest = tf.contrib.framework.nest 31 | else: 32 | nest = tf.nest 33 | 34 | 35 | class NormalizedQuery(dp_query.DPQuery): 36 | """DPQuery for queries with a DPQuery numerator and fixed denominator.""" 37 | 38 | # pylint: disable=invalid-name 39 | _GlobalState = collections.namedtuple( 40 | '_GlobalState', ['numerator_state', 'denominator']) 41 | 42 | def __init__(self, numerator_query, denominator): 43 | """Initializer for NormalizedQuery. 44 | 45 | Args: 46 | numerator_query: A DPQuery for the numerator. 47 | denominator: A value for the denominator. May be None if it will be 48 | supplied via the set_denominator function before get_noised_result is 49 | called. 50 | """ 51 | self._numerator = numerator_query 52 | self._denominator = denominator 53 | 54 | def set_ledger(self, ledger): 55 | """See base class.""" 56 | self._numerator.set_ledger(ledger) 57 | 58 | def initial_global_state(self): 59 | """See base class.""" 60 | if self._denominator is not None: 61 | denominator = tf.cast(self._denominator, tf.float32) 62 | else: 63 | denominator = None 64 | return self._GlobalState( 65 | self._numerator.initial_global_state(), denominator) 66 | 67 | def derive_sample_params(self, global_state): 68 | """See base class.""" 69 | return self._numerator.derive_sample_params(global_state.numerator_state) 70 | 71 | def initial_sample_state(self, template): 72 | """See base class.""" 73 | # NormalizedQuery has no sample state beyond the numerator state. 74 | return self._numerator.initial_sample_state(template) 75 | 76 | def preprocess_record(self, params, record): 77 | return self._numerator.preprocess_record(params, record) 78 | 79 | def accumulate_preprocessed_record( 80 | self, sample_state, preprocessed_record): 81 | """See base class.""" 82 | return self._numerator.accumulate_preprocessed_record( 83 | sample_state, preprocessed_record) 84 | 85 | def get_noised_result(self, sample_state, global_state): 86 | """See base class.""" 87 | noised_sum, new_sum_global_state = self._numerator.get_noised_result( 88 | sample_state, global_state.numerator_state) 89 | def normalize(v): 90 | return tf.truediv(v, global_state.denominator) 91 | 92 | return (nest.map_structure(normalize, noised_sum), 93 | self._GlobalState(new_sum_global_state, global_state.denominator)) 94 | 95 | def merge_sample_states(self, sample_state_1, sample_state_2): 96 | """See base class.""" 97 | return self._numerator.merge_sample_states(sample_state_1, sample_state_2) 98 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/dp_query/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) # Apache 2.0 4 | 5 | py_library( 6 | name = "dp_query", 7 | srcs = ["dp_query.py"], 8 | deps = [ 9 | "//third_party/py/distutils", 10 | "//third_party/py/tensorflow", 11 | ], 12 | ) 13 | 14 | py_library( 15 | name = "gaussian_query", 16 | srcs = ["gaussian_query.py"], 17 | deps = [ 18 | ":dp_query", 19 | ":normalized_query", 20 | "//third_party/py/distutils", 21 | "//third_party/py/tensorflow", 22 | ], 23 | ) 24 | 25 | py_test( 26 | name = "gaussian_query_test", 27 | size = "small", 28 | srcs = ["gaussian_query_test.py"], 29 | python_version = "PY2", 30 | deps = [ 31 | ":gaussian_query", 32 | ":test_utils", 33 | "//third_party/py/absl/testing:parameterized", 34 | "//third_party/py/numpy", 35 | "//third_party/py/six", 36 | "//third_party/py/tensorflow", 37 | ], 38 | ) 39 | 40 | py_library( 41 | name = "no_privacy_query", 42 | srcs = ["no_privacy_query.py"], 43 | deps = [ 44 | ":dp_query", 45 | "//third_party/py/distutils", 46 | "//third_party/py/tensorflow", 47 | ], 48 | ) 49 | 50 | py_test( 51 | name = "no_privacy_query_test", 52 | size = "small", 53 | srcs = ["no_privacy_query_test.py"], 54 | python_version = "PY2", 55 | deps = [ 56 | ":no_privacy_query", 57 | ":test_utils", 58 | "//third_party/py/absl/testing:parameterized", 59 | "//third_party/py/tensorflow", 60 | ], 61 | ) 62 | 63 | py_library( 64 | name = "normalized_query", 65 | srcs = ["normalized_query.py"], 66 | deps = [ 67 | ":dp_query", 68 | "//third_party/py/distutils", 69 | "//third_party/py/tensorflow", 70 | ], 71 | ) 72 | 73 | py_test( 74 | name = "normalized_query_test", 75 | size = "small", 76 | srcs = ["normalized_query_test.py"], 77 | python_version = "PY2", 78 | deps = [ 79 | ":gaussian_query", 80 | ":normalized_query", 81 | ":test_utils", 82 | "//third_party/py/tensorflow", 83 | ], 84 | ) 85 | 86 | py_library( 87 | name = "nested_query", 88 | srcs = ["nested_query.py"], 89 | deps = [ 90 | ":dp_query", 91 | "//third_party/py/distutils", 92 | "//third_party/py/tensorflow", 93 | ], 94 | ) 95 | 96 | py_test( 97 | name = "nested_query_test", 98 | size = "small", 99 | srcs = ["nested_query_test.py"], 100 | python_version = "PY2", 101 | deps = [ 102 | ":gaussian_query", 103 | ":nested_query", 104 | ":test_utils", 105 | "//third_party/py/absl/testing:parameterized", 106 | "//third_party/py/distutils", 107 | "//third_party/py/numpy", 108 | "//third_party/py/tensorflow", 109 | ], 110 | ) 111 | 112 | py_library( 113 | name = "quantile_adaptive_clip_sum_query", 114 | srcs = ["quantile_adaptive_clip_sum_query.py"], 115 | deps = [ 116 | ":dp_query", 117 | ":gaussian_query", 118 | ":normalized_query", 119 | "//third_party/py/tensorflow", 120 | ], 121 | ) 122 | 123 | py_test( 124 | name = "quantile_adaptive_clip_sum_query_test", 125 | srcs = ["quantile_adaptive_clip_sum_query_test.py"], 126 | python_version = "PY2", 127 | deps = [ 128 | ":quantile_adaptive_clip_sum_query", 129 | ":test_utils", 130 | "//third_party/py/numpy", 131 | "//third_party/py/tensorflow", 132 | "//third_party/py/tensorflow_privacy/privacy/analysis:privacy_ledger", 133 | ], 134 | ) 135 | 136 | py_library( 137 | name = "test_utils", 138 | srcs = ["test_utils.py"], 139 | deps = [], 140 | ) 141 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | r"""Command-line script for computing privacy of a model trained with DP-SGD. 16 | 17 | The script applies the RDP accountant to estimate privacy budget of an iterated 18 | Sampled Gaussian Mechanism. The mechanism's parameters are controlled by flags. 19 | 20 | Example: 21 | compute_dp_sgd_privacy 22 | --N=60000 \ 23 | --batch_size=256 \ 24 | --noise_multiplier=1.12 \ 25 | --epochs=60 \ 26 | --delta=1e-5 27 | 28 | The output states that DP-SGD with these parameters satisfies (2.92, 1e-5)-DP. 29 | """ 30 | 31 | from __future__ import absolute_import 32 | from __future__ import division 33 | from __future__ import print_function 34 | 35 | import math 36 | import sys 37 | 38 | from absl import app 39 | from absl import flags 40 | 41 | # Opting out of loading all sibling packages and their dependencies. 42 | sys.skip_tf_privacy_import = True 43 | 44 | from privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top 45 | from privacy.analysis.rdp_accountant import get_privacy_spent 46 | 47 | FLAGS = flags.FLAGS 48 | 49 | flags.DEFINE_integer('N', None, 'Total number of examples') 50 | flags.DEFINE_integer('batch_size', None, 'Batch size') 51 | flags.DEFINE_float('noise_multiplier', None, 'Noise multiplier for DP-SGD') 52 | flags.DEFINE_float('epochs', None, 'Number of epochs (may be fractional)') 53 | flags.DEFINE_float('delta', 1e-6, 'Target delta') 54 | 55 | flags.mark_flag_as_required('N') 56 | flags.mark_flag_as_required('batch_size') 57 | flags.mark_flag_as_required('noise_multiplier') 58 | flags.mark_flag_as_required('epochs') 59 | 60 | 61 | def apply_dp_sgd_analysis(q, sigma, steps, orders, delta): 62 | """Compute and print results of DP-SGD analysis.""" 63 | 64 | # compute_rdp requires that sigma be the ratio of the standard deviation of 65 | # the Gaussian noise to the l2-sensitivity of the function to which it is 66 | # added. Hence, sigma here corresponds to the `noise_multiplier` parameter 67 | # in the DP-SGD implementation found in privacy.optimizers.dp_optimizer 68 | rdp = compute_rdp(q, sigma, steps, orders) 69 | 70 | eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta) 71 | 72 | print('DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated' 73 | ' over {} steps satisfies'.format(100 * q, sigma, steps), end=' ') 74 | print('differential privacy with eps = {:.3g} and delta = {}.'.format( 75 | eps, delta)) 76 | print('The optimal RDP order is {}.'.format(opt_order)) 77 | 78 | if opt_order == max(orders) or opt_order == min(orders): 79 | print('The privacy estimate is likely to be improved by expanding ' 80 | 'the set of orders.') 81 | 82 | 83 | def main(argv): 84 | del argv # argv is not used. 85 | 86 | q = FLAGS.batch_size / FLAGS.N # q - the sampling ratio. 87 | if q > 1: 88 | raise app.UsageError('N must be larger than the batch size.') 89 | orders = ([1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] + 90 | list(range(5, 64)) + [128, 256, 512]) 91 | steps = int(math.ceil(FLAGS.epochs * FLAGS.N / FLAGS.batch_size)) 92 | 93 | apply_dp_sgd_analysis(q, FLAGS.noise_multiplier, steps, orders, FLAGS.delta) 94 | 95 | 96 | if __name__ == '__main__': 97 | app.run(main) 98 | -------------------------------------------------------------------------------- /tensorflow_privacy/research/pate_2017/train_teachers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | import deep_cnn 20 | import input # pylint: disable=redefined-builtin 21 | import metrics 22 | import tensorflow as tf 23 | 24 | 25 | tf.flags.DEFINE_string('dataset', 'svhn', 'The name of the dataset to use') 26 | tf.flags.DEFINE_integer('nb_labels', 10, 'Number of output classes') 27 | 28 | tf.flags.DEFINE_string('data_dir','/tmp','Temporary storage') 29 | tf.flags.DEFINE_string('train_dir','/tmp/train_dir', 30 | 'Where model ckpt are saved') 31 | 32 | tf.flags.DEFINE_integer('max_steps', 3000, 'Number of training steps to run.') 33 | tf.flags.DEFINE_integer('nb_teachers', 50, 'Teachers in the ensemble.') 34 | tf.flags.DEFINE_integer('teacher_id', 0, 'ID of teacher being trained.') 35 | 36 | tf.flags.DEFINE_boolean('deeper', False, 'Activate deeper CNN model') 37 | 38 | FLAGS = tf.flags.FLAGS 39 | 40 | 41 | def train_teacher(dataset, nb_teachers, teacher_id): 42 | """ 43 | This function trains a teacher (teacher id) among an ensemble of nb_teachers 44 | models for the dataset specified. 45 | :param dataset: string corresponding to dataset (svhn, cifar10) 46 | :param nb_teachers: total number of teachers in the ensemble 47 | :param teacher_id: id of the teacher being trained 48 | :return: True if everything went well 49 | """ 50 | # If working directories do not exist, create them 51 | assert input.create_dir_if_needed(FLAGS.data_dir) 52 | assert input.create_dir_if_needed(FLAGS.train_dir) 53 | 54 | # Load the dataset 55 | if dataset == 'svhn': 56 | train_data,train_labels,test_data,test_labels = input.ld_svhn(extended=True) 57 | elif dataset == 'cifar10': 58 | train_data, train_labels, test_data, test_labels = input.ld_cifar10() 59 | elif dataset == 'mnist': 60 | train_data, train_labels, test_data, test_labels = input.ld_mnist() 61 | else: 62 | print("Check value of dataset flag") 63 | return False 64 | 65 | # Retrieve subset of data for this teacher 66 | data, labels = input.partition_dataset(train_data, 67 | train_labels, 68 | nb_teachers, 69 | teacher_id) 70 | 71 | print("Length of training data: " + str(len(labels))) 72 | 73 | # Define teacher checkpoint filename and full path 74 | if FLAGS.deeper: 75 | filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt' 76 | else: 77 | filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt' 78 | ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + filename 79 | 80 | # Perform teacher training 81 | assert deep_cnn.train(data, labels, ckpt_path) 82 | 83 | # Append final step value to checkpoint for evaluation 84 | ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) 85 | 86 | # Retrieve teacher probability estimates on the test data 87 | teacher_preds = deep_cnn.softmax_preds(test_data, ckpt_path_final) 88 | 89 | # Compute teacher accuracy 90 | precision = metrics.accuracy(teacher_preds, test_labels) 91 | print('Precision of teacher after training: ' + str(precision)) 92 | 93 | return True 94 | 95 | 96 | def main(argv=None): # pylint: disable=unused-argument 97 | # Make a call to train_teachers with values specified in flags 98 | assert train_teacher(FLAGS.dataset, FLAGS.nb_teachers, FLAGS.teacher_id) 99 | 100 | if __name__ == '__main__': 101 | tf.app.run() 102 | -------------------------------------------------------------------------------- /tensorflow_privacy/README.md: -------------------------------------------------------------------------------- 1 | # TensorFlow Privacy 2 | 3 | This repository contains the source code for TensorFlow Privacy, a Python 4 | library that includes implementations of TensorFlow optimizers for training 5 | machine learning models with differential privacy. The library comes with 6 | tutorials and analysis tools for computing the privacy guarantees provided. 7 | 8 | The TensorFlow Privacy library is under continual development, always welcoming 9 | contributions. In particular, we always welcome help towards resolving the 10 | issues currently open. 11 | 12 | ## Setting up TensorFlow Privacy 13 | 14 | ### Dependencies 15 | 16 | This library uses [TensorFlow](https://www.tensorflow.org/) to define machine 17 | learning models. Therefore, installing TensorFlow (>= 1.14) is a pre-requisite. 18 | You can find instructions [here](https://www.tensorflow.org/install/). For 19 | better performance, it is also recommended to install TensorFlow with GPU 20 | support (detailed instructions on how to do this are available in the TensorFlow 21 | installation documentation). 22 | 23 | In addition to TensorFlow and its dependencies, other prerequisites are: 24 | 25 | * `scipy` >= 0.17 26 | 27 | * `mpmath` (for testing) 28 | 29 | * `tensorflow_datasets` (for the RNN tutorial `lm_dpsgd_tutorial.py` only) 30 | 31 | ### Installing TensorFlow Privacy 32 | 33 | First, clone this GitHub repository into a directory of your choice: 34 | 35 | ``` 36 | git clone https://github.com/tensorflow/privacy 37 | ``` 38 | 39 | You can then install the local package in "editable" mode in order to add it to 40 | your `PYTHONPATH`: 41 | 42 | ``` 43 | cd privacy 44 | pip install -e . 45 | ``` 46 | 47 | If you'd like to make contributions, we recommend first forking the repository 48 | and then cloning your fork rather than cloning this repository directly. 49 | 50 | ## Contributing 51 | 52 | Contributions are welcomed! Bug fixes and new features can be initiated through 53 | GitHub pull requests. To speed the code review process, we ask that: 54 | 55 | * When making code contributions to TensorFlow Privacy, you follow the `PEP8 56 | with two spaces` coding style (the same as the one used by TensorFlow) in 57 | your pull requests. In most cases this can be done by running `autopep8 -i 58 | --indent-size 2 ` on the files you have edited. 59 | 60 | * You should also check your code with pylint and TensorFlow's pylint 61 | [configuration file](https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/tools/ci_build/pylintrc) 62 | by running `pylint --rcfile=/path/to/the/tf/rcfile `. 63 | 64 | * When making your first pull request, you 65 | [sign the Google CLA](https://cla.developers.google.com/clas) 66 | 67 | * We do not accept pull requests that add git submodules because of 68 | [the problems that arise when maintaining git submodules](https://medium.com/@porteneuve/mastering-git-submodules-34c65e940407) 69 | 70 | ## Tutorials directory 71 | 72 | To help you get started with the functionalities provided by this library, we 73 | provide a detailed walkthrough [here](tutorials/walkthrough/walkthrough.md) that 74 | will teach you how to wrap existing optimizers 75 | (e.g., SGD, Adam, ...) into their differentially private counterparts using 76 | TensorFlow (TF) Privacy. You will also learn how to tune the parameters 77 | introduced by differentially private optimization and how to 78 | measure the privacy guarantees provided using analysis tools included in TF 79 | Privacy. 80 | 81 | In addition, the 82 | `tutorials/` folder comes with scripts demonstrating how to use the library 83 | features. The list of tutorials is described in the README included in the 84 | tutorials directory. 85 | 86 | NOTE: the tutorials are maintained carefully. However, they are not considered 87 | part of the API and they can change at any time without warning. You should not 88 | write 3rd party code that imports the tutorials and expect that the interface 89 | will not break. 90 | 91 | ## Research directory 92 | 93 | This folder contains code to reproduce results from research papers related to 94 | privacy in machine learning. It is not maintained as carefully as the tutorials 95 | directory, but rather intended as a convenient archive. 96 | 97 | ## Remarks 98 | 99 | The content of this repository supersedes the following existing folder in the 100 | tensorflow/models [repository](https://github.com/tensorflow/models/tree/master/research/differential_privacy) 101 | 102 | ## Contacts 103 | 104 | If you have any questions that cannot be addressed by raising an issue, feel 105 | free to contact: 106 | 107 | * Galen Andrew (@galenmandrew) 108 | * Steve Chien (@schien1729) 109 | * Nicolas Papernot (@npapernot) 110 | 111 | ## Copyright 112 | 113 | Copyright 2019 - Google LLC 114 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/dp_query/nested_query.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Implements DPQuery interface for queries over nested structures. 16 | """ 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from distutils.version import LooseVersion 23 | import tensorflow as tf 24 | 25 | from privacy.dp_query import dp_query 26 | 27 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): 28 | nest = tf.contrib.framework.nest 29 | else: 30 | nest = tf.nest 31 | 32 | 33 | class NestedQuery(dp_query.DPQuery): 34 | """Implements DPQuery interface for structured queries. 35 | 36 | NestedQuery evaluates arbitrary nested structures of queries. Records must be 37 | nested structures of tensors that are compatible (in type and arity) with the 38 | query structure, but are allowed to have deeper structure within each leaf of 39 | the query structure. For example, the nested query [q1, q2] is compatible with 40 | the record [t1, t2] or [t1, (t2, t3)], but not with (t1, t2), [t1] or 41 | [t1, t2, t3]. The entire substructure of each record corresponding to a leaf 42 | node of the query structure is routed to the corresponding query. If the same 43 | tensor should be consumed by multiple sub-queries, it can be replicated in the 44 | record, for example [t1, t1]. 45 | 46 | NestedQuery is intended to allow privacy mechanisms for groups as described in 47 | [McMahan & Andrew, 2018: "A General Approach to Adding Differential Privacy to 48 | Iterative Training Procedures" (https://arxiv.org/abs/1812.06210)]. 49 | """ 50 | 51 | def __init__(self, queries): 52 | """Initializes the NestedQuery. 53 | 54 | Args: 55 | queries: A nested structure of queries. 56 | """ 57 | self._queries = queries 58 | 59 | def _map_to_queries(self, fn, *inputs, **kwargs): 60 | def caller(query, *args): 61 | return getattr(query, fn)(*args, **kwargs) 62 | return nest.map_structure_up_to( 63 | self._queries, caller, self._queries, *inputs) 64 | 65 | def set_ledger(self, ledger): 66 | self._map_to_queries('set_ledger', ledger=ledger) 67 | 68 | def initial_global_state(self): 69 | """See base class.""" 70 | return self._map_to_queries('initial_global_state') 71 | 72 | def derive_sample_params(self, global_state): 73 | """See base class.""" 74 | return self._map_to_queries('derive_sample_params', global_state) 75 | 76 | def initial_sample_state(self, template): 77 | """See base class.""" 78 | return self._map_to_queries('initial_sample_state', template) 79 | 80 | def preprocess_record(self, params, record): 81 | """See base class.""" 82 | return self._map_to_queries('preprocess_record', params, record) 83 | 84 | def accumulate_preprocessed_record( 85 | self, sample_state, preprocessed_record): 86 | """See base class.""" 87 | return self._map_to_queries( 88 | 'accumulate_preprocessed_record', 89 | sample_state, 90 | preprocessed_record) 91 | 92 | def merge_sample_states(self, sample_state_1, sample_state_2): 93 | return self._map_to_queries( 94 | 'merge_sample_states', sample_state_1, sample_state_2) 95 | 96 | def get_noised_result(self, sample_state, global_state): 97 | """Gets query result after all records of sample have been accumulated. 98 | 99 | Args: 100 | sample_state: The sample state after all records have been accumulated. 101 | global_state: The global state. 102 | 103 | Returns: 104 | A tuple (result, new_global_state) where "result" is a structure matching 105 | the query structure containing the results of the subqueries and 106 | "new_global_state" is a structure containing the updated global states 107 | for the subqueries. 108 | """ 109 | estimates_and_new_global_states = self._map_to_queries( 110 | 'get_noised_result', sample_state, global_state) 111 | 112 | flat_estimates, flat_new_global_states = zip( 113 | *nest.flatten_up_to(self._queries, estimates_and_new_global_states)) 114 | return ( 115 | nest.pack_sequence_as(self._queries, flat_estimates), 116 | nest.pack_sequence_as(self._queries, flat_new_global_states)) 117 | -------------------------------------------------------------------------------- /tensorflow_privacy/research/pate_2018/core_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for pate.core.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import sys 23 | import unittest 24 | import numpy as np 25 | 26 | import core as pate 27 | 28 | 29 | class PateTest(unittest.TestCase): 30 | 31 | def _test_rdp_gaussian_value_errors(self): 32 | # Test for ValueErrors. 33 | with self.assertRaises(ValueError): 34 | pate.rdp_gaussian(1.0, 1.0, np.array([2, 3, 4])) 35 | with self.assertRaises(ValueError): 36 | pate.rdp_gaussian(np.log(0.5), -1.0, np.array([2, 3, 4])) 37 | with self.assertRaises(ValueError): 38 | pate.rdp_gaussian(np.log(0.5), 1.0, np.array([1, 3, 4])) 39 | 40 | def _test_rdp_gaussian_as_function_of_q(self): 41 | # Test for data-independent and data-dependent ranges over q. 42 | # The following corresponds to orders 1.1, 2.5, 32, 250 43 | # sigmas 1.5, 15, 1500, 15000. 44 | # Hand calculated -log(q0)s arranged in a 'sigma major' ordering. 45 | neglogq0s = [ 46 | 2.8, 2.6, 427, None, 4.8, 4.0, 4.7, 275, 9.6, 8.8, 6.0, 4, 12, 11.2, 47 | 8.6, 6.4 48 | ] 49 | idx_neglogq0s = 0 # To iterate through neglogq0s. 50 | orders = [1.1, 2.5, 32, 250] 51 | sigmas = [1.5, 15, 1500, 15000] 52 | for sigma in sigmas: 53 | for order in orders: 54 | curr_neglogq0 = neglogq0s[idx_neglogq0s] 55 | idx_neglogq0s += 1 56 | if curr_neglogq0 is None: # sigma == 1.5 and order == 250: 57 | continue 58 | 59 | rdp_at_q0 = pate.rdp_gaussian(-curr_neglogq0, sigma, order) 60 | 61 | # Data-dependent range. (Successively halve the value of q.) 62 | logq_dds = (-curr_neglogq0 - np.array( 63 | [0, np.log(2), np.log(4), np.log(8)])) 64 | # Check that in q_dds, rdp is decreasing. 65 | for idx in range(len(logq_dds) - 1): 66 | self.assertGreater( 67 | pate.rdp_gaussian(logq_dds[idx], sigma, order), 68 | pate.rdp_gaussian(logq_dds[idx + 1], sigma, order)) 69 | 70 | # Data-independent range. 71 | q_dids = np.exp(-curr_neglogq0) + np.array([0.1, 0.2, 0.3, 0.4]) 72 | # Check that in q_dids, rdp is constant. 73 | for q in q_dids: 74 | self.assertEqual(rdp_at_q0, pate.rdp_gaussian( 75 | np.log(q), sigma, order)) 76 | 77 | def _test_compute_eps_from_delta_value_error(self): 78 | # Test for ValueError. 79 | with self.assertRaises(ValueError): 80 | pate.compute_eps_from_delta([1.1, 2, 3, 4], [1, 2, 3], 0.001) 81 | 82 | def _test_compute_eps_from_delta_monotonicity(self): 83 | # Test for monotonicity with respect to delta. 84 | orders = [1.1, 2.5, 250.0] 85 | sigmas = [1e-3, 1.0, 1e5] 86 | deltas = [1e-60, 1e-6, 0.1, 0.999] 87 | for sigma in sigmas: 88 | list_of_eps = [] 89 | rdps_for_gaussian = np.array(orders) / (2 * sigma**2) 90 | for delta in deltas: 91 | list_of_eps.append( 92 | pate.compute_eps_from_delta(orders, rdps_for_gaussian, delta)[0]) 93 | 94 | # Check that in list_of_eps, epsilons are decreasing (as delta increases). 95 | sorted_list_of_eps = list(list_of_eps) 96 | sorted_list_of_eps.sort(reverse=True) 97 | self.assertEqual(list_of_eps, sorted_list_of_eps) 98 | 99 | def _test_compute_q0(self): 100 | # Stub code to search a logq space and figure out logq0 by eyeballing 101 | # results. This code does not run with the tests. Remove underscore to run. 102 | sigma = 15 103 | order = 250 104 | logqs = np.arange(-290, -270, 1) 105 | count = 0 106 | for logq in logqs: 107 | count += 1 108 | sys.stdout.write("\t%0.5g: %0.10g" % 109 | (logq, pate.rdp_gaussian(logq, sigma, order))) 110 | sys.stdout.flush() 111 | if count % 5 == 0: 112 | print("") 113 | 114 | def test_rdp_gaussian(self): 115 | self._test_rdp_gaussian_value_errors() 116 | self._test_rdp_gaussian_as_function_of_q() 117 | 118 | def test_compute_eps_from_delta(self): 119 | self._test_compute_eps_from_delta_value_error() 120 | self._test_compute_eps_from_delta_monotonicity() 121 | 122 | 123 | if __name__ == "__main__": 124 | unittest.main() 125 | -------------------------------------------------------------------------------- /tensorflow_privacy/research/pate_2018/smooth_sensitivity_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for pate.smooth_sensitivity.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import unittest 23 | import numpy as np 24 | 25 | import smooth_sensitivity as pate_ss 26 | 27 | 28 | class PateSmoothSensitivityTest(unittest.TestCase): 29 | 30 | def test_check_conditions(self): 31 | self.assertEqual(pate_ss.check_conditions(20, 10, 25.), (True, False)) 32 | self.assertEqual(pate_ss.check_conditions(30, 10, 25.), (True, True)) 33 | 34 | def _assert_all_close(self, x, y): 35 | """Asserts that two numpy arrays are close.""" 36 | self.assertEqual(len(x), len(y)) 37 | self.assertTrue(np.allclose(x, y, rtol=1e-8, atol=0)) 38 | 39 | def test_compute_local_sensitivity_bounds_gnmax(self): 40 | counts1 = np.array([10, 0, 0]) 41 | sigma1 = .5 42 | order1 = 1.5 43 | 44 | answer1 = np.array( 45 | [3.13503646e-17, 1.60178280e-08, 5.90681786e-03] + [5.99981308e+00] * 7) 46 | 47 | # Test for "going right" in the smooth sensitivity computation. 48 | out1 = pate_ss.compute_local_sensitivity_bounds_gnmax( 49 | counts1, 10, sigma1, order1) 50 | 51 | self._assert_all_close(out1, answer1) 52 | 53 | counts2 = np.array([1000, 500, 300, 200, 0]) 54 | sigma2 = 250. 55 | order2 = 10. 56 | 57 | # Test for "going left" in the smooth sensitivity computation. 58 | out2 = pate_ss.compute_local_sensitivity_bounds_gnmax( 59 | counts2, 2000, sigma2, order2) 60 | 61 | answer2 = np.array([0.] * 298 + [2.77693450548e-7, 2.10853979548e-6] + 62 | [2.73113623988e-6] * 1700) 63 | self._assert_all_close(out2, answer2) 64 | 65 | def test_compute_local_sensitivity_bounds_threshold(self): 66 | counts1_3 = np.array([20, 10, 0]) 67 | num_teachers = sum(counts1_3) 68 | t1 = 16 # high threshold 69 | sigma = 2 70 | order = 10 71 | 72 | out1 = pate_ss.compute_local_sensitivity_bounds_threshold( 73 | counts1_3, num_teachers, t1, sigma, order) 74 | answer1 = np.array([0] * 3 + [ 75 | 1.48454129e-04, 1.47826870e-02, 3.94153241e-02, 6.45775697e-02, 76 | 9.01543247e-02, 1.16054002e-01, 1.42180452e-01, 1.42180452e-01, 77 | 1.48454129e-04, 1.47826870e-02, 3.94153241e-02, 6.45775697e-02, 78 | 9.01543266e-02, 1.16054000e-01, 1.42180452e-01, 1.68302106e-01, 79 | 1.93127860e-01 80 | ] + [0] * 10) 81 | self._assert_all_close(out1, answer1) 82 | 83 | t2 = 2 # low threshold 84 | 85 | out2 = pate_ss.compute_local_sensitivity_bounds_threshold( 86 | counts1_3, num_teachers, t2, sigma, order) 87 | answer2 = np.array([ 88 | 1.60212079e-01, 2.07021132e-01, 2.07021132e-01, 1.93127860e-01, 89 | 1.68302106e-01, 1.42180452e-01, 1.16054002e-01, 9.01543247e-02, 90 | 6.45775697e-02, 3.94153241e-02, 1.47826870e-02, 1.48454129e-04 91 | ] + [0] * 18) 92 | self._assert_all_close(out2, answer2) 93 | 94 | t3 = 50 # very high threshold (larger than the number of teachers). 95 | 96 | out3 = pate_ss.compute_local_sensitivity_bounds_threshold( 97 | counts1_3, num_teachers, t3, sigma, order) 98 | 99 | answer3 = np.array([ 100 | 1.35750725752e-19, 1.88990500499e-17, 2.05403154065e-15, 101 | 1.74298153642e-13, 1.15489723995e-11, 5.97584949325e-10, 102 | 2.41486826748e-08, 7.62150641922e-07, 1.87846248741e-05, 103 | 0.000360973025976, 0.000360973025976, 2.76377015215e-50, 104 | 1.00904975276e-53, 2.87254164748e-57, 6.37583360761e-61, 105 | 1.10331620211e-64, 1.48844393335e-68, 1.56535552444e-72, 106 | 1.28328011060e-76, 8.20047697109e-81 107 | ] + [0] * 10) 108 | 109 | self._assert_all_close(out3, answer3) 110 | 111 | # Fractional values. 112 | counts4 = np.array([19.5, -5.1, 0]) 113 | t4 = 10.1 114 | out4 = pate_ss.compute_local_sensitivity_bounds_threshold( 115 | counts4, num_teachers, t4, sigma, order) 116 | 117 | answer4 = np.array([ 118 | 0.0620410301, 0.0875807131, 0.113451958, 0.139561671, 0.1657074530, 119 | 0.1908244840, 0.2070270720, 0.207027072, 0.169718100, 0.0575152142, 120 | 0.00678695871 121 | ] + [0] * 6 + [0.000536304908, 0.0172181073, 0.041909870] + [0] * 10) 122 | self._assert_all_close(out4, answer4) 123 | 124 | 125 | if __name__ == "__main__": 126 | unittest.main() 127 | -------------------------------------------------------------------------------- /results/cifar_dpsgd_delta_0.0001_lr_0.001.txt: -------------------------------------------------------------------------------- 1 | eps: [0.5150317674746007, 0.5183779587283024, 0.521724149982004, 0.5250703412357057, 0.5284165324894075, 0.5317627237431092, 0.5351089149968108, 0.5384551062505125, 0.5418012975042142, 0.545147488757916, 0.5484936800116176, 0.5518398712653193, 0.555186062519021, 0.5585322537727228, 0.5618784450264244, 0.5652246362801261, 0.5685708275338278, 0.5719170187875295, 0.5752632100412312, 0.5786094012949329, 0.5819555925486346, 0.5853017838023362, 0.5886479750560379, 0.5919941663097397, 0.5953403575634414, 0.5986865488171431, 0.6020327400708447, 0.6053789313245465, 0.6087251225782482, 0.6120713138319499, 0.6154175050856515, 0.6187636963393532, 0.622109887593055, 0.6254560788467567, 0.6288022701004583, 0.63214846135416, 0.6354946526078618, 0.6388408438615634, 0.6421870351152651, 0.6455332263689668, 0.6488794176226685, 0.6522256088763703, 0.6555718001300719, 0.6589179913837736, 0.6622641826374753, 0.665610373891177, 0.6689565651448788, 0.6723027563985804, 0.6756489476522821, 0.6789951389059838, 0.6823413301596855, 0.6856875214133872, 0.6890337126670889, 0.6923799039207906, 0.6957260951744924, 0.699072286428194, 0.7024184776818957, 0.7057646689355974, 0.709110860189299, 0.7124570514430008, 0.7158032426967025, 0.7191494339504042, 0.7224956252041059, 0.7258418164578075, 0.7291880077115093, 0.732534198965211, 0.7358803902189126, 0.7392265814726143, 0.742572772726316, 0.7459189639800178, 0.7492651552337195, 0.7526113464874211, 0.7559575377411228, 0.7593037289948246, 0.7626499202485262, 0.7659961115022279, 0.7693423027559296, 0.7726884940096312, 0.7760346852633331, 0.7793808765170347, 0.7827270677707364, 0.7860732590244381, 0.7894194502781398, 0.7927656415318415, 0.7961118327855432, 0.7994580240392448, 0.8028042152929467, 0.8061504065466483, 0.80949659780035, 0.8128427890540517, 0.8161889803077533, 0.8195351715614552, 0.8228813628151568, 0.8262275540688585, 0.8295737453225602, 0.8329199365762618, 0.8362661278299636, 0.8396123190836653, 0.8429585103373669, 0.8463047015910687, 0.8496508928447704, 0.8529970840984721, 0.8563432753521738, 0.8596894666058754, 0.8630356578595771, 0.8663818491132789, 0.8697280403669805, 0.8730742316206823, 0.8764204228743839, 0.8797666141280857, 0.8831128053817874, 0.886458996635489, 0.8898051878891907, 0.8931513791428924, 0.8964975703965941, 0.8998437616502959, 0.9031899529039975, 0.9065361441576992, 0.909882335411401, 0.9132285266651026, 0.9165747179188043, 0.919920909172506, 0.9232671004262076, 0.9266132916799095, 0.9299594829336111, 0.9333056741873128, 0.9366321354866116, 0.9397411701930944, 0.942850204899577, 0.9459592396060599, 0.9490682743125425, 0.9521773090190253, 0.955286343725508, 0.9583953784319907, 0.9615044131384735, 0.9646134478449562, 0.967722482551439, 0.9708315172579216, 0.9739405519644044, 0.9770495866708871, 0.9801586213773699, 0.9832676560838526, 0.9863766907903353, 0.9894857254968181, 0.9925947602033007, 0.9957037949097836, 0.9988128296162663, 1.001921864322749, 1.0050308990292316, 1.0081399337357144, 1.0112489684421972, 1.0143580031486799, 1.0174670378551627, 1.0205760725616453, 1.0236851072681281, 1.026794141974611, 1.0299031766810935, 1.0330122113875764, 1.036121246094059, 1.0392302808005418, 1.0423393155070246, 1.0454483502135072, 1.0485573849199898, 1.0516664196264727, 1.0547754543329555, 1.057884489039438, 1.060993523745921, 1.0641025584524035, 1.0672115931588864, 1.0703206278653692, 1.0734296625718518, 1.0765386972783344, 1.0796477319848172, 1.0827567666913, 1.0858658013977829, 1.0889748361042655, 1.092083870810748, 1.095192905517231, 1.0983019402237137, 1.1014109749301964, 1.104520009636679, 1.1076290443431618, 1.1107380790496446, 1.1138471137561274, 1.11695614846261, 1.1200651831690926, 1.1231742178755755, 1.1262832525820583, 1.1293922872885411, 1.1325013219950237, 1.1356103567015063, 1.1387193914079892, 1.141828426114472, 1.1449374608209546, 1.1479242666081464, 1.1508590255484523, 1.1537937844887582, 1.156728543429064, 1.1596633023693697, 1.1625980613096756] 2 | validation acc: [0.3689, 0.4393, 0.4015, 0.4714, 0.5035, 0.5211, 0.5439, 0.5236, 0.5269, 0.5212, 0.5728, 0.5894, 0.6, 0.5682, 0.595, 0.6018, 0.5995, 0.5808, 0.6293, 0.5536, 0.6295, 0.6174, 0.6154, 0.6357, 0.6283, 0.6392, 0.6269, 0.6295, 0.6244, 0.6356, 0.6583, 0.6559, 0.6231, 0.6565, 0.6261, 0.648, 0.6631, 0.6505, 0.6463, 0.6132, 0.6627, 0.67, 0.6711, 0.6797, 0.6806, 0.6739, 0.666, 0.6837, 0.6636, 0.6594, 0.6692, 0.6836, 0.6531, 0.6807, 0.6928, 0.6751, 0.6717, 0.6842, 0.6873, 0.6875, 0.6964, 0.6807, 0.6873, 0.6865, 0.6725, 0.6951, 0.7011, 0.7006, 0.6831, 0.6934, 0.6845, 0.695, 0.681, 0.6752, 0.7018, 0.689, 0.6966, 0.6856, 0.7055, 0.6915, 0.6999, 0.7041, 0.7046, 0.7065, 0.6895, 0.7009, 0.6926, 0.7011, 0.6886, 0.687, 0.6958, 0.6659, 0.6842, 0.7042, 0.6952, 0.6946, 0.7093, 0.6639, 0.7009, 0.696, 0.6996, 0.6833, 0.6999, 0.6965, 0.6988, 0.6989, 0.7109, 0.708, 0.7098, 0.6979, 0.7056, 0.7033, 0.7145, 0.7079, 0.7074, 0.6999, 0.7051, 0.7149, 0.708, 0.6967, 0.708, 0.6877, 0.7099, 0.7002, 0.6944, 0.7035, 0.7081, 0.7052, 0.7149, 0.7072, 0.7155, 0.7131, 0.7038, 0.7051, 0.7151, 0.7064, 0.7047, 0.7083, 0.7178, 0.6983, 0.7147, 0.7072, 0.6974, 0.7154, 0.7125, 0.7122, 0.684, 0.7159, 0.7176, 0.7013, 0.7085, 0.6975, 0.7118, 0.717, 0.7073, 0.712, 0.7118, 0.7154, 0.6985, 0.7013, 0.7042, 0.7186, 0.7075, 0.7138, 0.7114, 0.7143, 0.7176, 0.7153, 0.7182, 0.7053, 0.7027, 0.6978, 0.7036, 0.7201, 0.7076, 0.7116, 0.7174, 0.7194, 0.7077, 0.7169, 0.7126, 0.7196, 0.709, 0.7195, 0.7159, 0.7108, 0.7153, 0.7122, 0.7089, 0.7093, 0.7101, 0.7123, 0.7181, 0.7225, 0.7054, 0.7108, 0.7106, 0.7147, 0.717, 0.7107] 3 | -------------------------------------------------------------------------------- /results/cifar_dpsgd_delta_1e-06_lr_0.001.txt: -------------------------------------------------------------------------------- 1 | eps: [0.7708745555850502, 0.774220746838752, 0.7775669380924536, 0.7809131293461553, 0.784259320599857, 0.7876055118535588, 0.7909517031072604, 0.7942978943609621, 0.7976440856146638, 0.8009902768683654, 0.8043364681220672, 0.8076826593757689, 0.8110288506294706, 0.8143750418831723, 0.817721233136874, 0.8210674243905757, 0.8244136156442774, 0.8277598068979791, 0.8311059981516807, 0.8344521894053825, 0.8377983806590842, 0.8411445719127859, 0.8444907631664875, 0.8478369544201892, 0.851183145673891, 0.8545293369275927, 0.8578755281812943, 0.861221719434996, 0.8645679106886978, 0.8679141019423995, 0.8712602931961011, 0.8746064844498028, 0.8779526757035045, 0.8812988669572063, 0.8846450582109079, 0.8879912494646096, 0.8913374407183113, 0.8946836319720131, 0.8980298232257147, 0.9013760144794164, 0.9047222057331181, 0.9080683969868197, 0.9114145882405215, 0.9147607794942232, 0.9181069707479249, 0.9214531620016266, 0.9247993532553282, 0.92814554450903, 0.9314917357627317, 0.9348379270164334, 0.9381841182701351, 0.9415303095238368, 0.9448765007775385, 0.9482226920312402, 0.9515688832849418, 0.9549150745386435, 0.9582612657923453, 0.961607457046047, 0.9649536482997487, 0.9682998395534503, 0.9716460308071521, 0.9749922220608538, 0.9783384133145554, 0.9816846045682571, 0.9850307958219588, 0.9883769870756606, 0.9917231783293623, 0.9950693695830639, 0.9984155608367656, 1.0017617520904674, 1.005107943344169, 1.0084541345978708, 1.0118003258515724, 1.015146517105274, 1.0184927083589759, 1.0218388996126775, 1.0251850908663793, 1.028531282120081, 1.0318774733737825, 1.0352236646274844, 1.038569855881186, 1.0419160471348876, 1.0452622383885894, 1.048608429642291, 1.0519546208959927, 1.0553008121496945, 1.0586470034033961, 1.0619931946570977, 1.0653393859107996, 1.0686855771645012, 1.072031768418203, 1.0753779596719046, 1.0787241509256065, 1.082070342179308, 1.0854165334330097, 1.0887627246867115, 1.0921089159404131, 1.0954551071941148, 1.0988012984478166, 1.1021474897015182, 1.1054936809552198, 1.1088398722089217, 1.1121860634626233, 1.1155322547163251, 1.1188784459700267, 1.1222246372237286, 1.1255708284774302, 1.1289170197311318, 1.1322632109848336, 1.1356094022385352, 1.1389555934922369, 1.1423017847459387, 1.1456479759996403, 1.148994167253342, 1.1523403585070437, 1.1556865497607454, 1.159032741014447, 1.1623789322681488, 1.1657251235218504, 1.1690713147755523, 1.1724175060292539, 1.1757636972829557, 1.1791098885366573, 1.182456079790359, 1.1858022710440608, 1.1891484622977624, 1.192494653551464, 1.1958408448051658, 1.1991870360588675, 1.202533227312569, 1.205879418566271, 1.2092256098199725, 1.2125718010736741, 1.215917992327376, 1.2192641835810776, 1.2226103748347794, 1.225956566088481, 1.2293027573421829, 1.2326489485958845, 1.235995139849586, 1.239341331103288, 1.2426875223569895, 1.2460337136106912, 1.249379904864393, 1.2527260961180946, 1.2560722873717962, 1.259418478625498, 1.2627646698791997, 1.2661108611329013, 1.2694570523866031, 1.272803243640305, 1.2761494348940063, 1.2794956261477082, 1.28284181740141, 1.2861880086551116, 1.2895341999088132, 1.292880391162515, 1.2962265824162167, 1.2995727736699183, 1.3029189649236201, 1.3062651561773218, 1.3096113474310234, 1.3129575386847252, 1.3163037299384268, 1.3196499211921284, 1.3229961124458303, 1.326342303699532, 1.3296884949532335, 1.3330346862069353, 1.3363808774606372, 1.3397270687143388, 1.3430732599680404, 1.3464194512217422, 1.3497656424754438, 1.3531118337291455, 1.3564580249828473, 1.359804216236549, 1.3631504074902505, 1.3664965987439524, 1.369842789997654, 1.3731889812513556, 1.3765351725050574, 1.3798813637587592, 1.3832275550124609, 1.3865737462661625, 1.3899199375198643, 1.393266128773566, 1.3966123200272675, 1.3999585112809694, 1.403304702534671, 1.406502720583159, 1.4096117552896417, 1.4127207899961243, 1.415829824702607, 1.41893885940909, 1.4220478941155725, 1.4251569288220554, 1.428265963528538, 1.4313749982350208, 1.4344840329415036] 2 | validation acc: [0.3756, 0.4218, 0.4576, 0.4572, 0.4651, 0.4857, 0.5136, 0.5337, 0.5265, 0.5388, 0.5286, 0.5851, 0.5796, 0.5462, 0.5908, 0.5742, 0.6136, 0.6284, 0.62, 0.6343, 0.6097, 0.5827, 0.6341, 0.6228, 0.6097, 0.6276, 0.6345, 0.6337, 0.6002, 0.6019, 0.6545, 0.6631, 0.657, 0.6518, 0.5948, 0.6656, 0.6811, 0.6565, 0.656, 0.6623, 0.6691, 0.6784, 0.6245, 0.6595, 0.6673, 0.6807, 0.6832, 0.6674, 0.6831, 0.6682, 0.6918, 0.6821, 0.6889, 0.6752, 0.6908, 0.6817, 0.6664, 0.6706, 0.6626, 0.6724, 0.6938, 0.6982, 0.6643, 0.6889, 0.6762, 0.685, 0.6905, 0.6932, 0.6746, 0.6929, 0.6961, 0.6895, 0.7006, 0.6895, 0.6689, 0.6777, 0.6898, 0.691, 0.6958, 0.6895, 0.6984, 0.6855, 0.7083, 0.6827, 0.6995, 0.6969, 0.6915, 0.6919, 0.7012, 0.6939, 0.6988, 0.7037, 0.6882, 0.7033, 0.7001, 0.6896, 0.7084, 0.6972, 0.6925, 0.6818, 0.7096, 0.6979, 0.6871, 0.7024, 0.6981, 0.705, 0.7077, 0.6933, 0.6958, 0.7041, 0.7097, 0.6911, 0.711, 0.6803, 0.7049, 0.696, 0.6952, 0.7037, 0.6988, 0.7038, 0.7027, 0.709, 0.7009, 0.7011, 0.6897, 0.6994, 0.7066, 0.7003, 0.7055, 0.7056, 0.7117, 0.6883, 0.6999, 0.6967, 0.6828, 0.6997, 0.6998, 0.7012, 0.7046, 0.7142, 0.7072, 0.7101, 0.7134, 0.7073, 0.7083, 0.6998, 0.7108, 0.7114, 0.7121, 0.6971, 0.6996, 0.708, 0.7059, 0.7061, 0.7115, 0.7101, 0.7076, 0.713, 0.7002, 0.706, 0.7077, 0.7046, 0.7088, 0.7055, 0.7149, 0.6985, 0.7109, 0.7013, 0.6881, 0.7105, 0.7124, 0.6967, 0.7129, 0.7077, 0.7158, 0.7088, 0.7125, 0.7061, 0.7141, 0.7113, 0.7113, 0.701, 0.7078, 0.7119, 0.7149, 0.7165, 0.7112, 0.7059, 0.7105, 0.711, 0.7144, 0.7158, 0.6987, 0.7047, 0.7062, 0.7107, 0.7102, 0.713, 0.7108, 0.7112] 3 | -------------------------------------------------------------------------------- /results/cifar_dpsgd_delta_1e-05_lr_0.001.txt: -------------------------------------------------------------------------------- 1 | eps: [0.6429531615298255, 0.6462993527835272, 0.6496455440372289, 0.6529917352909306, 0.6563379265446323, 0.659684117798334, 0.6630303090520356, 0.6663765003057374, 0.6697226915594391, 0.6730688828131408, 0.6764150740668424, 0.6797612653205442, 0.6831074565742459, 0.6864536478279476, 0.6897998390816492, 0.6931460303353509, 0.6964922215890527, 0.6998384128427544, 0.703184604096456, 0.7065307953501577, 0.7098769866038595, 0.7132231778575611, 0.7165693691112628, 0.7199155603649645, 0.7232617516186662, 0.726607942872368, 0.7299541341260696, 0.7333003253797713, 0.736646516633473, 0.7399927078871747, 0.7433388991408764, 0.7466850903945781, 0.7500312816482798, 0.7533774729019815, 0.7567236641556832, 0.7600698554093849, 0.7634160466630866, 0.7667622379167882, 0.7701084291704899, 0.7734546204241917, 0.7768008116778934, 0.7801470029315951, 0.7834931941852967, 0.7868393854389985, 0.7901855766927002, 0.7935317679464019, 0.7968779592001036, 0.8002241504538052, 0.803570341707507, 0.8069165329612087, 0.8102627242149103, 0.813608915468612, 0.8169551067223138, 0.8203012979760155, 0.8236474892297172, 0.8269936804834188, 0.8303398717371205, 0.8336860629908223, 0.8370322542445239, 0.8403784454982256, 0.8437246367519273, 0.847070828005629, 0.8504170192593308, 0.8537632105130324, 0.8571094017667341, 0.8604555930204358, 0.8638017842741375, 0.8671479755278392, 0.8704941667815409, 0.8738403580352426, 0.8771865492889444, 0.880532740542646, 0.8838789317963477, 0.8872251230500494, 0.890571314303751, 0.8939175055574528, 0.8972636968111545, 0.9006098880648561, 0.9039560793185579, 0.9073022705722595, 0.9106484618259613, 0.913994653079663, 0.9173408443333646, 0.9206870355870663, 0.924033226840768, 0.9273794180944697, 0.9307256093481715, 0.9340718006018731, 0.9374179918555748, 0.9407641831092766, 0.9441103743629782, 0.94745656561668, 0.9508027568703816, 0.9541489481240834, 0.9574951393777851, 0.9608413306314867, 0.9641875218851884, 0.9675337131388901, 0.9708799043925918, 0.9742260956462936, 0.9775722868999952, 0.9809184781536969, 0.9842646694073987, 0.9876108606611003, 0.990957051914802, 0.9943032431685037, 0.9976494344222053, 1.0009956256759072, 1.0043418169296088, 1.0076880081833104, 1.0110341994370122, 1.0143803906907138, 1.0177265819444155, 1.0210727731981173, 1.024418964451819, 1.0277651557055207, 1.0311113469592224, 1.0344575382129242, 1.0378037294666258, 1.0411499207203274, 1.0444961119740293, 1.0478423032277309, 1.0511884944814325, 1.0545346857351343, 1.057880876988836, 1.0612270682425375, 1.0645732594962394, 1.067919450749941, 1.0712656420036426, 1.0746118332573444, 1.077958024511046, 1.081304215764748, 1.0846504070184495, 1.0879965982721513, 1.091342789525853, 1.0946889807795546, 1.0980351720332564, 1.101381363286958, 1.1047275545406596, 1.1080737457943615, 1.111419937048063, 1.1147661283017647, 1.1181123195554665, 1.1214585108091681, 1.1248047020628698, 1.1281508933165716, 1.1314970845702732, 1.134843275823975, 1.1381894670776767, 1.1415356583313785, 1.14488184958508, 1.1482280408387817, 1.1515742320924836, 1.1549204233461852, 1.1582666145998868, 1.1616128058535886, 1.1649589971072902, 1.1683051883609918, 1.1715674280348853, 1.1746764627413682, 1.177785497447851, 1.1808945321543336, 1.1840035668608162, 1.187112601567299, 1.1902216362737819, 1.1933306709802645, 1.1964397056867473, 1.19954874039323, 1.2026577750997127, 1.2057668098061955, 1.2088758445126782, 1.2119848792191608, 1.2150939139256436, 1.2182029486321264, 1.2213119833386092, 1.2244210180450918, 1.2275300527515745, 1.2306390874580573, 1.23374812216454, 1.2368571568710227, 1.2399661915775053, 1.2430752262839881, 1.246184260990471, 1.2492932956969538, 1.2524023304034364, 1.255511365109919, 1.2586203998164018, 1.2617294345228847, 1.2648384692293675, 1.26794750393585, 1.2710565386423327, 1.2741655733488155, 1.2772746080552984, 1.280383642761781, 1.2834926774682636, 1.2866017121747464, 1.2897107468812292, 1.292819781587712, 1.2959288162941947, 1.2990378510006773] 2 | validation acc: [0.3862, 0.4004, 0.472, 0.4846, 0.4885, 0.5227, 0.5376, 0.5356, 0.5393, 0.5635, 0.5725, 0.5661, 0.5843, 0.5438, 0.5578, 0.5831, 0.5552, 0.6103, 0.6269, 0.6374, 0.6368, 0.6487, 0.6304, 0.6503, 0.6575, 0.6605, 0.6447, 0.6517, 0.6564, 0.6473, 0.6612, 0.6177, 0.6586, 0.6693, 0.6787, 0.6607, 0.6713, 0.6521, 0.6856, 0.6873, 0.6751, 0.6718, 0.6855, 0.6855, 0.6903, 0.6754, 0.6879, 0.6705, 0.6817, 0.6988, 0.6854, 0.6942, 0.6741, 0.6766, 0.6513, 0.6887, 0.6859, 0.6742, 0.6927, 0.6952, 0.6957, 0.6914, 0.6613, 0.6897, 0.6996, 0.6947, 0.6949, 0.7076, 0.7055, 0.6975, 0.6965, 0.6952, 0.7058, 0.6886, 0.7069, 0.6995, 0.7004, 0.6913, 0.6775, 0.6852, 0.7103, 0.6981, 0.7069, 0.6831, 0.686, 0.6965, 0.7016, 0.7051, 0.7053, 0.71, 0.6983, 0.7085, 0.6941, 0.7142, 0.7117, 0.7053, 0.703, 0.6986, 0.7028, 0.687, 0.7078, 0.7027, 0.7033, 0.7014, 0.7109, 0.7074, 0.6972, 0.7062, 0.7017, 0.6891, 0.7045, 0.7116, 0.6902, 0.7089, 0.7077, 0.7067, 0.6935, 0.7092, 0.7053, 0.7038, 0.7144, 0.7054, 0.6911, 0.7094, 0.7109, 0.7051, 0.7122, 0.7066, 0.7152, 0.6971, 0.7127, 0.7078, 0.6832, 0.7162, 0.7108, 0.7104, 0.7095, 0.6962, 0.709, 0.7153, 0.7138, 0.7133, 0.7069, 0.7129, 0.718, 0.6988, 0.7103, 0.7074, 0.7074, 0.7025, 0.7119, 0.7105, 0.7086, 0.7141, 0.7092, 0.7044, 0.7129, 0.7082, 0.7058, 0.7043, 0.7112, 0.712, 0.7115, 0.7156, 0.7187, 0.7084, 0.7186, 0.7017, 0.7182, 0.7187, 0.709, 0.7035, 0.7123, 0.7159, 0.7169, 0.7134, 0.7068, 0.7085, 0.7115, 0.7112, 0.7163, 0.7135, 0.7191, 0.721, 0.7158, 0.7156, 0.7113, 0.7226, 0.7208, 0.7148, 0.7143, 0.7124, 0.7221, 0.7156, 0.7205, 0.7194, 0.7078, 0.7183, 0.7229, 0.7208] 3 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/analysis/tensor_buffer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """A lightweight buffer for maintaining tensors.""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | 23 | class TensorBuffer(object): 24 | """A lightweight buffer for maintaining lists. 25 | 26 | The TensorBuffer accumulates tensors of the given shape into a tensor (whose 27 | rank is one more than that of the given shape) via calls to `append`. The 28 | current value of the accumulated tensor can be extracted via the property 29 | `values`. 30 | """ 31 | 32 | def __init__(self, capacity, shape, dtype=tf.int32, name=None): 33 | """Initializes the TensorBuffer. 34 | 35 | Args: 36 | capacity: Initial capacity. Buffer will double in capacity each time it is 37 | filled to capacity. 38 | shape: The shape (as tuple or list) of the tensors to accumulate. 39 | dtype: The type of the tensors. 40 | name: A string name for the variable_scope used. 41 | 42 | Raises: 43 | ValueError: If the shape is empty (specifies scalar shape). 44 | """ 45 | shape = list(shape) 46 | self._rank = len(shape) 47 | self._name = name 48 | self._dtype = dtype 49 | if not self._rank: 50 | raise ValueError('Shape cannot be scalar.') 51 | shape = [capacity] + shape 52 | 53 | with tf.variable_scope(self._name): 54 | # We need to use a placeholder as the initial value to allow resizing. 55 | self._buffer = tf.Variable( 56 | initial_value=tf.placeholder_with_default( 57 | tf.zeros(shape, dtype), shape=None), 58 | trainable=False, 59 | name='buffer', 60 | use_resource=True) 61 | self._current_size = tf.Variable( 62 | initial_value=0, dtype=tf.int32, trainable=False, name='current_size') 63 | self._capacity = tf.Variable( 64 | initial_value=capacity, 65 | dtype=tf.int32, 66 | trainable=False, 67 | name='capacity') 68 | 69 | def append(self, value): 70 | """Appends a new tensor to the end of the buffer. 71 | 72 | Args: 73 | value: The tensor to append. Must match the shape specified in the 74 | initializer. 75 | 76 | Returns: 77 | An op appending the new tensor to the end of the buffer. 78 | """ 79 | 80 | def _double_capacity(): 81 | """Doubles the capacity of the current tensor buffer.""" 82 | padding = tf.zeros_like(self._buffer, self._buffer.dtype) 83 | new_buffer = tf.concat([self._buffer, padding], axis=0) 84 | if tf.executing_eagerly(): 85 | with tf.variable_scope(self._name, reuse=True): 86 | self._buffer = tf.get_variable( 87 | name='buffer', 88 | dtype=self._dtype, 89 | initializer=new_buffer, 90 | trainable=False) 91 | return self._buffer, tf.assign(self._capacity, 92 | tf.multiply(self._capacity, 2)) 93 | else: 94 | return tf.assign( 95 | self._buffer, new_buffer, 96 | validate_shape=False), tf.assign(self._capacity, 97 | tf.multiply(self._capacity, 2)) 98 | 99 | update_buffer, update_capacity = tf.cond( 100 | tf.equal(self._current_size, self._capacity), 101 | _double_capacity, lambda: (self._buffer, self._capacity)) 102 | 103 | with tf.control_dependencies([update_buffer, update_capacity]): 104 | with tf.control_dependencies([ 105 | tf.assert_less( 106 | self._current_size, 107 | self._capacity, 108 | message='Appending past end of TensorBuffer.'), 109 | tf.assert_equal( 110 | tf.shape(value), 111 | tf.shape(self._buffer)[1:], 112 | message='Appending value of inconsistent shape.') 113 | ]): 114 | with tf.control_dependencies( 115 | [tf.assign(self._buffer[self._current_size, :], value)]): 116 | return tf.assign_add(self._current_size, 1) 117 | 118 | @property 119 | def values(self): 120 | """Returns the accumulated tensor.""" 121 | begin_value = tf.zeros([self._rank + 1], dtype=tf.int32) 122 | value_size = tf.concat([[self._current_size], 123 | tf.constant(-1, tf.int32, [self._rank])], 0) 124 | return tf.slice(self._buffer, begin_value, value_size) 125 | 126 | @property 127 | def current_size(self): 128 | """Returns the current number of tensors in the buffer.""" 129 | return self._current_size 130 | 131 | @property 132 | def capacity(self): 133 | """Returns the current capacity of the buffer.""" 134 | return self._capacity 135 | -------------------------------------------------------------------------------- /tensorflow_privacy/tutorials/walkthrough/mnist_scratch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """Scratchpad for training a CNN on MNIST with DPSGD.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import numpy as np 22 | import tensorflow as tf 23 | 24 | tf.flags.DEFINE_float('learning_rate', .15, 'Learning rate for training') 25 | tf.flags.DEFINE_integer('batch_size', 256, 'Batch size') 26 | tf.flags.DEFINE_integer('epochs', 15, 'Number of epochs') 27 | 28 | FLAGS = tf.flags.FLAGS 29 | 30 | 31 | def cnn_model_fn(features, labels, mode): 32 | """Model function for a CNN.""" 33 | 34 | # Define CNN architecture using tf.keras.layers. 35 | input_layer = tf.reshape(features['x'], [-1, 28, 28, 1]) 36 | y = tf.keras.layers.Conv2D(16, 8, 37 | strides=2, 38 | padding='same', 39 | activation='relu').apply(input_layer) 40 | y = tf.keras.layers.MaxPool2D(2, 1).apply(y) 41 | y = tf.keras.layers.Conv2D(32, 4, 42 | strides=2, 43 | padding='valid', 44 | activation='relu').apply(y) 45 | y = tf.keras.layers.MaxPool2D(2, 1).apply(y) 46 | y = tf.keras.layers.Flatten().apply(y) 47 | y = tf.keras.layers.Dense(32, activation='relu').apply(y) 48 | logits = tf.keras.layers.Dense(10).apply(y) 49 | 50 | # Calculate loss as a vector and as its average across minibatch. 51 | vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, 52 | logits=logits) 53 | scalar_loss = tf.reduce_mean(vector_loss) 54 | 55 | # Configure the training op (for TRAIN mode). 56 | if mode == tf.estimator.ModeKeys.TRAIN: 57 | optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) 58 | opt_loss = scalar_loss 59 | global_step = tf.train.get_global_step() 60 | train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) 61 | return tf.estimator.EstimatorSpec(mode=mode, 62 | loss=scalar_loss, 63 | train_op=train_op) 64 | 65 | # Add evaluation metrics (for EVAL mode). 66 | elif mode == tf.estimator.ModeKeys.EVAL: 67 | eval_metric_ops = { 68 | 'accuracy': 69 | tf.metrics.accuracy( 70 | labels=labels, 71 | predictions=tf.argmax(input=logits, axis=1)) 72 | } 73 | return tf.estimator.EstimatorSpec(mode=mode, 74 | loss=scalar_loss, 75 | eval_metric_ops=eval_metric_ops) 76 | 77 | 78 | def load_mnist(): 79 | """Loads MNIST and preprocesses to combine training and validation data.""" 80 | train, test = tf.keras.datasets.mnist.load_data() 81 | train_data, train_labels = train 82 | test_data, test_labels = test 83 | 84 | train_data = np.array(train_data, dtype=np.float32) / 255 85 | test_data = np.array(test_data, dtype=np.float32) / 255 86 | 87 | train_labels = np.array(train_labels, dtype=np.int32) 88 | test_labels = np.array(test_labels, dtype=np.int32) 89 | 90 | assert train_data.min() == 0. 91 | assert train_data.max() == 1. 92 | assert test_data.min() == 0. 93 | assert test_data.max() == 1. 94 | assert train_labels.ndim == 1 95 | assert test_labels.ndim == 1 96 | 97 | return train_data, train_labels, test_data, test_labels 98 | 99 | 100 | def main(unused_argv): 101 | tf.logging.set_verbosity(tf.logging.INFO) 102 | 103 | # Load training and test data. 104 | train_data, train_labels, test_data, test_labels = load_mnist() 105 | 106 | # Instantiate the tf.Estimator. 107 | mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn) 108 | 109 | # Create tf.Estimator input functions for the training and test data. 110 | train_input_fn = tf.estimator.inputs.numpy_input_fn( 111 | x={'x': train_data}, 112 | y=train_labels, 113 | batch_size=FLAGS.batch_size, 114 | num_epochs=FLAGS.epochs, 115 | shuffle=True) 116 | eval_input_fn = tf.estimator.inputs.numpy_input_fn( 117 | x={'x': test_data}, 118 | y=test_labels, 119 | num_epochs=1, 120 | shuffle=False) 121 | 122 | # Training loop. 123 | steps_per_epoch = 60000 // FLAGS.batch_size 124 | for epoch in range(1, FLAGS.epochs + 1): 125 | # Train the model for one epoch. 126 | mnist_classifier.train(input_fn=train_input_fn, steps=steps_per_epoch) 127 | 128 | # Evaluate the model and print results 129 | eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn) 130 | test_accuracy = eval_results['accuracy'] 131 | print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy)) 132 | 133 | if __name__ == '__main__': 134 | tf.app.run() 135 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/analysis/privacy_ledger_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for PrivacyLedger.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | from privacy.analysis import privacy_ledger 24 | from privacy.dp_query import gaussian_query 25 | from privacy.dp_query import nested_query 26 | from privacy.dp_query import test_utils 27 | 28 | tf.enable_eager_execution() 29 | 30 | 31 | class PrivacyLedgerTest(tf.test.TestCase): 32 | 33 | def test_fail_on_probability_zero(self): 34 | with self.assertRaisesRegexp(ValueError, 35 | 'Selection probability cannot be 0.'): 36 | privacy_ledger.PrivacyLedger(10, 0) 37 | 38 | def test_basic(self): 39 | ledger = privacy_ledger.PrivacyLedger(10, 0.1) 40 | ledger.record_sum_query(5.0, 1.0) 41 | ledger.record_sum_query(2.0, 0.5) 42 | 43 | ledger.finalize_sample() 44 | 45 | expected_queries = [[5.0, 1.0], [2.0, 0.5]] 46 | formatted = ledger.get_formatted_ledger_eager() 47 | 48 | sample = formatted[0] 49 | self.assertAllClose(sample.population_size, 10.0) 50 | self.assertAllClose(sample.selection_probability, 0.1) 51 | self.assertAllClose(sorted(sample.queries), sorted(expected_queries)) 52 | 53 | def test_sum_query(self): 54 | record1 = tf.constant([2.0, 0.0]) 55 | record2 = tf.constant([-1.0, 1.0]) 56 | 57 | population_size = tf.Variable(0) 58 | selection_probability = tf.Variable(1.0) 59 | 60 | query = gaussian_query.GaussianSumQuery( 61 | l2_norm_clip=10.0, stddev=0.0) 62 | query = privacy_ledger.QueryWithLedger( 63 | query, population_size, selection_probability) 64 | 65 | # First sample. 66 | tf.assign(population_size, 10) 67 | tf.assign(selection_probability, 0.1) 68 | test_utils.run_query(query, [record1, record2]) 69 | 70 | expected_queries = [[10.0, 0.0]] 71 | formatted = query.ledger.get_formatted_ledger_eager() 72 | sample_1 = formatted[0] 73 | self.assertAllClose(sample_1.population_size, 10.0) 74 | self.assertAllClose(sample_1.selection_probability, 0.1) 75 | self.assertAllClose(sample_1.queries, expected_queries) 76 | 77 | # Second sample. 78 | tf.assign(population_size, 20) 79 | tf.assign(selection_probability, 0.2) 80 | test_utils.run_query(query, [record1, record2]) 81 | 82 | formatted = query.ledger.get_formatted_ledger_eager() 83 | sample_1, sample_2 = formatted 84 | self.assertAllClose(sample_1.population_size, 10.0) 85 | self.assertAllClose(sample_1.selection_probability, 0.1) 86 | self.assertAllClose(sample_1.queries, expected_queries) 87 | 88 | self.assertAllClose(sample_2.population_size, 20.0) 89 | self.assertAllClose(sample_2.selection_probability, 0.2) 90 | self.assertAllClose(sample_2.queries, expected_queries) 91 | 92 | def test_nested_query(self): 93 | population_size = tf.Variable(0) 94 | selection_probability = tf.Variable(1.0) 95 | 96 | query1 = gaussian_query.GaussianAverageQuery( 97 | l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0) 98 | query2 = gaussian_query.GaussianAverageQuery( 99 | l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0) 100 | 101 | query = nested_query.NestedQuery([query1, query2]) 102 | query = privacy_ledger.QueryWithLedger( 103 | query, population_size, selection_probability) 104 | 105 | record1 = [1.0, [12.0, 9.0]] 106 | record2 = [5.0, [1.0, 2.0]] 107 | 108 | # First sample. 109 | tf.assign(population_size, 10) 110 | tf.assign(selection_probability, 0.1) 111 | test_utils.run_query(query, [record1, record2]) 112 | 113 | expected_queries = [[4.0, 2.0], [5.0, 1.0]] 114 | formatted = query.ledger.get_formatted_ledger_eager() 115 | sample_1 = formatted[0] 116 | self.assertAllClose(sample_1.population_size, 10.0) 117 | self.assertAllClose(sample_1.selection_probability, 0.1) 118 | self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) 119 | 120 | # Second sample. 121 | tf.assign(population_size, 20) 122 | tf.assign(selection_probability, 0.2) 123 | test_utils.run_query(query, [record1, record2]) 124 | 125 | formatted = query.ledger.get_formatted_ledger_eager() 126 | sample_1, sample_2 = formatted 127 | self.assertAllClose(sample_1.population_size, 10.0) 128 | self.assertAllClose(sample_1.selection_probability, 0.1) 129 | self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) 130 | 131 | self.assertAllClose(sample_2.population_size, 20.0) 132 | self.assertAllClose(sample_2.selection_probability, 0.2) 133 | self.assertAllClose(sorted(sample_2.queries), sorted(expected_queries)) 134 | 135 | 136 | if __name__ == '__main__': 137 | tf.test.main() 138 | -------------------------------------------------------------------------------- /tensorflow_privacy/research/pate_2017/aggregation.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import numpy as np 22 | from six.moves import xrange 23 | 24 | 25 | def labels_from_probs(probs): 26 | """ 27 | Helper function: computes argmax along last dimension of array to obtain 28 | labels (max prob or max logit value) 29 | :param probs: numpy array where probabilities or logits are on last dimension 30 | :return: array with same shape as input besides last dimension with shape 1 31 | now containing the labels 32 | """ 33 | # Compute last axis index 34 | last_axis = len(np.shape(probs)) - 1 35 | 36 | # Label is argmax over last dimension 37 | labels = np.argmax(probs, axis=last_axis) 38 | 39 | # Return as np.int32 40 | return np.asarray(labels, dtype=np.int32) 41 | 42 | 43 | def noisy_max(logits, lap_scale, return_clean_votes=False): 44 | """ 45 | This aggregation mechanism takes the softmax/logit output of several models 46 | resulting from inference on identical inputs and computes the noisy-max of 47 | the votes for candidate classes to select a label for each sample: it 48 | adds Laplacian noise to label counts and returns the most frequent label. 49 | :param logits: logits or probabilities for each sample 50 | :param lap_scale: scale of the Laplacian noise to be added to counts 51 | :param return_clean_votes: if set to True, also returns clean votes (without 52 | Laplacian noise). This can be used to perform the 53 | privacy analysis of this aggregation mechanism. 54 | :return: pair of result and (if clean_votes is set to True) the clean counts 55 | for each class per sample and the original labels produced by 56 | the teachers. 57 | """ 58 | 59 | # Compute labels from logits/probs and reshape array properly 60 | labels = labels_from_probs(logits) 61 | labels_shape = np.shape(labels) 62 | labels = labels.reshape((labels_shape[0], labels_shape[1])) 63 | 64 | # Initialize array to hold final labels 65 | result = np.zeros(int(labels_shape[1])) 66 | 67 | if return_clean_votes: 68 | # Initialize array to hold clean votes for each sample 69 | clean_votes = np.zeros((int(labels_shape[1]), 10)) 70 | 71 | # Parse each sample 72 | for i in xrange(int(labels_shape[1])): 73 | # Count number of votes assigned to each class 74 | label_counts = np.bincount(labels[:, i], minlength=10) 75 | 76 | if return_clean_votes: 77 | # Store vote counts for export 78 | clean_votes[i] = label_counts 79 | 80 | # Cast in float32 to prepare before addition of Laplacian noise 81 | label_counts = np.asarray(label_counts, dtype=np.float32) 82 | 83 | # Sample independent Laplacian noise for each class 84 | for item in xrange(10): 85 | label_counts[item] += np.random.laplace(loc=0.0, scale=float(lap_scale)) 86 | 87 | # Result is the most frequent label 88 | result[i] = np.argmax(label_counts) 89 | 90 | # Cast labels to np.int32 for compatibility with deep_cnn.py feed dictionaries 91 | result = np.asarray(result, dtype=np.int32) 92 | 93 | if return_clean_votes: 94 | # Returns several array, which are later saved: 95 | # result: labels obtained from the noisy aggregation 96 | # clean_votes: the number of teacher votes assigned to each sample and class 97 | # labels: the labels assigned by teachers (before the noisy aggregation) 98 | return result, clean_votes, labels 99 | else: 100 | # Only return labels resulting from noisy aggregation 101 | return result 102 | 103 | 104 | def aggregation_most_frequent(logits): 105 | """ 106 | This aggregation mechanism takes the softmax/logit output of several models 107 | resulting from inference on identical inputs and computes the most frequent 108 | label. It is deterministic (no noise injection like noisy_max() above. 109 | :param logits: logits or probabilities for each sample 110 | :return: 111 | """ 112 | # Compute labels from logits/probs and reshape array properly 113 | labels = labels_from_probs(logits) 114 | labels_shape = np.shape(labels) 115 | labels = labels.reshape((labels_shape[0], labels_shape[1])) 116 | 117 | # Initialize array to hold final labels 118 | result = np.zeros(int(labels_shape[1])) 119 | 120 | # Parse each sample 121 | for i in xrange(int(labels_shape[1])): 122 | # Count number of votes assigned to each class 123 | label_counts = np.bincount(labels[:, i], minlength=10) 124 | 125 | label_counts = np.asarray(label_counts, dtype=np.int32) 126 | 127 | # Result is the most frequent label 128 | result[i] = np.argmax(label_counts) 129 | 130 | return np.asarray(result, dtype=np.int32) 131 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/dp_query/gaussian_query.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Implements DPQuery interface for Gaussian average queries. 16 | """ 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import collections 23 | 24 | from distutils.version import LooseVersion 25 | import tensorflow as tf 26 | 27 | from privacy.dp_query import dp_query 28 | from privacy.dp_query import normalized_query 29 | 30 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): 31 | nest = tf.contrib.framework.nest 32 | else: 33 | nest = tf.nest 34 | 35 | 36 | class GaussianSumQuery(dp_query.SumAggregationDPQuery): 37 | """Implements DPQuery interface for Gaussian sum queries. 38 | 39 | Accumulates clipped vectors, then adds Gaussian noise to the sum. 40 | """ 41 | 42 | # pylint: disable=invalid-name 43 | _GlobalState = collections.namedtuple( 44 | '_GlobalState', ['l2_norm_clip', 'stddev']) 45 | 46 | def __init__(self, l2_norm_clip, stddev): 47 | """Initializes the GaussianSumQuery. 48 | 49 | Args: 50 | l2_norm_clip: The clipping norm to apply to the global norm of each 51 | record. 52 | stddev: The stddev of the noise added to the sum. 53 | """ 54 | self._l2_norm_clip = l2_norm_clip 55 | self._stddev = stddev 56 | self._ledger = None 57 | 58 | def set_ledger(self, ledger): 59 | self._ledger = ledger 60 | 61 | def make_global_state(self, l2_norm_clip, stddev): 62 | """Creates a global state from the given parameters.""" 63 | return self._GlobalState(tf.cast(l2_norm_clip, tf.float32), 64 | tf.cast(stddev, tf.float32)) 65 | 66 | def initial_global_state(self): 67 | return self.make_global_state(self._l2_norm_clip, self._stddev) 68 | 69 | def derive_sample_params(self, global_state): 70 | return global_state.l2_norm_clip 71 | 72 | def initial_sample_state(self, template): 73 | return nest.map_structure( 74 | dp_query.zeros_like, template) 75 | 76 | def preprocess_record_impl(self, params, record): 77 | """Clips the l2 norm, returning the clipped record and the l2 norm. 78 | 79 | Args: 80 | params: The parameters for the sample. 81 | record: The record to be processed. 82 | 83 | Returns: 84 | A tuple (preprocessed_records, l2_norm) where `preprocessed_records` is 85 | the structure of preprocessed tensors, and l2_norm is the total l2 norm 86 | before clipping. 87 | """ 88 | l2_norm_clip = params 89 | record_as_list = nest.flatten(record) 90 | clipped_as_list, norm = tf.clip_by_global_norm(record_as_list, l2_norm_clip) 91 | return nest.pack_sequence_as(record, clipped_as_list), norm 92 | 93 | def preprocess_record(self, params, record): 94 | preprocessed_record, _ = self.preprocess_record_impl(params, record) 95 | return preprocessed_record 96 | 97 | def get_noised_result(self, sample_state, global_state): 98 | """See base class.""" 99 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): 100 | def add_noise(v): 101 | return v + tf.random_normal(tf.shape(v), stddev=global_state.stddev) 102 | else: 103 | random_normal = tf.random_normal_initializer(stddev=global_state.stddev) 104 | def add_noise(v): 105 | return v + random_normal(tf.shape(v)) 106 | 107 | if self._ledger: 108 | dependencies = [ 109 | self._ledger.record_sum_query( 110 | global_state.l2_norm_clip, global_state.stddev) 111 | ] 112 | else: 113 | dependencies = [] 114 | with tf.control_dependencies(dependencies): 115 | return nest.map_structure(add_noise, sample_state), global_state 116 | 117 | 118 | class GaussianAverageQuery(normalized_query.NormalizedQuery): 119 | """Implements DPQuery interface for Gaussian average queries. 120 | 121 | Accumulates clipped vectors, adds Gaussian noise, and normalizes. 122 | 123 | Note that we use "fixed-denominator" estimation: the denominator should be 124 | specified as the expected number of records per sample. Accumulating the 125 | denominator separately would also be possible but would be produce a higher 126 | variance estimator. 127 | """ 128 | 129 | def __init__(self, 130 | l2_norm_clip, 131 | sum_stddev, 132 | denominator): 133 | """Initializes the GaussianAverageQuery. 134 | 135 | Args: 136 | l2_norm_clip: The clipping norm to apply to the global norm of each 137 | record. 138 | sum_stddev: The stddev of the noise added to the sum (before 139 | normalization). 140 | denominator: The normalization constant (applied after noise is added to 141 | the sum). 142 | """ 143 | super(GaussianAverageQuery, self).__init__( 144 | numerator_query=GaussianSumQuery(l2_norm_clip, sum_stddev), 145 | denominator=denominator) 146 | -------------------------------------------------------------------------------- /dp_optimizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Differentially private optimizers for TensorFlow.""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from privacy.analysis import privacy_ledger 23 | from privacy.dp_query import gaussian_query 24 | 25 | def make_optimizer_class(cls): 26 | """Constructs a DP optimizer class from an existing one.""" 27 | parent_code = tf.optimizers.Optimizer._compute_gradients.__code__ 28 | child_code = cls._compute_gradients.__code__ 29 | if child_code is not parent_code: 30 | tf.logging.warning( 31 | 'WARNING: Calling make_optimizer_class() on class %s that overrides ' 32 | 'method compute_gradients(). Check to ensure that ' 33 | 'make_optimizer_class() does not interfere with overridden version.', 34 | cls.__name__) 35 | 36 | class DPOptimizerClass(cls): 37 | """Differentially private subclass of given class cls.""" 38 | 39 | def __init__( 40 | self, 41 | dp_sum_query, 42 | num_microbatches=None, 43 | unroll_microbatches=False, 44 | *args, 45 | **kwargs): 46 | """Initialize the DPOptimizerClass. 47 | 48 | Args: 49 | dp_sum_query: DPQuery object, specifying differential privacy 50 | mechanism to use. 51 | num_microbatches: How many microbatches into which the minibatch is 52 | split. If None, will default to the size of the minibatch, and 53 | per-example gradients will be computed. 54 | unroll_microbatches: If true, processes microbatches within a Python 55 | loop instead of a tf.while_loop. Can be used if using a tf.while_loop 56 | raises an exception. 57 | """ 58 | super(DPOptimizerClass, self).__init__(*args, **kwargs) 59 | ###### accountant + sanitizer ###### 60 | self._dp_sum_query = dp_sum_query 61 | ###### 62 | self._num_microbatches = num_microbatches 63 | self._global_state = self._dp_sum_query.initial_global_state() 64 | self._unroll_microbatches = unroll_microbatches 65 | 66 | def compute_gradients(self, loss, var_list, gate_gradients=None, aggregation_method=None, colocate_gradients_with_ops=False, grad_loss=None, gradient_tape=None): 67 | if not gradient_tape: 68 | raise ValueError('A tape needs to be passed.') 69 | 70 | vector_loss = loss() 71 | if self._num_microbatches is None: 72 | self._num_microbatches = tf.shape(vector_loss)[0] 73 | sample_state = self._dp_sum_query.initial_sample_state(var_list) 74 | microbatches_losses = tf.reshape(vector_loss, [self._num_microbatches, -1]) 75 | sample_params = (self._dp_sum_query.derive_sample_params(self._global_state)) 76 | 77 | for idx in range(self._num_microbatches): 78 | ###### compute gradient ###### 79 | microbatch_loss = tf.reduce_mean(tf.gather(microbatches_losses, [idx])) 80 | grads = gradient_tape.gradient(microbatch_loss, var_list) 81 | ###### 82 | 83 | ###### accountant ###### 84 | sample_state = self._dp_sum_query.accumulate_record(sample_params, sample_state, grads) 85 | ###### 86 | 87 | ###### sanitizer ###### 88 | grad_sums, self._global_state = (self._dp_sum_query.get_noised_result(sample_state, self._global_state)) 89 | ###### 90 | 91 | def normalize(v): 92 | return v / tf.cast(self._num_microbatches, tf.float32) 93 | 94 | final_grads = tf.nest.map_structure(normalize, grad_sums) 95 | 96 | grads_and_vars = list(zip(final_grads, var_list)) 97 | return grads_and_vars 98 | 99 | return DPOptimizerClass 100 | 101 | 102 | def make_gaussian_optimizer_class(cls): 103 | """Constructs a DP optimizer with Gaussian averaging of updates.""" 104 | 105 | class DPGaussianOptimizerClass(make_optimizer_class(cls)): 106 | """DP subclass of given class cls using Gaussian averaging.""" 107 | 108 | def __init__(self, l2_norm_clip, noise_multiplier, num_microbatches=None, ledger=None, unroll_microbatches=False, *args, **kwargs): 109 | dp_sum_query = gaussian_query.GaussianSumQuery(l2_norm_clip, l2_norm_clip * noise_multiplier) 110 | 111 | if ledger: 112 | dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, ledger=ledger) 113 | 114 | super(DPGaussianOptimizerClass, self).__init__(dp_sum_query, num_microbatches, unroll_microbatches, *args, **kwargs) 115 | 116 | @property 117 | def ledger(self): 118 | return self._dp_sum_query.ledger 119 | 120 | return DPGaussianOptimizerClass 121 | 122 | DPAdagradOptimizer = make_optimizer_class(tf.optimizers.Adagrad) 123 | DPAdamOptimizer = make_optimizer_class(tf.optimizers.Adam) 124 | DPGradientDescentOptimizer = make_optimizer_class(tf.optimizers.SGD) 125 | 126 | DPAdagradGaussianOptimizer = make_gaussian_optimizer_class(tf.optimizers.Adagrad) 127 | DPAdamGaussianOptimizer = make_gaussian_optimizer_class(tf.optimizers.Adam) 128 | DPGradientDescentGaussianOptimizer = make_gaussian_optimizer_class(tf.optimizers.SGD) 129 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/optimizers/dp_optimizer_eager_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for differentially private optimizers.""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | from absl.testing import parameterized 21 | import numpy as np 22 | import tensorflow as tf 23 | 24 | from privacy.analysis import privacy_ledger 25 | from privacy.dp_query import gaussian_query 26 | from privacy.optimizers import dp_optimizer 27 | 28 | 29 | class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase): 30 | 31 | def setUp(self): 32 | tf.enable_eager_execution() 33 | super(DPOptimizerEagerTest, self).setUp() 34 | 35 | def _loss_fn(self, val0, val1): 36 | return 0.5 * tf.reduce_sum(tf.squared_difference(val0, val1), axis=1) 37 | 38 | @parameterized.named_parameters( 39 | ('DPGradientDescent 1', dp_optimizer.DPGradientDescentOptimizer, 1, 40 | [-2.5, -2.5]), 41 | ('DPGradientDescent 2', dp_optimizer.DPGradientDescentOptimizer, 2, 42 | [-2.5, -2.5]), 43 | ('DPGradientDescent 4', dp_optimizer.DPGradientDescentOptimizer, 4, 44 | [-2.5, -2.5]), 45 | ('DPAdagrad 1', dp_optimizer.DPAdagradOptimizer, 1, [-2.5, -2.5]), 46 | ('DPAdagrad 2', dp_optimizer.DPAdagradOptimizer, 2, [-2.5, -2.5]), 47 | ('DPAdagrad 4', dp_optimizer.DPAdagradOptimizer, 4, [-2.5, -2.5]), 48 | ('DPAdam 1', dp_optimizer.DPAdamOptimizer, 1, [-2.5, -2.5]), 49 | ('DPAdam 2', dp_optimizer.DPAdamOptimizer, 2, [-2.5, -2.5]), 50 | ('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5])) 51 | def testBaseline(self, cls, num_microbatches, expected_answer): 52 | with tf.GradientTape(persistent=True) as gradient_tape: 53 | var0 = tf.Variable([1.0, 2.0]) 54 | data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) 55 | 56 | dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) 57 | dp_sum_query = privacy_ledger.QueryWithLedger( 58 | dp_sum_query, 1e6, num_microbatches / 1e6) 59 | 60 | opt = cls( 61 | dp_sum_query, 62 | num_microbatches=num_microbatches, 63 | learning_rate=2.0) 64 | 65 | self.evaluate(tf.global_variables_initializer()) 66 | # Fetch params to validate initial values 67 | self.assertAllClose([1.0, 2.0], self.evaluate(var0)) 68 | 69 | # Expected gradient is sum of differences divided by number of 70 | # microbatches. 71 | grads_and_vars = opt.compute_gradients( 72 | lambda: self._loss_fn(var0, data0), [var0], 73 | gradient_tape=gradient_tape) 74 | self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0]) 75 | 76 | @parameterized.named_parameters( 77 | ('DPGradientDescent', dp_optimizer.DPGradientDescentOptimizer), 78 | ('DPAdagrad', dp_optimizer.DPAdagradOptimizer), 79 | ('DPAdam', dp_optimizer.DPAdamOptimizer)) 80 | def testClippingNorm(self, cls): 81 | with tf.GradientTape(persistent=True) as gradient_tape: 82 | var0 = tf.Variable([0.0, 0.0]) 83 | data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) 84 | 85 | dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) 86 | dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) 87 | 88 | opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) 89 | 90 | self.evaluate(tf.global_variables_initializer()) 91 | # Fetch params to validate initial values 92 | self.assertAllClose([0.0, 0.0], self.evaluate(var0)) 93 | 94 | # Expected gradient is sum of differences. 95 | grads_and_vars = opt.compute_gradients( 96 | lambda: self._loss_fn(var0, data0), [var0], 97 | gradient_tape=gradient_tape) 98 | self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0]) 99 | 100 | @parameterized.named_parameters( 101 | ('DPGradientDescent', dp_optimizer.DPGradientDescentOptimizer), 102 | ('DPAdagrad', dp_optimizer.DPAdagradOptimizer), 103 | ('DPAdam', dp_optimizer.DPAdamOptimizer)) 104 | def testNoiseMultiplier(self, cls): 105 | with tf.GradientTape(persistent=True) as gradient_tape: 106 | var0 = tf.Variable([0.0]) 107 | data0 = tf.Variable([[0.0]]) 108 | 109 | dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0) 110 | dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) 111 | 112 | opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) 113 | 114 | self.evaluate(tf.global_variables_initializer()) 115 | # Fetch params to validate initial values 116 | self.assertAllClose([0.0], self.evaluate(var0)) 117 | 118 | grads = [] 119 | for _ in range(1000): 120 | grads_and_vars = opt.compute_gradients( 121 | lambda: self._loss_fn(var0, data0), [var0], 122 | gradient_tape=gradient_tape) 123 | grads.append(grads_and_vars[0][0]) 124 | 125 | # Test standard deviation is close to l2_norm_clip * noise_multiplier. 126 | self.assertNear(np.std(grads), 2.0 * 4.0, 0.5) 127 | 128 | 129 | if __name__ == '__main__': 130 | tf.test.main() 131 | -------------------------------------------------------------------------------- /mnist.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Training a CNN on MNIST with Keras and the DP SGD optimizer.""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | from absl import app 21 | from absl import flags 22 | from absl import logging 23 | 24 | import numpy as np 25 | import tensorflow as tf 26 | 27 | from privacy.analysis.rdp_accountant import compute_rdp 28 | from privacy.analysis.rdp_accountant import get_privacy_spent 29 | from dp_optimizer import DPGradientDescentGaussianOptimizer 30 | 31 | GradientDescentOptimizer = tf.compat.v1.train.GradientDescentOptimizer 32 | 33 | flags.DEFINE_boolean('dpsgd', True, 'If True, train with DP-SGD. If False, train with vanilla SGD.') 34 | flags.DEFINE_float('learning_rate', 1e-3, 'Learning rate for training') 35 | flags.DEFINE_float('noise_multiplier', 1.1, 'Ratio of the standard deviation to the clipping norm') 36 | flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm') 37 | flags.DEFINE_integer('batch_size', 250, 'Batch size') 38 | flags.DEFINE_integer('epochs', 400, 'Number of epochs') 39 | flags.DEFINE_integer('microbatches', 250, 'Number of microbatches (must evenly divide batch_size)') 40 | flags.DEFINE_string('model_dir', None, 'Model directory') 41 | 42 | FLAGS = flags.FLAGS 43 | delta = 1e-2 # Delta is set to 1e-5 because MNIST has 60000 training points. 44 | 45 | 46 | def compute_epsilon(steps): 47 | """Computes epsilon value for given hyperparameters.""" 48 | if FLAGS.noise_multiplier == 0.0: 49 | return float('inf') 50 | orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) 51 | sampling_probability = FLAGS.batch_size / 60000 52 | rdp = compute_rdp(q=sampling_probability, noise_multiplier=FLAGS.noise_multiplier, steps=steps, orders=orders) 53 | return get_privacy_spent(orders, rdp, target_delta=delta)[0] 54 | 55 | 56 | class EpsilonPrintingCallback(tf.keras.callbacks.Callback): 57 | """Callback for Keras model to evaluate epsilon after every epoch.""" 58 | def __init__(self): 59 | self.eps_history = [] 60 | 61 | def on_epoch_end(self, epoch, logs=None): 62 | if FLAGS.dpsgd: 63 | eps = compute_epsilon((epoch + 1) * (60000 // FLAGS.batch_size)) 64 | self.eps_history.append(eps) 65 | print(', eps = {}'.format(eps)) 66 | 67 | 68 | def load_mnist(): 69 | """Loads MNIST and preprocesses to combine training and validation data.""" 70 | train, test = tf.keras.datasets.mnist.load_data() 71 | train_data, train_labels = train 72 | test_data, test_labels = test 73 | 74 | train_data = np.array(train_data, dtype=np.float32) / 255 75 | test_data = np.array(test_data, dtype=np.float32) / 255 76 | 77 | train_data = train_data.reshape(train_data.shape[0], 28, 28, 1) 78 | test_data = test_data.reshape(test_data.shape[0], 28, 28, 1) 79 | 80 | train_labels = np.array(train_labels, dtype=np.int32) 81 | test_labels = np.array(test_labels, dtype=np.int32) 82 | 83 | train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10) 84 | test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10) 85 | 86 | return train_data, train_labels, test_data, test_labels 87 | 88 | 89 | def main(unused_argv): 90 | logging.set_verbosity(logging.INFO) 91 | if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0: 92 | raise ValueError('Number of microbatches should divide evenly batch_size') 93 | 94 | # Load training and test data. 95 | train_data, train_labels, test_data, test_labels = load_mnist() 96 | 97 | # Define a sequential Keras model 98 | model = tf.keras.Sequential([ 99 | tf.keras.layers.Conv2D(16, 8, strides=2, padding='same', activation='relu', input_shape=(28, 28, 1)), 100 | tf.keras.layers.MaxPool2D(2, 1), 101 | tf.keras.layers.Conv2D(32, 4, strides=2, padding='valid', activation='relu'), 102 | tf.keras.layers.MaxPool2D(2, 1), 103 | tf.keras.layers.Flatten(), 104 | tf.keras.layers.Dense(32, activation='relu'), 105 | tf.keras.layers.Dense(10) 106 | ]) 107 | 108 | if FLAGS.dpsgd: 109 | optimizer = DPGradientDescentGaussianOptimizer( 110 | l2_norm_clip=FLAGS.l2_norm_clip, 111 | noise_multiplier=FLAGS.noise_multiplier, 112 | num_microbatches=FLAGS.microbatches, 113 | learning_rate=FLAGS.learning_rate) 114 | # Compute vector of per-example loss rather than its mean over a minibatch. 115 | loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True, reduction=tf.compat.v1.losses.Reduction.NONE) 116 | else: 117 | optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) 118 | loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True) 119 | 120 | # Compile model with Keras 121 | model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) 122 | 123 | # Train model with Keras 124 | eps_callback = EpsilonPrintingCallback() 125 | fit_history = model.fit(train_data, train_labels, epochs=FLAGS.epochs, validation_data=(test_data, test_labels), batch_size=FLAGS.batch_size, callbacks=[eps_callback]) 126 | eps_history = eps_callback.eps_history 127 | val_acc_history = fit_history.history['val_accuracy'] 128 | with open('delta_{}_lr_{}.txt'.format(delta, FLAGS.learning_rate), 'w') as f: 129 | f.write('eps: {}\n'.format(eps_history)) 130 | f.write('validation acc: {}\n'.format(val_acc_history)) 131 | 132 | if __name__ == '__main__': 133 | app.run(main) -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/dp_query/nested_query_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for NestedQuery.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | 22 | from absl.testing import parameterized 23 | from distutils.version import LooseVersion 24 | import numpy as np 25 | import tensorflow as tf 26 | 27 | from privacy.dp_query import gaussian_query 28 | from privacy.dp_query import nested_query 29 | from privacy.dp_query import test_utils 30 | 31 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): 32 | nest = tf.contrib.framework.nest 33 | else: 34 | nest = tf.nest 35 | 36 | _basic_query = gaussian_query.GaussianSumQuery(1.0, 0.0) 37 | 38 | 39 | class NestedQueryTest(tf.test.TestCase, parameterized.TestCase): 40 | 41 | def test_nested_gaussian_sum_no_clip_no_noise(self): 42 | with self.cached_session() as sess: 43 | query1 = gaussian_query.GaussianSumQuery( 44 | l2_norm_clip=10.0, stddev=0.0) 45 | query2 = gaussian_query.GaussianSumQuery( 46 | l2_norm_clip=10.0, stddev=0.0) 47 | 48 | query = nested_query.NestedQuery([query1, query2]) 49 | 50 | record1 = [1.0, [2.0, 3.0]] 51 | record2 = [4.0, [3.0, 2.0]] 52 | 53 | query_result, _ = test_utils.run_query(query, [record1, record2]) 54 | result = sess.run(query_result) 55 | expected = [5.0, [5.0, 5.0]] 56 | self.assertAllClose(result, expected) 57 | 58 | def test_nested_gaussian_average_no_clip_no_noise(self): 59 | with self.cached_session() as sess: 60 | query1 = gaussian_query.GaussianAverageQuery( 61 | l2_norm_clip=10.0, sum_stddev=0.0, denominator=5.0) 62 | query2 = gaussian_query.GaussianAverageQuery( 63 | l2_norm_clip=10.0, sum_stddev=0.0, denominator=5.0) 64 | 65 | query = nested_query.NestedQuery([query1, query2]) 66 | 67 | record1 = [1.0, [2.0, 3.0]] 68 | record2 = [4.0, [3.0, 2.0]] 69 | 70 | query_result, _ = test_utils.run_query(query, [record1, record2]) 71 | result = sess.run(query_result) 72 | expected = [1.0, [1.0, 1.0]] 73 | self.assertAllClose(result, expected) 74 | 75 | def test_nested_gaussian_average_with_clip_no_noise(self): 76 | with self.cached_session() as sess: 77 | query1 = gaussian_query.GaussianAverageQuery( 78 | l2_norm_clip=4.0, sum_stddev=0.0, denominator=5.0) 79 | query2 = gaussian_query.GaussianAverageQuery( 80 | l2_norm_clip=5.0, sum_stddev=0.0, denominator=5.0) 81 | 82 | query = nested_query.NestedQuery([query1, query2]) 83 | 84 | record1 = [1.0, [12.0, 9.0]] # Clipped to [1.0, [4.0, 3.0]] 85 | record2 = [5.0, [1.0, 2.0]] # Clipped to [4.0, [1.0, 2.0]] 86 | 87 | query_result, _ = test_utils.run_query(query, [record1, record2]) 88 | result = sess.run(query_result) 89 | expected = [1.0, [1.0, 1.0]] 90 | self.assertAllClose(result, expected) 91 | 92 | def test_complex_nested_query(self): 93 | with self.cached_session() as sess: 94 | query_ab = gaussian_query.GaussianSumQuery( 95 | l2_norm_clip=1.0, stddev=0.0) 96 | query_c = gaussian_query.GaussianAverageQuery( 97 | l2_norm_clip=10.0, sum_stddev=0.0, denominator=2.0) 98 | query_d = gaussian_query.GaussianSumQuery( 99 | l2_norm_clip=10.0, stddev=0.0) 100 | 101 | query = nested_query.NestedQuery( 102 | [query_ab, {'c': query_c, 'd': [query_d]}]) 103 | 104 | record1 = [{'a': 0.0, 'b': 2.71828}, {'c': (-4.0, 6.0), 'd': [-4.0]}] 105 | record2 = [{'a': 3.14159, 'b': 0.0}, {'c': (6.0, -4.0), 'd': [5.0]}] 106 | 107 | query_result, _ = test_utils.run_query(query, [record1, record2]) 108 | result = sess.run(query_result) 109 | expected = [{'a': 1.0, 'b': 1.0}, {'c': (1.0, 1.0), 'd': [1.0]}] 110 | self.assertAllClose(result, expected) 111 | 112 | def test_nested_query_with_noise(self): 113 | with self.cached_session() as sess: 114 | sum_stddev = 2.71828 115 | denominator = 3.14159 116 | 117 | query1 = gaussian_query.GaussianSumQuery( 118 | l2_norm_clip=1.5, stddev=sum_stddev) 119 | query2 = gaussian_query.GaussianAverageQuery( 120 | l2_norm_clip=0.5, sum_stddev=sum_stddev, denominator=denominator) 121 | query = nested_query.NestedQuery((query1, query2)) 122 | 123 | record1 = (3.0, [2.0, 1.5]) 124 | record2 = (0.0, [-1.0, -3.5]) 125 | 126 | query_result, _ = test_utils.run_query(query, [record1, record2]) 127 | 128 | noised_averages = [] 129 | for _ in range(1000): 130 | noised_averages.append(nest.flatten(sess.run(query_result))) 131 | 132 | result_stddev = np.std(noised_averages, 0) 133 | avg_stddev = sum_stddev / denominator 134 | expected_stddev = [sum_stddev, avg_stddev, avg_stddev] 135 | self.assertArrayNear(result_stddev, expected_stddev, 0.1) 136 | 137 | @parameterized.named_parameters( 138 | ('type_mismatch', [_basic_query], (1.0,), TypeError), 139 | ('too_many_queries', [_basic_query, _basic_query], [1.0], ValueError), 140 | ('query_too_deep', [_basic_query, [_basic_query]], [1.0, 1.0], TypeError)) 141 | def test_record_incompatible_with_query( 142 | self, queries, record, error_type): 143 | with self.assertRaises(error_type): 144 | test_utils.run_query(nested_query.NestedQuery(queries), [record]) 145 | 146 | 147 | if __name__ == '__main__': 148 | tf.test.main() 149 | -------------------------------------------------------------------------------- /tensorflow_privacy/research/pate_2017/README.md: -------------------------------------------------------------------------------- 1 | # Learning private models with multiple teachers 2 | 3 | This repository contains code to create a setup for learning privacy-preserving 4 | student models by transferring knowledge from an ensemble of teachers trained 5 | on disjoint subsets of the data for which privacy guarantees are to be provided. 6 | 7 | Knowledge acquired by teachers is transferred to the student in a differentially 8 | private manner by noisily aggregating the teacher decisions before feeding them 9 | to the student during training. 10 | 11 | The paper describing the approach is [arXiv:1610.05755](https://arxiv.org/abs/1610.05755) 12 | 13 | ## Dependencies 14 | 15 | This model uses `TensorFlow` to perform numerical computations associated with 16 | machine learning models, as well as common Python libraries like: `numpy`, 17 | `scipy`, and `six`. Instructions to install these can be found in their 18 | respective documentations. 19 | 20 | ## How to run 21 | 22 | This repository supports the MNIST and SVHN datasets. The following 23 | instructions are given for MNIST but can easily be adapted by replacing the 24 | flag `--dataset=mnist` by `--dataset=svhn`. 25 | There are 2 steps: teacher training and student training. Data will be 26 | automatically downloaded when you start the teacher training. 27 | 28 | The following is a two-step process: first we train an ensemble of teacher 29 | models and second we train a student using predictions made by this ensemble. 30 | 31 | **Training the teachers:** first run the `train_teachers.py` file with at least 32 | three flags specifying (1) the number of teachers, (2) the ID of the teacher 33 | you are training among these teachers, and (3) the dataset on which to train. 34 | For instance, to train teacher number 10 among an ensemble of 100 teachers for 35 | MNIST, you use the following command: 36 | 37 | ``` 38 | python train_teachers.py --nb_teachers=100 --teacher_id=10 --dataset=mnist 39 | ``` 40 | 41 | Other flags like `train_dir` and `data_dir` should optionally be set to 42 | respectively point to the directory where model checkpoints and temporary data 43 | (like the dataset) should be saved. The flag `max_steps` (default at 3000) 44 | controls the length of training. See `train_teachers.py` and `deep_cnn.py` 45 | to find available flags and their descriptions. 46 | 47 | **Training the student:** once the teachers are all trained, e.g., teachers 48 | with IDs `0` to `99` are trained for `nb_teachers=100`, we are ready to train 49 | the student. The student is trained by labeling some of the test data with 50 | predictions from the teachers. The predictions are aggregated by counting the 51 | votes assigned to each class among the ensemble of teachers, adding Laplacian 52 | noise to these votes, and assigning the label with the maximum noisy vote count 53 | to the sample. This is detailed in function `noisy_max` in the file 54 | `aggregation.py`. To learn the student, use the following command: 55 | 56 | ``` 57 | python train_student.py --nb_teachers=100 --dataset=mnist --stdnt_share=5000 58 | ``` 59 | 60 | The flag `--stdnt_share=5000` indicates that the student should be able to 61 | use the first `5000` samples of the dataset's test subset as unlabeled 62 | training points (they will be labeled using the teacher predictions). The 63 | remaining samples are used for evaluation of the student's accuracy, which 64 | is displayed upon completion of training. 65 | 66 | ## Using semi-supervised GANs to train the student 67 | 68 | In the paper, we describe how to train the student in a semi-supervised 69 | fashion using Generative Adversarial Networks. This can be reproduced for MNIST 70 | by cloning the [improved-gan](https://github.com/openai/improved-gan) 71 | repository and adding to your `PATH` variable before running the shell 72 | script `train_student_mnist_250_lap_20_count_50_epochs_600.sh`. 73 | 74 | ``` 75 | export PATH="/path/to/improved-gan/mnist_svhn_cifar10":$PATH 76 | sh train_student_mnist_250_lap_20_count_50_epochs_600.sh 77 | ``` 78 | 79 | 80 | ## Alternative deeper convolutional architecture 81 | 82 | Note that a deeper convolutional model is available. Both the default and 83 | deeper models graphs are defined in `deep_cnn.py`, respectively by 84 | functions `inference` and `inference_deeper`. Use the flag `--deeper=true` 85 | to switch to that model when launching `train_teachers.py` and 86 | `train_student.py`. 87 | 88 | ## Privacy analysis 89 | 90 | In the paper, we detail how data-dependent differential privacy bounds can be 91 | computed to estimate the cost of training the student. In order to reproduce 92 | the bounds given in the paper, we include the label predicted by our two 93 | teacher ensembles: MNIST and SVHN. You can run the privacy analysis for each 94 | dataset with the following commands: 95 | 96 | ``` 97 | python analysis.py --counts_file=mnist_250_teachers_labels.npy --indices_file=mnist_250_teachers_100_indices_used_by_student.npy 98 | 99 | python analysis.py --counts_file=svhn_250_teachers_labels.npy --max_examples=1000 --delta=1e-6 100 | ``` 101 | 102 | To expedite experimentation with the privacy analysis of student training, 103 | the `analysis.py` file is configured to download the labels produced by 250 104 | teacher models, for MNIST and SVHN when running the two commands included 105 | above. These 250 teacher models were trained using the following command lines, 106 | where `XXX` takes values between `0` and `249`: 107 | 108 | ``` 109 | python train_teachers.py --nb_teachers=250 --teacher_id=XXX --dataset=mnist 110 | python train_teachers.py --nb_teachers=250 --teacher_id=XXX --dataset=svhn 111 | ``` 112 | 113 | Note that these labels may also be used in lieu of function `ensemble_preds` 114 | in `train_student.py`, to compare the performance of alternative student model 115 | architectures and learning techniques. This facilitates future work, by 116 | removing the need for training the MNIST and SVHN teacher ensembles when 117 | proposing new student training approaches. 118 | 119 | ## Contact 120 | 121 | To ask questions, please email `nicolas@papernot.fr` or open an issue on 122 | the `tensorflow/models` issues tracker. Please assign issues to 123 | [@npapernot](https://github.com/npapernot). 124 | -------------------------------------------------------------------------------- /tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_keras.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Training a CNN on MNIST with Keras and the DP SGD optimizer.""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | from absl import app 21 | from absl import flags 22 | 23 | from distutils.version import LooseVersion 24 | 25 | import numpy as np 26 | import tensorflow as tf 27 | 28 | from privacy.analysis.rdp_accountant import compute_rdp 29 | from privacy.analysis.rdp_accountant import get_privacy_spent 30 | from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer 31 | 32 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): 33 | GradientDescentOptimizer = tf.train.GradientDescentOptimizer 34 | else: 35 | GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name 36 | 37 | flags.DEFINE_boolean( 38 | 'dpsgd', True, 'If True, train with DP-SGD. If False, ' 39 | 'train with vanilla SGD.') 40 | flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training') 41 | flags.DEFINE_float('noise_multiplier', 1.1, 42 | 'Ratio of the standard deviation to the clipping norm') 43 | flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm') 44 | flags.DEFINE_integer('batch_size', 250, 'Batch size') 45 | flags.DEFINE_integer('epochs', 60, 'Number of epochs') 46 | flags.DEFINE_integer( 47 | 'microbatches', 250, 'Number of microbatches ' 48 | '(must evenly divide batch_size)') 49 | flags.DEFINE_string('model_dir', None, 'Model directory') 50 | 51 | FLAGS = flags.FLAGS 52 | 53 | 54 | def compute_epsilon(steps): 55 | """Computes epsilon value for given hyperparameters.""" 56 | if FLAGS.noise_multiplier == 0.0: 57 | return float('inf') 58 | orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) 59 | sampling_probability = FLAGS.batch_size / 60000 60 | rdp = compute_rdp(q=sampling_probability, 61 | noise_multiplier=FLAGS.noise_multiplier, 62 | steps=steps, 63 | orders=orders) 64 | # Delta is set to 1e-5 because MNIST has 60000 training points. 65 | return get_privacy_spent(orders, rdp, target_delta=1e-5)[0] 66 | 67 | 68 | def load_mnist(): 69 | """Loads MNIST and preprocesses to combine training and validation data.""" 70 | train, test = tf.keras.datasets.mnist.load_data() 71 | train_data, train_labels = train 72 | test_data, test_labels = test 73 | 74 | train_data = np.array(train_data, dtype=np.float32) / 255 75 | test_data = np.array(test_data, dtype=np.float32) / 255 76 | 77 | train_data = train_data.reshape(train_data.shape[0], 28, 28, 1) 78 | test_data = test_data.reshape(test_data.shape[0], 28, 28, 1) 79 | 80 | train_labels = np.array(train_labels, dtype=np.int32) 81 | test_labels = np.array(test_labels, dtype=np.int32) 82 | 83 | train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10) 84 | test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10) 85 | 86 | assert train_data.min() == 0. 87 | assert train_data.max() == 1. 88 | assert test_data.min() == 0. 89 | assert test_data.max() == 1. 90 | 91 | return train_data, train_labels, test_data, test_labels 92 | 93 | 94 | def main(unused_argv): 95 | tf.logging.set_verbosity(tf.logging.INFO) 96 | if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0: 97 | raise ValueError('Number of microbatches should divide evenly batch_size') 98 | 99 | # Load training and test data. 100 | train_data, train_labels, test_data, test_labels = load_mnist() 101 | 102 | # Define a sequential Keras model 103 | model = tf.keras.Sequential([ 104 | tf.keras.layers.Conv2D(16, 8, 105 | strides=2, 106 | padding='same', 107 | activation='relu', 108 | input_shape=(28, 28, 1)), 109 | tf.keras.layers.MaxPool2D(2, 1), 110 | tf.keras.layers.Conv2D(32, 4, 111 | strides=2, 112 | padding='valid', 113 | activation='relu'), 114 | tf.keras.layers.MaxPool2D(2, 1), 115 | tf.keras.layers.Flatten(), 116 | tf.keras.layers.Dense(32, activation='relu'), 117 | tf.keras.layers.Dense(10) 118 | ]) 119 | 120 | if FLAGS.dpsgd: 121 | optimizer = DPGradientDescentGaussianOptimizer( 122 | l2_norm_clip=FLAGS.l2_norm_clip, 123 | noise_multiplier=FLAGS.noise_multiplier, 124 | num_microbatches=FLAGS.microbatches, 125 | learning_rate=FLAGS.learning_rate) 126 | # Compute vector of per-example loss rather than its mean over a minibatch. 127 | loss = tf.keras.losses.CategoricalCrossentropy( 128 | from_logits=True, reduction=tf.losses.Reduction.NONE) 129 | else: 130 | optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) 131 | loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True) 132 | 133 | # Compile model with Keras 134 | model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) 135 | 136 | # Train model with Keras 137 | model.fit(train_data, train_labels, 138 | epochs=FLAGS.epochs, 139 | validation_data=(test_data, test_labels), 140 | batch_size=FLAGS.batch_size) 141 | 142 | # Compute the privacy budget expended. 143 | if FLAGS.dpsgd: 144 | eps = compute_epsilon(FLAGS.epochs * 60000 // FLAGS.batch_size) 145 | print('For delta=1e-5, the current epsilon is: %.2f' % eps) 146 | else: 147 | print('Trained with vanilla non-private SGD optimizer') 148 | 149 | if __name__ == '__main__': 150 | app.run(main) 151 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/dp_query/gaussian_query_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for GaussianAverageQuery.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from absl.testing import parameterized 22 | import numpy as np 23 | from six.moves import xrange 24 | import tensorflow as tf 25 | 26 | from privacy.dp_query import gaussian_query 27 | from privacy.dp_query import test_utils 28 | 29 | 30 | class GaussianQueryTest(tf.test.TestCase, parameterized.TestCase): 31 | 32 | def test_gaussian_sum_no_clip_no_noise(self): 33 | with self.cached_session() as sess: 34 | record1 = tf.constant([2.0, 0.0]) 35 | record2 = tf.constant([-1.0, 1.0]) 36 | 37 | query = gaussian_query.GaussianSumQuery( 38 | l2_norm_clip=10.0, stddev=0.0) 39 | query_result, _ = test_utils.run_query(query, [record1, record2]) 40 | result = sess.run(query_result) 41 | expected = [1.0, 1.0] 42 | self.assertAllClose(result, expected) 43 | 44 | def test_gaussian_sum_with_clip_no_noise(self): 45 | with self.cached_session() as sess: 46 | record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0]. 47 | record2 = tf.constant([4.0, -3.0]) # Not clipped. 48 | 49 | query = gaussian_query.GaussianSumQuery( 50 | l2_norm_clip=5.0, stddev=0.0) 51 | query_result, _ = test_utils.run_query(query, [record1, record2]) 52 | result = sess.run(query_result) 53 | expected = [1.0, 1.0] 54 | self.assertAllClose(result, expected) 55 | 56 | def test_gaussian_sum_with_changing_clip_no_noise(self): 57 | with self.cached_session() as sess: 58 | record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0]. 59 | record2 = tf.constant([4.0, -3.0]) # Not clipped. 60 | 61 | l2_norm_clip = tf.Variable(5.0) 62 | l2_norm_clip_placeholder = tf.placeholder(tf.float32) 63 | assign_l2_norm_clip = tf.assign(l2_norm_clip, l2_norm_clip_placeholder) 64 | query = gaussian_query.GaussianSumQuery( 65 | l2_norm_clip=l2_norm_clip, stddev=0.0) 66 | query_result, _ = test_utils.run_query(query, [record1, record2]) 67 | 68 | self.evaluate(tf.global_variables_initializer()) 69 | result = sess.run(query_result) 70 | expected = [1.0, 1.0] 71 | self.assertAllClose(result, expected) 72 | 73 | sess.run(assign_l2_norm_clip, {l2_norm_clip_placeholder: 0.0}) 74 | result = sess.run(query_result) 75 | expected = [0.0, 0.0] 76 | self.assertAllClose(result, expected) 77 | 78 | def test_gaussian_sum_with_noise(self): 79 | with self.cached_session() as sess: 80 | record1, record2 = 2.71828, 3.14159 81 | stddev = 1.0 82 | 83 | query = gaussian_query.GaussianSumQuery( 84 | l2_norm_clip=5.0, stddev=stddev) 85 | query_result, _ = test_utils.run_query(query, [record1, record2]) 86 | 87 | noised_sums = [] 88 | for _ in xrange(1000): 89 | noised_sums.append(sess.run(query_result)) 90 | 91 | result_stddev = np.std(noised_sums) 92 | self.assertNear(result_stddev, stddev, 0.1) 93 | 94 | def test_gaussian_sum_merge(self): 95 | records1 = [tf.constant([2.0, 0.0]), tf.constant([-1.0, 1.0])] 96 | records2 = [tf.constant([3.0, 5.0]), tf.constant([-1.0, 4.0])] 97 | 98 | def get_sample_state(records): 99 | query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=1.0) 100 | global_state = query.initial_global_state() 101 | params = query.derive_sample_params(global_state) 102 | sample_state = query.initial_sample_state(records[0]) 103 | for record in records: 104 | sample_state = query.accumulate_record(params, sample_state, record) 105 | return sample_state 106 | 107 | sample_state_1 = get_sample_state(records1) 108 | sample_state_2 = get_sample_state(records2) 109 | 110 | merged = gaussian_query.GaussianSumQuery(10.0, 1.0).merge_sample_states( 111 | sample_state_1, 112 | sample_state_2) 113 | 114 | with self.cached_session() as sess: 115 | result = sess.run(merged) 116 | 117 | expected = [3.0, 10.0] 118 | self.assertAllClose(result, expected) 119 | 120 | def test_gaussian_average_no_noise(self): 121 | with self.cached_session() as sess: 122 | record1 = tf.constant([5.0, 0.0]) # Clipped to [3.0, 0.0]. 123 | record2 = tf.constant([-1.0, 2.0]) # Not clipped. 124 | 125 | query = gaussian_query.GaussianAverageQuery( 126 | l2_norm_clip=3.0, sum_stddev=0.0, denominator=2.0) 127 | query_result, _ = test_utils.run_query(query, [record1, record2]) 128 | result = sess.run(query_result) 129 | expected_average = [1.0, 1.0] 130 | self.assertAllClose(result, expected_average) 131 | 132 | def test_gaussian_average_with_noise(self): 133 | with self.cached_session() as sess: 134 | record1, record2 = 2.71828, 3.14159 135 | sum_stddev = 1.0 136 | denominator = 2.0 137 | 138 | query = gaussian_query.GaussianAverageQuery( 139 | l2_norm_clip=5.0, sum_stddev=sum_stddev, denominator=denominator) 140 | query_result, _ = test_utils.run_query(query, [record1, record2]) 141 | 142 | noised_averages = [] 143 | for _ in range(1000): 144 | noised_averages.append(sess.run(query_result)) 145 | 146 | result_stddev = np.std(noised_averages) 147 | avg_stddev = sum_stddev / denominator 148 | self.assertNear(result_stddev, avg_stddev, 0.1) 149 | 150 | @parameterized.named_parameters( 151 | ('type_mismatch', [1.0], (1.0,), TypeError), 152 | ('too_few_on_left', [1.0], [1.0, 1.0], ValueError), 153 | ('too_few_on_right', [1.0, 1.0], [1.0], ValueError)) 154 | def test_incompatible_records(self, record1, record2, error_type): 155 | query = gaussian_query.GaussianSumQuery(1.0, 0.0) 156 | with self.assertRaises(error_type): 157 | test_utils.run_query(query, [record1, record2]) 158 | 159 | 160 | if __name__ == '__main__': 161 | tf.test.main() 162 | -------------------------------------------------------------------------------- /tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_eager.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Training a CNN on MNIST in TF Eager mode with DP-SGD optimizer.""" 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | from absl import app 20 | from absl import flags 21 | 22 | from distutils.version import LooseVersion 23 | 24 | import numpy as np 25 | import tensorflow as tf 26 | 27 | from privacy.analysis.rdp_accountant import compute_rdp 28 | from privacy.analysis.rdp_accountant import get_privacy_spent 29 | from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer 30 | 31 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): 32 | GradientDescentOptimizer = tf.train.GradientDescentOptimizer 33 | tf.enable_eager_execution() 34 | else: 35 | GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name 36 | 37 | flags.DEFINE_boolean('dpsgd', True, 'If True, train with DP-SGD. If False, ' 38 | 'train with vanilla SGD.') 39 | flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training') 40 | flags.DEFINE_float('noise_multiplier', 1.1, 41 | 'Ratio of the standard deviation to the clipping norm') 42 | flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm') 43 | flags.DEFINE_integer('batch_size', 250, 'Batch size') 44 | flags.DEFINE_integer('epochs', 60, 'Number of epochs') 45 | flags.DEFINE_integer('microbatches', 250, 'Number of microbatches ' 46 | '(must evenly divide batch_size)') 47 | 48 | FLAGS = flags.FLAGS 49 | 50 | 51 | def compute_epsilon(steps): 52 | """Computes epsilon value for given hyperparameters.""" 53 | if FLAGS.noise_multiplier == 0.0: 54 | return float('inf') 55 | orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) 56 | sampling_probability = FLAGS.batch_size / 60000 57 | rdp = compute_rdp(q=sampling_probability, 58 | noise_multiplier=FLAGS.noise_multiplier, 59 | steps=steps, 60 | orders=orders) 61 | # Delta is set to 1e-5 because MNIST has 60000 training points. 62 | return get_privacy_spent(orders, rdp, target_delta=1e-5)[0] 63 | 64 | 65 | def main(_): 66 | if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0: 67 | raise ValueError('Number of microbatches should divide evenly batch_size') 68 | 69 | # Fetch the mnist data 70 | train, test = tf.keras.datasets.mnist.load_data() 71 | train_images, train_labels = train 72 | test_images, test_labels = test 73 | 74 | # Create a dataset object and batch for the training data 75 | dataset = tf.data.Dataset.from_tensor_slices( 76 | (tf.cast(train_images[..., tf.newaxis]/255, tf.float32), 77 | tf.cast(train_labels, tf.int64))) 78 | dataset = dataset.shuffle(1000).batch(FLAGS.batch_size) 79 | 80 | # Create a dataset object and batch for the test data 81 | eval_dataset = tf.data.Dataset.from_tensor_slices( 82 | (tf.cast(test_images[..., tf.newaxis]/255, tf.float32), 83 | tf.cast(test_labels, tf.int64))) 84 | eval_dataset = eval_dataset.batch(10000) 85 | 86 | # Define the model using tf.keras.layers 87 | mnist_model = tf.keras.Sequential([ 88 | tf.keras.layers.Conv2D(16, 8, 89 | strides=2, 90 | padding='same', 91 | activation='relu'), 92 | tf.keras.layers.MaxPool2D(2, 1), 93 | tf.keras.layers.Conv2D(32, 4, strides=2, activation='relu'), 94 | tf.keras.layers.MaxPool2D(2, 1), 95 | tf.keras.layers.Flatten(), 96 | tf.keras.layers.Dense(32, activation='relu'), 97 | tf.keras.layers.Dense(10) 98 | ]) 99 | 100 | # Instantiate the optimizer 101 | if FLAGS.dpsgd: 102 | opt = DPGradientDescentGaussianOptimizer( 103 | l2_norm_clip=FLAGS.l2_norm_clip, 104 | noise_multiplier=FLAGS.noise_multiplier, 105 | num_microbatches=FLAGS.microbatches, 106 | learning_rate=FLAGS.learning_rate) 107 | else: 108 | opt = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) 109 | 110 | # Training loop. 111 | steps_per_epoch = 60000 // FLAGS.batch_size 112 | for epoch in range(FLAGS.epochs): 113 | # Train the model for one epoch. 114 | for (_, (images, labels)) in enumerate(dataset.take(-1)): 115 | with tf.GradientTape(persistent=True) as gradient_tape: 116 | # This dummy call is needed to obtain the var list. 117 | logits = mnist_model(images, training=True) 118 | var_list = mnist_model.trainable_variables 119 | 120 | # In Eager mode, the optimizer takes a function that returns the loss. 121 | def loss_fn(): 122 | logits = mnist_model(images, training=True) # pylint: disable=undefined-loop-variable,cell-var-from-loop 123 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits( 124 | labels=labels, logits=logits) # pylint: disable=undefined-loop-variable,cell-var-from-loop 125 | # If training without privacy, the loss is a scalar not a vector. 126 | if not FLAGS.dpsgd: 127 | loss = tf.reduce_mean(loss) 128 | return loss 129 | 130 | if FLAGS.dpsgd: 131 | grads_and_vars = opt.compute_gradients(loss_fn, var_list, 132 | gradient_tape=gradient_tape) 133 | else: 134 | grads_and_vars = opt.compute_gradients(loss_fn, var_list) 135 | 136 | opt.apply_gradients(grads_and_vars) 137 | 138 | # Evaluate the model and print results 139 | for (_, (images, labels)) in enumerate(eval_dataset.take(-1)): 140 | logits = mnist_model(images, training=False) 141 | correct_preds = tf.equal(tf.argmax(logits, axis=1), labels) 142 | test_accuracy = np.mean(correct_preds.numpy()) 143 | print('Test accuracy after epoch %d is: %.3f' % (epoch, test_accuracy)) 144 | 145 | # Compute the privacy budget expended so far. 146 | if FLAGS.dpsgd: 147 | eps = compute_epsilon((epoch + 1) * steps_per_epoch) 148 | print('For delta=1e-5, the current epsilon is: %.2f' % eps) 149 | else: 150 | print('Trained with vanilla non-private SGD optimizer') 151 | 152 | if __name__ == '__main__': 153 | app.run(main) 154 | -------------------------------------------------------------------------------- /tensorflow_privacy/tutorials/README.md: -------------------------------------------------------------------------------- 1 | # Tutorials 2 | 3 | This folder contains a set of tutorials that demonstrate the features of this 4 | library. 5 | As demonstrated on MNIST in `mnist_dpsgd_tutorial.py`, the easiest way to use 6 | a differentially private optimizer is to modify an existing TF training loop 7 | to replace an existing vanilla optimizer with its differentially private 8 | counterpart implemented in the library. 9 | 10 | Here is a list of all the tutorials included: 11 | 12 | * `lm_dpsgd_tutorial.py`: learn a language model with differential privacy. 13 | 14 | * `mnist_dpsgd_tutorial.py`: learn a convolutional neural network on MNIST with 15 | differential privacy. 16 | 17 | * `mnist_dpsgd_tutorial_eager.py`: learn a convolutional neural network on MNIST 18 | with differential privacy using Eager mode. 19 | 20 | * `mnist_dpsgd_tutorial_keras.py`: learn a convolutional neural network on MNIST 21 | with differential privacy using tf.Keras. 22 | 23 | * `mnist_lr_tutorial.py`: learn a differentially private logistic regression 24 | model on MNIST. The model illustrates application of the 25 | "amplification-by-iteration" analysis (https://arxiv.org/abs/1808.06651). 26 | 27 | The rest of this README describes the different parameters used to configure 28 | DP-SGD as well as expected outputs for the `mnist_dpsgd_tutorial.py` tutorial. 29 | 30 | ## Parameters 31 | 32 | All of the optimizers share some privacy-specific parameters that need to 33 | be tuned in addition to any existing hyperparameter. There are currently four: 34 | 35 | * `learning_rate` (float): The learning rate of the SGD training algorithm. The 36 | higher the learning rate, the more each update matters. If the updates are noisy 37 | (such as when the additive noise is large compared to the clipping 38 | threshold), the learning rate must be kept low for the training procedure to converge. 39 | * `num_microbatches` (int): The input data for each step (i.e., batch) of your 40 | original training algorithm is split into this many microbatches. Generally, 41 | increasing this will improve your utility but slow down your training in terms 42 | of wall-clock time. The total number of examples consumed in one global step 43 | remains the same. This number should evenly divide your input batch size. 44 | * `l2_norm_clip` (float): The cumulative gradient across all network parameters 45 | from each microbatch will be clipped so that its L2 norm is at most this 46 | value. You should set this to something close to some percentile of what 47 | you expect the gradient from each microbatch to be. In previous experiments, 48 | we've found numbers from 0.5 to 1.0 to work reasonably well. 49 | * `noise_multiplier` (float): This governs the amount of noise added during 50 | training. Generally, more noise results in better privacy and lower utility. 51 | This generally has to be at least 0.3 to obtain rigorous privacy guarantees, 52 | but smaller values may still be acceptable for practical purposes. 53 | 54 | ## Measuring Privacy 55 | 56 | Differential privacy can be expressed using two values, epsilon and delta. 57 | Roughly speaking, they mean the following: 58 | 59 | * epsilon gives a ceiling on how much the probability of a particular output 60 | can increase by including (or removing) a single training example. We usually 61 | want it to be a small constant (less than 10, or, for more stringent privacy 62 | guarantees, less than 1). However, this is only an upper bound, and a large 63 | value of epsilon may still mean good practical privacy. 64 | * delta bounds the probability of an arbitrary change in model behavior. 65 | We can usually set this to a very small number (1e-7 or so) without 66 | compromising utility. A rule of thumb is to set it to be less than the inverse 67 | of the training data size. 68 | 69 | To find out the epsilon given a fixed delta value for your model, follow the 70 | approach demonstrated in the `compute_epsilon` of the `mnist_dpsgd_tutorial.py` 71 | where the arguments used to call the RDP accountant (i.e., the tool used to 72 | compute the privacy guarantee) are: 73 | 74 | * `q` : The sampling ratio, defined as (number of examples consumed in one 75 | step) / (total training examples). 76 | * `noise_multiplier` : The noise_multiplier from your parameters above. 77 | * `steps` : The number of global steps taken. 78 | 79 | A detailed writeup of the theory behind the computation of epsilon and delta 80 | is available at https://arxiv.org/abs/1908.10530. 81 | 82 | ## Expected Output 83 | 84 | When the `mnist_dpsgd_tutorial.py` script is run with the default parameters, 85 | the output will contain the following lines (leaving out a lot of diagnostic 86 | info): 87 | ``` 88 | ... 89 | Test accuracy after 1 epochs is: 0.774 90 | For delta=1e-5, the current epsilon is: 1.03 91 | ... 92 | Test accuracy after 2 epochs is: 0.877 93 | For delta=1e-5, the current epsilon is: 1.11 94 | ... 95 | Test accuracy after 60 epochs is: 0.966 96 | For delta=1e-5, the current epsilon is: 3.01 97 | ``` 98 | 99 | ## Using Command-Line Interface for Privacy Budgeting 100 | 101 | Before launching a (possibly quite lengthy) training procedure, it is possible 102 | to compute, quickly and accurately, privacy loss at any point of the training. 103 | To do so, run the script `privacy/analysis/compute_dp_sgd_privacy.py`, which 104 | does not have any TensorFlow dependencies. For example, executing 105 | ``` 106 | compute_dp_sgd_privacy.py --N=60000 --batch_size=256 --noise_multiplier=1.1 --epochs=60 --delta=1e-5 107 | ``` 108 | allows us to conclude, in a matter of seconds, that DP-SGD run with default 109 | parameters satisfies differential privacy with eps = 3.01 and delta = 1e-05. 110 | Note that the flags provided in the command above correspond to the tutorial in 111 | `mnist_dpsgd_tutorial.py`. The command is applicable to other datasets but the 112 | values passed must be adapted (e.g., N the number of training points). 113 | 114 | 115 | ## Select Parameters 116 | 117 | The table below has a few sample parameters illustrating various 118 | accuracy/privacy tradeoffs achieved by the MNIST tutorial in 119 | `mnist_dpsgd_tutorial.py` (default parameters are in __bold__; privacy epsilon 120 | is reported at delta=1e-5; accuracy is averaged over 10 runs, its standard 121 | deviation is less than .3% in all cases). 122 | 123 | | Learning rate | Noise multiplier | Clipping threshold | Number of microbatches | Number of epochs | Privacy eps | Accuracy | 124 | | ------------- | ---------------- | ----------------- | ---------------------- | ---------------- | ----------- | -------- | 125 | | 0.1 | | | __256__ | 20 | no privacy | 99.0% | 126 | | 0.25 | 1.3 | 1.5 | __256__ | 15 | 1.19 | 95.0% | 127 | | __0.15__ | __1.1__ | __1.0__ | __256__ |__60__ | 3.01 | 96.6% | 128 | | 0.25 | 0.7 | 1.5 | __256__ | 45 | 7.10 | 97.0% | 129 | 130 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/analysis/rdp_accountant_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for rdp_accountant.py.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import sys 22 | 23 | from absl.testing import absltest 24 | from absl.testing import parameterized 25 | from mpmath import exp 26 | from mpmath import inf 27 | from mpmath import log 28 | from mpmath import npdf 29 | from mpmath import quad 30 | import numpy as np 31 | 32 | from privacy.analysis import privacy_ledger 33 | from privacy.analysis import rdp_accountant 34 | 35 | 36 | class TestGaussianMoments(parameterized.TestCase): 37 | ################################# 38 | # HELPER FUNCTIONS: # 39 | # Exact computations using # 40 | # multi-precision arithmetic. # 41 | ################################# 42 | 43 | def _log_float_mp(self, x): 44 | # Convert multi-precision input to float log space. 45 | if x >= sys.float_info.min: 46 | return float(log(x)) 47 | else: 48 | return -np.inf 49 | 50 | def _integral_mp(self, fn, bounds=(-inf, inf)): 51 | integral, _ = quad(fn, bounds, error=True, maxdegree=8) 52 | return integral 53 | 54 | def _distributions_mp(self, sigma, q): 55 | 56 | def _mu0(x): 57 | return npdf(x, mu=0, sigma=sigma) 58 | 59 | def _mu1(x): 60 | return npdf(x, mu=1, sigma=sigma) 61 | 62 | def _mu(x): 63 | return (1 - q) * _mu0(x) + q * _mu1(x) 64 | 65 | return _mu0, _mu # Closure! 66 | 67 | def _mu1_over_mu0(self, x, sigma): 68 | # Closed-form expression for N(1, sigma^2) / N(0, sigma^2) at x. 69 | return exp((2 * x - 1) / (2 * sigma**2)) 70 | 71 | def _mu_over_mu0(self, x, q, sigma): 72 | return (1 - q) + q * self._mu1_over_mu0(x, sigma) 73 | 74 | def _compute_a_mp(self, sigma, q, alpha): 75 | """Compute A_alpha for arbitrary alpha by numerical integration.""" 76 | mu0, _ = self._distributions_mp(sigma, q) 77 | a_alpha_fn = lambda z: mu0(z) * self._mu_over_mu0(z, q, sigma)**alpha 78 | a_alpha = self._integral_mp(a_alpha_fn) 79 | return a_alpha 80 | 81 | # TEST ROUTINES 82 | def test_compute_rdp_no_data(self): 83 | # q = 0 84 | self.assertEqual(rdp_accountant.compute_rdp(0, 10, 1, 20), 0) 85 | 86 | def test_compute_rdp_no_sampling(self): 87 | # q = 1, RDP = alpha/2 * sigma^2 88 | self.assertEqual(rdp_accountant.compute_rdp(1, 10, 1, 20), 0.1) 89 | 90 | def test_compute_rdp_scalar(self): 91 | rdp_scalar = rdp_accountant.compute_rdp(0.1, 2, 10, 5) 92 | self.assertAlmostEqual(rdp_scalar, 0.07737, places=5) 93 | 94 | def test_compute_rdp_sequence(self): 95 | rdp_vec = rdp_accountant.compute_rdp(0.01, 2.5, 50, 96 | [1.5, 2.5, 5, 50, 100, np.inf]) 97 | self.assertSequenceAlmostEqual( 98 | rdp_vec, [0.00065, 0.001085, 0.00218075, 0.023846, 167.416307, np.inf], 99 | delta=1e-5) 100 | 101 | params = ({'q': 1e-7, 'sigma': .1, 'order': 1.01}, 102 | {'q': 1e-6, 'sigma': .1, 'order': 256}, 103 | {'q': 1e-5, 'sigma': .1, 'order': 256.1}, 104 | {'q': 1e-6, 'sigma': 1, 'order': 27}, 105 | {'q': 1e-4, 'sigma': 1., 'order': 1.5}, 106 | {'q': 1e-3, 'sigma': 1., 'order': 2}, 107 | {'q': .01, 'sigma': 10, 'order': 20}, 108 | {'q': .1, 'sigma': 100, 'order': 20.5}, 109 | {'q': .99, 'sigma': .1, 'order': 256}, 110 | {'q': .999, 'sigma': 100, 'order': 256.1}) 111 | 112 | # pylint:disable=undefined-variable 113 | @parameterized.parameters(p for p in params) 114 | def test_compute_log_a_equals_mp(self, q, sigma, order): 115 | # Compare the cheap computation of log(A) with an expensive, multi-precision 116 | # computation. 117 | log_a = rdp_accountant._compute_log_a(q, sigma, order) 118 | log_a_mp = self._log_float_mp(self._compute_a_mp(sigma, q, order)) 119 | np.testing.assert_allclose(log_a, log_a_mp, rtol=1e-4) 120 | 121 | def test_get_privacy_spent_check_target_delta(self): 122 | orders = range(2, 33) 123 | rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders) 124 | eps, _, opt_order = rdp_accountant.get_privacy_spent( 125 | orders, rdp, target_delta=1e-5) 126 | self.assertAlmostEqual(eps, 1.258575, places=5) 127 | self.assertEqual(opt_order, 20) 128 | 129 | def test_get_privacy_spent_check_target_eps(self): 130 | orders = range(2, 33) 131 | rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders) 132 | _, delta, opt_order = rdp_accountant.get_privacy_spent( 133 | orders, rdp, target_eps=1.258575) 134 | self.assertAlmostEqual(delta, 1e-5) 135 | self.assertEqual(opt_order, 20) 136 | 137 | def test_check_composition(self): 138 | orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12., 14., 139 | 16., 20., 24., 28., 32., 64., 256.) 140 | 141 | rdp = rdp_accountant.compute_rdp(q=1e-4, 142 | noise_multiplier=.4, 143 | steps=40000, 144 | orders=orders) 145 | 146 | eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp, 147 | target_delta=1e-6) 148 | 149 | rdp += rdp_accountant.compute_rdp(q=0.1, 150 | noise_multiplier=2, 151 | steps=100, 152 | orders=orders) 153 | eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp, 154 | target_delta=1e-5) 155 | self.assertAlmostEqual(eps, 8.509656, places=5) 156 | self.assertEqual(opt_order, 2.5) 157 | 158 | def test_compute_rdp_from_ledger(self): 159 | orders = range(2, 33) 160 | q = 0.1 161 | n = 1000 162 | l2_norm_clip = 3.14159 163 | noise_stddev = 2.71828 164 | steps = 3 165 | 166 | query_entry = privacy_ledger.GaussianSumQueryEntry( 167 | l2_norm_clip, noise_stddev) 168 | ledger = [privacy_ledger.SampleEntry(n, q, [query_entry])] * steps 169 | 170 | z = noise_stddev / l2_norm_clip 171 | rdp = rdp_accountant.compute_rdp(q, z, steps, orders) 172 | rdp_from_ledger = rdp_accountant.compute_rdp_from_ledger(ledger, orders) 173 | self.assertSequenceAlmostEqual(rdp, rdp_from_ledger) 174 | 175 | 176 | if __name__ == '__main__': 177 | absltest.main() 178 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Vectorized differentially private optimizers for TensorFlow.""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | from distutils.version import LooseVersion 21 | import tensorflow as tf 22 | 23 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): 24 | nest = tf.contrib.framework.nest 25 | AdagradOptimizer = tf.train.AdagradOptimizer 26 | AdamOptimizer = tf.train.AdamOptimizer 27 | GradientDescentOptimizer = tf.train.GradientDescentOptimizer 28 | parent_code = tf.train.Optimizer.compute_gradients.__code__ 29 | GATE_OP = tf.train.Optimizer.GATE_OP # pylint: disable=invalid-name 30 | else: 31 | nest = tf.nest 32 | AdagradOptimizer = tf.optimizers.Adagrad 33 | AdamOptimizer = tf.optimizers.Adam 34 | GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name 35 | parent_code = tf.optimizers.Optimizer._compute_gradients.__code__ # pylint: disable=protected-access 36 | GATE_OP = None # pylint: disable=invalid-name 37 | 38 | 39 | def make_vectorized_optimizer_class(cls): 40 | """Constructs a vectorized DP optimizer class from an existing one.""" 41 | if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): 42 | child_code = cls.compute_gradients.__code__ 43 | else: 44 | child_code = cls._compute_gradients.__code__ # pylint: disable=protected-access 45 | if child_code is not parent_code: 46 | tf.logging.warning( 47 | 'WARNING: Calling make_optimizer_class() on class %s that overrides ' 48 | 'method compute_gradients(). Check to ensure that ' 49 | 'make_optimizer_class() does not interfere with overridden version.', 50 | cls.__name__) 51 | 52 | class DPOptimizerClass(cls): 53 | """Differentially private subclass of given class cls.""" 54 | 55 | def __init__( 56 | self, 57 | l2_norm_clip, 58 | noise_multiplier, 59 | num_microbatches=None, 60 | *args, # pylint: disable=keyword-arg-before-vararg, g-doc-args 61 | **kwargs): 62 | """Initialize the DPOptimizerClass. 63 | 64 | Args: 65 | l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients) 66 | noise_multiplier: Ratio of the standard deviation to the clipping norm 67 | num_microbatches: How many microbatches into which the minibatch is 68 | split. If None, will default to the size of the minibatch, and 69 | per-example gradients will be computed. 70 | """ 71 | super(DPOptimizerClass, self).__init__(*args, **kwargs) 72 | self._l2_norm_clip = l2_norm_clip 73 | self._noise_multiplier = noise_multiplier 74 | self._num_microbatches = num_microbatches 75 | 76 | def compute_gradients(self, 77 | loss, 78 | var_list, 79 | gate_gradients=GATE_OP, 80 | aggregation_method=None, 81 | colocate_gradients_with_ops=False, 82 | grad_loss=None, 83 | gradient_tape=None): 84 | if callable(loss): 85 | # TF is running in Eager mode 86 | raise NotImplementedError('Vectorized optimizer unavailable for TF2.') 87 | else: 88 | # TF is running in graph mode, check we did not receive a gradient tape. 89 | if gradient_tape: 90 | raise ValueError('When in graph mode, a tape should not be passed.') 91 | 92 | batch_size = tf.shape(loss)[0] 93 | if self._num_microbatches is None: 94 | self._num_microbatches = batch_size 95 | 96 | # Note: it would be closer to the correct i.i.d. sampling of records if 97 | # we sampled each microbatch from the appropriate binomial distribution, 98 | # although that still wouldn't be quite correct because it would be 99 | # sampling from the dataset without replacement. 100 | microbatch_losses = tf.reshape(loss, [self._num_microbatches, -1]) 101 | 102 | if var_list is None: 103 | var_list = ( 104 | tf.trainable_variables() + tf.get_collection( 105 | tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) 106 | 107 | def process_microbatch(microbatch_loss): 108 | """Compute clipped grads for one microbatch.""" 109 | microbatch_loss = tf.reduce_mean(microbatch_loss) 110 | grads, _ = zip(*super(DPOptimizerClass, self).compute_gradients( 111 | microbatch_loss, 112 | var_list, 113 | gate_gradients, 114 | aggregation_method, 115 | colocate_gradients_with_ops, 116 | grad_loss)) 117 | grads_list = [ 118 | g if g is not None else tf.zeros_like(v) 119 | for (g, v) in zip(list(grads), var_list) 120 | ] 121 | # Clip gradients to have L2 norm of l2_norm_clip. 122 | # Here, we use TF primitives rather than the built-in 123 | # tf.clip_by_global_norm() so that operations can be vectorized 124 | # across microbatches. 125 | grads_flat = nest.flatten(grads_list) 126 | squared_l2_norms = [tf.reduce_sum(tf.square(g)) for g in grads_flat] 127 | global_norm = tf.sqrt(tf.add_n(squared_l2_norms)) 128 | div = tf.maximum(global_norm / self._l2_norm_clip, 1.) 129 | clipped_flat = [g / div for g in grads_flat] 130 | clipped_grads = nest.pack_sequence_as(grads_list, clipped_flat) 131 | return clipped_grads 132 | 133 | clipped_grads = tf.vectorized_map(process_microbatch, microbatch_losses) 134 | 135 | def reduce_noise_normalize_batch(stacked_grads): 136 | summed_grads = tf.reduce_sum(stacked_grads, axis=0) 137 | noise_stddev = self._l2_norm_clip * self._noise_multiplier 138 | noise = tf.random.normal(tf.shape(summed_grads), 139 | stddev=noise_stddev) 140 | noised_grads = summed_grads + noise 141 | return noised_grads / tf.cast(self._num_microbatches, tf.float32) 142 | 143 | final_grads = nest.map_structure(reduce_noise_normalize_batch, 144 | clipped_grads) 145 | 146 | return list(zip(final_grads, var_list)) 147 | 148 | return DPOptimizerClass 149 | 150 | 151 | VectorizedDPAdagrad = make_vectorized_optimizer_class(AdagradOptimizer) 152 | VectorizedDPAdam = make_vectorized_optimizer_class(AdamOptimizer) 153 | VectorizedDPSGD = make_vectorized_optimizer_class(GradientDescentOptimizer) 154 | -------------------------------------------------------------------------------- /tensorflow_privacy/tutorials/bolton_tutorial.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tutorial for bolt_on module, the model and the optimizer.""" 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | import tensorflow as tf # pylint: disable=wrong-import-position 19 | from privacy.bolt_on import losses # pylint: disable=wrong-import-position 20 | from privacy.bolt_on import models # pylint: disable=wrong-import-position 21 | from privacy.bolt_on.optimizers import BoltOn # pylint: disable=wrong-import-position 22 | # ------- 23 | # First, we will create a binary classification dataset with a single output 24 | # dimension. The samples for each label are repeated data points at different 25 | # points in space. 26 | # ------- 27 | # Parameters for dataset 28 | n_samples = 10 29 | input_dim = 2 30 | n_outputs = 1 31 | # Create binary classification dataset: 32 | x_stack = [tf.constant(-1, tf.float32, (n_samples, input_dim)), 33 | tf.constant(1, tf.float32, (n_samples, input_dim))] 34 | y_stack = [tf.constant(0, tf.float32, (n_samples, 1)), 35 | tf.constant(1, tf.float32, (n_samples, 1))] 36 | x, y = tf.concat(x_stack, 0), tf.concat(y_stack, 0) 37 | print(x.shape, y.shape) 38 | generator = tf.data.Dataset.from_tensor_slices((x, y)) 39 | generator = generator.batch(10) 40 | generator = generator.shuffle(10) 41 | # ------- 42 | # First, we will explore using the pre - built BoltOnModel, which is a thin 43 | # wrapper around a Keras Model using a single - layer neural network. 44 | # It automatically uses the BoltOn Optimizer which encompasses all the logic 45 | # required for the BoltOn Differential Privacy method. 46 | # ------- 47 | bolt = models.BoltOnModel(n_outputs) # tell the model how many outputs we have. 48 | # ------- 49 | # Now, we will pick our optimizer and Strongly Convex Loss function. The loss 50 | # must extend from StrongConvexMixin and implement the associated methods.Some 51 | # existing loss functions are pre - implemented in bolt_on.loss 52 | # ------- 53 | optimizer = tf.optimizers.SGD() 54 | reg_lambda = 1 55 | C = 1 56 | radius_constant = 1 57 | loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant) 58 | # ------- 59 | # For simplicity, we pick all parameters of the StrongConvexBinaryCrossentropy 60 | # to be 1; these are all tunable and their impact can be read in losses. 61 | # StrongConvexBinaryCrossentropy.We then compile the model with the chosen 62 | # optimizer and loss, which will automatically wrap the chosen optimizer with 63 | # the BoltOn Optimizer, ensuring the required components function as required 64 | # for privacy guarantees. 65 | # ------- 66 | bolt.compile(optimizer, loss) 67 | # ------- 68 | # To fit the model, the optimizer will require additional information about 69 | # the dataset and model.These parameters are: 70 | # 1. the class_weights used 71 | # 2. the number of samples in the dataset 72 | # 3. the batch size which the model will try to infer, if possible. If not, 73 | # you will be required to pass these explicitly to the fit method. 74 | # 75 | # As well, there are two privacy parameters than can be altered: 76 | # 1. epsilon, a float 77 | # 2. noise_distribution, a valid string indicating the distriution to use (must 78 | # be implemented) 79 | # 80 | # The BoltOnModel offers a helper method,.calculate_class_weight to aid in 81 | # class_weight calculation. 82 | # required parameters 83 | # ------- 84 | class_weight = None # default, use .calculate_class_weight for other values 85 | batch_size = None # default, if it cannot be inferred, specify this 86 | n_samples = None # default, if it cannot be iferred, specify this 87 | # privacy parameters 88 | epsilon = 2 89 | noise_distribution = 'laplace' 90 | 91 | bolt.fit(x, 92 | y, 93 | epsilon=epsilon, 94 | class_weight=class_weight, 95 | batch_size=batch_size, 96 | n_samples=n_samples, 97 | noise_distribution=noise_distribution, 98 | epochs=2) 99 | # ------- 100 | # We may also train a generator object, or try different optimizers and loss 101 | # functions. Below, we will see that we must pass the number of samples as the 102 | # fit method is unable to infer it for a generator. 103 | # ------- 104 | optimizer2 = tf.optimizers.Adam() 105 | bolt.compile(optimizer2, loss) 106 | # required parameters 107 | class_weight = None # default, use .calculate_class_weight for other values 108 | batch_size = None # default, if it cannot be inferred, specify this 109 | n_samples = None # default, if it cannot be iferred, specify this 110 | # privacy parameters 111 | epsilon = 2 112 | noise_distribution = 'laplace' 113 | try: 114 | bolt.fit(generator, 115 | epsilon=epsilon, 116 | class_weight=class_weight, 117 | batch_size=batch_size, 118 | n_samples=n_samples, 119 | noise_distribution=noise_distribution, 120 | verbose=0) 121 | except ValueError as e: 122 | print(e) 123 | # ------- 124 | # And now, re running with the parameter set. 125 | # ------- 126 | n_samples = 20 127 | bolt.fit_generator(generator, 128 | epsilon=epsilon, 129 | class_weight=class_weight, 130 | n_samples=n_samples, 131 | noise_distribution=noise_distribution, 132 | verbose=0) 133 | # ------- 134 | # You don't have to use the BoltOn model to use the BoltOn method. 135 | # There are only a few requirements: 136 | # 1. make sure any requirements from the loss are implemented in the model. 137 | # 2. instantiate the optimizer and use it as a context around the fit operation. 138 | # ------- 139 | # -------------------- Part 2, using the Optimizer 140 | 141 | # ------- 142 | # Here, we create our own model and setup the BoltOn optimizer. 143 | # ------- 144 | 145 | 146 | class TestModel(tf.keras.Model): # pylint: disable=abstract-method 147 | 148 | def __init__(self, reg_layer, number_of_outputs=1): 149 | super(TestModel, self).__init__(name='test') 150 | self.output_layer = tf.keras.layers.Dense(number_of_outputs, 151 | kernel_regularizer=reg_layer) 152 | 153 | def call(self, inputs): # pylint: disable=arguments-differ 154 | return self.output_layer(inputs) 155 | 156 | 157 | optimizer = tf.optimizers.SGD() 158 | loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant) 159 | optimizer = BoltOn(optimizer, loss) 160 | # ------- 161 | # Now, we instantiate our model and check for 1. Since our loss requires L2 162 | # regularization over the kernel, we will pass it to the model. 163 | # ------- 164 | n_outputs = 1 # parameter for model and optimizer context. 165 | test_model = TestModel(loss.kernel_regularizer(), n_outputs) 166 | test_model.compile(optimizer, loss) 167 | # ------- 168 | # We comply with 2., and use the BoltOn Optimizer as a context around the fit 169 | # method. 170 | # ------- 171 | # parameters for context 172 | noise_distribution = 'laplace' 173 | epsilon = 2 174 | class_weights = 1 # Previously, the fit method auto-detected the class_weights. 175 | # Here, we need to pass the class_weights explicitly. 1 is the same as None. 176 | n_samples = 20 177 | batch_size = 5 178 | 179 | with optimizer( 180 | noise_distribution=noise_distribution, 181 | epsilon=epsilon, 182 | layers=test_model.layers, 183 | class_weights=class_weights, 184 | n_samples=n_samples, 185 | batch_size=batch_size 186 | ) as _: 187 | test_model.fit(x, y, batch_size=batch_size, epochs=2) 188 | --------------------------------------------------------------------------------