├── .bazelrc ├── BUILD ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── WORKSPACE ├── g3doc ├── README.md ├── build_docs.py ├── guide │ ├── _index.yaml │ ├── _toc.yaml │ ├── get_started.md │ ├── images │ │ └── getting-started-img.png │ └── measure_privacy.md └── tutorials │ ├── _toc.yaml │ ├── classification_privacy.ipynb │ └── privacy_report.ipynb ├── pip_tools ├── build_empirical_pip_package.sh ├── build_pip_package.sh ├── publish_empirical_pip_package.sh ├── publish_pip_package.sh ├── test_empirical_pip_package.sh └── test_pip_package.sh ├── requirements.txt ├── research ├── GDP_2019 │ ├── BUILD │ ├── adult_tutorial.py │ └── imdb_tutorial.py ├── README.md ├── audit_2020 │ ├── README.md │ ├── attacks.py │ ├── audit.py │ ├── audit_test.py │ ├── fmnist_audit.py │ └── mean_audit.py ├── dp_newton │ ├── README.md │ ├── run_privacy_utility │ └── src │ │ ├── dataset_loader.py │ │ ├── my_logistic_regression.py │ │ ├── opt_algs.py │ │ ├── print_results.py │ │ └── run.py ├── hyperparameters_2022 │ ├── README.md │ ├── figure7.py │ ├── figure7_default_values.py │ ├── lr_acc.json │ └── rdp_accountant.py ├── instahide_attack_2020 │ ├── README.md │ ├── step_1_create_graph.py │ ├── step_2_color_graph.py │ ├── step_3_second_graph.py │ ├── step_4_final_graph.py │ ├── step_5_reconstruct.py │ ├── step_6_adjust_color.py │ └── step_7_visualize.py ├── mi_lira_2021 │ ├── README.md │ ├── dataset.py │ ├── fprtpr.png │ ├── inference.py │ ├── plot.py │ ├── score.py │ ├── scripts │ │ ├── train_demo.sh │ │ └── train_demo_multigpu.sh │ └── train.py ├── mi_poison_2022 │ ├── README.md │ ├── fprtpr.png │ ├── logs │ │ └── .keep │ ├── plot_poison.py │ ├── scripts │ │ ├── train_demo.sh │ │ └── train_demo_multigpu.sh │ └── train_poison.py ├── neuracrypt_attack_2021 │ └── attack.py ├── pate_2017 │ ├── BUILD │ ├── README.md │ ├── aggregation.py │ ├── analysis.py │ ├── deep_cnn.py │ ├── input.py │ ├── metrics.py │ ├── train_student.py │ ├── train_student_mnist_250_lap_20_count_50_epochs_600.sh │ ├── train_teachers.py │ └── utils.py └── pate_2018 │ ├── BUILD │ ├── ICLR2018 │ ├── BUILD │ ├── README.md │ ├── download.py │ ├── generate_figures.sh │ ├── generate_table.sh │ ├── generate_table_data_independent.sh │ ├── plot_ls_q.py │ ├── plot_partition.py │ ├── plots_for_slides.py │ ├── rdp_bucketized.py │ ├── rdp_cumulative.py │ ├── smooth_sensitivity_table.py │ └── utility_queries_answered.py │ ├── README.md │ ├── core.py │ ├── core_test.py │ ├── smooth_sensitivity.py │ └── smooth_sensitivity_test.py ├── setup.py ├── setup_empirical.py ├── tensorflow_privacy ├── .bazelversion ├── BUILD ├── __init__.py ├── privacy │ ├── BUILD │ ├── __init__.py │ ├── analysis │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── compute_dp_sgd_privacy.py │ │ ├── compute_dp_sgd_privacy_lib.py │ │ ├── compute_dp_sgd_privacy_test.py │ │ ├── compute_noise_from_budget.py │ │ ├── compute_noise_from_budget_lib.py │ │ ├── compute_noise_from_budget_test.py │ │ ├── gdp_accountant.py │ │ ├── tensor_buffer.py │ │ ├── tensor_buffer_eager_test.py │ │ ├── tensor_buffer_graph_test.py │ │ ├── tree_aggregation_accountant.py │ │ └── tree_aggregation_accountant_test.py │ ├── bolt_on │ │ ├── BUILD │ │ ├── README.md │ │ ├── __init__.py │ │ ├── losses.py │ │ ├── losses_test.py │ │ ├── models.py │ │ ├── models_test.py │ │ ├── optimizers.py │ │ └── optimizers_test.py │ ├── dp_query │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── discrete_gaussian_query.py │ │ ├── discrete_gaussian_query_test.py │ │ ├── discrete_gaussian_utils.py │ │ ├── discrete_gaussian_utils_test.py │ │ ├── distributed_discrete_gaussian_query.py │ │ ├── distributed_discrete_gaussian_query_test.py │ │ ├── distributed_skellam_query.py │ │ ├── distributed_skellam_query_test.py │ │ ├── dp_query.py │ │ ├── dp_query_test.py │ │ ├── gaussian_query.py │ │ ├── gaussian_query_test.py │ │ ├── nested_query.py │ │ ├── nested_query_test.py │ │ ├── no_privacy_query.py │ │ ├── no_privacy_query_test.py │ │ ├── normalized_query.py │ │ ├── normalized_query_test.py │ │ ├── quantile_adaptive_clip_sum_query.py │ │ ├── quantile_adaptive_clip_sum_query_test.py │ │ ├── quantile_adaptive_clip_tree_query.py │ │ ├── quantile_adaptive_clip_tree_query_test.py │ │ ├── quantile_estimator_query.py │ │ ├── quantile_estimator_query_test.py │ │ ├── restart_query.py │ │ ├── restart_query_test.py │ │ ├── test_utils.py │ │ ├── tree_aggregation.py │ │ ├── tree_aggregation_query.py │ │ ├── tree_aggregation_query_test.py │ │ ├── tree_aggregation_test.py │ │ ├── tree_range_query.py │ │ └── tree_range_query_test.py │ ├── estimators │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── binary_class_head.py │ │ ├── binary_class_head_test.py │ │ ├── dnn.py │ │ ├── dnn_test.py │ │ ├── head_utils.py │ │ ├── multi_class_head.py │ │ ├── multi_class_head_test.py │ │ ├── multi_label_head.py │ │ ├── multi_label_head_test.py │ │ ├── test_utils.py │ │ └── v1 │ │ │ ├── BUILD │ │ │ ├── __init__.py │ │ │ ├── dnn.py │ │ │ ├── dnn_test.py │ │ │ ├── head.py │ │ │ ├── head_test.py │ │ │ ├── linear.py │ │ │ └── linear_test.py │ ├── fast_gradient_clipping │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── bert_encoder_utils.py │ │ ├── bert_encoder_utils_test.py │ │ ├── clip_grads.py │ │ ├── clip_grads_test.py │ │ ├── common_manip_utils.py │ │ ├── common_test_utils.py │ │ ├── gradient_clipping_utils.py │ │ ├── gradient_clipping_utils_test.py │ │ ├── layer_registry.py │ │ ├── noise_utils.py │ │ ├── noise_utils_test.py │ │ ├── registry_functions │ │ │ ├── BUILD │ │ │ ├── __init__.py │ │ │ ├── dense.py │ │ │ ├── dense_test.py │ │ │ ├── dense_tpu_test.py │ │ │ ├── einsum_dense.py │ │ │ ├── einsum_dense_test.py │ │ │ ├── einsum_dense_tpu_test.py │ │ │ ├── einsum_utils.py │ │ │ ├── einsum_utils_test.py │ │ │ ├── embedding.py │ │ │ ├── embedding_test.py │ │ │ ├── embedding_tpu_test.py │ │ │ ├── layer_normalization.py │ │ │ ├── layer_normalization_test.py │ │ │ ├── layer_normalization_tpu_test.py │ │ │ ├── multi_head_attention.py │ │ │ ├── multi_head_attention_test.py │ │ │ ├── multi_head_attention_tpu_test.py │ │ │ ├── nlp_on_device_embedding.py │ │ │ ├── nlp_on_device_embedding_test.py │ │ │ ├── nlp_on_device_embedding_tpu_test.py │ │ │ ├── nlp_position_embedding.py │ │ │ ├── nlp_position_embedding_test.py │ │ │ ├── nlp_position_embedding_tpu_test.py │ │ │ └── registry_function_utils.py │ │ └── type_aliases.py │ ├── keras_models │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── dp_keras_model.py │ │ ├── dp_keras_model_distributed_test.py │ │ └── dp_keras_model_test.py │ ├── logistic_regression │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── datasets.py │ │ ├── datasets_test.py │ │ ├── multinomial_logistic.py │ │ ├── multinomial_logistic_test.py │ │ ├── single_layer_softmax.py │ │ └── single_layer_softmax_test.py │ ├── membership_inference_attack │ │ ├── BUILD │ │ ├── README.md │ │ ├── __init__.py │ │ ├── data_structures.py │ │ ├── dataset_slicing.py │ │ ├── keras_evaluation.py │ │ ├── membership_inference_attack.py │ │ ├── models.py │ │ ├── plotting.py │ │ ├── privacy_report.py │ │ ├── seq2seq_mia.py │ │ └── tf_estimator_evaluation.py │ ├── optimizers │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── clip_and_aggregate_gradients.py │ │ ├── clip_and_aggregate_gradients_test.py │ │ ├── dp_optimizer.py │ │ ├── dp_optimizer_eager_test.py │ │ ├── dp_optimizer_keras.py │ │ ├── dp_optimizer_keras_sparse.py │ │ ├── dp_optimizer_keras_sparse_distributed_test.py │ │ ├── dp_optimizer_keras_sparse_test.py │ │ ├── dp_optimizer_keras_test.py │ │ ├── dp_optimizer_keras_vectorized.py │ │ ├── dp_optimizer_test.py │ │ ├── dp_optimizer_vectorized.py │ │ └── dp_optimizer_vectorized_test.py │ ├── privacy_tests │ │ ├── BUILD │ │ ├── README.md │ │ ├── __init__.py │ │ ├── epsilon_lower_bound.py │ │ ├── epsilon_lower_bound_test.py │ │ ├── membership_inference_attack │ │ │ ├── BUILD │ │ │ ├── CONTRIBUTING.md │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── advanced_mia.py │ │ │ ├── advanced_mia_example.py │ │ │ ├── advanced_mia_test.py │ │ │ ├── codelab_roc_fig.png │ │ │ ├── codelabs │ │ │ │ ├── BUILD │ │ │ │ ├── README.md │ │ │ │ ├── codelab.ipynb │ │ │ │ ├── example.py │ │ │ │ ├── membership_probability_codelab.ipynb │ │ │ │ ├── third_party │ │ │ │ │ └── seq2seq_membership_inference │ │ │ │ │ │ ├── BUILD │ │ │ │ │ │ ├── LICENSE │ │ │ │ │ │ └── seq2seq_membership_inference_codelab.ipynb │ │ │ │ └── word2vec_codelab.ipynb │ │ │ ├── data_structures.py │ │ │ ├── data_structures_test.py │ │ │ ├── dataset_slicing.py │ │ │ ├── dataset_slicing_test.py │ │ │ ├── keras_evaluation.py │ │ │ ├── keras_evaluation_example.py │ │ │ ├── keras_evaluation_test.py │ │ │ ├── membership_inference_attack.py │ │ │ ├── membership_inference_attack_test.py │ │ │ ├── models.py │ │ │ ├── models_test.py │ │ │ ├── plotting.py │ │ │ ├── privacy_report.py │ │ │ ├── privacy_report_test.py │ │ │ ├── seq2seq_mia.py │ │ │ ├── seq2seq_mia_test.py │ │ │ ├── tf_estimator_evaluation.py │ │ │ ├── tf_estimator_evaluation_example.py │ │ │ ├── tf_estimator_evaluation_test.py │ │ │ └── utils_tensorboard.py │ │ ├── secret_sharer │ │ │ ├── BUILD │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── exposures.py │ │ │ ├── exposures_test.py │ │ │ ├── generate_secrets.py │ │ │ ├── generate_secrets_test.py │ │ │ ├── secret_sharer_example.ipynb │ │ │ └── secret_sharer_image_example.ipynb │ │ ├── utils.py │ │ ├── utils_test.py │ │ └── version.py │ └── sparsity_preserving_noise │ │ ├── BUILD │ │ ├── layer_registry.py │ │ ├── registry_functions │ │ ├── BUILD │ │ ├── embedding.py │ │ └── embedding_test.py │ │ ├── sparse_noise_utils.py │ │ ├── sparse_noise_utils_test.py │ │ └── type_aliases.py ├── v1 │ ├── BUILD │ └── __init__.py └── version.py └── tutorials ├── BUILD ├── README.md ├── bolton_tutorial.py ├── lm_dpsgd_tutorial.py ├── mnist_dpsgd_tutorial.py ├── mnist_dpsgd_tutorial_common.py ├── mnist_dpsgd_tutorial_eager.py ├── mnist_dpsgd_tutorial_keras.py ├── mnist_dpsgd_tutorial_keras_model.py ├── mnist_dpsgd_tutorial_tpu.py ├── mnist_dpsgd_tutorial_vectorized.py ├── mnist_lr_tutorial.py ├── movielens_tutorial.py └── walkthrough ├── BUILD ├── README.md └── mnist_scratch.py /.bazelrc: -------------------------------------------------------------------------------- 1 | # TensorFlow Privacy Bazel configuration 2 | # 3 | # See https://docs.bazel.build/versions/master/user-manual.html#config for 4 | # details on the various configuration options. 5 | 6 | # Enable verbose failures. 7 | build --verbose_failures 8 | 9 | # Enable logging rc options. 10 | common --announce_rc 11 | 12 | # Enable platform-specific configs from bazelrc files. 13 | common --enable_platform_specific_config 14 | 15 | # Enable logging error output. 16 | test --test_output=errors 17 | test --test_summary=detailed 18 | 19 | # Execute commands as local subprocesses 20 | build --spawn_strategy=local 21 | -------------------------------------------------------------------------------- /BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:private"]) 2 | 3 | licenses(["notice"]) 4 | 5 | exports_files(["LICENSE"]) 6 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Attack library 26 | 27 | If you wish to add novel attacks to the attack library, please check our 28 | [guidelines](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/membership_inference_attack/CONTRIBUTING.md) 29 | 30 | ## Community Guidelines 31 | 32 | This project follows Google's 33 | [Open Source Community Guidelines](https://opensource.google.com/conduct/). 34 | -------------------------------------------------------------------------------- /WORKSPACE: -------------------------------------------------------------------------------- 1 | workspace(name = "org_tensorflow_privacy") 2 | 3 | load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") 4 | load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") 5 | 6 | git_repository( 7 | name = "bazel_skylib", 8 | remote = "https://github.com/bazelbuild/bazel-skylib.git", 9 | tag = "1.0.3", 10 | ) 11 | 12 | git_repository( 13 | name = "rules_python", 14 | remote = "https://github.com/bazelbuild/rules_python.git", 15 | tag = "0.5.0", 16 | ) 17 | -------------------------------------------------------------------------------- /g3doc/README.md: -------------------------------------------------------------------------------- 1 | # Under construction 2 | -------------------------------------------------------------------------------- /g3doc/guide/_toc.yaml: -------------------------------------------------------------------------------- 1 | toc: 2 | - title: Overview 3 | path: /responsible_ai/privacy/guide/ 4 | - title: Get Started 5 | path: /responsible_ai/privacy/guide/get_started 6 | - title: Measure Privacy 7 | path: /responsible_ai/privacy/guide/measure_privacy 8 | -------------------------------------------------------------------------------- /g3doc/guide/images/getting-started-img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorflow/privacy/a640ca62ca80d2e4e534fbecf2678e3d0786a2fa/g3doc/guide/images/getting-started-img.png -------------------------------------------------------------------------------- /g3doc/guide/measure_privacy.md: -------------------------------------------------------------------------------- 1 | # Measure Privacy 2 | 3 | Differential privacy is a framework for measuring the privacy guarantees 4 | provided by an algorithm and can be expressed using the values ε (epsilon) and δ 5 | (delta). Of the two, ε is more important and more sensitive to the choice of 6 | hyperparameters. Roughly speaking, they mean the following: 7 | 8 | * ε gives a ceiling on how much the probability of a particular output can 9 | increase by including (or removing) a single training example. You usually 10 | want it to be a small constant (less than 10, or for more stringent privacy 11 | guarantees, less than 1). However, this is only an upper bound, and a large 12 | value of epsilon may still mean good practical privacy. 13 | * δ bounds the probability of an arbitrary change in model behavior. You can 14 | usually set this to a very small number (1e-7 or so) without compromising 15 | utility. A rule of thumb is to set it to be less than the inverse of the 16 | training data size. 17 | 18 | The relationship between training hyperparameters and the resulting privacy in 19 | terms of (ε, δ) is complicated and tricky to state explicitly. Our current 20 | recommended approach is at the bottom of the [Get Started page](get_started.md), 21 | which involves finding the maximum noise multiplier one can use while still 22 | having reasonable utility, and then scaling the noise multiplier and number of 23 | microbatches. TensorFlow Privacy provides a tool, `compute_dp_sgd_privacy` to 24 | compute (ε, δ) based on the noise multiplier σ, the number of training steps 25 | taken, and the fraction of input data consumed at each step. The amount of 26 | privacy increases with the noise multiplier σ and decreases the more times the 27 | data is used on training. Generally, in order to achieve an epsilon of at most 28 | 10.0, we need to set the noise multiplier to around 0.3 to 0.5, depending on the 29 | dataset size and number of epochs. See the 30 | [classification privacy tutorial](../tutorials/classification_privacy.ipynb) to 31 | see the approach. 32 | 33 | For more detail, see 34 | [the original DP-SGD paper](https://arxiv.org/pdf/1607.00133.pdf). 35 | 36 | You can use `compute_dp_sgd_privacy` to find out the epsilon given a fixed delta 37 | value for your model [../tutorials/classification_privacy.ipynb]: 38 | 39 | * `q` : the sampling ratio - the probability of an individual training point 40 | being included in a mini batch (`batch_size/number_of_examples`). 41 | * `noise_multiplier` : A float that governs the amount of noise added during 42 | training. Generally, more noise results in better privacy and lower utility. 43 | * `steps` : The number of global steps taken. 44 | 45 | A detailed writeup of the theory behind the computation of epsilon and delta is 46 | available at 47 | [Differential Privacy of the Sampled Gaussian Mechanism](https://arxiv.org/abs/1908.10530). 48 | -------------------------------------------------------------------------------- /g3doc/tutorials/_toc.yaml: -------------------------------------------------------------------------------- 1 | toc: 2 | - title: Compute privacy 3 | path: /responsible_ai/privacy/tutorials/classification_privacy 4 | - title: Assess privacy risk 5 | path: /responsible_ai/privacy/tutorials/privacy_report 6 | -------------------------------------------------------------------------------- /pip_tools/build_empirical_pip_package.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2020, The TensorFlow Privacy Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | # Tool to build the TensorFlow Privacy/Privacy Tests pip package. 17 | set -e 18 | 19 | main() { 20 | # Create a working directory. 21 | local temp_dir="$(mktemp -d)" 22 | trap "rm -rf ${temp_dir}" EXIT 23 | 24 | # Create a virtual environment 25 | python3.11 -m venv "${temp_dir}/venv" 26 | source "${temp_dir}/venv/bin/activate" 27 | python --version 28 | pip install --upgrade pip 29 | pip --version 30 | 31 | # Build the pip package 32 | pip install --upgrade setuptools wheel 33 | python "setup_empirical.py" sdist bdist_wheel 34 | 35 | # Cleanup. 36 | deactivate 37 | } 38 | 39 | main "$@" 40 | -------------------------------------------------------------------------------- /pip_tools/build_pip_package.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2020, The TensorFlow Privacy Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | # Tool to build the TensorFlow Privacy pip package. 17 | set -e 18 | 19 | main() { 20 | # Create a working directory. 21 | local temp_dir="$(mktemp -d)" 22 | trap "rm -rf ${temp_dir}" EXIT 23 | 24 | # Create a virtual environment 25 | python3.11 -m venv "${temp_dir}/venv" 26 | source "${temp_dir}/venv/bin/activate" 27 | python --version 28 | pip install --upgrade pip 29 | pip --version 30 | 31 | # Build the pip package 32 | pip install --upgrade setuptools wheel 33 | python "setup.py" sdist bdist_wheel 34 | 35 | # Cleanup. 36 | deactivate 37 | } 38 | 39 | main "$@" 40 | -------------------------------------------------------------------------------- /pip_tools/publish_empirical_pip_package.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2020, The TensorFlow Privacy Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | # Tool to publish the TensorFlow Privacy pip package. 17 | set -e 18 | 19 | main() { 20 | # Create a working directory. 21 | local temp_dir="$(mktemp -d)" 22 | trap "rm -rf ${temp_dir}" EXIT 23 | 24 | # Create a virtual environment 25 | python3.11 -m venv "${temp_dir}/venv" 26 | source "${temp_dir}/venv/bin/activate" 27 | python --version 28 | pip install --upgrade pip 29 | pip --version 30 | 31 | # Publish the pip package. 32 | package="$(ls "dist/"*".whl" | head -n1)" 33 | pip install --upgrade twine 34 | twine check "${package}" 35 | twine upload "${package}" 36 | 37 | # Cleanup. 38 | deactivate 39 | } 40 | 41 | main "$@" 42 | -------------------------------------------------------------------------------- /pip_tools/publish_pip_package.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2020, The TensorFlow Privacy Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | # Tool to publish the TensorFlow Privacy pip package. 17 | set -e 18 | 19 | main() { 20 | # Create a working directory. 21 | local temp_dir="$(mktemp -d)" 22 | trap "rm -rf ${temp_dir}" EXIT 23 | 24 | # Create a virtual environment 25 | python3.11 -m venv "${temp_dir}/venv" 26 | source "${temp_dir}/venv/bin/activate" 27 | python --version 28 | pip install --upgrade pip 29 | pip --version 30 | 31 | # Publish the pip package. 32 | package="$(ls "dist/"*".whl" | head -n1)" 33 | pip install --upgrade twine 34 | twine check "${package}" 35 | twine upload "${package}" 36 | 37 | # Cleanup. 38 | deactivate 39 | } 40 | 41 | main "$@" 42 | -------------------------------------------------------------------------------- /pip_tools/test_empirical_pip_package.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2020, The TensorFlow Privacy Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | # Tool to build the TensorFlow Privacy pip package. 17 | set -e 18 | 19 | main() { 20 | # Create a working directory. 21 | local temp_dir="$(mktemp -d)" 22 | trap "rm -rf ${temp_dir}" EXIT 23 | 24 | # Create a virtual environment 25 | python3.11 -m venv "${temp_dir}/venv" 26 | source "${temp_dir}/venv/bin/activate" 27 | python --version 28 | pip install --upgrade pip 29 | pip --version 30 | 31 | # Test the pip package. 32 | package="$(ls "dist/"*".whl" | head -n1)" 33 | pip install --upgrade "${package}" 34 | pip freeze 35 | python -c "import tensorflow_privacy.privacy.privacy_tests as pt; print(pt.__version__)" 36 | 37 | # Cleanup. 38 | deactivate 39 | } 40 | 41 | main "$@" 42 | -------------------------------------------------------------------------------- /pip_tools/test_pip_package.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2020, The TensorFlow Privacy Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | # Tool to build the TensorFlow Privacy pip package. 17 | set -e 18 | 19 | main() { 20 | # Create a working directory. 21 | local temp_dir="$(mktemp -d)" 22 | trap "rm -rf ${temp_dir}" EXIT 23 | 24 | # Create a virtual environment 25 | python3.11 -m venv "${temp_dir}/venv" 26 | source "${temp_dir}/venv/bin/activate" 27 | python --version 28 | pip install --upgrade pip 29 | pip --version 30 | 31 | # Test the pip package. 32 | package="$(ls "dist/"*".whl" | head -n1)" 33 | pip install --upgrade "${package}" 34 | pip freeze 35 | python -c "import tensorflow_privacy as tfp; print(tfp.__version__)" 36 | 37 | # Cleanup. 38 | deactivate 39 | } 40 | 41 | main "$@" 42 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Requirements for Tensorflow Privacy. 2 | # 3 | # If you add a *new* dependency and it is required by the TensorFlow Federated 4 | # package, also add the dependency to `setup.py`. 5 | # 6 | # If you update the version of an *existing* dependency and it is required by 7 | # the TensorFlow Federated package, also update the version of the dependency in 8 | # `setup.py`. 9 | # 10 | # * For packages that have a stable release, we use a version that is 11 | # compatible with that release (e.g. `~=x.y`). See 12 | # https://peps.python.org/pep-0440/#compatible-release for more information. 13 | # * For packages that do not have a stable release, we use a version that 14 | # matches a release that has been tested (e.g. `==x.y.z`). See 15 | # https://peps.python.org/pep-0440/#version-matching for more information. 16 | # 17 | # This assumes that the packages follows Semantic Versioning, see 18 | # https://semver.org/. If a package follows a different versioning scheme or 19 | # requires unique handling, we use a different version specifier and comment the 20 | # versioning scheme or reasoning. 21 | # 22 | # Note: As of 2022-08-17 there is bug in `pip` when multiple packages use the 23 | # compatible release operator `~=` to specify a version and one of those 24 | # versions ends in `0`. See https://github.com/pypa/pip/issues/9613 for more 25 | # information. In this case, use the equivalent clause `>=x.0,==x.*` instead of 26 | # `~=x.0`. 27 | 28 | absl-py>=1.0,==1.* 29 | dm-tree==0.1.8 30 | dp-accounting==0.4.4 31 | immutabledict~=2.2 32 | matplotlib~=3.3 33 | numpy~=1.21 34 | packaging~=22.0 35 | pandas~=1.4 36 | scikit-learn>=1.0,==1.* 37 | scipy~=1.9 38 | statsmodels==0.14.0 39 | tensorflow-datasets~=4.5 40 | tensorflow-estimator~=2.4 41 | tensorflow-probability~=0.22.0 42 | tensorflow>=2.4.0,<=2.15.0 43 | tf-models-official~=2.13 44 | -------------------------------------------------------------------------------- /research/GDP_2019/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:private"]) 2 | 3 | licenses(["notice"]) 4 | 5 | filegroup( 6 | name = "ignore_srcs", 7 | srcs = [ 8 | "adult_tutorial.py", 9 | "imdb_tutorial.py", 10 | ], 11 | tags = ["ignore_srcs"], 12 | ) 13 | -------------------------------------------------------------------------------- /research/README.md: -------------------------------------------------------------------------------- 1 | # Research 2 | 3 | This folder contains code to reproduce results from research papers. Currently, 4 | the following papers are included: 5 | 6 | * Semi-supervised Knowledge Transfer for Deep Learning from Private Training 7 | Data (ICLR 2017): `pate_2017` 8 | 9 | * Scalable Private Learning with PATE (ICLR 2018): `pate_2018` 10 | -------------------------------------------------------------------------------- /research/audit_2020/README.md: -------------------------------------------------------------------------------- 1 | # Auditing Private Machine Learning 2 | Code for "Auditing Differentially Private Machine Learning: How Private is Private SGD?": https://arxiv.org/abs/2006.07709. This implementation is simple but not easily parallelizable. For a parallelizable version which is harder to run, see https://github.com/jagielski/auditing-dpsgd. 3 | 4 | ## Usage 5 | This attack relies on the AuditAttack class found in audit.py. The class allows one to generate poisoning, run trials to compute membership scores for the poisoning, and then use the resulting membership scores to compute a lower bound on epsilon. 6 | 7 | ## Examples 8 | Two examples are provided, mean_audit.py and fmnist_audit.py. fmnist_audit.py attacks the FashionMNIST dataset. It allows the user to specify between standard bkdr attacks and clipping-aware attacks, and also allows the user to specify between multiple poisoning attack sizes, model types, and whether to load saved model weights to start training from. mean_audit.py audits a model which computes the mean of a dataset. This provides an example of user-provided poisoning samples, rather than those autogenerated from our attacks.py library. 9 | 10 | ## Requirements 11 | Requires scikit-learn=0.24.1, statsmodels=0.12.2, tensorflow=1.14.0 12 | -------------------------------------------------------------------------------- /research/dp_newton/README.md: -------------------------------------------------------------------------------- 1 | # Project Title 2 | 3 | Faster Differentially Private Convex Optimization via Second-Order Methods 4 | https://arxiv.org/abs/2112.03570
5 | by Arun Ganesh, Mahdi Haghifam, Thomas Steinke, Abhradeep Thakurta. 6 | 7 | ## Description 8 | 9 | Implementation of the optimizatoin algorithms proposed in 10 | https://arxiv.org/abs/2112.03570
11 | 12 | ## Getting Started 13 | 14 | You will need to install fairly standard dependencies 15 | 16 | run 'run_privacy_utility' to compare the convergence speed and excess loss of 17 | different algorithms. 18 | 19 | ### Citation 20 | 21 | You can cite this paper with 22 | 23 | ``` 24 | @article{ganesh2023faster, 25 | title={Faster Differentially Private Convex Optimization 26 | via Second-Order Methods}, 27 | author={Ganesh, Arun and Haghifam, Mahdi and Steinke, Thomas 28 | and Thakurta, Abhradeep}, 29 | journal={arXiv preprint arXiv:2305.13209}, 30 | year={2023} 31 | } 32 | ``` 33 | -------------------------------------------------------------------------------- /research/dp_newton/run_privacy_utility: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================= 15 | 16 | rm -rf ./src/results 17 | mkdir -p ./src/results 18 | rm -rf ./src/datasets_directory 19 | mkdir -p ./src/datasets_directory 20 | dataset="protein_dataset" # 'a1a_dataset', 'synthetic_dataset', 'fmnist_dataset' 21 | privacy_budget="3.0" # epsilon in DP 22 | num_iteration_GD="100" # number of iterations for DP-GD 23 | num_iteration_NT="15" # number of iterations for damped newton 24 | num_iteration_our="15" # number of iterations for double noise (proposed method) 25 | $HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'dp_gd' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_GD 26 | $HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'damped_newton' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_NT --grad_frac $"0.7" 27 | $HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'double_noise' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_our --grad_frac $"0.7" --trace_frac $"0.1" --trace_coeff $"0.5" 28 | $HOME/google-code/dpoptVenv/bin/python3 ./src/print_results.py 29 | -------------------------------------------------------------------------------- /research/dp_newton/src/print_results.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================= 15 | 16 | """print the achievable error of different algorithms""" 17 | 18 | # pylint: skip-file 19 | # pyformat: disable 20 | 21 | import json 22 | import os 23 | import numpy as np 24 | 25 | RESULTS_PATH = './src/results/' 26 | excess_loss = {} 27 | opt_algs = [ 28 | 'DPGD', 29 | 'DN-Hess-add', 30 | 'DN-UB-add', 31 | 'DN-Hess-clip', 32 | 'DN-UB-clip', 33 | 'private-newton', 34 | ] 35 | for filename in os.listdir(RESULTS_PATH): 36 | f = os.path.join(RESULTS_PATH, filename) 37 | with open(f, encoding='utf-8') as json_file: 38 | data = json.load(json_file) 39 | for alg in data.keys(): 40 | if alg in opt_algs: 41 | loss_avg = np.array(data[alg]['loss_avg']) 42 | loss_std = np.array(data[alg]['loss_std']) 43 | clock_time = np.array(data[alg]['clock_time_avg']) 44 | print('optimization algorithm: ', alg) 45 | print('excess loss: ' + str(loss_avg[-1])) 46 | print('run time: ' + str(clock_time[-1]) + '(sec)') 47 | print('-----') 48 | -------------------------------------------------------------------------------- /research/hyperparameters_2022/README.md: -------------------------------------------------------------------------------- 1 | # Hyperparameter Tuning with Renyi Differential Privacy 2 | 3 | ### Nicolas Papernot and Thomas Steinke 4 | 5 | This repository contains the code used to reproduce some of the experiments in 6 | our 7 | [ICLR 2022 paper on hyperparameter tuning with differential privacy](https://openreview.net/forum?id=-70L8lpp9DF). 8 | 9 | You can reproduce Figure 7 in the paper by running `figure7.py`. It loads by 10 | default values used to plot the figure contained in the paper, and we also 11 | included a dictionary `lr_acc.json` containing the accuracy of a large number of 12 | ML models trained with different learning rates. If you'd like to try our 13 | approach to fine-tune your own parameters, you will have to modify the code that 14 | interacts with this dictionary (`lr_acc` in the code from `figure7.py`). 15 | 16 | ## Citing this work 17 | 18 | If you use this repository for academic research, you are highly encouraged 19 | (though not required) to cite our paper: 20 | 21 | ``` 22 | @inproceedings{ 23 | papernot2022hyperparameter, 24 | title={Hyperparameter Tuning with Renyi Differential Privacy}, 25 | author={Nicolas Papernot and Thomas Steinke}, 26 | booktitle={International Conference on Learning Representations}, 27 | year={2022}, 28 | url={https://openreview.net/forum?id=-70L8lpp9DF} 29 | } 30 | ``` 31 | -------------------------------------------------------------------------------- /research/hyperparameters_2022/figure7_default_values.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Default values for generating Figure 7.""" 15 | 16 | import json 17 | import numpy as np 18 | 19 | orders = ([1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] + 20 | list(range(5, 64)) + [128, 256, 512]) 21 | rdp = [ 22 | 2.04459751e-01, 2.45818210e-01, 2.87335988e-01, 3.29014798e-01, 23 | 3.70856385e-01, 4.12862542e-01, 4.97375951e-01, 5.82570265e-01, 24 | 6.68461534e-01, 7.55066706e-01, 8.42403732e-01, 1.01935100e+00, 25 | 1.19947313e+00, 1.38297035e+00, 1.57009549e+00, 1.76124790e+00, 26 | 1.95794503e+00, 2.19017390e+00, 4.48407479e+00, 3.08305394e+02, 27 | 4.98610133e+03, 1.11363692e+04, 1.72590079e+04, 2.33487231e+04, 28 | 2.94091123e+04, 3.54439803e+04, 4.14567914e+04, 4.74505356e+04, 29 | 5.34277419e+04, 5.93905358e+04, 6.53407051e+04, 7.12797586e+04, 30 | 7.72089762e+04, 8.31294496e+04, 8.90421151e+04, 9.49477802e+04, 31 | 1.00847145e+05, 1.06740819e+05, 1.12629335e+05, 1.18513163e+05, 32 | 1.24392717e+05, 1.30268362e+05, 1.36140424e+05, 1.42009194e+05, 33 | 1.47874932e+05, 1.53737871e+05, 1.59598221e+05, 1.65456171e+05, 34 | 1.71311893e+05, 1.77165542e+05, 1.83017260e+05, 1.88867175e+05, 35 | 1.94715404e+05, 2.00562057e+05, 2.06407230e+05, 2.12251015e+05, 36 | 2.18093495e+05, 2.23934746e+05, 2.29774840e+05, 2.35613842e+05, 37 | 2.41451813e+05, 2.47288808e+05, 2.53124881e+05, 2.58960080e+05, 38 | 2.64794449e+05, 2.70628032e+05, 2.76460867e+05, 2.82292992e+05, 39 | 2.88124440e+05, 6.66483142e+05, 1.41061455e+06, 2.89842152e+06 40 | ] 41 | with open("lr_acc.json", "r") as dict_f: 42 | lr_acc = json.load(dict_f) 43 | num_trials = 1000 44 | lr_rates = np.logspace(np.log10(1e-4), np.log10(1.), num=1000)[-400:] 45 | gammas = np.asarray( 46 | [1e-07, 8e-06, 1e-04, 0.00024, 0.0015, 0.0035, 0.025, 0.05, 0.1, 0.2, 0.5]) 47 | non_private_acc = 0.9594 48 | -------------------------------------------------------------------------------- /research/instahide_attack_2020/README.md: -------------------------------------------------------------------------------- 1 | Implementation of our reconstruction attack on InstaHide. 2 | 3 | Is Private Learning Possible with Instance Encoding? 4 | Nicholas Carlini, Samuel Deng, Sanjam Garg, Somesh Jha, Saeed Mahloujifar, Mohammad Mahmoody, Shuang Song, Abhradeep Thakurta, Florian Tramer 5 | https://arxiv.org/abs/2011.05315 6 | 7 | 8 | ## Overview 9 | 10 | InstaHide is a recent privacy-preserving machine learning framework. 11 | It takes a (sensitive) dataset and generates encoded images that are privacy-preserving. 12 | Our attack breaks InstaHide and shows it does not offer meaningful privacy. 13 | Given the encoded dataset, we can recover a near-identical copy of the original images. 14 | 15 | This repository implements the attack described in our paper. It consists of a number of 16 | steps that shoul be run sequentially. It assumes access to pre-trained neural network 17 | classifiers that should be downloaded following the steps below. 18 | 19 | 20 | ### Requirements 21 | 22 | * Python, version ≥ 3.5 23 | * jax 24 | * jaxlib 25 | * objax (https://github.com/google/objax) 26 | * PIL 27 | * sklearn 28 | 29 | 30 | ### Running the attack 31 | 32 | To reproduce our results and run the attack, each of the files should be run in turn. 33 | 34 | 0. Download the necessary dependency files: 35 | - (encryption.npy)[https://www.dropbox.com/sh/8zdsr1sjftia4of/AAA-60TOjGKtGEZrRmbawwqGa?dl=0] and (labels.npy)[https://www.dropbox.com/sh/8zdsr1sjftia4of/AAA-60TOjGKtGEZrRmbawwqGa?dl=0] from the (InstaHide Challenge)[https://github.com/Hazelsuko07/InstaHide_Challenge] 36 | - The (saved models)[https://drive.google.com/file/d/1YfKzGRfnnzKfUKpLjIRXRto8iD4FdwGw/view?usp=sharing] used to run the attack 37 | - Set up all the requirements as above 38 | 39 | 1. Run `step_1_create_graph.py`. Produce the similarity graph to pair together encoded images that share an original image. 40 | 41 | 2. Run `step_2_color_graph.py`. Color the graph to find 50 dense cliques. 42 | 43 | 3. Run `step_3_second_graph.py`. Create a new bipartite similarity graph. 44 | 45 | 4. Run `step_4_final_graph.py`. Solve the matching problem to assign encoded images to original images. 46 | 47 | 5. Run `step_5_reconstruct.py`. Reconstruct the original images. 48 | 49 | 6. Run `step_6_adjust_color.py`. Adjust the color curves to match. 50 | 51 | 7. Run `step_7_visualize.py`. Show the final resulting images. 52 | 53 | ## Citation 54 | 55 | You can cite this attack at 56 | 57 | ``` 58 | @inproceedings{carlini2021private, 59 | title={Is Private Learning Possible with Instance Encoding?}, 60 | author={Carlini, Nicholas and Deng, Samuel and Garg, Sanjam and Jha, Somesh and Mahloujifar, Saeed and Mahmoody, Mohammad and Thakurta, Abhradeep and Tram{\`e}r, Florian}, 61 | booktitle={2021 IEEE Symposium on Security and Privacy (SP)}, 62 | pages={410--427}, 63 | year={2021}, 64 | organization={IEEE} 65 | } 66 | ``` -------------------------------------------------------------------------------- /research/instahide_attack_2020/step_1_create_graph.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """ 17 | Create the similarity graph given the encoded images by running the similarity 18 | neural network over all pairs of images. 19 | """ 20 | 21 | import objax 22 | import numpy as np 23 | import jax.numpy as jn 24 | import functools 25 | import os 26 | import random 27 | 28 | from objax.zoo import wide_resnet 29 | 30 | def setup(): 31 | global model 32 | class DoesUseSame(objax.Module): 33 | def __init__(self): 34 | fn = functools.partial(wide_resnet.WideResNet, depth=28, width=6) 35 | self.model = fn(6,2) 36 | 37 | model_vars = self.model.vars() 38 | self.ema = objax.optimizer.ExponentialMovingAverage(model_vars, momentum=0.999, debias=True) 39 | 40 | 41 | def predict_op(x,y): 42 | # The model takes the two images and checks if they correspond 43 | # to the same original image. 44 | xx = jn.concatenate([jn.abs(x), 45 | jn.abs(y)], 46 | axis=1) 47 | return self.model(xx, training=False) 48 | 49 | self.predict = objax.Jit(self.ema.replace_vars(predict_op), model_vars + self.ema.vars()) 50 | self.predict_fast = objax.Parallel(self.ema.replace_vars(predict_op), model_vars + self.ema.vars()) 51 | 52 | model = DoesUseSame() 53 | checkpoint = objax.io.Checkpoint("models/step1/", keep_ckpts=5, makedir=True) 54 | start_epoch, last_ckpt = checkpoint.restore(model.vars()) 55 | 56 | 57 | def doall(): 58 | global graph 59 | n = np.load("data/encryption.npy") 60 | n = np.transpose(n, (0,3,1,2)) 61 | 62 | # Compute the similarity between each encoded image and all others 63 | # This is n^2 work but should run fairly quickly, especially given 64 | # more than one GPU. Otherwise about an hour or so. 65 | graph = [] 66 | with model.vars().replicate(): 67 | for i in range(5000): 68 | print(i) 69 | v = model.predict_fast(np.tile(n[i:i+1], (5000,1,1,1)), n) 70 | graph.append(np.array(v[:,0]-v[:,1])) 71 | graph = np.array(graph) 72 | np.save("data/graph.npy", graph) 73 | 74 | 75 | if __name__ == "__main__": 76 | setup() 77 | doall() 78 | -------------------------------------------------------------------------------- /research/instahide_attack_2020/step_4_final_graph.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | import multiprocessing as mp 17 | import pickle 18 | import random 19 | import numpy as np 20 | 21 | 22 | labels = np.load("data/label.npy") 23 | nextgraph = np.load("data/nextgraph.npy") 24 | 25 | assigned = [[] for _ in range(5000)] 26 | lambdas = [[] for _ in range(5000)] 27 | for i in range(100): 28 | order = (np.argsort(nextgraph[:,i])) 29 | correct = (labels[order[:20]]>0).sum(axis=0).argmax() 30 | 31 | # Let's create the final graph 32 | # Instead of doing a full bipartite matching, let's just greedily 33 | # choose the closest 80 candidates for each encoded image to pair 34 | # together can call it a day. 35 | # This is within a percent or two of doing that, and much easier. 36 | 37 | # Also record the lambdas based on which image it coresponds to, 38 | # but if they share a label then just guess it's an even 50/50 split. 39 | 40 | 41 | for x in order[:80]: 42 | if labels[x][correct] > 0 and len(assigned[x]) < 2: 43 | assigned[x].append(i) 44 | if np.sum(labels[x]>0) == 1: 45 | # the same label was mixed in twice. punt. 46 | lambdas[x].append(labels[x][correct]/2) 47 | else: 48 | lambdas[x].append(labels[x][correct]) 49 | 50 | np.save("data/predicted_pairings_80.npy", assigned) 51 | np.save("data/predicted_lambdas_80.npy", lambdas) 52 | -------------------------------------------------------------------------------- /research/instahide_attack_2020/step_6_adjust_color.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | Fix the color curves. Use a pre-trained "neural network" with <100 weights. 17 | Visually this helps a lot, even if it's not doing much of anything in pactice. 18 | """ 19 | 20 | import random 21 | 22 | import os 23 | os.environ['CUDA_VISIBLE_DEVICES'] = '' 24 | 25 | import numpy as np 26 | import jax.numpy as jn 27 | 28 | import objax 29 | 30 | # Our extremely complicated neural network to re-color the images. 31 | # Takes one pixel at a time and fixes the color of that pixel. 32 | model = objax.nn.Sequential([objax.nn.Linear(3, 10), 33 | objax.functional.relu, 34 | objax.nn.Linear(10, 3) 35 | ]) 36 | 37 | # These are the weights. 38 | weights = [[-0.09795442, -0.26434848, -0.24964345, -0.11450608, 0.6797288, -0.48435465, 39 | 0.45307165, -0.31196147, -0.33266315, 0.20486055], 40 | [[-0.9056427, 0.02872663, -1.5114126, -0.41024876, -0.98195165, 0.1143966, 41 | 0.6763464, -0.58654785, -1.797063, -0.2176538, ], 42 | [ 1.1941166, 0.15515928, 1.1691351, -0.7256186, 0.8046044, 1.3127686, 43 | -0.77297133, -1.1761239, 0.85841715, 0.95545965], 44 | [ 0.20092924, 0.57503146, 0.22809981, 1.5288007, -0.94781816, -0.68305916, 45 | -0.5245211, 1.4042739, -0.00527458, -1.1462274, ]], 46 | [0.15683544, 0.22086962, 0.33100453], 47 | [[ 7.7239674e-01, 4.0261227e-01, -9.6466336e-03], 48 | [-2.2159107e-01, 1.5123411e-01, 3.4485441e-01], 49 | [-1.7618114e+00, -7.1886492e-01, -4.6467595e-02], 50 | [ 6.9419539e-01, 6.2531930e-01, 7.2271496e-01], 51 | [-1.1913675e+00, -6.7755884e-01, -3.5114303e-01], 52 | [ 4.8022485e-01, 1.7145030e-01, 7.4849324e-04], 53 | [ 3.8332436e-02, -7.0614147e-01, -5.5127507e-01], 54 | [-1.0929481e+00, -1.0268525e+00, -7.0265180e-01], 55 | [ 1.4880739e+00, 7.1450096e-01, 2.9102692e-01], 56 | [ 7.2846663e-01, 7.1322352e-01, -1.7453632e-01]]] 57 | 58 | for i,(k,v) in enumerate(model.vars().items()): 59 | v.assign(jn.array(weights[i])) 60 | 61 | # Do all of the re-coloring 62 | predict = objax.Jit(lambda x: model(x, training=False), 63 | model.vars()) 64 | 65 | out = model(np.load("data/private_raw.npy")) 66 | np.save("data/private.npy", out) 67 | -------------------------------------------------------------------------------- /research/instahide_attack_2020/step_7_visualize.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | 17 | """ 18 | Given the private images, draw them in a 100x100 grid for visualization. 19 | """ 20 | 21 | import numpy as np 22 | from PIL import Image 23 | import matplotlib.pyplot as plt 24 | 25 | p = np.load("data/private.npy") 26 | 27 | def toimg(x): 28 | print(x.shape) 29 | img = (x+1)*127.5 30 | img = np.clip(img, 0, 255) 31 | img = np.reshape(img, (10, 10, 32, 32, 3)) 32 | img = np.concatenate(img, axis=2) 33 | img = np.concatenate(img, axis=0) 34 | img = Image.fromarray(np.array(img,dtype=np.uint8)) 35 | return img 36 | 37 | toimg(p).save("data/reconstructed.png") 38 | 39 | -------------------------------------------------------------------------------- /research/mi_lira_2021/fprtpr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorflow/privacy/a640ca62ca80d2e4e534fbecf2678e3d0786a2fa/research/mi_lira_2021/fprtpr.png -------------------------------------------------------------------------------- /research/mi_lira_2021/score.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | import numpy as np 17 | import os 18 | import multiprocessing as mp 19 | 20 | 21 | def load_one(base): 22 | """ 23 | This loads a logits and converts it to a scored prediction. 24 | """ 25 | root = os.path.join(logdir,base,'logits') 26 | if not os.path.exists(root): return None 27 | 28 | if not os.path.exists(os.path.join(logdir,base,'scores')): 29 | os.mkdir(os.path.join(logdir,base,'scores')) 30 | 31 | for f in os.listdir(root): 32 | try: 33 | opredictions = np.load(os.path.join(root,f)) 34 | except: 35 | print("Fail") 36 | continue 37 | 38 | ## Be exceptionally careful. 39 | ## Numerically stable everything, as described in the paper. 40 | predictions = opredictions - np.max(opredictions, axis=3, keepdims=True) 41 | predictions = np.array(np.exp(predictions), dtype=np.float64) 42 | predictions = predictions/np.sum(predictions,axis=3,keepdims=True) 43 | 44 | COUNT = predictions.shape[0] 45 | # x num_examples x num_augmentations x logits 46 | y_true = predictions[np.arange(COUNT),:,:,labels[:COUNT]] 47 | print(y_true.shape) 48 | 49 | print('mean acc',np.mean(predictions[:,0,0,:].argmax(1)==labels[:COUNT])) 50 | 51 | predictions[np.arange(COUNT),:,:,labels[:COUNT]] = 0 52 | y_wrong = np.sum(predictions, axis=3) 53 | 54 | logit = (np.log(y_true.mean((1))+1e-45) - np.log(y_wrong.mean((1))+1e-45)) 55 | 56 | np.save(os.path.join(logdir, base, 'scores', f), logit) 57 | 58 | 59 | def load_stats(): 60 | with mp.Pool(8) as p: 61 | p.map(load_one, [x for x in os.listdir(logdir) if 'exp' in x]) 62 | 63 | 64 | logdir = sys.argv[1] 65 | labels = np.load(os.path.join(logdir,"y_train.npy")) 66 | load_stats() 67 | -------------------------------------------------------------------------------- /research/mi_poison_2022/fprtpr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorflow/privacy/a640ca62ca80d2e4e534fbecf2678e3d0786a2fa/research/mi_poison_2022/fprtpr.png -------------------------------------------------------------------------------- /research/mi_poison_2022/logs/.keep: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /research/pate_2017/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:private"]) 2 | 3 | licenses(["notice"]) 4 | 5 | filegroup( 6 | name = "ignore_srcs", 7 | srcs = [ 8 | "aggregation.py", 9 | "analysis.py", 10 | "deep_cnn.py", 11 | "input.py", 12 | "metrics.py", 13 | "train_student.py", 14 | "train_teachers.py", 15 | "utils.py", 16 | ], 17 | tags = ["ignore_srcs"], 18 | ) 19 | -------------------------------------------------------------------------------- /research/pate_2017/metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import numpy as np 21 | 22 | 23 | def accuracy(logits, labels): 24 | """ 25 | Return accuracy of the array of logits (or label predictions) wrt the labels 26 | :param logits: this can either be logits, probabilities, or a single label 27 | :param labels: the correct labels to match against 28 | :return: the accuracy as a float 29 | """ 30 | assert len(logits) == len(labels) 31 | 32 | if len(np.shape(logits)) > 1: 33 | # Predicted labels are the argmax over axis 1 34 | predicted_labels = np.argmax(logits, axis=1) 35 | else: 36 | # Input was already labels 37 | assert len(np.shape(logits)) == 1 38 | predicted_labels = logits 39 | 40 | # Check against correct labels to compute correct guesses 41 | correct = np.sum(predicted_labels == labels.reshape(len(labels))) 42 | 43 | # Divide by number of labels to obtain accuracy 44 | accuracy = float(correct) / len(labels) 45 | 46 | # Return float value 47 | return accuracy 48 | 49 | 50 | -------------------------------------------------------------------------------- /research/pate_2017/train_student_mnist_250_lap_20_count_50_epochs_600.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | 17 | # Be sure to clone https://github.com/openai/improved-gan 18 | # and add improved-gan/mnist_svhn_cifar10 to your PATH variable 19 | 20 | # Download labels used to train the student 21 | wget https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_student_labels_lap_20.npy 22 | 23 | # Train the student using improved-gan 24 | THEANO_FLAGS='floatX=float32,device=gpu,lib.cnmem=1' train_mnist_fm_custom_labels.py --labels mnist_250_student_labels_lap_20.npy --count 50 --epochs 600 25 | 26 | -------------------------------------------------------------------------------- /research/pate_2017/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | 17 | def batch_indices(batch_nb, data_length, batch_size): 18 | """ 19 | This helper function computes a batch start and end index 20 | :param batch_nb: the batch number 21 | :param data_length: the total length of the data being parsed by batches 22 | :param batch_size: the number of inputs in each batch 23 | :return: pair of (start, end) indices 24 | """ 25 | # Batch start and end index 26 | start = int(batch_nb * batch_size) 27 | end = int((batch_nb + 1) * batch_size) 28 | 29 | # When there are not enough inputs left, we reuse some to complete the batch 30 | if end > data_length: 31 | shift = end - data_length 32 | start -= shift 33 | end -= shift 34 | 35 | return start, end 36 | -------------------------------------------------------------------------------- /research/pate_2018/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:private"]) 2 | 3 | licenses(["notice"]) 4 | 5 | filegroup( 6 | name = "ignore_srcs", 7 | srcs = [ 8 | "core.py", 9 | "core_test.py", 10 | "smooth_sensitivity.py", 11 | "smooth_sensitivity_test.py", 12 | ], 13 | tags = ["ignore_srcs"], 14 | ) 15 | -------------------------------------------------------------------------------- /research/pate_2018/ICLR2018/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:private"]) 2 | 3 | licenses(["notice"]) 4 | 5 | filegroup( 6 | name = "ignore_srcs", 7 | srcs = [ 8 | "download.py", 9 | "plot_ls_q.py", 10 | "plot_partition.py", 11 | "plots_for_slides.py", 12 | "rdp_bucketized.py", 13 | "rdp_cumulative.py", 14 | "smooth_sensitivity_table.py", 15 | "utility_queries_answered.py", 16 | ], 17 | tags = ["ignore_srcs"], 18 | ) 19 | -------------------------------------------------------------------------------- /research/pate_2018/ICLR2018/README.md: -------------------------------------------------------------------------------- 1 | Scripts in support of the paper "Scalable Private Learning with PATE" by Nicolas 2 | Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar, Ulfar 3 | Erlingsson (ICLR 2018, https://arxiv.org/abs/1802.08908). 4 | 5 | 6 | ### Requirements 7 | 8 | * Python, version ≥ 2.7 9 | * absl (see [here](https://github.com/abseil/abseil-py), or just type `pip install absl-py`) 10 | * matplotlib 11 | * numpy 12 | * scipy 13 | * sympy (for smooth sensitivity analysis) 14 | * write access to the current directory (otherwise, output directories in download.py and *.sh 15 | scripts must be changed) 16 | 17 | ## Reproducing Figures 1 and 5, and Table 2 18 | 19 | Before running any of the analysis scripts, create the data/ directory and download votes files by running\ 20 | `$ python download.py` 21 | 22 | To generate Figures 1 and 5 run\ 23 | `$ sh generate_figures.sh`\ 24 | The output is written to the figures/ directory. 25 | 26 | For Table 2 run (may take several hours)\ 27 | `$ sh generate_table.sh`\ 28 | The output is written to the console. 29 | 30 | For data-independent bounds (for comparison with Table 2), run\ 31 | `$ sh generate_table_data_independent.sh`\ 32 | The output is written to the console. 33 | 34 | ## Files in this directory 35 | 36 | * generate_figures.sh — Master script for generating Figures 1 and 5. 37 | 38 | * generate_table.sh — Master script for generating Table 2. 39 | 40 | * generate_table_data_independent.sh — Master script for computing data-independent 41 | bounds. 42 | 43 | * rdp_bucketized.py — Script for producing Figure 1 (right) and Figure 5 (right). 44 | 45 | * rdp_cumulative.py — Script for producing Figure 1 (middle) and Figure 5 (left). 46 | 47 | * smooth_sensitivity_table.py — Script for generating Table 2. 48 | 49 | * utility_queries_answered — Script for producing Figure 1 (left). 50 | 51 | * plot_partition.py — Script for producing partition.pdf, a detailed breakdown of privacy 52 | costs for Confident-GNMax with smooth sensitivity analysis (takes ~50 hours). 53 | 54 | * plots_for_slides.py — Script for producing several plots for the slide deck. 55 | 56 | * download.py — Utility script for populating the data/ directory. 57 | 58 | * plot_ls_q.py is not used. 59 | 60 | 61 | All Python files take flags. Run script_name.py --help for help on flags. 62 | -------------------------------------------------------------------------------- /research/pate_2018/ICLR2018/download.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Script to download votes files to the data/ directory. 16 | """ 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from six.moves import urllib 23 | import os 24 | import tarfile 25 | 26 | FILE_URI = 'https://storage.googleapis.com/pate-votes/votes.gz' 27 | DATA_DIR = 'data/' 28 | 29 | 30 | def download(): 31 | print('Downloading ' + FILE_URI) 32 | tar_filename, _ = urllib.request.urlretrieve(FILE_URI) 33 | print('Unpacking ' + tar_filename) 34 | with tarfile.open(tar_filename, "r:gz") as tar: 35 | tar.extractall(DATA_DIR) 36 | print('Done!') 37 | 38 | 39 | if __name__ == '__main__': 40 | if not os.path.exists(DATA_DIR): 41 | print('Data directory does not exist. Creating ' + DATA_DIR) 42 | os.makedirs(DATA_DIR) 43 | download() 44 | -------------------------------------------------------------------------------- /research/pate_2018/ICLR2018/generate_figures.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | 18 | counts_file="data/glyph_5000_teachers.npy" 19 | output_dir="figures/" 20 | 21 | mkdir -p $output_dir 22 | 23 | if [ ! -d "$output_dir" ]; then 24 | echo "Directory $output_dir does not exist." 25 | exit 1 26 | fi 27 | 28 | python rdp_bucketized.py \ 29 | --plot=small \ 30 | --counts_file=$counts_file \ 31 | --plot_file=$output_dir"noisy_thresholding_check_perf.pdf" 32 | 33 | python rdp_bucketized.py \ 34 | --plot=large \ 35 | --counts_file=$counts_file \ 36 | --plot_file=$output_dir"noisy_thresholding_check_perf_details.pdf" 37 | 38 | python rdp_cumulative.py \ 39 | --cache=False \ 40 | --counts_file=$counts_file \ 41 | --figures_dir=$output_dir 42 | 43 | python utility_queries_answered.py --plot_file=$output_dir"utility_queries_answered.pdf" -------------------------------------------------------------------------------- /research/pate_2018/ICLR2018/generate_table.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | 18 | echo "Reproducing Table 2. Takes a couple of hours." 19 | 20 | executable="python smooth_sensitivity_table.py" 21 | data_dir="data" 22 | 23 | echo 24 | echo "######## MNIST ########" 25 | echo 26 | 27 | $executable \ 28 | --counts_file=$data_dir"/mnist_250_teachers.npy" \ 29 | --threshold=200 \ 30 | --sigma1=150 \ 31 | --sigma2=40 \ 32 | --queries=640 \ 33 | --delta=1e-5 34 | 35 | echo 36 | echo "######## SVHN ########" 37 | echo 38 | 39 | $executable \ 40 | --counts_file=$data_dir"/svhn_250_teachers.npy" \ 41 | --threshold=300 \ 42 | --sigma1=200 \ 43 | --sigma2=40 \ 44 | --queries=8500 \ 45 | --delta=1e-6 46 | 47 | echo 48 | echo "######## Adult ########" 49 | echo 50 | 51 | $executable \ 52 | --counts_file=$data_dir"/adult_250_teachers.npy" \ 53 | --threshold=300 \ 54 | --sigma1=200 \ 55 | --sigma2=40 \ 56 | --queries=1500 \ 57 | --delta=1e-5 58 | 59 | echo 60 | echo "######## Glyph (Confident) ########" 61 | echo 62 | 63 | $executable \ 64 | --counts_file=$data_dir"/glyph_5000_teachers.npy" \ 65 | --threshold=1000 \ 66 | --sigma1=500 \ 67 | --sigma2=100 \ 68 | --queries=12000 \ 69 | --delta=1e-8 70 | 71 | echo 72 | echo "######## Glyph (Interactive, Round 1) ########" 73 | echo 74 | 75 | $executable \ 76 | --counts_file=$data_dir"/glyph_round1.npy" \ 77 | --threshold=3500 \ 78 | --sigma1=1500 \ 79 | --sigma2=100 \ 80 | --delta=1e-8 81 | 82 | echo 83 | echo "######## Glyph (Interactive, Round 2) ########" 84 | echo 85 | 86 | $executable \ 87 | --counts_file=$data_dir"/glyph_round2.npy" \ 88 | --baseline_file=$data_dir"/glyph_round2_student.npy" \ 89 | --threshold=3500 \ 90 | --sigma1=2000 \ 91 | --sigma2=200 \ 92 | --teachers=5000 \ 93 | --delta=1e-8 94 | -------------------------------------------------------------------------------- /research/pate_2018/ICLR2018/generate_table_data_independent.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | 18 | echo "Table 2 with data-independent analysis." 19 | 20 | executable="python smooth_sensitivity_table.py" 21 | data_dir="data" 22 | 23 | echo 24 | echo "######## MNIST ########" 25 | echo 26 | 27 | $executable \ 28 | --counts_file=$data_dir"/mnist_250_teachers.npy" \ 29 | --threshold=200 \ 30 | --sigma1=150 \ 31 | --sigma2=40 \ 32 | --queries=640 \ 33 | --delta=1e-5 \ 34 | --data_independent 35 | echo 36 | echo "######## SVHN ########" 37 | echo 38 | 39 | $executable \ 40 | --counts_file=$data_dir"/svhn_250_teachers.npy" \ 41 | --threshold=300 \ 42 | --sigma1=200 \ 43 | --sigma2=40 \ 44 | --queries=8500 \ 45 | --delta=1e-6 \ 46 | --data_independent 47 | 48 | echo 49 | echo "######## Adult ########" 50 | echo 51 | 52 | $executable \ 53 | --counts_file=$data_dir"/adult_250_teachers.npy" \ 54 | --threshold=300 \ 55 | --sigma1=200 \ 56 | --sigma2=40 \ 57 | --queries=1500 \ 58 | --delta=1e-5 \ 59 | --data_independent 60 | 61 | echo 62 | echo "######## Glyph (Confident) ########" 63 | echo 64 | 65 | $executable \ 66 | --counts_file=$data_dir"/glyph_5000_teachers.npy" \ 67 | --threshold=1000 \ 68 | --sigma1=500 \ 69 | --sigma2=100 \ 70 | --queries=12000 \ 71 | --delta=1e-8 \ 72 | --data_independent 73 | 74 | echo 75 | echo "######## Glyph (Interactive, Round 1) ########" 76 | echo 77 | 78 | $executable \ 79 | --counts_file=$data_dir"/glyph_round1.npy" \ 80 | --threshold=3500 \ 81 | --sigma1=1500 \ 82 | --sigma2=100 \ 83 | --delta=1e-8 \ 84 | --data_independent 85 | 86 | echo 87 | echo "######## Glyph (Interactive, Round 2) ########" 88 | echo 89 | 90 | $executable \ 91 | --counts_file=$data_dir"/glyph_round2.npy" \ 92 | --baseline_file=$data_dir"/glyph_round2_student.npy" \ 93 | --threshold=3500 \ 94 | --sigma1=2000 \ 95 | --sigma2=200 \ 96 | --teachers=5000 \ 97 | --delta=1e-8 \ 98 | --order=8.5 \ 99 | --data_independent 100 | -------------------------------------------------------------------------------- /research/pate_2018/README.md: -------------------------------------------------------------------------------- 1 | Implementation of an RDP privacy accountant and smooth sensitivity analysis for 2 | the PATE framework. The underlying theory and supporting experiments appear in 3 | "Scalable Private Learning with PATE" by Nicolas Papernot, Shuang Song, Ilya 4 | Mironov, Ananth Raghunathan, Kunal Talwar, Ulfar Erlingsson (ICLR 2018, 5 | https://arxiv.org/abs/1802.08908). 6 | 7 | ## Overview 8 | 9 | The PATE ('Private Aggregation of Teacher Ensembles') framework was introduced 10 | by Papernot et al. in "Semi-supervised Knowledge Transfer for Deep Learning from 11 | Private Training Data" (ICLR 2017, https://arxiv.org/abs/1610.05755). The 12 | framework enables model-agnostic training that provably provides [differential 13 | privacy](https://en.wikipedia.org/wiki/Differential_privacy) of the training 14 | dataset. 15 | 16 | The framework consists of _teachers_, the _student_ model, and the _aggregator_. The 17 | teachers are models trained on disjoint subsets of the training datasets. The student 18 | model has access to an insensitive (e.g., public) unlabelled dataset, which is labelled by 19 | interacting with the ensemble of teachers via the _aggregator_. The aggregator tallies 20 | outputs of the teacher models, and either forwards a (noisy) aggregate to the student, or 21 | refuses to answer. 22 | 23 | Differential privacy is enforced by the aggregator. The privacy guarantees can be _data-independent_, 24 | which means that they are solely the function of the aggregator's parameters. Alternatively, privacy 25 | analysis can be _data-dependent_, which allows for finer reasoning where, under certain conditions on 26 | the input distribution, the final privacy guarantees can be improved relative to the data-independent 27 | analysis. Data-dependent privacy guarantees may, by themselves, be a function of sensitive data and 28 | therefore publishing these guarantees requires its own sanitization procedure. In our case 29 | sanitization of data-dependent privacy guarantees proceeds via _smooth sensitivity_ analysis. 30 | 31 | The common machinery used for all privacy analyses in this repository is the 32 | Rényi differential privacy, or RDP (see https://arxiv.org/abs/1702.07476). 33 | 34 | This repository contains implementations of privacy accountants and smooth 35 | sensitivity analysis for several data-independent and data-dependent mechanism that together 36 | comprise the PATE framework. 37 | 38 | 39 | ### Requirements 40 | 41 | * Python, version ≥ 2.7 42 | * absl (see [here](https://github.com/abseil/abseil-py), or just type `pip install absl-py`) 43 | * numpy 44 | * scipy 45 | * sympy (for smooth sensitivity analysis) 46 | * unittest (for testing) 47 | 48 | 49 | ### Self-testing 50 | 51 | To verify the installation run 52 | ```bash 53 | $ python core_test.py 54 | $ python smooth_sensitivity_test.py 55 | ``` 56 | 57 | 58 | ## Files in this directory 59 | 60 | * core.py — RDP privacy accountant for several vote aggregators (GNMax, 61 | Threshold, Laplace). 62 | 63 | * smooth_sensitivity.py — Smooth sensitivity analysis for GNMax and 64 | Threshold mechanisms. 65 | 66 | * core_test.py and smooth_sensitivity_test.py — Unit tests for the 67 | files above. 68 | 69 | ## Contact information 70 | 71 | You may direct your comments to mironov@google.com and PR to @ilyamironov. 72 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """TensorFlow Privacy/DP Training library setup file for pip.""" 15 | 16 | import setuptools 17 | 18 | with open('tensorflow_privacy/version.py') as file: 19 | globals_dict = {} 20 | exec(file.read(), globals_dict) # pylint: disable=exec-used 21 | VERSION = globals_dict['__version__'] 22 | 23 | README = ( 24 | 'A Python library that includes implementations of TensorFlow optimizers ' 25 | 'for training machine learning models with differential privacy.' 26 | ) 27 | 28 | setuptools.setup( 29 | name='tensorflow_privacy', 30 | version=VERSION, 31 | description='A privacy-focused machine learning framework', 32 | long_description=README, 33 | long_description_content_type='text/plain', 34 | url='https://github.com/tensorflow/privacy', 35 | license='Apache-2.0', 36 | packages=setuptools.find_packages(exclude=['*privacy.privacy_tests*']), 37 | install_requires=[ 38 | 'absl-py>=1.0,==1.*', 39 | 'dm-tree==0.1.8', 40 | 'dp-accounting==0.4.4', # TODO(b/364653784) 41 | 'numpy~=1.21', 42 | 'packaging~=22.0', 43 | 'scikit-learn>=1.0,==1.*', 44 | 'scipy~=1.9', 45 | 'tensorflow>=2.4.0,<=2.15.0', 46 | 'tensorflow-probability~=0.22.0', 47 | ], 48 | python_requires='>=3.9.0,<3.12', 49 | ) 50 | -------------------------------------------------------------------------------- /setup_empirical.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """TensorFlow Privacy/Privacy Tests library setup file for pip.""" 15 | 16 | import setuptools 17 | 18 | with open('tensorflow_privacy/privacy/privacy_tests/version.py') as file: 19 | globals_dict = {} 20 | exec(file.read(), globals_dict) # pylint: disable=exec-used 21 | VERSION = globals_dict['__version__'] 22 | 23 | README = ( 24 | 'A Python library that includes implementations of tests for empirical ' 25 | 'privacy.' 26 | ) 27 | 28 | setuptools.setup( 29 | name='tensorflow_empirical_privacy', 30 | version=VERSION, 31 | description='Tests for empirical privacy.', 32 | long_description=README, 33 | long_description_content_type='text/plain', 34 | url='https://github.com/tensorflow/privacy', 35 | license='Apache-2.0', 36 | packages=setuptools.find_packages(include=['*privacy.privacy_tests*']), 37 | install_requires=[ 38 | 'absl-py>=1.0,==1.*', 39 | 'immutabledict~=2.2', 40 | 'matplotlib~=3.3', 41 | 'numpy~=1.21', 42 | 'pandas~=1.4', 43 | 'scikit-learn>=1.0,==1.*', 44 | 'scipy~=1.9', 45 | 'statsmodels==0.14.0', 46 | 'tensorflow>=2.4.0,<=2.15.0', 47 | 'tensorflow-privacy>=0.9.0', 48 | 'tf-models-official~=2.13', 49 | ], 50 | python_requires='>=3.9.0,<3.12', 51 | ) 52 | -------------------------------------------------------------------------------- /tensorflow_privacy/.bazelversion: -------------------------------------------------------------------------------- 1 | 5.1.1 2 | -------------------------------------------------------------------------------- /tensorflow_privacy/BUILD: -------------------------------------------------------------------------------- 1 | load("@bazel_skylib//rules:build_test.bzl", "build_test") 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | ) 6 | 7 | licenses(["notice"]) 8 | 9 | exports_files([ 10 | "LICENSE", 11 | ]) 12 | 13 | py_library( 14 | name = "tensorflow_privacy", 15 | srcs = ["__init__.py"], 16 | deps = [ 17 | ":version", 18 | "//tensorflow_privacy/privacy/analysis:compute_dp_sgd_privacy_lib", 19 | "//tensorflow_privacy/privacy/analysis:tree_aggregation_accountant", 20 | "//tensorflow_privacy/privacy/dp_query", 21 | "//tensorflow_privacy/privacy/dp_query:discrete_gaussian_query", 22 | "//tensorflow_privacy/privacy/dp_query:distributed_discrete_gaussian_query", 23 | "//tensorflow_privacy/privacy/dp_query:distributed_skellam_query", 24 | "//tensorflow_privacy/privacy/dp_query:gaussian_query", 25 | "//tensorflow_privacy/privacy/dp_query:nested_query", 26 | "//tensorflow_privacy/privacy/dp_query:no_privacy_query", 27 | "//tensorflow_privacy/privacy/dp_query:normalized_query", 28 | "//tensorflow_privacy/privacy/dp_query:quantile_adaptive_clip_sum_query", 29 | "//tensorflow_privacy/privacy/dp_query:quantile_adaptive_clip_tree_query", 30 | "//tensorflow_privacy/privacy/dp_query:quantile_estimator_query", 31 | "//tensorflow_privacy/privacy/dp_query:restart_query", 32 | "//tensorflow_privacy/privacy/dp_query:tree_aggregation", 33 | "//tensorflow_privacy/privacy/dp_query:tree_aggregation_query", 34 | "//tensorflow_privacy/privacy/dp_query:tree_range_query", 35 | "//tensorflow_privacy/privacy/estimators:dnn", 36 | "//tensorflow_privacy/privacy/keras_models:dp_keras_model", 37 | "//tensorflow_privacy/privacy/logistic_regression:datasets", 38 | "//tensorflow_privacy/privacy/logistic_regression:multinomial_logistic", 39 | "//tensorflow_privacy/privacy/logistic_regression:single_layer_softmax", 40 | "//tensorflow_privacy/privacy/optimizers:dp_optimizer_keras", 41 | "//tensorflow_privacy/privacy/optimizers:dp_optimizer_keras_vectorized", 42 | "//tensorflow_privacy/v1:tensorflow_privacy_v1", 43 | ], 44 | ) 45 | 46 | build_test( 47 | name = "tensorflow_privacy_build_test", 48 | targets = [":tensorflow_privacy"], 49 | ) 50 | 51 | py_library( 52 | name = "version", 53 | srcs = ["version.py"], 54 | ) 55 | 56 | filegroup( 57 | name = "ignore_srcs", 58 | tags = ["ignore_srcs"], 59 | ) 60 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) 4 | 5 | py_library( 6 | name = "privacy", 7 | srcs = ["__init__.py"], 8 | ) 9 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/analysis/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) 4 | 5 | py_library( 6 | name = "analysis", 7 | srcs = ["__init__.py"], 8 | ) 9 | 10 | py_library( 11 | name = "compute_dp_sgd_privacy_lib", 12 | srcs = ["compute_dp_sgd_privacy_lib.py"], 13 | ) 14 | 15 | py_binary( 16 | name = "compute_dp_sgd_privacy", 17 | srcs = ["compute_dp_sgd_privacy.py"], 18 | deps = [":compute_dp_sgd_privacy_lib"], 19 | ) 20 | 21 | py_test( 22 | name = "compute_dp_sgd_privacy_test", 23 | size = "small", 24 | timeout = "moderate", 25 | srcs = ["compute_dp_sgd_privacy_test.py"], 26 | deps = [":compute_dp_sgd_privacy_lib"], 27 | ) 28 | 29 | py_binary( 30 | name = "compute_noise_from_budget", 31 | srcs = ["compute_noise_from_budget.py"], 32 | deps = [":compute_noise_from_budget_lib"], 33 | ) 34 | 35 | py_library( 36 | name = "compute_noise_from_budget_lib", 37 | srcs = ["compute_noise_from_budget_lib.py"], 38 | ) 39 | 40 | py_test( 41 | name = "compute_noise_from_budget_test", 42 | srcs = ["compute_noise_from_budget_test.py"], 43 | deps = [":compute_noise_from_budget_lib"], 44 | ) 45 | 46 | py_library( 47 | name = "gdp_accountant", 48 | srcs = ["gdp_accountant.py"], 49 | ) 50 | 51 | py_library( 52 | name = "tensor_buffer", 53 | srcs = ["tensor_buffer.py"], 54 | ) 55 | 56 | py_test( 57 | name = "tensor_buffer_eager_test", 58 | size = "small", 59 | srcs = ["tensor_buffer_eager_test.py"], 60 | deps = [":tensor_buffer"], 61 | ) 62 | 63 | py_test( 64 | name = "tensor_buffer_graph_test", 65 | size = "small", 66 | srcs = ["tensor_buffer_graph_test.py"], 67 | deps = [":tensor_buffer"], 68 | ) 69 | 70 | py_library( 71 | name = "tree_aggregation_accountant", 72 | srcs = ["tree_aggregation_accountant.py"], 73 | ) 74 | 75 | py_test( 76 | name = "tree_aggregation_accountant_test", 77 | srcs = ["tree_aggregation_accountant_test.py"], 78 | deps = [":tree_aggregation_accountant"], 79 | ) 80 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | r"""Command-line script for computing privacy of a model trained with DP-SGD. 16 | 17 | The script applies the RDP accountant to estimate privacy budget of an iterated 18 | Sampled Gaussian Mechanism. The mechanism's parameters are controlled by flags. 19 | 20 | Example: 21 | compute_noise_from_budget 22 | --N=60000 \ 23 | --batch_size=256 \ 24 | --epsilon=2.92 \ 25 | --epochs=60 \ 26 | --delta=1e-5 \ 27 | --min_noise=1e-6 28 | 29 | The output states that DP-SGD with these parameters should 30 | use a noise multiplier of 1.12. 31 | """ 32 | 33 | from absl import app 34 | from absl import flags 35 | 36 | from tensorflow_privacy.privacy.analysis.compute_noise_from_budget_lib import compute_noise 37 | 38 | FLAGS = flags.FLAGS 39 | 40 | flags.DEFINE_integer('N', None, 'Total number of examples') 41 | flags.DEFINE_integer('batch_size', None, 'Batch size') 42 | flags.DEFINE_float('epsilon', None, 'Target epsilon for DP-SGD') 43 | flags.DEFINE_float('epochs', None, 'Number of epochs (may be fractional)') 44 | flags.DEFINE_float('delta', 1e-6, 'Target delta') 45 | flags.DEFINE_float('min_noise', 1e-5, 'Minimum noise level for search.') 46 | 47 | 48 | def main(argv): 49 | del argv # argv is not used. 50 | 51 | assert FLAGS.N is not None, 'Flag N is missing.' 52 | assert FLAGS.batch_size is not None, 'Flag batch_size is missing.' 53 | assert FLAGS.epsilon is not None, 'Flag epsilon is missing.' 54 | assert FLAGS.epochs is not None, 'Flag epochs is missing.' 55 | compute_noise(FLAGS.N, FLAGS.batch_size, FLAGS.epsilon, FLAGS.epochs, 56 | FLAGS.delta, FLAGS.min_noise) 57 | 58 | 59 | if __name__ == '__main__': 60 | app.run(main) 61 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Library for computing privacy values for DP-SGD.""" 16 | 17 | import math 18 | 19 | from absl import app 20 | import dp_accounting 21 | 22 | 23 | def compute_noise(n, batch_size, target_epsilon, epochs, delta, noise_lbd): 24 | """Compute noise based on the given hyperparameters.""" 25 | q = batch_size / n # q - the sampling ratio. 26 | if q > 1: 27 | raise app.UsageError('n must be larger than the batch size.') 28 | orders = ([1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] + 29 | list(range(5, 64)) + [128, 256, 512]) 30 | steps = int(math.ceil(epochs * n / batch_size)) 31 | 32 | def make_event_from_noise(sigma): 33 | return dp_accounting.SelfComposedDpEvent( 34 | dp_accounting.PoissonSampledDpEvent( 35 | q, dp_accounting.GaussianDpEvent(sigma)), steps) 36 | 37 | def make_accountant(): 38 | return dp_accounting.rdp.RdpAccountant(orders) 39 | 40 | accountant = make_accountant() 41 | accountant.compose(make_event_from_noise(noise_lbd)) 42 | init_epsilon = accountant.get_epsilon(delta) 43 | 44 | if init_epsilon < target_epsilon: # noise_lbd was an overestimate 45 | print('noise_lbd too large for target epsilon.') 46 | return 0 47 | 48 | target_noise = dp_accounting.calibrate_dp_mechanism( 49 | make_accountant, make_event_from_noise, target_epsilon, delta, 50 | dp_accounting.LowerEndpointAndGuess(noise_lbd, noise_lbd * 2)) 51 | 52 | print( 53 | 'DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated' 54 | ' over {} steps satisfies'.format(100 * q, target_noise, steps), 55 | end=' ') 56 | print('differential privacy with eps = {:.3g} and delta = {}.'.format( 57 | target_epsilon, delta)) 58 | return target_noise 59 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/analysis/compute_noise_from_budget_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from absl.testing import absltest 17 | from absl.testing import parameterized 18 | 19 | from tensorflow_privacy.privacy.analysis import compute_noise_from_budget_lib 20 | 21 | 22 | class ComputeNoiseFromBudgetTest(parameterized.TestCase): 23 | 24 | @parameterized.named_parameters( 25 | ('Test0', 60000, 150, 0.941870567, 15, 1e-5, 1e-5, 1.3), 26 | ('Test1', 100000, 100, 1.70928734, 30, 1e-7, 1e-6, 1.0), 27 | ('Test2', 100000000, 1024, 5907984.81339406, 10, 1e-7, 1e-5, 0.1), 28 | ('Test3', 100000000, 1024, 5907984.81339406, 10, 1e-7, 1, 0), 29 | ) 30 | def test_compute_noise(self, n, batch_size, target_epsilon, epochs, delta, 31 | min_noise, expected_noise): 32 | self.skipTest('Disable test.') 33 | target_noise = compute_noise_from_budget_lib.compute_noise( 34 | n, batch_size, target_epsilon, epochs, delta, min_noise) 35 | self.assertAlmostEqual(target_noise, expected_noise) 36 | 37 | 38 | if __name__ == '__main__': 39 | absltest.main() 40 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/analysis/gdp_accountant.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================= 15 | r"""Implements privacy accounting for Gaussian Differential Privacy. 16 | 17 | Applies the Dual and Central Limit Theorem (CLT) to estimate privacy budget of 18 | an iterated subsampled Gaussian Mechanism (by either uniform or Poisson 19 | subsampling). 20 | """ 21 | 22 | import numpy as np 23 | from scipy import optimize 24 | from scipy import stats 25 | 26 | 27 | def compute_mu_uniform(epoch, noise_multi, n, batch_size): 28 | """Compute mu from uniform subsampling.""" 29 | 30 | t = epoch * n / batch_size 31 | c = batch_size * np.sqrt(t) / n 32 | return np.sqrt(2) * c * np.sqrt( 33 | np.exp(noise_multi**(-2)) * stats.norm.cdf(1.5 / noise_multi) + 34 | 3 * stats.norm.cdf(-0.5 / noise_multi) - 2) 35 | 36 | 37 | def compute_mu_poisson(epoch, noise_multi, n, batch_size): 38 | """Compute mu from Poisson subsampling.""" 39 | 40 | t = epoch * n / batch_size 41 | return np.sqrt(np.exp(noise_multi**(-2)) - 1) * np.sqrt(t) * batch_size / n 42 | 43 | 44 | def delta_eps_mu(eps, mu): 45 | """Compute dual between mu-GDP and (epsilon, delta)-DP.""" 46 | return stats.norm.cdf(-eps / mu + mu / 47 | 2) - np.exp(eps) * stats.norm.cdf(-eps / mu - mu / 2) 48 | 49 | 50 | def eps_from_mu(mu, delta): 51 | """Compute epsilon from mu given delta via inverse dual.""" 52 | 53 | def f(x): 54 | """Reversely solve dual by matching delta.""" 55 | return delta_eps_mu(x, mu) - delta 56 | 57 | return optimize.root_scalar(f, bracket=[0, 500], method='brentq').root 58 | 59 | 60 | def compute_eps_uniform(epoch, noise_multi, n, batch_size, delta): 61 | """Compute epsilon given delta from inverse dual of uniform subsampling.""" 62 | 63 | return eps_from_mu( 64 | compute_mu_uniform(epoch, noise_multi, n, batch_size), delta) 65 | 66 | 67 | def compute_eps_poisson(epoch, noise_multi, n, batch_size, delta): 68 | """Compute epsilon given delta from inverse dual of Poisson subsampling.""" 69 | 70 | return eps_from_mu( 71 | compute_mu_poisson(epoch, noise_multi, n, batch_size), delta) 72 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/analysis/tensor_buffer_eager_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import tensorflow as tf 16 | 17 | from tensorflow_privacy.privacy.analysis import tensor_buffer 18 | 19 | tf.compat.v1.enable_eager_execution() 20 | 21 | 22 | class TensorBufferTest(tf.test.TestCase): 23 | """Tests for TensorBuffer in eager mode.""" 24 | 25 | def test_basic(self): 26 | size, shape = 2, [2, 3] 27 | 28 | my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') 29 | 30 | value1 = [[1, 2, 3], [4, 5, 6]] 31 | my_buffer.append(value1) 32 | self.assertAllEqual(my_buffer.values.numpy(), [value1]) 33 | 34 | value2 = [[4, 5, 6], [7, 8, 9]] 35 | my_buffer.append(value2) 36 | self.assertAllEqual(my_buffer.values.numpy(), [value1, value2]) 37 | 38 | def test_fail_on_scalar(self): 39 | with self.assertRaisesRegex(ValueError, 'Shape cannot be scalar.'): 40 | tensor_buffer.TensorBuffer(1, ()) 41 | 42 | def test_fail_on_inconsistent_shape(self): 43 | size, shape = 1, [2, 3] 44 | 45 | my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') 46 | 47 | with self.assertRaisesRegex(tf.errors.InvalidArgumentError, 48 | 'Appending value of inconsistent shape.'): 49 | my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32)) 50 | 51 | def test_resize(self): 52 | size, shape = 2, [2, 3] 53 | 54 | my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') 55 | 56 | # Append three buffers. Third one should succeed after resizing. 57 | value1 = [[1, 2, 3], [4, 5, 6]] 58 | my_buffer.append(value1) 59 | self.assertAllEqual(my_buffer.values.numpy(), [value1]) 60 | self.assertAllEqual(my_buffer.current_size.numpy(), 1) 61 | self.assertAllEqual(my_buffer.capacity.numpy(), 2) 62 | 63 | value2 = [[4, 5, 6], [7, 8, 9]] 64 | my_buffer.append(value2) 65 | self.assertAllEqual(my_buffer.values.numpy(), [value1, value2]) 66 | self.assertAllEqual(my_buffer.current_size.numpy(), 2) 67 | self.assertAllEqual(my_buffer.capacity.numpy(), 2) 68 | 69 | value3 = [[7, 8, 9], [10, 11, 12]] 70 | my_buffer.append(value3) 71 | self.assertAllEqual(my_buffer.values.numpy(), [value1, value2, value3]) 72 | self.assertAllEqual(my_buffer.current_size.numpy(), 3) 73 | # Capacity should have doubled. 74 | self.assertAllEqual(my_buffer.capacity.numpy(), 4) 75 | 76 | 77 | if __name__ == '__main__': 78 | tf.test.main() 79 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/analysis/tensor_buffer_graph_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import tensorflow as tf 16 | from tensorflow_privacy.privacy.analysis import tensor_buffer 17 | 18 | 19 | class TensorBufferTest(tf.test.TestCase): 20 | """Tests for TensorBuffer in graph mode.""" 21 | 22 | def test_noresize(self): 23 | """Test buffer does not resize if capacity is not exceeded.""" 24 | with self.cached_session() as sess: 25 | size, shape = 2, [2, 3] 26 | 27 | my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') 28 | value1 = [[1, 2, 3], [4, 5, 6]] 29 | with tf.control_dependencies([my_buffer.append(value1)]): 30 | value2 = [[7, 8, 9], [10, 11, 12]] 31 | with tf.control_dependencies([my_buffer.append(value2)]): 32 | values = my_buffer.values 33 | current_size = my_buffer.current_size 34 | capacity = my_buffer.capacity 35 | self.evaluate(tf.compat.v1.global_variables_initializer()) 36 | 37 | v, cs, cap = sess.run([values, current_size, capacity]) 38 | self.assertAllEqual(v, [value1, value2]) 39 | self.assertEqual(cs, 2) 40 | self.assertEqual(cap, 2) 41 | 42 | def test_resize(self): 43 | """Test buffer resizes if capacity is exceeded.""" 44 | with self.cached_session() as sess: 45 | size, shape = 2, [2, 3] 46 | 47 | my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') 48 | value1 = [[1, 2, 3], [4, 5, 6]] 49 | with tf.control_dependencies([my_buffer.append(value1)]): 50 | value2 = [[7, 8, 9], [10, 11, 12]] 51 | with tf.control_dependencies([my_buffer.append(value2)]): 52 | value3 = [[13, 14, 15], [16, 17, 18]] 53 | with tf.control_dependencies([my_buffer.append(value3)]): 54 | values = my_buffer.values 55 | current_size = my_buffer.current_size 56 | capacity = my_buffer.capacity 57 | self.evaluate(tf.compat.v1.global_variables_initializer()) 58 | 59 | v, cs, cap = sess.run([values, current_size, capacity]) 60 | self.assertAllEqual(v, [value1, value2, value3]) 61 | self.assertEqual(cs, 3) 62 | self.assertEqual(cap, 4) 63 | 64 | 65 | if __name__ == '__main__': 66 | tf.test.main() 67 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/bolt_on/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:private"]) 2 | 3 | licenses(["notice"]) 4 | 5 | filegroup( 6 | name = "ignore_srcs", 7 | srcs = [ 8 | "__init__.py", 9 | "losses.py", 10 | "losses_test.py", 11 | "models.py", 12 | "models_test.py", 13 | "optimizers.py", 14 | "optimizers_test.py", 15 | ], 16 | tags = ["ignore_srcs"], 17 | ) 18 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/bolt_on/README.md: -------------------------------------------------------------------------------- 1 | # BoltOn Subpackage 2 | 3 | This package contains source code for the BoltOn method, a particular 4 | differential-privacy (DP) technique that uses output perturbations and 5 | leverages additional assumptions to provide a new way of approaching the 6 | privacy guarantees. 7 | 8 | ## BoltOn Description 9 | 10 | This method uses 4 key steps to achieve privacy guarantees: 11 | 1. Adds noise to weights after training (output perturbation). 12 | 2. Projects weights to R, the radius of the hypothesis space, 13 | after each batch. This value is configurable by the user. 14 | 3. Limits learning rate 15 | 4. Uses a strongly convex loss function (see compile) 16 | 17 | For more details on the strong convexity requirements, see: 18 | Bolt-on Differential Privacy for Scalable Stochastic Gradient 19 | Descent-based Analytics by Xi Wu et al. at https://arxiv.org/pdf/1606.04722.pdf 20 | 21 | ## Why BoltOn? 22 | 23 | The major difference for the BoltOn method is that it injects noise post model 24 | convergence, rather than noising gradients or weights during training. This 25 | approach requires some additional constraints listed in the Description. 26 | Should the use-case and model satisfy these constraints, this is another 27 | approach that can be trained to maximize utility while maintaining the privacy. 28 | The paper describes in detail the advantages and disadvantages of this approach 29 | and its results compared to some other methods, namely noising at each iteration 30 | and no noising. 31 | 32 | ## Tutorials 33 | 34 | This package has a tutorial that can be found in the root tutorials directory, 35 | under `bolton_tutorial.py`. 36 | 37 | ## Contribution 38 | 39 | This package was initially contributed by Georgian Partners with the hope of 40 | growing the tensorflow/privacy library. There are several rich use cases for 41 | delta-epsilon privacy in machine learning, some of which can be explored here: 42 | https://medium.com/apache-mxnet/epsilon-differential-privacy-for-machine-learning-using-mxnet-a4270fe3865e 43 | https://arxiv.org/pdf/1811.04911.pdf 44 | 45 | ## Stability 46 | 47 | As we are pegged on tensorflow2.0, this package may encounter stability 48 | issues in the ongoing development of tensorflow2.0. 49 | 50 | This sub-package is currently stable for 2.0.0a0, 2.0.0b0, and 2.0.0.b1 If you 51 | would like to use this subpackage, please do use one of these versions as we 52 | cannot guarantee it will work for all latest releases. If you do find issues, 53 | feel free to raise an issue to the contributors listed below. 54 | 55 | ## Contacts 56 | 57 | In addition to the maintainers of tensorflow/privacy listed in the root 58 | README.md, please feel free to contact members of Georgian Partners. In 59 | particular, 60 | 61 | * Georgian Partners(@georgianpartners) 62 | * Ji Chao Zhang(@Jichaogp) 63 | * Christopher Choquette(@cchoquette) 64 | 65 | ## Copyright 66 | 67 | Copyright 2019 - Google LLC 68 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/bolt_on/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """BoltOn Method for privacy.""" 15 | 16 | import sys 17 | 18 | from packaging import version 19 | import tensorflow.compat.v1 as tf 20 | 21 | if version.Version(tf.__version__) < version.Version("2.0.0"): 22 | raise ImportError("Please upgrade your version " 23 | "of tensorflow from: {0} to at least 2.0.0 to " 24 | "use privacy/bolt_on".format( 25 | version.Version(tf.__version__))) 26 | if hasattr(sys, "skip_tf_privacy_import"): # Useful for standalone scripts. 27 | pass 28 | else: 29 | from tensorflow_privacy.privacy.bolt_on.models import BoltOnModel # pylint: disable=g-import-not-at-top 30 | from tensorflow_privacy.privacy.bolt_on.optimizers import BoltOn # pylint: disable=g-import-not-at-top 31 | from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexHuber # pylint: disable=g-import-not-at-top 32 | from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexBinaryCrossentropy # pylint: disable=g-import-not-at-top 33 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/dp_query/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/dp_query/dp_query_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from absl.testing import parameterized 16 | import tensorflow as tf 17 | from tensorflow_privacy.privacy.dp_query import no_privacy_query 18 | 19 | 20 | class SumAggregationQueryTest(tf.test.TestCase, parameterized.TestCase): 21 | 22 | def test_initial_sample_state_works_on_tensorspecs(self): 23 | query = no_privacy_query.NoPrivacySumQuery() 24 | template = tf.TensorSpec.from_tensor(tf.constant([1.0, 2.0])) 25 | sample_state = query.initial_sample_state(template) 26 | expected = [0.0, 0.0] 27 | self.assertAllClose(sample_state, expected) 28 | 29 | 30 | if __name__ == '__main__': 31 | tf.test.main() 32 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/dp_query/no_privacy_query_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from absl.testing import parameterized 16 | import tensorflow as tf 17 | from tensorflow_privacy.privacy.dp_query import no_privacy_query 18 | from tensorflow_privacy.privacy.dp_query import test_utils 19 | 20 | 21 | class NoPrivacyQueryTest(tf.test.TestCase, parameterized.TestCase): 22 | 23 | def test_sum(self): 24 | record1 = tf.constant([2.0, 0.0]) 25 | record2 = tf.constant([-1.0, 1.0]) 26 | 27 | query = no_privacy_query.NoPrivacySumQuery() 28 | query_result, _ = test_utils.run_query(query, [record1, record2]) 29 | expected = [1.0, 1.0] 30 | self.assertAllClose(query_result, expected) 31 | 32 | def test_no_privacy_average(self): 33 | record1 = tf.constant([5.0, 0.0]) 34 | record2 = tf.constant([-1.0, 2.0]) 35 | 36 | query = no_privacy_query.NoPrivacyAverageQuery() 37 | query_result, _ = test_utils.run_query(query, [record1, record2]) 38 | expected = [2.0, 1.0] 39 | self.assertAllClose(query_result, expected) 40 | 41 | def test_no_privacy_weighted_average(self): 42 | record1 = tf.constant([4.0, 0.0]) 43 | record2 = tf.constant([-1.0, 1.0]) 44 | 45 | weights = [1, 3] 46 | 47 | query = no_privacy_query.NoPrivacyAverageQuery() 48 | query_result, _ = test_utils.run_query( 49 | query, [record1, record2], weights=weights) 50 | expected = [0.25, 0.75] 51 | self.assertAllClose(query_result, expected) 52 | 53 | @parameterized.named_parameters( 54 | ('type_mismatch', [1.0], (1.0,), TypeError), 55 | ('too_few_on_left', [1.0], [1.0, 1.0], ValueError), 56 | ('too_few_on_right', [1.0, 1.0], [1.0], ValueError)) 57 | def test_incompatible_records(self, record1, record2, error_type): 58 | query = no_privacy_query.NoPrivacySumQuery() 59 | with self.assertRaises(error_type): 60 | test_utils.run_query(query, [record1, record2]) 61 | 62 | 63 | if __name__ == '__main__': 64 | tf.test.main() 65 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/dp_query/normalized_query_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import tensorflow as tf 16 | from tensorflow_privacy.privacy.dp_query import gaussian_query 17 | from tensorflow_privacy.privacy.dp_query import normalized_query 18 | from tensorflow_privacy.privacy.dp_query import test_utils 19 | 20 | 21 | class NormalizedQueryTest(tf.test.TestCase): 22 | 23 | def test_normalization(self): 24 | record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0]. 25 | record2 = tf.constant([4.0, -3.0]) # Not clipped. 26 | 27 | sum_query = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0, stddev=0.0) 28 | query = normalized_query.NormalizedQuery( 29 | numerator_query=sum_query, denominator=2.0) 30 | 31 | query_result, _ = test_utils.run_query(query, [record1, record2]) 32 | expected = [0.5, 0.5] 33 | self.assertAllClose(query_result, expected) 34 | 35 | 36 | if __name__ == '__main__': 37 | tf.test.main() 38 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/dp_query/test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utility methods for testing private queries. 15 | 16 | Utility methods for testing private queries. 17 | """ 18 | 19 | 20 | def run_query(query, records, global_state=None, weights=None): 21 | """Executes query on the given set of records as a single sample. 22 | 23 | Args: 24 | query: A PrivateQuery to run. 25 | records: An iterable containing records to pass to the query. 26 | global_state: The current global state. If None, an initial global state is 27 | generated. 28 | weights: An optional iterable containing the weights of the records. 29 | 30 | Returns: 31 | A tuple (result, new_global_state) where "result" is the result of the 32 | query and "new_global_state" is the updated global state. 33 | """ 34 | if not global_state: 35 | global_state = query.initial_global_state() 36 | params = query.derive_sample_params(global_state) 37 | sample_state = query.initial_sample_state(next(iter(records))) 38 | if weights is None: 39 | for record in records: 40 | sample_state = query.accumulate_record(params, sample_state, record) 41 | else: 42 | for weight, record in zip(weights, records): 43 | sample_state = query.accumulate_record(params, sample_state, record, 44 | weight) 45 | result, global_state, _ = query.get_noised_result(sample_state, global_state) 46 | return result, global_state 47 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/estimators/BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_python//python:defs.bzl", "py_library") 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | licenses(["notice"]) 6 | 7 | py_library( 8 | name = "estimators", 9 | srcs = ["__init__.py"], 10 | ) 11 | 12 | py_library( 13 | name = "head_utils", 14 | srcs = [ 15 | "head_utils.py", 16 | ], 17 | deps = [ 18 | ":binary_class_head", 19 | ":multi_class_head", 20 | ], 21 | ) 22 | 23 | py_library( 24 | name = "binary_class_head", 25 | srcs = [ 26 | "binary_class_head.py", 27 | ], 28 | ) 29 | 30 | py_library( 31 | name = "multi_class_head", 32 | srcs = [ 33 | "multi_class_head.py", 34 | ], 35 | ) 36 | 37 | py_library( 38 | name = "multi_label_head", 39 | srcs = [ 40 | "multi_label_head.py", 41 | ], 42 | ) 43 | 44 | py_library( 45 | name = "dnn", 46 | srcs = [ 47 | "dnn.py", 48 | ], 49 | deps = [":head_utils"], 50 | ) 51 | 52 | py_library( 53 | name = "test_utils", 54 | srcs = [ 55 | "test_utils.py", 56 | ], 57 | ) 58 | 59 | py_test( 60 | name = "binary_class_head_test", 61 | timeout = "long", 62 | srcs = ["binary_class_head_test.py"], 63 | deps = [ 64 | ":binary_class_head", 65 | ":test_utils", 66 | "//tensorflow_privacy/privacy/optimizers:dp_optimizer_keras", 67 | ], 68 | ) 69 | 70 | py_test( 71 | name = "multi_class_head_test", 72 | timeout = "long", 73 | srcs = ["multi_class_head_test.py"], 74 | deps = [ 75 | ":multi_class_head", 76 | ":test_utils", 77 | "//tensorflow_privacy/privacy/optimizers:dp_optimizer_keras", 78 | ], 79 | ) 80 | 81 | py_test( 82 | name = "multi_label_head_test", 83 | timeout = "long", 84 | srcs = ["multi_label_head_test.py"], 85 | deps = [ 86 | ":multi_label_head", 87 | ":test_utils", 88 | "//tensorflow_privacy/privacy/optimizers:dp_optimizer_keras", 89 | ], 90 | ) 91 | 92 | py_test( 93 | name = "dnn_test", 94 | timeout = "long", 95 | srcs = ["dnn_test.py"], 96 | deps = [ 97 | ":dnn", 98 | ":test_utils", 99 | "//tensorflow_privacy/privacy/optimizers:dp_optimizer_keras", 100 | ], 101 | ) 102 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/estimators/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/estimators/binary_class_head_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numpy as np 16 | import tensorflow as tf 17 | from tensorflow_privacy.privacy.estimators import binary_class_head 18 | from tensorflow_privacy.privacy.estimators import test_utils 19 | from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer 20 | from tensorflow_estimator.python.estimator import estimator 21 | 22 | 23 | class DPBinaryClassHeadTest(tf.test.TestCase): 24 | """Tests for DP-enabled binary class heads.""" 25 | 26 | def testLoss(self): 27 | """Tests loss() returns per-example losses.""" 28 | 29 | head = binary_class_head.DPBinaryClassHead() 30 | features = {'feature_a': np.full((4), 1.0)} 31 | labels = np.array([[1.0], [1.0], [1.0], [0.0]]) 32 | logits = np.full((4, 1), 0.5) 33 | 34 | actual_loss = head.loss(labels, logits, features) 35 | expected_loss = tf.nn.sigmoid_cross_entropy_with_logits( 36 | labels=labels, logits=logits) 37 | 38 | self.assertEqual(actual_loss.shape, [4, 1]) 39 | 40 | if tf.executing_eagerly(): 41 | self.assertEqual(actual_loss.shape, [4, 1]) 42 | self.assertAllClose(actual_loss, expected_loss) 43 | return 44 | 45 | self.assertAllClose(expected_loss, self.evaluate(actual_loss)) 46 | 47 | def testCreateTPUEstimatorSpec(self): 48 | """Tests that an Estimator built with this head works.""" 49 | 50 | train_features, train_labels = test_utils.make_input_data(256, 2) 51 | feature_columns = [] 52 | for key in train_features: 53 | feature_columns.append(tf.feature_column.numeric_column(key=key)) 54 | 55 | head = binary_class_head.DPBinaryClassHead() 56 | optimizer = DPKerasSGDOptimizer( 57 | learning_rate=0.5, 58 | l2_norm_clip=1.0, 59 | noise_multiplier=0.0, 60 | num_microbatches=2) 61 | model_fn = test_utils.make_model_fn(head, optimizer, feature_columns) 62 | classifier = estimator.Estimator(model_fn=model_fn) 63 | 64 | classifier.train( 65 | input_fn=test_utils.make_input_fn(train_features, train_labels, True), 66 | steps=4) 67 | 68 | test_features, test_labels = test_utils.make_input_data(64, 2) 69 | classifier.evaluate( 70 | input_fn=test_utils.make_input_fn(test_features, test_labels, False), 71 | steps=4) 72 | 73 | predict_features, predict_labels_ = test_utils.make_input_data(64, 2) 74 | classifier.predict( 75 | input_fn=test_utils.make_input_fn(predict_features, predict_labels_, 76 | False)) 77 | 78 | 79 | if __name__ == '__main__': 80 | tf.test.main() 81 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/estimators/dnn.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Estimator heads that allow integration with TF Privacy.""" 15 | 16 | import tensorflow as tf 17 | 18 | from tensorflow_privacy.privacy.estimators import head_utils 19 | from tensorflow_estimator.python.estimator import estimator 20 | from tensorflow_estimator.python.estimator.canned import dnn 21 | 22 | 23 | class DNNClassifier(estimator.Estimator): 24 | """DP version of `tf.estimator.DNNClassifier`.""" 25 | 26 | def __init__( 27 | self, 28 | hidden_units, 29 | feature_columns, 30 | model_dir=None, 31 | n_classes=2, 32 | weight_column=None, 33 | label_vocabulary=None, 34 | optimizer=None, 35 | activation_fn=tf.nn.relu, 36 | dropout=None, 37 | config=None, 38 | warm_start_from=None, 39 | loss_reduction=tf.keras.losses.Reduction.NONE, 40 | batch_norm=False, 41 | ): 42 | """See `tf.estimator.DNNClassifier`.""" 43 | head = head_utils.binary_or_multi_class_head( 44 | n_classes, 45 | weight_column=weight_column, 46 | label_vocabulary=label_vocabulary, 47 | loss_reduction=loss_reduction) 48 | estimator._canned_estimator_api_gauge.get_cell('Classifier').set('DNN') 49 | 50 | def _model_fn(features, labels, mode, config): 51 | return dnn.dnn_model_fn_v2( 52 | features=features, 53 | labels=labels, 54 | mode=mode, 55 | head=head, 56 | hidden_units=hidden_units, 57 | feature_columns=tuple(feature_columns or []), 58 | optimizer=optimizer, 59 | activation_fn=activation_fn, 60 | dropout=dropout, 61 | config=config, 62 | batch_norm=batch_norm) 63 | 64 | super().__init__( 65 | model_fn=_model_fn, 66 | model_dir=model_dir, 67 | config=config, 68 | warm_start_from=warm_start_from) 69 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/estimators/dnn_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import functools 16 | 17 | from absl.testing import parameterized 18 | import tensorflow as tf 19 | from tensorflow_privacy.privacy.estimators import dnn 20 | from tensorflow_privacy.privacy.estimators import test_utils 21 | from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer 22 | 23 | 24 | class DPDNNClassifierTest(tf.test.TestCase, parameterized.TestCase): 25 | """Tests for DP-enabled DNNClassifier.""" 26 | 27 | @parameterized.named_parameters( 28 | ('BinaryClassDNN 1', 2), 29 | ('MultiClassDNN 1', 3), 30 | ) 31 | def testDNN(self, classes): 32 | train_features, train_labels = test_utils.make_input_data(256, classes) 33 | feature_columns = [] 34 | for key in train_features: 35 | feature_columns.append(tf.feature_column.numeric_column(key=key)) 36 | 37 | optimizer = functools.partial( 38 | DPKerasSGDOptimizer, 39 | learning_rate=0.5, 40 | l2_norm_clip=1.0, 41 | noise_multiplier=0.0, 42 | num_microbatches=1) 43 | 44 | classifier = dnn.DNNClassifier( 45 | hidden_units=[10], 46 | activation_fn='relu', 47 | feature_columns=feature_columns, 48 | n_classes=classes, 49 | optimizer=optimizer, 50 | loss_reduction=tf.losses.Reduction.NONE) 51 | 52 | classifier.train( 53 | input_fn=test_utils.make_input_fn(train_features, train_labels, True, 54 | 16)) 55 | 56 | test_features, test_labels = test_utils.make_input_data(64, classes) 57 | classifier.evaluate( 58 | input_fn=test_utils.make_input_fn(test_features, test_labels, False, 59 | 16)) 60 | 61 | predict_features, predict_labels = test_utils.make_input_data(64, classes) 62 | classifier.predict( 63 | input_fn=test_utils.make_input_fn(predict_features, predict_labels, 64 | False)) 65 | 66 | 67 | if __name__ == '__main__': 68 | tf.test.main() 69 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/estimators/head_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Estimator heads that allow integration with TF Privacy.""" 15 | 16 | from tensorflow_privacy.privacy.estimators.binary_class_head import DPBinaryClassHead 17 | from tensorflow_privacy.privacy.estimators.multi_class_head import DPMultiClassHead 18 | 19 | 20 | def binary_or_multi_class_head(n_classes, weight_column, label_vocabulary, 21 | loss_reduction): 22 | """Creates either binary or multi-class head. 23 | 24 | Args: 25 | n_classes: Number of label classes. 26 | weight_column: A string or a `NumericColumn` created by 27 | `tf.feature_column.numeric_column` defining feature column representing 28 | weights. It is used to down weight or boost examples during training. It 29 | will be multiplied by the loss of the example. If it is a string, it is 30 | used as a key to fetch weight tensor from the `features`. If it is a 31 | `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then 32 | weight_column.normalizer_fn is applied on it to get weight tensor. 33 | label_vocabulary: A list of strings represents possible label values. If 34 | given, labels must be string type and have any value in 35 | `label_vocabulary`. If it is not given, that means labels are already 36 | encoded as integer or float within [0, 1] for `n_classes=2` and encoded as 37 | integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there 38 | will be errors if vocabulary is not provided and labels are string. 39 | loss_reduction: One of `tf.losses.Reduction` except `NONE`. Defines how to 40 | reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. 41 | 42 | Returns: 43 | A `Head` instance. 44 | """ 45 | if n_classes == 2: 46 | head = DPBinaryClassHead( 47 | weight_column=weight_column, 48 | label_vocabulary=label_vocabulary, 49 | loss_reduction=loss_reduction) 50 | else: 51 | head = DPMultiClassHead( 52 | n_classes, 53 | weight_column=weight_column, 54 | label_vocabulary=label_vocabulary, 55 | loss_reduction=loss_reduction) 56 | return head 57 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/estimators/v1/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) 4 | 5 | py_library( 6 | name = "v1", 7 | srcs = ["__init__.py"], 8 | ) 9 | 10 | py_library( 11 | name = "head", 12 | srcs = [ 13 | "head.py", 14 | ], 15 | ) 16 | 17 | py_library( 18 | name = "dnn", 19 | srcs = [ 20 | "dnn.py", 21 | ], 22 | deps = [":head"], 23 | ) 24 | 25 | py_library( 26 | name = "linear", 27 | srcs = [ 28 | "linear.py", 29 | ], 30 | deps = [":head"], 31 | ) 32 | 33 | py_test( 34 | name = "head_test", 35 | timeout = "long", 36 | srcs = ["head_test.py"], 37 | deps = [ 38 | ":head", 39 | "//tensorflow_privacy/privacy/estimators:test_utils", 40 | "//tensorflow_privacy/privacy/optimizers:dp_optimizer", 41 | ], 42 | ) 43 | 44 | py_test( 45 | name = "dnn_test", 46 | timeout = "long", 47 | srcs = ["dnn_test.py"], 48 | deps = [ 49 | ":dnn", 50 | "//tensorflow_privacy/privacy/estimators:test_utils", 51 | "//tensorflow_privacy/privacy/optimizers:dp_optimizer", 52 | ], 53 | ) 54 | 55 | py_test( 56 | name = "linear_test", 57 | timeout = "long", 58 | srcs = ["linear_test.py"], 59 | deps = [ 60 | ":linear", 61 | "//tensorflow_privacy/privacy/estimators:test_utils", 62 | "//tensorflow_privacy/privacy/optimizers:dp_optimizer", 63 | ], 64 | ) 65 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/estimators/v1/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/estimators/v1/dnn.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """DP version of DNNClassifiers v1.""" 16 | 17 | import tensorflow as tf 18 | 19 | from tensorflow_privacy.privacy.estimators.v1 import head as head_lib 20 | from tensorflow_estimator.python.estimator import estimator 21 | from tensorflow_estimator.python.estimator.canned import dnn 22 | 23 | 24 | class DNNClassifier(estimator.Estimator): 25 | """DP version of `tf.compat.v1.estimator.DNNClassifier`.""" 26 | 27 | def __init__( 28 | self, 29 | hidden_units, 30 | feature_columns, 31 | model_dir=None, 32 | n_classes=2, 33 | weight_column=None, 34 | label_vocabulary=None, 35 | optimizer='Adagrad', 36 | activation_fn=tf.nn.relu, 37 | dropout=None, 38 | input_layer_partitioner=None, 39 | config=None, 40 | warm_start_from=None, 41 | loss_reduction=tf.compat.v1.losses.Reduction.SUM, # For scalar summary. 42 | batch_norm=False, 43 | ): 44 | """See `tf.compat.v1.estimator.DNNClassifier`.""" 45 | head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access 46 | n_classes, weight_column, label_vocabulary, loss_reduction) 47 | estimator._canned_estimator_api_gauge.get_cell('Classifier').set('DNN') 48 | 49 | def _model_fn(features, labels, mode, config): 50 | """Call the defined shared dnn_model_fn.""" 51 | return dnn._dnn_model_fn( # pylint: disable=protected-access 52 | features=features, 53 | labels=labels, 54 | mode=mode, 55 | head=head, 56 | hidden_units=hidden_units, 57 | feature_columns=tuple(feature_columns or []), 58 | optimizer=optimizer, 59 | activation_fn=activation_fn, 60 | dropout=dropout, 61 | input_layer_partitioner=input_layer_partitioner, 62 | config=config, 63 | batch_norm=batch_norm) 64 | 65 | super().__init__( 66 | model_fn=_model_fn, 67 | model_dir=model_dir, 68 | config=config, 69 | warm_start_from=warm_start_from) 70 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/estimators/v1/dnn_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import functools 16 | 17 | from absl.testing import parameterized 18 | import tensorflow as tf 19 | from tensorflow_privacy.privacy.estimators import test_utils 20 | from tensorflow_privacy.privacy.estimators.v1 import dnn 21 | from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer 22 | 23 | # pylint: disable=g-deprecated-tf-checker 24 | 25 | 26 | class DPDNNClassifierTest(tf.test.TestCase, parameterized.TestCase): 27 | """Tests for DP-enabled DNNClassifier.""" 28 | 29 | @parameterized.named_parameters( 30 | ('BinaryClassDNN', 2, 1), 31 | ('BinaryClassDNN 4', 2, 4), 32 | ('MultiClassDNN 3', 3, 1), 33 | ('MultiClassDNN 4', 4, 1), 34 | ('MultiClassDNN 4 4', 4, 4), 35 | ) 36 | def testDNN(self, n_classes, num_microbatches): 37 | train_features, train_labels = test_utils.make_input_data(256, n_classes) 38 | feature_columns = [] 39 | for key in train_features: 40 | feature_columns.append(tf.feature_column.numeric_column(key=key)) 41 | 42 | optimizer = functools.partial( 43 | DPGradientDescentGaussianOptimizer, 44 | learning_rate=0.5, 45 | l2_norm_clip=1.0, 46 | noise_multiplier=0.0, 47 | num_microbatches=num_microbatches, 48 | ) 49 | 50 | classifier = dnn.DNNClassifier( 51 | hidden_units=[10], 52 | activation_fn='relu', 53 | feature_columns=feature_columns, 54 | n_classes=n_classes, 55 | optimizer=optimizer, 56 | loss_reduction=tf.losses.Reduction.NONE) 57 | 58 | classifier.train( 59 | input_fn=test_utils.make_input_fn(train_features, train_labels, True, 60 | 16)) 61 | 62 | test_features, test_labels = test_utils.make_input_data(64, n_classes) 63 | classifier.evaluate( 64 | input_fn=test_utils.make_input_fn(test_features, test_labels, False, 65 | 16)) 66 | 67 | predict_features, predict_labels = test_utils.make_input_data(64, n_classes) 68 | classifier.predict( 69 | input_fn=test_utils.make_input_fn(predict_features, predict_labels, 70 | False)) 71 | 72 | 73 | if __name__ == '__main__': 74 | tf.test.main() 75 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/estimators/v1/linear.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """DP version of LinearClassifier v1.""" 16 | 17 | import tensorflow as tf 18 | from tensorflow_privacy.privacy.estimators.v1 import head as head_lib 19 | from tensorflow_estimator.python.estimator import estimator # pylint: disable=g-deprecated-tf-checker 20 | from tensorflow_estimator.python.estimator.canned import linear # pylint: disable=g-deprecated-tf-checker 21 | 22 | 23 | class LinearClassifier(estimator.Estimator): 24 | """DP version of `tf.compat.v1.estimator.LinearClassifier`.""" 25 | 26 | def __init__( 27 | self, 28 | feature_columns, 29 | model_dir=None, 30 | n_classes=2, 31 | weight_column=None, 32 | label_vocabulary=None, 33 | optimizer='Ftrl', 34 | config=None, 35 | partitioner=None, 36 | warm_start_from=None, 37 | loss_reduction=tf.compat.v1.losses.Reduction.SUM, # For scalar summary. 38 | sparse_combiner='sum', 39 | ): 40 | """See `tf.compat.v1.estimator.LinearClassifier`.""" 41 | linear._validate_linear_sdca_optimizer_for_linear_classifier( # pylint: disable=protected-access 42 | feature_columns=feature_columns, 43 | n_classes=n_classes, 44 | optimizer=optimizer, 45 | sparse_combiner=sparse_combiner, 46 | ) 47 | estimator._canned_estimator_api_gauge.get_cell('Classifier').set('Linear') # pylint: disable=protected-access 48 | 49 | head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access 50 | n_classes, weight_column, label_vocabulary, loss_reduction 51 | ) 52 | 53 | def _model_fn(features, labels, mode, config): 54 | """Call the defined shared _linear_model_fn.""" 55 | return linear._linear_model_fn( # pylint: disable=protected-access 56 | features=features, 57 | labels=labels, 58 | mode=mode, 59 | head=head, 60 | feature_columns=tuple(feature_columns or []), 61 | optimizer=optimizer, 62 | partitioner=partitioner, 63 | config=config, 64 | sparse_combiner=sparse_combiner, 65 | ) 66 | 67 | super(LinearClassifier, self).__init__( 68 | model_fn=_model_fn, 69 | model_dir=model_dir, 70 | config=config, 71 | warm_start_from=warm_start_from, 72 | ) 73 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/estimators/v1/linear_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for LinearClassifier.""" 15 | 16 | import functools 17 | 18 | from absl.testing import parameterized 19 | import tensorflow as tf 20 | from tensorflow_privacy.privacy.estimators import test_utils 21 | from tensorflow_privacy.privacy.estimators.v1 import linear 22 | from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer 23 | 24 | # pylint: disable=g-deprecated-tf-checker 25 | 26 | 27 | class DPLinearClassifierClassifierTest( 28 | tf.test.TestCase, parameterized.TestCase 29 | ): 30 | """Tests for DP-enabled LinearClassifier.""" 31 | 32 | @parameterized.named_parameters( 33 | ('BinaryClassLinear 1', 2, 1), 34 | ('BinaryClassLinear 4', 2, 4), 35 | ('MultiClassLinear 3', 3, 1), 36 | ('MultiClassLinear 4', 4, 1), 37 | ('MultiClassLinear 4 1', 4, 2), 38 | ) 39 | def testRunsWithoutErrors(self, n_classes, num_microbatches): 40 | train_features, train_labels = test_utils.make_input_data(256, n_classes) 41 | feature_columns = [] 42 | for key in train_features: 43 | feature_columns.append(tf.feature_column.numeric_column(key=key)) # pylint: disable=g-deprecated-tf-checker 44 | 45 | optimizer = functools.partial( 46 | DPGradientDescentGaussianOptimizer, 47 | learning_rate=0.5, 48 | l2_norm_clip=1.0, 49 | noise_multiplier=0.0, 50 | num_microbatches=num_microbatches, 51 | ) 52 | 53 | classifier = linear.LinearClassifier( 54 | feature_columns=feature_columns, 55 | n_classes=n_classes, 56 | optimizer=optimizer, 57 | loss_reduction=tf.compat.v1.losses.Reduction.SUM, 58 | ) 59 | 60 | classifier.train( 61 | input_fn=test_utils.make_input_fn( 62 | train_features, train_labels, True, 16 63 | ) 64 | ) 65 | 66 | test_features, test_labels = test_utils.make_input_data(64, n_classes) 67 | classifier.evaluate( 68 | input_fn=test_utils.make_input_fn(test_features, test_labels, False, 16) 69 | ) 70 | 71 | predict_features, predict_labels = test_utils.make_input_data(64, n_classes) 72 | classifier.predict( 73 | input_fn=test_utils.make_input_fn( 74 | predict_features, predict_labels, False 75 | ) 76 | ) 77 | 78 | 79 | if __name__ == '__main__': 80 | tf.test.main() 81 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/fast_gradient_clipping/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | py_library( 4 | name = "type_aliases", 5 | srcs = ["type_aliases.py"], 6 | ) 7 | 8 | py_library( 9 | name = "bert_encoder_utils", 10 | srcs = ["bert_encoder_utils.py"], 11 | deps = [":gradient_clipping_utils"], 12 | ) 13 | 14 | py_test( 15 | name = "bert_encoder_utils_test", 16 | srcs = ["bert_encoder_utils_test.py"], 17 | deps = [":bert_encoder_utils"], 18 | ) 19 | 20 | py_library( 21 | name = "common_manip_utils", 22 | srcs = ["common_manip_utils.py"], 23 | deps = [":type_aliases"], 24 | ) 25 | 26 | py_library( 27 | name = "common_test_utils", 28 | srcs = ["common_test_utils.py"], 29 | deps = [ 30 | ":clip_grads", 31 | ":layer_registry", 32 | ":type_aliases", 33 | ], 34 | ) 35 | 36 | py_library( 37 | name = "gradient_clipping_utils", 38 | srcs = ["gradient_clipping_utils.py"], 39 | deps = [ 40 | ":common_manip_utils", 41 | ":layer_registry", 42 | ":type_aliases", 43 | "//tensorflow_privacy/privacy/sparsity_preserving_noise:layer_registry", 44 | "//tensorflow_privacy/privacy/sparsity_preserving_noise:type_aliases", 45 | ], 46 | ) 47 | 48 | py_test( 49 | name = "gradient_clipping_utils_test", 50 | srcs = ["gradient_clipping_utils_test.py"], 51 | shard_count = 8, 52 | deps = [ 53 | ":gradient_clipping_utils", 54 | ":layer_registry", 55 | "//tensorflow_privacy/privacy/sparsity_preserving_noise:layer_registry", 56 | ], 57 | ) 58 | 59 | py_library( 60 | name = "layer_registry", 61 | srcs = ["layer_registry.py"], 62 | deps = [ 63 | ":type_aliases", 64 | "//tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions:dense", 65 | "//tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions:embedding", 66 | ], 67 | ) 68 | 69 | py_library( 70 | name = "clip_grads", 71 | srcs = ["clip_grads.py"], 72 | deps = [ 73 | ":common_manip_utils", 74 | ":gradient_clipping_utils", 75 | ":layer_registry", 76 | ":type_aliases", 77 | ], 78 | ) 79 | 80 | py_library( 81 | name = "noise_utils", 82 | srcs = ["noise_utils.py"], 83 | deps = ["//tensorflow_privacy/privacy/sparsity_preserving_noise:sparse_noise_utils"], 84 | ) 85 | 86 | py_test( 87 | name = "clip_grads_test", 88 | srcs = ["clip_grads_test.py"], 89 | shard_count = 8, 90 | deps = [ 91 | ":clip_grads", 92 | ":common_test_utils", 93 | ":gradient_clipping_utils", 94 | ":layer_registry", 95 | ":type_aliases", 96 | ], 97 | ) 98 | 99 | py_test( 100 | name = "noise_utils_test", 101 | srcs = ["noise_utils_test.py"], 102 | deps = [":noise_utils"], 103 | ) 104 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/fast_gradient_clipping/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/fast_gradient_clipping/common_manip_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """A collection of common utility functions for tensor/data manipulation.""" 15 | 16 | from typing import Optional 17 | 18 | import tensorflow as tf 19 | from tensorflow_privacy.privacy.fast_gradient_clipping import type_aliases 20 | 21 | 22 | def maybe_add_microbatch_axis( 23 | x: type_aliases.PackedTensors, 24 | num_microbatches: Optional[type_aliases.BatchSize], 25 | ) -> type_aliases.PackedTensors: 26 | """Adds the microbatch axis to a collection of tensors. 27 | 28 | Args: 29 | x: Model output or input tensors. 30 | num_microbatches: If None, x is returned unchanged. Otherwise, must divide 31 | the batch size. 32 | 33 | Returns: 34 | The input tensor x, reshaped from [batch_size, ...] to 35 | [num_microbatches, batch_size / num_microbatches, ...]. 36 | """ 37 | if num_microbatches is None: 38 | return x 39 | 40 | def _expand(t): 41 | with tf.control_dependencies( 42 | [tf.assert_equal(tf.math.floormod(tf.shape(t)[0], num_microbatches), 0)] 43 | ): 44 | return tf.reshape( 45 | t, tf.concat([[num_microbatches, -1], tf.shape(t)[1:]], axis=0) 46 | ) 47 | 48 | return tf.nest.map_structure(_expand, x) 49 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/dense_tpu_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import tensorflow as tf 16 | from tensorflow_privacy.privacy.fast_gradient_clipping import common_test_utils as ctu 17 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import dense_test 18 | 19 | 20 | class GradNormTpuTest(dense_test.GradNormTest): 21 | 22 | def setUp(self): 23 | super(dense_test.GradNormTest, self).setUp() 24 | self.strategy = ctu.create_tpu_strategy() 25 | self.assertIn('TPU', self.strategy.extended.worker_devices[0]) 26 | self.using_tpu = True 27 | 28 | 29 | if __name__ == '__main__': 30 | tf.test.main() 31 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/einsum_dense.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Fast clipping function for `tfm.nlp.layers.EinsumDense`.""" 15 | 16 | from collections.abc import Mapping, Sequence 17 | from typing import Any, Optional 18 | import tensorflow as tf 19 | from tensorflow_privacy.privacy.fast_gradient_clipping import type_aliases 20 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import einsum_utils 21 | 22 | 23 | def einsum_layer_computation( 24 | layer_instance: tf.keras.layers.EinsumDense, 25 | input_args: Sequence[Any], 26 | input_kwargs: Mapping[str, Any], 27 | tape: tf.GradientTape, 28 | num_microbatches: Optional[tf.Tensor] = None, 29 | ) -> type_aliases.RegistryFunctionOutput: 30 | """Registry function for `tf.keras.layers.EinsumDense`. 31 | 32 | For the technical details, see the documentation of 33 | `einsum_utils.compute_fast_einsum_gradient_norm()`. 34 | 35 | Args: 36 | layer_instance: A `tf.keras.layers.EinsumDense` instance. 37 | input_args: See `dense_layer_computation()` in `dense.py`. 38 | input_kwargs: See `dense_layer_computation()` in `dense.py`. 39 | tape: See `dense_layer_computation()` in `dense.py`. 40 | num_microbatches: See `dense_layer_computation()` in `dense.py`. 41 | 42 | Returns: 43 | See `dense_layer_computation()` in `dense.py`. 44 | """ 45 | if input_kwargs: 46 | raise ValueError("EinsumDense layer calls should not receive kwargs.") 47 | del input_kwargs 48 | if len(input_args) != 1: 49 | raise ValueError("Only layer inputs of length 1 are permitted.") 50 | orig_activation = layer_instance.activation 51 | # Some activation functions may not apply a transform to the elements of the 52 | # output individually (which is needed for the fast clipping trick to work). 53 | # To avoid this case, we watch the variables that are only generated by the 54 | # linear transformation of the `EinsumDense` layer instance. 55 | layer_instance.activation = None 56 | base_vars = layer_instance(*input_args) 57 | tape.watch(base_vars) 58 | layer_instance.activation = orig_activation 59 | outputs = orig_activation(base_vars) if orig_activation else base_vars 60 | 61 | def sqr_norm_fn(grads): 62 | return einsum_utils.compute_fast_einsum_squared_gradient_norm( 63 | layer_instance.equation, 64 | input_args[0], 65 | grads, 66 | layer_instance.bias_axes, 67 | num_microbatches, 68 | ) 69 | 70 | return base_vars, outputs, sqr_norm_fn 71 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/einsum_dense_tpu_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import tensorflow as tf 16 | from tensorflow_privacy.privacy.fast_gradient_clipping import common_test_utils 17 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import einsum_dense_test 18 | 19 | 20 | class GradNormTpuTest(einsum_dense_test.GradNormTest): 21 | 22 | def setUp(self): 23 | super(einsum_dense_test.GradNormTest, self).setUp() 24 | self.strategy = common_test_utils.create_tpu_strategy() 25 | self.assertIn('TPU', self.strategy.extended.worker_devices[0]) 26 | self.using_tpu = True 27 | 28 | 29 | if __name__ == '__main__': 30 | tf.test.main() 31 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/embedding_tpu_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import tensorflow as tf 16 | from tensorflow_privacy.privacy.fast_gradient_clipping import common_test_utils 17 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import embedding_test 18 | 19 | 20 | class GradNormTpuTest(embedding_test.GradNormTest): 21 | 22 | def setUp(self): 23 | tf.config.experimental.disable_mlir_bridge() 24 | super(embedding_test.GradNormTest, self).setUp() 25 | self.strategy = common_test_utils.create_tpu_strategy() 26 | self.assertIn('TPU', self.strategy.extended.worker_devices[0]) 27 | self.using_tpu = True 28 | 29 | 30 | if __name__ == '__main__': 31 | tf.test.main() 32 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/layer_normalization_tpu_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import tensorflow as tf 16 | from tensorflow_privacy.privacy.fast_gradient_clipping import common_test_utils 17 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import layer_normalization_test 18 | 19 | 20 | class GradNormTpuTest(layer_normalization_test.GradNormTest): 21 | 22 | def setUp(self): 23 | super(layer_normalization_test.GradNormTest, self).setUp() 24 | self.strategy = common_test_utils.create_tpu_strategy() 25 | self.assertIn('TPU', self.strategy.extended.worker_devices[0]) 26 | self.using_tpu = True 27 | 28 | 29 | if __name__ == '__main__': 30 | tf.test.main() 31 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/multi_head_attention_tpu_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import tensorflow as tf 16 | from tensorflow_privacy.privacy.fast_gradient_clipping import common_test_utils as ctu 17 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import multi_head_attention_test 18 | 19 | 20 | class GradNormTpuTest(multi_head_attention_test.GradNormTest): 21 | 22 | def setUp(self): 23 | super(multi_head_attention_test.GradNormTest, self).setUp() 24 | self.strategy = ctu.create_tpu_strategy() 25 | self.assertIn('TPU', self.strategy.extended.worker_devices[0]) 26 | self.using_tpu = True 27 | 28 | 29 | if __name__ == '__main__': 30 | tf.test.main() 31 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/nlp_on_device_embedding.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Fast clipping function for `tfm.nlp.layers.OnDeviceEmbedding`.""" 15 | 16 | from collections.abc import Mapping, Sequence 17 | from typing import Any, Optional 18 | import tensorflow as tf 19 | from tensorflow_privacy.privacy.fast_gradient_clipping import type_aliases 20 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import registry_function_utils 21 | 22 | 23 | def nlp_on_device_embedding_layer_computation( 24 | layer_instance: tf.keras.layers.Layer, 25 | input_args: Sequence[Any], 26 | input_kwargs: Mapping[str, Any], 27 | tape: tf.GradientTape, 28 | num_microbatches: Optional[tf.Tensor] = None, 29 | ) -> type_aliases.RegistryFunctionOutput: 30 | """Registry function for `tfm.nlp.layers.OnDeviceEmbedding`. 31 | 32 | Args: 33 | layer_instance: A `tfm.nlp.layers.OnDeviceEmbedding` instance. 34 | input_args: See `dense_layer_computation()` in `dense.py`. 35 | input_kwargs: See `dense_layer_computation()` in `dense.py`. 36 | tape: See `dense_layer_computation()` in `dense.py`. 37 | num_microbatches: See `dense_layer_computation()` in `dense.py`. 38 | 39 | Returns: 40 | See `dense_layer_computation()` in `dense.py`. 41 | """ 42 | if input_kwargs: 43 | raise ValueError("Embedding layer calls should not receive kwargs.") 44 | del input_kwargs 45 | if len(input_args) != 1: 46 | raise ValueError("Only layer inputs of length 1 are permitted.") 47 | if hasattr(layer_instance, "_use_one_hot"): 48 | if layer_instance._use_one_hot: # pylint: disable=protected-access 49 | raise NotImplementedError( 50 | "The embedding feature '_use_one_hot' is not supported." 51 | ) 52 | # NOTE: Since the implementation of `tfm.nlp.layers.OnDeviceEmbedding` uses 53 | # `.set_shape()`, we can assume that inputs are not ragged. 54 | input_ids = tf.cast(*input_args, tf.int32) 55 | if len(layer_instance.trainable_variables) != 1: 56 | raise ValueError( 57 | "Only layer instances with only one set of trainable variables" 58 | "are permitted." 59 | ) 60 | base_vars = layer_instance.trainable_variables[0] 61 | tape.watch(base_vars) 62 | outputs = layer_instance(input_ids) 63 | 64 | def sqr_norm_fn(base_vars_grads: tf.IndexedSlices): 65 | return registry_function_utils.embedding_sqr_norm_fn( 66 | base_vars_grads.values, 67 | input_ids, 68 | num_microbatches, 69 | ) 70 | 71 | return base_vars, outputs, sqr_norm_fn 72 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/nlp_on_device_embedding_tpu_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import tensorflow as tf 16 | from tensorflow_privacy.privacy.fast_gradient_clipping import common_test_utils 17 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import nlp_on_device_embedding_test 18 | 19 | 20 | class GradNormTpuTest(nlp_on_device_embedding_test.GradNormTest): 21 | 22 | def setUp(self): 23 | tf.config.experimental.disable_mlir_bridge() 24 | super(nlp_on_device_embedding_test.GradNormTest, self).setUp() 25 | self.strategy = common_test_utils.create_tpu_strategy() 26 | self.assertIn('TPU', self.strategy.extended.worker_devices[0]) 27 | self.using_tpu = True 28 | 29 | 30 | if __name__ == '__main__': 31 | tf.test.main() 32 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/nlp_position_embedding.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Fast clipping function for `tfm.nlp.layers.OnDeviceEmbedding`.""" 15 | 16 | from collections.abc import Mapping, Sequence 17 | from typing import Any, Optional 18 | import tensorflow as tf 19 | from tensorflow_privacy.privacy.fast_gradient_clipping import common_manip_utils 20 | from tensorflow_privacy.privacy.fast_gradient_clipping import type_aliases 21 | 22 | 23 | def nlp_position_embedding_layer_computation( 24 | layer_instance: tf.keras.layers.Layer, 25 | input_args: Sequence[Any], 26 | input_kwargs: Mapping[str, Any], 27 | tape: tf.GradientTape, 28 | num_microbatches: Optional[tf.Tensor] = None, 29 | ) -> type_aliases.RegistryFunctionOutput: 30 | """Registry function for `tfm.nlp.layers.PositionEmbedding`. 31 | 32 | Args: 33 | layer_instance: A `tfm.nlp.layers.PositionEmbedding` instance. 34 | input_args: See `dense_layer_computation()` in `dense.py`. 35 | input_kwargs: See `dense_layer_computation()` in `dense.py`. 36 | tape: See `dense_layer_computation()` in `dense.py`. 37 | num_microbatches: See `dense_layer_computation()` in `dense.py`. 38 | 39 | Returns: 40 | See `dense_layer_computation()` in `dense.py`. 41 | """ 42 | if input_kwargs: 43 | raise ValueError("Embedding layer calls should not receive kwargs.") 44 | del input_kwargs 45 | if len(input_args) != 1: 46 | raise ValueError("Only layer inputs of length 1 are permitted.") 47 | input_ids = tf.cast(*input_args, tf.int32) 48 | base_vars = layer_instance(input_ids) 49 | tape.watch(base_vars) 50 | 51 | def sqr_norm_fn(grads): 52 | broadcast_axes = list(range(len(grads.shape))) 53 | del broadcast_axes[layer_instance._seq_axis] # pylint: disable=protected-access 54 | del broadcast_axes[-1], broadcast_axes[0] 55 | reduced_grads = tf.reduce_sum(grads, axis=broadcast_axes) 56 | if num_microbatches is not None: 57 | reduced_grads = common_manip_utils.maybe_add_microbatch_axis( 58 | reduced_grads, 59 | num_microbatches, 60 | ) 61 | reduced_grads = tf.reduce_sum(reduced_grads, axis=1) 62 | reduction_axes = tf.range(1, tf.rank(reduced_grads)) 63 | return tf.reduce_sum(tf.square(reduced_grads), axis=reduction_axes) 64 | 65 | return base_vars, base_vars, sqr_norm_fn 66 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/nlp_position_embedding_tpu_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import tensorflow as tf 16 | from tensorflow_privacy.privacy.fast_gradient_clipping import common_test_utils 17 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import nlp_position_embedding_test 18 | 19 | 20 | class GradNormTpuTest(nlp_position_embedding_test.GradNormTest): 21 | 22 | def setUp(self): 23 | super(nlp_position_embedding_test.GradNormTest, self).setUp() 24 | self.strategy = common_test_utils.create_tpu_strategy() 25 | self.assertIn('TPU', self.strategy.extended.worker_devices[0]) 26 | self.using_tpu = True 27 | 28 | 29 | if __name__ == '__main__': 30 | tf.test.main() 31 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/fast_gradient_clipping/type_aliases.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """A collection of type aliases used throughout the clipping library.""" 15 | 16 | from collections.abc import Callable, Iterable, Mapping, Sequence 17 | from typing import Any, Optional, Union 18 | import tensorflow as tf 19 | 20 | 21 | # Tensorflow aliases. 22 | Tensor = Union[tf.Tensor, tf.IndexedSlices, tf.SparseTensor, tf.RaggedTensor] 23 | 24 | PackedTensors = Union[Tensor, Iterable[Tensor], Mapping[str, Tensor]] 25 | 26 | InputTensors = PackedTensors 27 | 28 | OutputTensors = Union[Tensor, Iterable[Tensor]] 29 | 30 | BatchSize = Union[int, tf.Tensor] 31 | 32 | LossFn = Callable[..., tf.Tensor] 33 | 34 | # Layer Registry aliases. 35 | SquareNormFunction = Callable[[OutputTensors], tf.Tensor] 36 | 37 | RegistryFunctionOutput = tuple[Any, OutputTensors, SquareNormFunction] 38 | 39 | RegistryFunction = Callable[ 40 | [ 41 | Any, 42 | tuple[Any, ...], 43 | Mapping[str, Any], 44 | tf.GradientTape, 45 | Union[tf.Tensor, None], 46 | ], 47 | RegistryFunctionOutput, 48 | ] 49 | 50 | # Clipping aliases. 51 | GeneratorFunction = Optional[Callable[[Any, tuple, Mapping], tuple[Any, Any]]] 52 | 53 | # Testing aliases. 54 | LayerGenerator = Callable[[int, int], tf.keras.layers.Layer] 55 | 56 | ModelGenerator = Callable[ 57 | [LayerGenerator, Sequence[int], Sequence[int]], tf.keras.Model 58 | ] 59 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/keras_models/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) 4 | 5 | py_library( 6 | name = "keras_models", 7 | srcs = ["__init__.py"], 8 | ) 9 | 10 | py_library( 11 | name = "dp_keras_model", 12 | srcs = [ 13 | "dp_keras_model.py", 14 | ], 15 | deps = [ 16 | "//tensorflow_privacy/privacy/fast_gradient_clipping:clip_grads", 17 | "//tensorflow_privacy/privacy/fast_gradient_clipping:common_manip_utils", 18 | "//tensorflow_privacy/privacy/fast_gradient_clipping:gradient_clipping_utils", 19 | "//tensorflow_privacy/privacy/fast_gradient_clipping:noise_utils", 20 | "//tensorflow_privacy/privacy/sparsity_preserving_noise:layer_registry", 21 | "//tensorflow_privacy/privacy/sparsity_preserving_noise:sparse_noise_utils", 22 | ], 23 | ) 24 | 25 | py_test( 26 | name = "dp_keras_model_test", 27 | srcs = ["dp_keras_model_test.py"], 28 | shard_count = 16, 29 | deps = [ 30 | "//tensorflow_privacy/privacy/fast_gradient_clipping:layer_registry", 31 | "//tensorflow_privacy/privacy/keras_models:dp_keras_model", 32 | ], 33 | ) 34 | 35 | py_test( 36 | name = "dp_keras_model_distributed_test", 37 | timeout = "long", 38 | srcs = ["dp_keras_model_distributed_test.py"], 39 | tags = [ 40 | "manual", 41 | ], 42 | deps = [ 43 | ":dp_keras_model", 44 | "//tensorflow_privacy/privacy/fast_gradient_clipping:layer_registry", 45 | ], 46 | ) 47 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/keras_models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/logistic_regression/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) 4 | 5 | py_library( 6 | name = "logistic_regression", 7 | srcs = ["__init__.py"], 8 | ) 9 | 10 | py_library( 11 | name = "multinomial_logistic", 12 | srcs = ["multinomial_logistic.py"], 13 | deps = [ 14 | ":datasets", 15 | ":single_layer_softmax", 16 | "//tensorflow_privacy/privacy/optimizers:dp_optimizer_keras", 17 | ], 18 | ) 19 | 20 | py_test( 21 | name = "multinomial_logistic_test", 22 | size = "large", 23 | srcs = ["multinomial_logistic_test.py"], 24 | local = True, 25 | deps = [ 26 | ":datasets", 27 | ":multinomial_logistic", 28 | "//tensorflow_privacy/privacy/analysis:compute_dp_sgd_privacy_lib", 29 | ], 30 | ) 31 | 32 | py_library( 33 | name = "datasets", 34 | srcs = ["datasets.py"], 35 | ) 36 | 37 | py_test( 38 | name = "datasets_test", 39 | size = "small", 40 | srcs = ["datasets_test.py"], 41 | tags = ["requires-net:external"], 42 | deps = [":datasets"], 43 | ) 44 | 45 | py_library( 46 | name = "single_layer_softmax", 47 | srcs = ["single_layer_softmax.py"], 48 | deps = [":datasets"], 49 | ) 50 | 51 | py_test( 52 | name = "single_layer_softmax_test", 53 | size = "medium", 54 | srcs = ["single_layer_softmax_test.py"], 55 | deps = [ 56 | ":datasets", 57 | ":single_layer_softmax", 58 | ], 59 | ) 60 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/logistic_regression/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/logistic_regression/single_layer_softmax.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Implementation of a single-layer softmax classifier.""" 15 | 16 | from typing import List, Optional, Union, Tuple, Any 17 | 18 | import tensorflow as tf 19 | from tensorflow_privacy.privacy.logistic_regression import datasets 20 | 21 | 22 | def single_layer_softmax_classifier( 23 | train_dataset: datasets.RegressionDataset, 24 | test_dataset: datasets.RegressionDataset, 25 | epochs: int, 26 | num_classes: int, 27 | optimizer: tf.keras.optimizers.Optimizer, 28 | loss: Union[tf.keras.losses.Loss, str] = 'categorical_crossentropy', 29 | batch_size: int = 32, 30 | kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None 31 | ) -> Tuple[Any, List[float]]: 32 | """Trains a single layer neural network classifier with softmax activation. 33 | 34 | Args: 35 | train_dataset: consists of num_train many labeled examples, where the labels 36 | are in {0,1,...,num_classes-1}. 37 | test_dataset: consists of num_test many labeled examples, where the labels 38 | are in {0,1,...,num_classes-1}. 39 | epochs: the number of epochs. 40 | num_classes: the number of classes. 41 | optimizer: a tf.keras optimizer. 42 | loss: a tf.keras loss function. 43 | batch_size: a positive integer. 44 | kernel_regularizer: a regularization function. 45 | 46 | Returns: 47 | List of test accuracies (one for each epoch) on test_dataset of model 48 | trained on train_dataset. 49 | """ 50 | one_hot_train_labels = tf.one_hot(train_dataset.labels, num_classes) 51 | one_hot_test_labels = tf.one_hot(test_dataset.labels, num_classes) 52 | model = tf.keras.Sequential() 53 | model.add( 54 | tf.keras.layers.Dense( 55 | units=num_classes, 56 | activation='softmax', 57 | kernel_regularizer=kernel_regularizer)) 58 | model.compile(optimizer, loss=loss, metrics=['accuracy']) 59 | history = model.fit( 60 | train_dataset.points, 61 | one_hot_train_labels, 62 | batch_size=batch_size, 63 | epochs=epochs, 64 | validation_data=(test_dataset.points, one_hot_test_labels), 65 | verbose=0) 66 | weights = model.layers[0].weights 67 | return weights, history.history['val_accuracy'] 68 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/logistic_regression/single_layer_softmax_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | from absl.testing import parameterized 18 | from tensorflow_privacy.privacy.logistic_regression import datasets 19 | from tensorflow_privacy.privacy.logistic_regression import single_layer_softmax 20 | 21 | 22 | class SingleLayerSoftmaxTest(parameterized.TestCase): 23 | 24 | @parameterized.parameters( 25 | (5000, 500, 3, 40, 2, 0.05), 26 | (5000, 500, 4, 40, 2, 0.05), 27 | (10000, 1000, 3, 40, 4, 0.1), 28 | (10000, 1000, 4, 40, 4, 0.1), 29 | ) 30 | def test_single_layer_softmax(self, num_train, num_test, dimension, epochs, 31 | num_classes, tolerance): 32 | (train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data( 33 | num_train, num_test, dimension, num_classes) 34 | _, accuracy = single_layer_softmax.single_layer_softmax_classifier( 35 | train_dataset, test_dataset, epochs, num_classes, 'sgd') 36 | self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance) 37 | 38 | if __name__ == '__main__': 39 | unittest.main() 40 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/membership_inference_attack/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) 4 | 5 | py_library( 6 | name = "__init__", # There is already a membership_inference_attack. 7 | srcs = ["__init__.py"], 8 | ) 9 | 10 | py_library( 11 | name = "keras_evaluation", 12 | srcs = ["keras_evaluation.py"], 13 | deps = ["//tensorflow_privacy/privacy/privacy_tests/membership_inference_attack:keras_evaluation"], 14 | ) 15 | 16 | py_library( 17 | name = "plotting", 18 | srcs = ["plotting.py"], 19 | deps = [ 20 | "//tensorflow_privacy/privacy/privacy_tests/membership_inference_attack:plotting", 21 | ], 22 | ) 23 | 24 | py_library( 25 | name = "membership_inference_attack", 26 | srcs = [ 27 | "data_structures.py", 28 | "dataset_slicing.py", 29 | "membership_inference_attack.py", 30 | "models.py", 31 | "plotting.py", 32 | "seq2seq_mia.py", 33 | ], 34 | deps = [ 35 | "//tensorflow_privacy/privacy/privacy_tests/membership_inference_attack", 36 | ], 37 | ) 38 | 39 | py_library( 40 | name = "privacy_report", 41 | srcs = ["privacy_report.py"], 42 | deps = ["//tensorflow_privacy/privacy/privacy_tests/membership_inference_attack"], 43 | ) 44 | 45 | py_library( 46 | name = "tf_estimator_evaluation", 47 | srcs = ["tf_estimator_evaluation.py"], 48 | deps = ["//tensorflow_privacy/privacy/privacy_tests/membership_inference_attack:tf_estimator_evaluation"], 49 | ) 50 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/membership_inference_attack/README.md: -------------------------------------------------------------------------------- 1 | The sources from this folder were moved to 2 | privacy/privacy_tests/membership_inference_attack. 3 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/membership_inference_attack/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """The old location of Membership Inference Attack sources.""" 15 | 16 | import warnings 17 | 18 | warnings.warn( 19 | "\nMembership inference attack sources were moved. Please replace" 20 | "\nimport tensorflow_privacy.privacy.membership_inference_attack\n" 21 | "\nwith" 22 | "\nimport tensorflow_privacy.privacy.privacy_tests.membership_inference_attack" 23 | ) 24 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/membership_inference_attack/data_structures.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Moved to privacy_attack/membership_inference_attack.""" 15 | 16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import * # pylint: disable=wildcard-import 17 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Moved to privacy_attack/membership_inference_attack.""" 15 | 16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.dataset_slicing import * # pylint: disable=wildcard-import 17 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/membership_inference_attack/keras_evaluation.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Moved to privacy_attack/membership_inference_attack.""" 15 | 16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.keras_evaluation import * # pylint: disable=wildcard-import 17 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Moved to privacy_attack/membership_inference_attack.""" 15 | 16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.membership_inference_attack import * # pylint: disable=wildcard-import 17 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/membership_inference_attack/models.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Moved to privacy_attack/membership_inference_attack.""" 15 | 16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.models import * # pylint: disable=wildcard-import 17 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/membership_inference_attack/plotting.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Moved to privacy_attack/membership_inference_attack.""" 15 | 16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.plotting import * # pylint: disable=wildcard-import 17 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/membership_inference_attack/privacy_report.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Moved to privacy_attack/membership_inference_attack.""" 15 | 16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.plotting import * # pylint: disable=wildcard-import 17 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Moved to privacy_attack/membership_inference_attack.""" 15 | 16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.seq2seq_mia import * # pylint: disable=wildcard-import 17 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Moved to privacy_attack/membership_inference_attack.""" 15 | 16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.tf_estimator_evaluation import * # pylint: disable=wildcard-import 17 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) 4 | 5 | py_library( 6 | name = "privacy_tests", 7 | srcs = ["__init__.py"], 8 | deps = [ 9 | ":version", 10 | ], 11 | ) 12 | 13 | py_test( 14 | name = "utils_test", 15 | timeout = "long", 16 | srcs = ["utils_test.py"], 17 | deps = [":utils"], 18 | ) 19 | 20 | py_test( 21 | name = "epsilon_lower_bound_test", 22 | srcs = ["epsilon_lower_bound_test.py"], 23 | deps = [":epsilon_lower_bound"], 24 | ) 25 | 26 | py_library( 27 | name = "utils", 28 | srcs = ["utils.py"], 29 | ) 30 | 31 | py_library( 32 | name = "epsilon_lower_bound", 33 | srcs = ["epsilon_lower_bound.py"], 34 | ) 35 | 36 | py_library( 37 | name = "version", 38 | srcs = ["version.py"], 39 | ) 40 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/README.md: -------------------------------------------------------------------------------- 1 | # Privacy tests 2 | 3 | A good privacy-preserving model learns from the training data, but 4 | doesn't memorize individual samples. Excessive memorization is not only harmful 5 | for the model predictive power, but also presents a privacy risk. 6 | 7 | This library provides empirical tests for measuring potential memorization. 8 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """TensorFlow Empirical Privacy.""" 15 | 16 | from tensorflow_privacy.privacy.privacy_tests.version import __version__ # pylint: disable=g-bad-import-order 17 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We are happy to accept contributions to this project under the research folder. 4 | The research folder is intended for the attacks that are not yet generic enough 5 | to be included into the main library. 6 | 7 | We are happy to accept contributions to the primary codebase, see below for more 8 | details. 9 | 10 | Please follow these guidelines when sending us a pull request. 11 | 12 | ## Contributor License Agreement 13 | 14 | Contributions to this project must be accompanied by a Contributor License 15 | Agreement. You (or your employer) retain the copyright to your contribution, 16 | this simply gives us permission to use and redistribute your contributions as 17 | part of the project. Head over to to see 18 | your current agreements on file or to sign a new one. 19 | 20 | You generally only need to submit a CLA once, so if you've already submitted 21 | one (even if it was for a different project), you probably don't need to do it 22 | again. 23 | 24 | ## Community Guidelines 25 | 26 | This project follows 27 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). 28 | 29 | ## Does my new feature belong here? 30 | 31 | ### Research folder 32 | 33 | We use the following principles to guide what we add to our libraries. If your 34 | contribution doesn't align with these principles, we're likely to decline. 35 | 36 | * **Novelty:** The code should provide new attacks to the library. We will not 37 | accept code that duplicates existing attacks. 38 | * **Appropriate context and explanation:** The code should contain a README.md 39 | file based on the provided template.This template should explain the code's functionality, and provide basic steps on how to use it. 40 | * **Experiment-driven:** The code should contain an runnable example or a colab (e.g. on a toy model such as MNIST or CIFAR-10). 41 | * **Quality requirements:** (1) The code should adhere to the 42 | [Google Python style guide](https://google.github.io/styleguide/pyguide). 43 | (2) The public API of the attack should have clear code documentation (expected inputs/outputs) 44 | (3) The code should have reasonable unit test coverage (>60%); 45 | 46 | ### Primary codebase 47 | 48 | The primary codebase should include attacks that are of general interest and 49 | have a wide range of applications. For example, the standard membership 50 | inference test is applicable to virtually any classification model. 51 | 52 | The code contributed to the primary codebase should have a production style API 53 | which is consistent with the API of other attacks. Most likely, Google and the 54 | contributing team will need to meet and discuss the API before starting the 55 | contribution. 56 | 57 | 58 | If you're uncertain whether a planned contribution fits with these principles, 59 | [open an issue](https://github.com/tensorflow/privacy/issues/new) 60 | and describe what you want to add. We'll let you know whether it's something we 61 | want to include and will help you figure out the best way to implement it. 62 | 63 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelab_roc_fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorflow/privacy/a640ca62ca80d2e4e534fbecf2678e3d0786a2fa/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelab_roc_fig.png -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) 4 | 5 | py_binary( 6 | name = "example", 7 | srcs = ["example.py"], 8 | deps = [ 9 | "//tensorflow_privacy/privacy/privacy_tests/membership_inference_attack", 10 | "//tensorflow_privacy/privacy/privacy_tests/membership_inference_attack:privacy_report", 11 | ], 12 | ) 13 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/README.md: -------------------------------------------------------------------------------- 1 | # Membership inference attack examples 2 | 3 | ## Introductory codelab 4 | 5 | The easiest way to get started is to go through [the introductory codelab](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/codelab.ipynb). 6 | This trains a simple image classification model and tests it against a series 7 | of membership inference attacks. 8 | 9 | For a more detailed overview of the library, please check the sections below. 10 | 11 | ## End to end example 12 | As an alternative to the introductory codelab, we also have a standalone 13 | [example.py](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/example.py). 14 | 15 | ## Sequence to sequence models 16 | 17 | If you're interested in sequence to sequence model attacks, please see the 18 | [seq2seq colab](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb). 19 | 20 | ## Membership probability score 21 | 22 | If you're interested in the membership probability score (also called privacy 23 | risk score) developed by Song and Mittal, please see their 24 | [membership probability codelab](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/membership_probability_codelab.ipynb). 25 | 26 | The accompanying paper is on [arXiv](https://arxiv.org/abs/2003.10595). 27 | 28 | ## Word2Vec models 29 | 30 | If you're interested in word2vec models, please see the 31 | [word2vec codelab](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/word2vec_codelab.ipynb). 32 | 33 | This notebook implements advanced membership inference, as well as a secret 34 | sharer attack. Based on [this paper](https://arxiv.org/abs/2004.00053) and 35 | [this code](https://github.com/google/embedding-tests). 36 | 37 | ## Copyright 38 | 39 | Copyright 2020 - Google LLC 40 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) 4 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Congzheng Song 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/plotting.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Plotting functionality for membership inference attack analysis. 15 | 16 | Functions to plot ROC curves and histograms as well as functionality to store 17 | figures to colossus. 18 | """ 19 | 20 | from typing import Text, Iterable, Optional 21 | 22 | import matplotlib.pyplot as plt 23 | import numpy as np 24 | from sklearn import metrics 25 | 26 | 27 | def save_plot(figure: plt.Figure, path: Text, outformat='png'): 28 | """Store a figure to disk.""" 29 | if path is not None: 30 | with open(path, 'wb') as f: 31 | figure.savefig(f, bbox_inches='tight', format=outformat) 32 | plt.close(figure) 33 | 34 | 35 | def plot_curve_with_area(x: Iterable[float], 36 | y: Iterable[float], 37 | xlabel: Text = 'x', 38 | ylabel: Text = 'y') -> plt.Figure: 39 | """Plot the curve defined by inputs and the area under the curve. 40 | 41 | All entries of x and y are required to lie between 0 and 1. 42 | For example, x could be recall and y precision, or x is fpr and y is tpr. 43 | 44 | Args: 45 | x: Values on x-axis (1d) 46 | y: Values on y-axis (must be same length as x) 47 | xlabel: Label for x axis 48 | ylabel: Label for y axis 49 | 50 | Returns: 51 | The matplotlib figure handle 52 | """ 53 | fig = plt.figure() 54 | plt.plot([0, 1], [0, 1], 'k', lw=1.0) 55 | plt.plot(x, y, lw=2, label=f'AUC: {metrics.auc(x, y):.3f}') 56 | plt.xlabel(xlabel) 57 | plt.ylabel(ylabel) 58 | plt.gca().set_aspect('equal', adjustable='box') 59 | plt.legend() 60 | return fig 61 | 62 | 63 | def plot_histograms(train: Iterable[float], 64 | test: Iterable[float], 65 | xlabel: Text = 'x', 66 | thresh: Optional[float] = None) -> plt.Figure: 67 | """Plot histograms of training versus test metrics.""" 68 | xmin = min(np.min(train), np.min(test)) 69 | xmax = max(np.max(train), np.max(test)) 70 | bins = np.linspace(xmin, xmax, 100) 71 | fig = plt.figure() 72 | plt.hist(test, bins=bins, density=True, alpha=0.5, label='test', log='y') 73 | plt.hist(train, bins=bins, density=True, alpha=0.5, label='train', log='y') 74 | if thresh is not None: 75 | plt.axvline(thresh, c='r', label=f'threshold = {thresh:.3f}') 76 | plt.xlabel(xlabel) 77 | plt.ylabel('normalized counts (density)') 78 | plt.legend() 79 | return fig 80 | 81 | 82 | def plot_roc_curve(roc_curve, plot_func=plot_curve_with_area) -> plt.Figure: 83 | """Plot the ROC curve and the area under the curve.""" 84 | return plot_func(roc_curve.fpr, roc_curve.tpr, xlabel='FPR', ylabel='TPR') 85 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils_tensorboard.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utility functions for writing attack results to tensorboard.""" 15 | 16 | from typing import List, Union 17 | 18 | import tensorflow as tf 19 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults 20 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import get_flattened_attack_metrics 21 | 22 | 23 | def write_to_tensorboard_tf2(writers, tags, values, step): 24 | """Write metrics to tensorboard. 25 | 26 | Args: 27 | writers: a list of tensorboard writers or one writer to be used for metrics. 28 | If it's a list, it should be of the same length as tags 29 | tags: a list of tags of metrics 30 | values: a list of values of metrics with the same length as tags 31 | step: step for the tensorboard summary 32 | """ 33 | if writers is None or not writers: 34 | raise ValueError('write_to_tensorboard does not get any writer.') 35 | 36 | if not isinstance(writers, list): 37 | writers = [writers] * len(tags) 38 | 39 | assert len(writers) == len(tags) == len(values) 40 | 41 | for writer, tag, val in zip(writers, tags, values): 42 | with writer.as_default(): 43 | tf.summary.scalar(tag, val, step=step) 44 | writer.flush() 45 | 46 | for writer in set(writers): 47 | with writer.as_default(): 48 | writer.flush() 49 | 50 | 51 | def write_results_to_tensorboard_tf2( 52 | attack_results: AttackResults, 53 | writers: Union[tf.summary.SummaryWriter, List[tf.summary.SummaryWriter]], 54 | step: int, merge_classifiers: bool): 55 | """Write attack results to tensorboard. 56 | 57 | Args: 58 | attack_results: results from attack 59 | writers: a list of tensorboard writers or one writer to be used for metrics 60 | step: step for the tensorboard summary 61 | merge_classifiers: if true, plot different classifiers with the same 62 | slicing_spec and metric in the same figure 63 | """ 64 | if writers is None or not writers: 65 | raise ValueError('write_results_to_tensorboard does not get any writer.') 66 | 67 | att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( 68 | attack_results) 69 | if merge_classifiers: 70 | att_tags = ['attack/' + f'{s}_{m}' for s, m in zip(att_slices, att_metrics)] 71 | write_to_tensorboard_tf2([writers[t] for t in att_types], att_tags, 72 | att_values, step) 73 | else: 74 | att_tags = [ 75 | 'attack/' + f'{s}_{t}_{m}' 76 | for t, s, m in zip(att_types, att_slices, att_metrics) 77 | ] 78 | write_to_tensorboard_tf2(writers, att_tags, att_values, step) 79 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/secret_sharer/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) 4 | 5 | py_library( 6 | name = "secret_sharer", 7 | srcs = ["__init__.py"], 8 | ) 9 | 10 | py_library( 11 | name = "exposures", 12 | srcs = ["exposures.py"], 13 | ) 14 | 15 | py_test( 16 | name = "exposures_test", 17 | srcs = ["exposures_test.py"], 18 | deps = [":exposures"], 19 | ) 20 | 21 | py_library( 22 | name = "generate_secrets", 23 | srcs = ["generate_secrets.py"], 24 | ) 25 | 26 | py_test( 27 | name = "generate_secrets_test", 28 | srcs = ["generate_secrets_test.py"], 29 | deps = [":generate_secrets"], 30 | ) 31 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/secret_sharer/README.md: -------------------------------------------------------------------------------- 1 | # Secret Sharer Attack 2 | 3 | A good privacy-preserving model learns from the training data, but doesn't 4 | memorize it. This folder contains codes for conducting the Secret Sharer attack 5 | from [this paper](https://arxiv.org/abs/1802.08232). It is a method to test if a 6 | machine learning model memorizes its training data. 7 | 8 | The high level idea is to insert some random sequences as “secrets” into the 9 | training data, and then measure if the model has memorized those secrets. If 10 | there is significant memorization, it means that there can be potential privacy 11 | risk. 12 | 13 | ## How to Use 14 | 15 | ### Overview of the files 16 | 17 | - `generate_secrets.py` contains the code for generating secrets. 18 | - `exposures.py` contains code for evaluating exposures. 19 | - `secret_sharer_example.ipynb` is an example (character-level LSTM) for using 20 | the above code to conduct secret sharer attack. 21 | 22 | ### More Usage Examples 23 | 24 | ## Word2Vec models 25 | 26 | If you're interested in word2vec models, please see the 27 | [word2vec codelab](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/word2vec_codelab.ipynb). 28 | 29 | In addition to secret sharer, this notebook also implements membership inference 30 | attacks. Based on [this paper](https://arxiv.org/abs/2004.00053) and 31 | [this code](https://github.com/google/embedding-tests). 32 | 33 | ### Contact / Feedback 34 | 35 | Fill out this 36 | [Google form](https://docs.google.com/forms/d/1DPwr3_OfMcqAOA6sdelTVjIZhKxMZkXvs94z16UCDa4/edit) 37 | or reach out to us at tf-privacy@google.com and let us know how you’re using 38 | this module. We’re keen on hearing your stories, feedback, and suggestions! 39 | 40 | ## Contributing 41 | 42 | If you wish to add novel attacks to the attack library, please check our 43 | [guidelines](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/membership_inference_attack/CONTRIBUTING.md). 44 | 45 | ## Copyright 46 | 47 | Copyright 2021 - Google LLC 48 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/secret_sharer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from absl.testing import absltest 16 | import numpy as np 17 | from scipy import stats 18 | 19 | from tensorflow_privacy.privacy.privacy_tests.secret_sharer import exposures 20 | 21 | 22 | class UtilsTest(absltest.TestCase): 23 | 24 | def __init__(self, methodname): 25 | """Initialize the test class.""" 26 | super().__init__(methodname) 27 | 28 | def test_exposure_interpolation(self): 29 | """Test exposure by interpolation.""" 30 | perplexities = { 31 | '1': [0, 0.1], # smallest perplexities 32 | '2': [20.0], # largest perplexities 33 | '5': [3.5], # rank = 4 34 | '8': [3.5], # rank = 4 35 | } 36 | perplexities_reference = [float(x) for x in range(1, 17)] 37 | resulted_exposures = exposures.compute_exposure_interpolation( 38 | perplexities, perplexities_reference) 39 | num_perplexities_reference = len(perplexities_reference) 40 | exposure_largest = np.log2(num_perplexities_reference) 41 | exposure_smallest = np.log2(num_perplexities_reference) - np.log2( 42 | num_perplexities_reference + 1) 43 | exposure_rank4 = np.log2(num_perplexities_reference) - np.log2(4) 44 | expected_exposures = { 45 | '1': np.array([exposure_largest] * 2), 46 | '2': np.array([exposure_smallest]), 47 | '5': np.array([exposure_rank4]), 48 | '8': np.array([exposure_rank4]) 49 | } 50 | 51 | self.assertEqual(resulted_exposures.keys(), expected_exposures.keys()) 52 | for r in resulted_exposures.keys(): 53 | np.testing.assert_almost_equal(expected_exposures[r], 54 | resulted_exposures[r]) 55 | 56 | def test_exposure_extrapolation(self): 57 | parameters = (4, 0, 1) 58 | perplexities = { 59 | '1': stats.skewnorm.rvs(*parameters, size=(2,)), 60 | '10': stats.skewnorm.rvs(*parameters, size=(5,)) 61 | } 62 | perplexities_reference = stats.skewnorm.rvs(*parameters, size=(10000,)) 63 | resulted_exposures = exposures.compute_exposure_extrapolation( 64 | perplexities, perplexities_reference) 65 | fitted_parameters = stats.skewnorm.fit(perplexities_reference) 66 | 67 | self.assertEqual(resulted_exposures.keys(), perplexities.keys()) 68 | for r in resulted_exposures.keys(): 69 | np.testing.assert_almost_equal( 70 | resulted_exposures[r], 71 | -np.log2(stats.skewnorm.cdf(perplexities[r], *fitted_parameters))) 72 | 73 | 74 | if __name__ == '__main__': 75 | absltest.main() 76 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/privacy_tests/version.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """TensorFlow Privacy/Privacy Tests version.""" 15 | 16 | __version__ = '0.1.0' 17 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/sparsity_preserving_noise/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) 4 | 5 | py_library( 6 | name = "sparse_noise_utils", 7 | srcs = ["sparse_noise_utils.py"], 8 | deps = [ 9 | ":type_aliases", 10 | "//tensorflow_privacy/privacy/fast_gradient_clipping:gradient_clipping_utils", 11 | ], 12 | ) 13 | 14 | py_test( 15 | name = "sparse_noise_utils_test", 16 | srcs = ["sparse_noise_utils_test.py"], 17 | deps = [ 18 | ":sparse_noise_utils", 19 | "//tensorflow_privacy/privacy/fast_gradient_clipping:gradient_clipping_utils", 20 | ], 21 | ) 22 | 23 | py_library( 24 | name = "type_aliases", 25 | srcs = ["type_aliases.py"], 26 | ) 27 | 28 | py_library( 29 | name = "layer_registry", 30 | srcs = ["layer_registry.py"], 31 | deps = [ 32 | ":type_aliases", 33 | "//tensorflow_privacy/privacy/sparsity_preserving_noise/registry_functions:embedding", 34 | ], 35 | ) 36 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/sparsity_preserving_noise/layer_registry.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Registry of layer classes to their contribution histogram functions.""" 15 | 16 | from typing import Type 17 | 18 | import tensorflow as tf 19 | from tensorflow_privacy.privacy.sparsity_preserving_noise import type_aliases 20 | from tensorflow_privacy.privacy.sparsity_preserving_noise.registry_functions import embedding 21 | 22 | 23 | # ============================================================================== 24 | # Main class 25 | # ============================================================================== 26 | class LayerRegistry: 27 | """Custom container for layer registry functions.""" 28 | 29 | def __init__(self): 30 | """Basic initialization of various internal dictionaries.""" 31 | self._layer_class_dict = {} 32 | self._registry = {} 33 | 34 | def is_elem(self, layer_instance: tf.keras.layers.Layer) -> bool: 35 | """Checks if a layer instance's class is in the registry.""" 36 | return hash(layer_instance.__class__) in self._registry 37 | 38 | def lookup( 39 | self, layer_instance: tf.keras.layers.Layer 40 | ) -> type_aliases.SparsityPreservingNoiseLayerRegistryFunction: 41 | """Returns the layer registry function for a given layer instance.""" 42 | return self._registry[hash(layer_instance.__class__)] 43 | 44 | def insert( 45 | self, 46 | layer_class: Type[tf.keras.layers.Layer], 47 | layer_registry_function: type_aliases.SparsityPreservingNoiseLayerRegistryFunction, 48 | ): 49 | """Inserts a layer registry function into the internal dictionaries.""" 50 | layer_key = hash(layer_class) 51 | self._layer_class_dict[layer_key] = layer_class 52 | self._registry[layer_key] = layer_registry_function 53 | 54 | 55 | # ============================================================================== 56 | # Main factory methods 57 | # ============================================================================== 58 | def make_default_layer_registry() -> LayerRegistry: 59 | registry = LayerRegistry() 60 | registry.insert( 61 | tf.keras.layers.Embedding, 62 | embedding.embedding_layer_contribution_histogram, 63 | ) 64 | return registry 65 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/sparsity_preserving_noise/registry_functions/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) 4 | 5 | py_library( 6 | name = "embedding", 7 | srcs = ["embedding.py"], 8 | deps = ["//tensorflow_privacy/privacy/sparsity_preserving_noise:type_aliases"], 9 | ) 10 | 11 | py_test( 12 | name = "embedding_test", 13 | srcs = ["embedding_test.py"], 14 | deps = [":embedding"], 15 | ) 16 | -------------------------------------------------------------------------------- /tensorflow_privacy/privacy/sparsity_preserving_noise/type_aliases.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Type aliases for sparsity preserving noise.""" 15 | 16 | from collections.abc import Callable, Mapping, Sequence 17 | from typing import Any 18 | import tensorflow as tf 19 | 20 | InputArgs = Sequence[Any] 21 | InputKwargs = Mapping[str, Any] 22 | SparseGradient = tf.IndexedSlices | tf.SparseTensor 23 | ContributionCountHistogram = tf.SparseTensor 24 | ContributionCountHistogramFn = Callable[ 25 | [SparseGradient], ContributionCountHistogram 26 | ] 27 | NumMicrobatches = int | tf.Tensor 28 | SparsityPreservingNoiseLayerRegistryFunction = Callable[ 29 | [tf.keras.layers.Layer, InputArgs, InputKwargs, NumMicrobatches | None], 30 | dict[str, ContributionCountHistogramFn], 31 | ] 32 | -------------------------------------------------------------------------------- /tensorflow_privacy/v1/BUILD: -------------------------------------------------------------------------------- 1 | load("@bazel_skylib//rules:build_test.bzl", "build_test") 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | licenses(["notice"]) 6 | 7 | py_library( 8 | name = "tensorflow_privacy_v1", 9 | srcs = ["__init__.py"], 10 | deps = [ 11 | "//tensorflow_privacy/privacy/estimators/v1:dnn", 12 | "//tensorflow_privacy/privacy/optimizers:dp_optimizer", 13 | "//tensorflow_privacy/privacy/optimizers:dp_optimizer_vectorized", 14 | ], 15 | ) 16 | 17 | build_test( 18 | name = "tensorflow_privacy_build_test", 19 | targets = [":tensorflow_privacy_v1"], 20 | ) 21 | -------------------------------------------------------------------------------- /tensorflow_privacy/v1/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """TensorFlow Privacy library v1 imports. 15 | 16 | This module includes classes designed to be compatible with TF1, based on 17 | `tf.compat.v1.train.Optimizer` and `tf.estimator.Estimator`. 18 | """ 19 | 20 | import sys 21 | 22 | # pylint: disable=g-import-not-at-top 23 | 24 | if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts. 25 | pass 26 | else: 27 | # Estimators 28 | from tensorflow_privacy.privacy.estimators.v1.dnn import DNNClassifier as DNNClassifierV1 29 | 30 | # Optimizers 31 | from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradGaussianOptimizer 32 | from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradOptimizer 33 | from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamGaussianOptimizer 34 | from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamOptimizer 35 | from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer 36 | from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer 37 | from tensorflow_privacy.privacy.optimizers.dp_optimizer import make_optimizer_class 38 | 39 | from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagradOptimizer 40 | from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdamOptimizer 41 | from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGDOptimizer 42 | 43 | from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagrad 44 | from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdam 45 | from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGD 46 | from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import make_vectorized_optimizer_class 47 | -------------------------------------------------------------------------------- /tensorflow_privacy/version.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Privacy Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """TensorFlow Privacy version.""" 15 | 16 | __version__ = '0.9.0' 17 | -------------------------------------------------------------------------------- /tutorials/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) 2 | 3 | py_library( 4 | name = "mnist_dpsgd_tutorial_common", 5 | srcs = ["mnist_dpsgd_tutorial_common.py"], 6 | ) 7 | 8 | py_binary( 9 | name = "mnist_dpsgd_tutorial", 10 | srcs = ["mnist_dpsgd_tutorial.py"], 11 | deps = [ 12 | ":mnist_dpsgd_tutorial_common", 13 | "//tensorflow_privacy/privacy/analysis:compute_dp_sgd_privacy_lib", 14 | "//tensorflow_privacy/privacy/optimizers:dp_optimizer", 15 | ], 16 | ) 17 | 18 | py_binary( 19 | name = "mnist_dpsgd_tutorial_eager", 20 | srcs = ["mnist_dpsgd_tutorial_eager.py"], 21 | deps = ["//tensorflow_privacy/privacy/optimizers:dp_optimizer"], 22 | ) 23 | 24 | py_binary( 25 | name = "mnist_dpsgd_tutorial_keras", 26 | srcs = ["mnist_dpsgd_tutorial_keras.py"], 27 | deps = ["//tensorflow_privacy/privacy/optimizers:dp_optimizer_keras"], 28 | ) 29 | 30 | py_binary( 31 | name = "mnist_dpsgd_tutorial_keras_model", 32 | srcs = ["mnist_dpsgd_tutorial_keras_model.py"], 33 | deps = ["//tensorflow_privacy/privacy/keras_models:dp_keras_model"], 34 | ) 35 | 36 | py_binary( 37 | name = "mnist_dpsgd_tutorial_vectorized", 38 | srcs = ["mnist_dpsgd_tutorial_vectorized.py"], 39 | deps = ["//tensorflow_privacy/privacy/optimizers:dp_optimizer_vectorized"], 40 | ) 41 | 42 | py_binary( 43 | name = "mnist_dpsgd_tutorial_tpu", 44 | srcs = ["mnist_dpsgd_tutorial_tpu.py"], 45 | deps = [ 46 | ":mnist_dpsgd_tutorial_common", 47 | "//tensorflow_privacy/privacy/analysis:compute_dp_sgd_privacy_lib", 48 | "//tensorflow_privacy/privacy/optimizers:dp_optimizer", 49 | ], 50 | ) 51 | 52 | py_binary( 53 | name = "mnist_lr_tutorial", 54 | srcs = ["mnist_lr_tutorial.py"], 55 | deps = ["//tensorflow_privacy/privacy/optimizers:dp_optimizer"], 56 | ) 57 | 58 | py_binary( 59 | name = "lm_dpsgd_tutorial", 60 | srcs = ["lm_dpsgd_tutorial.py"], 61 | deps = ["//tensorflow_privacy/privacy/optimizers:dp_optimizer"], 62 | ) 63 | 64 | py_binary( 65 | name = "movielens_tutorial", 66 | srcs = ["movielens_tutorial.py"], 67 | deps = [ 68 | "//tensorflow_privacy/privacy/analysis:gdp_accountant", 69 | "//tensorflow_privacy/privacy/optimizers:dp_optimizer", 70 | ], 71 | ) 72 | 73 | filegroup( 74 | name = "ignore_srcs", 75 | srcs = ["bolton_tutorial.py"], 76 | tags = ["ignore_srcs"], 77 | ) 78 | -------------------------------------------------------------------------------- /tutorials/mnist_dpsgd_tutorial_common.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020, The TensorFlow Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Common tools for DP-SGD MNIST tutorials.""" 15 | 16 | import tensorflow as tf 17 | import tensorflow_datasets as tfds 18 | 19 | 20 | def get_cnn_model(features): 21 | """Given input features, returns the logits from a simple CNN model.""" 22 | input_layer = tf.reshape(features, [-1, 28, 28, 1]) 23 | y = tf.keras.layers.Conv2D( 24 | 16, 8, strides=2, padding='same', activation='relu')( 25 | input_layer) 26 | y = tf.keras.layers.MaxPool2D(2, 1)(y) 27 | y = tf.keras.layers.Conv2D( 28 | 32, 4, strides=2, padding='valid', activation='relu')( 29 | y) 30 | y = tf.keras.layers.MaxPool2D(2, 1)(y) 31 | y = tf.keras.layers.Flatten()(y) 32 | y = tf.keras.layers.Dense(32, activation='relu')(y) 33 | logits = tf.keras.layers.Dense(10)(y) 34 | 35 | return logits 36 | 37 | 38 | def make_input_fn(split, input_batch_size=256, repetitions=-1, tpu=False): 39 | """Make input function on given MNIST split.""" 40 | 41 | def input_fn(params=None): 42 | """A simple input function.""" 43 | batch_size = params.get('batch_size', input_batch_size) 44 | 45 | def parser(example): 46 | image, label = example['image'], example['label'] 47 | image = tf.cast(image, tf.float32) 48 | image /= 255.0 49 | label = tf.cast(label, tf.int32) 50 | return image, label 51 | 52 | dataset = tfds.load(name='mnist', split=split) 53 | dataset = dataset.map(parser).shuffle(60000).repeat(repetitions).batch( 54 | batch_size) 55 | # If this input function is not meant for TPUs, we can stop here. 56 | # Otherwise, we need to explicitly set its shape. Note that for unknown 57 | # reasons, returning the latter format causes performance regression 58 | # on non-TPUs. 59 | if not tpu: 60 | return dataset 61 | 62 | # Give inputs statically known shapes; needed for TPUs. 63 | images, labels = tf.data.make_one_shot_iterator(dataset).get_next() 64 | # return images, labels 65 | images.set_shape([batch_size, 28, 28, 1]) 66 | labels.set_shape([ 67 | batch_size, 68 | ]) 69 | return images, labels 70 | 71 | return input_fn 72 | -------------------------------------------------------------------------------- /tutorials/walkthrough/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) 2 | 3 | py_binary( 4 | name = "mnist_scratch", 5 | srcs = ["mnist_scratch.py"], 6 | ) 7 | --------------------------------------------------------------------------------