├── .bazelrc
├── BUILD
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── WORKSPACE
├── g3doc
    ├── README.md
    ├── build_docs.py
    ├── guide
    │   ├── _index.yaml
    │   ├── _toc.yaml
    │   ├── get_started.md
    │   ├── images
    │   │   └── getting-started-img.png
    │   └── measure_privacy.md
    └── tutorials
    │   ├── _toc.yaml
    │   ├── classification_privacy.ipynb
    │   └── privacy_report.ipynb
├── pip_tools
    ├── build_empirical_pip_package.sh
    ├── build_pip_package.sh
    ├── publish_empirical_pip_package.sh
    ├── publish_pip_package.sh
    ├── test_empirical_pip_package.sh
    └── test_pip_package.sh
├── requirements.txt
├── research
    ├── GDP_2019
    │   ├── BUILD
    │   ├── adult_tutorial.py
    │   └── imdb_tutorial.py
    ├── README.md
    ├── audit_2020
    │   ├── README.md
    │   ├── attacks.py
    │   ├── audit.py
    │   ├── audit_test.py
    │   ├── fmnist_audit.py
    │   └── mean_audit.py
    ├── dp_newton
    │   ├── README.md
    │   ├── run_privacy_utility
    │   └── src
    │   │   ├── dataset_loader.py
    │   │   ├── my_logistic_regression.py
    │   │   ├── opt_algs.py
    │   │   ├── print_results.py
    │   │   └── run.py
    ├── hyperparameters_2022
    │   ├── README.md
    │   ├── figure7.py
    │   ├── figure7_default_values.py
    │   ├── lr_acc.json
    │   └── rdp_accountant.py
    ├── instahide_attack_2020
    │   ├── README.md
    │   ├── step_1_create_graph.py
    │   ├── step_2_color_graph.py
    │   ├── step_3_second_graph.py
    │   ├── step_4_final_graph.py
    │   ├── step_5_reconstruct.py
    │   ├── step_6_adjust_color.py
    │   └── step_7_visualize.py
    ├── mi_lira_2021
    │   ├── README.md
    │   ├── dataset.py
    │   ├── fprtpr.png
    │   ├── inference.py
    │   ├── plot.py
    │   ├── score.py
    │   ├── scripts
    │   │   ├── train_demo.sh
    │   │   └── train_demo_multigpu.sh
    │   └── train.py
    ├── mi_poison_2022
    │   ├── README.md
    │   ├── fprtpr.png
    │   ├── logs
    │   │   └── .keep
    │   ├── plot_poison.py
    │   ├── scripts
    │   │   ├── train_demo.sh
    │   │   └── train_demo_multigpu.sh
    │   └── train_poison.py
    ├── neuracrypt_attack_2021
    │   └── attack.py
    ├── pate_2017
    │   ├── BUILD
    │   ├── README.md
    │   ├── aggregation.py
    │   ├── analysis.py
    │   ├── deep_cnn.py
    │   ├── input.py
    │   ├── metrics.py
    │   ├── train_student.py
    │   ├── train_student_mnist_250_lap_20_count_50_epochs_600.sh
    │   ├── train_teachers.py
    │   └── utils.py
    └── pate_2018
    │   ├── BUILD
    │   ├── ICLR2018
    │       ├── BUILD
    │       ├── README.md
    │       ├── download.py
    │       ├── generate_figures.sh
    │       ├── generate_table.sh
    │       ├── generate_table_data_independent.sh
    │       ├── plot_ls_q.py
    │       ├── plot_partition.py
    │       ├── plots_for_slides.py
    │       ├── rdp_bucketized.py
    │       ├── rdp_cumulative.py
    │       ├── smooth_sensitivity_table.py
    │       └── utility_queries_answered.py
    │   ├── README.md
    │   ├── core.py
    │   ├── core_test.py
    │   ├── smooth_sensitivity.py
    │   └── smooth_sensitivity_test.py
├── setup.py
├── setup_empirical.py
├── tensorflow_privacy
    ├── .bazelversion
    ├── BUILD
    ├── __init__.py
    ├── privacy
    │   ├── BUILD
    │   ├── __init__.py
    │   ├── analysis
    │   │   ├── BUILD
    │   │   ├── __init__.py
    │   │   ├── compute_dp_sgd_privacy.py
    │   │   ├── compute_dp_sgd_privacy_lib.py
    │   │   ├── compute_dp_sgd_privacy_test.py
    │   │   ├── compute_noise_from_budget.py
    │   │   ├── compute_noise_from_budget_lib.py
    │   │   ├── compute_noise_from_budget_test.py
    │   │   ├── gdp_accountant.py
    │   │   ├── tensor_buffer.py
    │   │   ├── tensor_buffer_eager_test.py
    │   │   ├── tensor_buffer_graph_test.py
    │   │   ├── tree_aggregation_accountant.py
    │   │   └── tree_aggregation_accountant_test.py
    │   ├── bolt_on
    │   │   ├── BUILD
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── losses.py
    │   │   ├── losses_test.py
    │   │   ├── models.py
    │   │   ├── models_test.py
    │   │   ├── optimizers.py
    │   │   └── optimizers_test.py
    │   ├── dp_query
    │   │   ├── BUILD
    │   │   ├── __init__.py
    │   │   ├── discrete_gaussian_query.py
    │   │   ├── discrete_gaussian_query_test.py
    │   │   ├── discrete_gaussian_utils.py
    │   │   ├── discrete_gaussian_utils_test.py
    │   │   ├── distributed_discrete_gaussian_query.py
    │   │   ├── distributed_discrete_gaussian_query_test.py
    │   │   ├── distributed_skellam_query.py
    │   │   ├── distributed_skellam_query_test.py
    │   │   ├── dp_query.py
    │   │   ├── dp_query_test.py
    │   │   ├── gaussian_query.py
    │   │   ├── gaussian_query_test.py
    │   │   ├── nested_query.py
    │   │   ├── nested_query_test.py
    │   │   ├── no_privacy_query.py
    │   │   ├── no_privacy_query_test.py
    │   │   ├── normalized_query.py
    │   │   ├── normalized_query_test.py
    │   │   ├── quantile_adaptive_clip_sum_query.py
    │   │   ├── quantile_adaptive_clip_sum_query_test.py
    │   │   ├── quantile_adaptive_clip_tree_query.py
    │   │   ├── quantile_adaptive_clip_tree_query_test.py
    │   │   ├── quantile_estimator_query.py
    │   │   ├── quantile_estimator_query_test.py
    │   │   ├── restart_query.py
    │   │   ├── restart_query_test.py
    │   │   ├── test_utils.py
    │   │   ├── tree_aggregation.py
    │   │   ├── tree_aggregation_query.py
    │   │   ├── tree_aggregation_query_test.py
    │   │   ├── tree_aggregation_test.py
    │   │   ├── tree_range_query.py
    │   │   └── tree_range_query_test.py
    │   ├── estimators
    │   │   ├── BUILD
    │   │   ├── __init__.py
    │   │   ├── binary_class_head.py
    │   │   ├── binary_class_head_test.py
    │   │   ├── dnn.py
    │   │   ├── dnn_test.py
    │   │   ├── head_utils.py
    │   │   ├── multi_class_head.py
    │   │   ├── multi_class_head_test.py
    │   │   ├── multi_label_head.py
    │   │   ├── multi_label_head_test.py
    │   │   ├── test_utils.py
    │   │   └── v1
    │   │   │   ├── BUILD
    │   │   │   ├── __init__.py
    │   │   │   ├── dnn.py
    │   │   │   ├── dnn_test.py
    │   │   │   ├── head.py
    │   │   │   ├── head_test.py
    │   │   │   ├── linear.py
    │   │   │   └── linear_test.py
    │   ├── fast_gradient_clipping
    │   │   ├── BUILD
    │   │   ├── __init__.py
    │   │   ├── bert_encoder_utils.py
    │   │   ├── bert_encoder_utils_test.py
    │   │   ├── clip_grads.py
    │   │   ├── clip_grads_test.py
    │   │   ├── common_manip_utils.py
    │   │   ├── common_test_utils.py
    │   │   ├── gradient_clipping_utils.py
    │   │   ├── gradient_clipping_utils_test.py
    │   │   ├── layer_registry.py
    │   │   ├── noise_utils.py
    │   │   ├── noise_utils_test.py
    │   │   ├── registry_functions
    │   │   │   ├── BUILD
    │   │   │   ├── __init__.py
    │   │   │   ├── dense.py
    │   │   │   ├── dense_test.py
    │   │   │   ├── dense_tpu_test.py
    │   │   │   ├── einsum_dense.py
    │   │   │   ├── einsum_dense_test.py
    │   │   │   ├── einsum_dense_tpu_test.py
    │   │   │   ├── einsum_utils.py
    │   │   │   ├── einsum_utils_test.py
    │   │   │   ├── embedding.py
    │   │   │   ├── embedding_test.py
    │   │   │   ├── embedding_tpu_test.py
    │   │   │   ├── layer_normalization.py
    │   │   │   ├── layer_normalization_test.py
    │   │   │   ├── layer_normalization_tpu_test.py
    │   │   │   ├── multi_head_attention.py
    │   │   │   ├── multi_head_attention_test.py
    │   │   │   ├── multi_head_attention_tpu_test.py
    │   │   │   ├── nlp_on_device_embedding.py
    │   │   │   ├── nlp_on_device_embedding_test.py
    │   │   │   ├── nlp_on_device_embedding_tpu_test.py
    │   │   │   ├── nlp_position_embedding.py
    │   │   │   ├── nlp_position_embedding_test.py
    │   │   │   ├── nlp_position_embedding_tpu_test.py
    │   │   │   └── registry_function_utils.py
    │   │   └── type_aliases.py
    │   ├── keras_models
    │   │   ├── BUILD
    │   │   ├── __init__.py
    │   │   ├── dp_keras_model.py
    │   │   ├── dp_keras_model_distributed_test.py
    │   │   └── dp_keras_model_test.py
    │   ├── logistic_regression
    │   │   ├── BUILD
    │   │   ├── __init__.py
    │   │   ├── datasets.py
    │   │   ├── datasets_test.py
    │   │   ├── multinomial_logistic.py
    │   │   ├── multinomial_logistic_test.py
    │   │   ├── single_layer_softmax.py
    │   │   └── single_layer_softmax_test.py
    │   ├── membership_inference_attack
    │   │   ├── BUILD
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── data_structures.py
    │   │   ├── dataset_slicing.py
    │   │   ├── keras_evaluation.py
    │   │   ├── membership_inference_attack.py
    │   │   ├── models.py
    │   │   ├── plotting.py
    │   │   ├── privacy_report.py
    │   │   ├── seq2seq_mia.py
    │   │   └── tf_estimator_evaluation.py
    │   ├── optimizers
    │   │   ├── BUILD
    │   │   ├── __init__.py
    │   │   ├── clip_and_aggregate_gradients.py
    │   │   ├── clip_and_aggregate_gradients_test.py
    │   │   ├── dp_optimizer.py
    │   │   ├── dp_optimizer_eager_test.py
    │   │   ├── dp_optimizer_keras.py
    │   │   ├── dp_optimizer_keras_sparse.py
    │   │   ├── dp_optimizer_keras_sparse_distributed_test.py
    │   │   ├── dp_optimizer_keras_sparse_test.py
    │   │   ├── dp_optimizer_keras_test.py
    │   │   ├── dp_optimizer_keras_vectorized.py
    │   │   ├── dp_optimizer_test.py
    │   │   ├── dp_optimizer_vectorized.py
    │   │   └── dp_optimizer_vectorized_test.py
    │   ├── privacy_tests
    │   │   ├── BUILD
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── epsilon_lower_bound.py
    │   │   ├── epsilon_lower_bound_test.py
    │   │   ├── membership_inference_attack
    │   │   │   ├── BUILD
    │   │   │   ├── CONTRIBUTING.md
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── advanced_mia.py
    │   │   │   ├── advanced_mia_example.py
    │   │   │   ├── advanced_mia_test.py
    │   │   │   ├── codelab_roc_fig.png
    │   │   │   ├── codelabs
    │   │   │   │   ├── BUILD
    │   │   │   │   ├── README.md
    │   │   │   │   ├── codelab.ipynb
    │   │   │   │   ├── example.py
    │   │   │   │   ├── membership_probability_codelab.ipynb
    │   │   │   │   ├── third_party
    │   │   │   │   │   └── seq2seq_membership_inference
    │   │   │   │   │   │   ├── BUILD
    │   │   │   │   │   │   ├── LICENSE
    │   │   │   │   │   │   └── seq2seq_membership_inference_codelab.ipynb
    │   │   │   │   └── word2vec_codelab.ipynb
    │   │   │   ├── data_structures.py
    │   │   │   ├── data_structures_test.py
    │   │   │   ├── dataset_slicing.py
    │   │   │   ├── dataset_slicing_test.py
    │   │   │   ├── keras_evaluation.py
    │   │   │   ├── keras_evaluation_example.py
    │   │   │   ├── keras_evaluation_test.py
    │   │   │   ├── membership_inference_attack.py
    │   │   │   ├── membership_inference_attack_test.py
    │   │   │   ├── models.py
    │   │   │   ├── models_test.py
    │   │   │   ├── plotting.py
    │   │   │   ├── privacy_report.py
    │   │   │   ├── privacy_report_test.py
    │   │   │   ├── seq2seq_mia.py
    │   │   │   ├── seq2seq_mia_test.py
    │   │   │   ├── tf_estimator_evaluation.py
    │   │   │   ├── tf_estimator_evaluation_example.py
    │   │   │   ├── tf_estimator_evaluation_test.py
    │   │   │   └── utils_tensorboard.py
    │   │   ├── secret_sharer
    │   │   │   ├── BUILD
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── exposures.py
    │   │   │   ├── exposures_test.py
    │   │   │   ├── generate_secrets.py
    │   │   │   ├── generate_secrets_test.py
    │   │   │   ├── secret_sharer_example.ipynb
    │   │   │   └── secret_sharer_image_example.ipynb
    │   │   ├── utils.py
    │   │   ├── utils_test.py
    │   │   └── version.py
    │   └── sparsity_preserving_noise
    │   │   ├── BUILD
    │   │   ├── layer_registry.py
    │   │   ├── registry_functions
    │   │       ├── BUILD
    │   │       ├── embedding.py
    │   │       └── embedding_test.py
    │   │   ├── sparse_noise_utils.py
    │   │   ├── sparse_noise_utils_test.py
    │   │   └── type_aliases.py
    ├── v1
    │   ├── BUILD
    │   └── __init__.py
    └── version.py
└── tutorials
    ├── BUILD
    ├── README.md
    ├── bolton_tutorial.py
    ├── lm_dpsgd_tutorial.py
    ├── mnist_dpsgd_tutorial.py
    ├── mnist_dpsgd_tutorial_common.py
    ├── mnist_dpsgd_tutorial_eager.py
    ├── mnist_dpsgd_tutorial_keras.py
    ├── mnist_dpsgd_tutorial_keras_model.py
    ├── mnist_dpsgd_tutorial_tpu.py
    ├── mnist_dpsgd_tutorial_vectorized.py
    ├── mnist_lr_tutorial.py
    ├── movielens_tutorial.py
    └── walkthrough
        ├── BUILD
        ├── README.md
        └── mnist_scratch.py


/.bazelrc:
--------------------------------------------------------------------------------
 1 | # TensorFlow Privacy Bazel configuration
 2 | #
 3 | # See https://docs.bazel.build/versions/master/user-manual.html#config for
 4 | # details on the various configuration options.
 5 | 
 6 | # Enable verbose failures.
 7 | build --verbose_failures
 8 | 
 9 | # Enable logging rc options.
10 | common --announce_rc
11 | 
12 | # Enable platform-specific configs from bazelrc files.
13 | common --enable_platform_specific_config
14 | 
15 | # Enable logging error output.
16 | test --test_output=errors
17 | test --test_summary=detailed
18 | 
19 | # Execute commands as local subprocesses
20 | build --spawn_strategy=local
21 | 


--------------------------------------------------------------------------------
/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:private"])
2 | 
3 | licenses(["notice"])
4 | 
5 | exports_files(["LICENSE"])
6 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to <https://cla.developers.google.com/> to see
12 | your current agreements on file or to sign a new one.
13 | 
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 | 
18 | ## Code reviews
19 | 
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 | 
25 | ## Attack library
26 | 
27 | If you wish to add novel attacks to the attack library, please check our
28 | [guidelines](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/membership_inference_attack/CONTRIBUTING.md)
29 | 
30 | ## Community Guidelines
31 | 
32 | This project follows Google's 
33 | [Open Source Community Guidelines](https://opensource.google.com/conduct/).
34 | 


--------------------------------------------------------------------------------
/WORKSPACE:
--------------------------------------------------------------------------------
 1 | workspace(name = "org_tensorflow_privacy")
 2 | 
 3 | load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
 4 | load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
 5 | 
 6 | git_repository(
 7 |     name = "bazel_skylib",
 8 |     remote = "https://github.com/bazelbuild/bazel-skylib.git",
 9 |     tag = "1.0.3",
10 | )
11 | 
12 | git_repository(
13 |     name = "rules_python",
14 |     remote = "https://github.com/bazelbuild/rules_python.git",
15 |     tag = "0.5.0",
16 | )
17 | 


--------------------------------------------------------------------------------
/g3doc/README.md:
--------------------------------------------------------------------------------
1 | # Under construction
2 | 


--------------------------------------------------------------------------------
/g3doc/guide/_toc.yaml:
--------------------------------------------------------------------------------
1 | toc:
2 | - title: Overview
3 |   path: /responsible_ai/privacy/guide/
4 | - title: Get Started
5 |   path: /responsible_ai/privacy/guide/get_started
6 | - title: Measure Privacy
7 |   path: /responsible_ai/privacy/guide/measure_privacy
8 | 


--------------------------------------------------------------------------------
/g3doc/guide/images/getting-started-img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorflow/privacy/a640ca62ca80d2e4e534fbecf2678e3d0786a2fa/g3doc/guide/images/getting-started-img.png


--------------------------------------------------------------------------------
/g3doc/guide/measure_privacy.md:
--------------------------------------------------------------------------------
 1 | # Measure Privacy
 2 | 
 3 | Differential privacy is a framework for measuring the privacy guarantees
 4 | provided by an algorithm and can be expressed using the values ε (epsilon) and δ
 5 | (delta). Of the two, ε is more important and more sensitive to the choice of
 6 | hyperparameters. Roughly speaking, they mean the following:
 7 | 
 8 | *   ε gives a ceiling on how much the probability of a particular output can
 9 |     increase by including (or removing) a single training example. You usually
10 |     want it to be a small constant (less than 10, or for more stringent privacy
11 |     guarantees, less than 1). However, this is only an upper bound, and a large
12 |     value of epsilon may still mean good practical privacy.
13 | *   δ bounds the probability of an arbitrary change in model behavior. You can
14 |     usually set this to a very small number (1e-7 or so) without compromising
15 |     utility. A rule of thumb is to set it to be less than the inverse of the
16 |     training data size.
17 | 
18 | The relationship between training hyperparameters and the resulting privacy in
19 | terms of (ε, δ) is complicated and tricky to state explicitly. Our current
20 | recommended approach is at the bottom of the [Get Started page](get_started.md),
21 | which involves finding the maximum noise multiplier one can use while still
22 | having reasonable utility, and then scaling the noise multiplier and number of
23 | microbatches. TensorFlow Privacy provides a tool, `compute_dp_sgd_privacy` to
24 | compute (ε, δ) based on the noise multiplier σ, the number of training steps
25 | taken, and the fraction of input data consumed at each step. The amount of
26 | privacy increases with the noise multiplier σ and decreases the more times the
27 | data is used on training. Generally, in order to achieve an epsilon of at most
28 | 10.0, we need to set the noise multiplier to around 0.3 to 0.5, depending on the
29 | dataset size and number of epochs. See the
30 | [classification privacy tutorial](../tutorials/classification_privacy.ipynb) to
31 | see the approach.
32 | 
33 | For more detail, see
34 | [the original DP-SGD paper](https://arxiv.org/pdf/1607.00133.pdf).
35 | 
36 | You can use `compute_dp_sgd_privacy` to find out the epsilon given a fixed delta
37 | value for your model [../tutorials/classification_privacy.ipynb]:
38 | 
39 | *   `q` : the sampling ratio - the probability of an individual training point
40 |     being included in a mini batch (`batch_size/number_of_examples`).
41 | *   `noise_multiplier` : A float that governs the amount of noise added during
42 |     training. Generally, more noise results in better privacy and lower utility.
43 | *   `steps` : The number of global steps taken.
44 | 
45 | A detailed writeup of the theory behind the computation of epsilon and delta is
46 | available at
47 | [Differential Privacy of the Sampled Gaussian Mechanism](https://arxiv.org/abs/1908.10530).
48 | 


--------------------------------------------------------------------------------
/g3doc/tutorials/_toc.yaml:
--------------------------------------------------------------------------------
1 | toc:
2 | - title: Compute privacy
3 |   path: /responsible_ai/privacy/tutorials/classification_privacy
4 | - title: Assess privacy risk
5 |   path: /responsible_ai/privacy/tutorials/privacy_report
6 | 


--------------------------------------------------------------------------------
/pip_tools/build_empirical_pip_package.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright 2020, The TensorFlow Privacy Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | # Tool to build the TensorFlow Privacy/Privacy Tests pip package.
17 | set -e
18 | 
19 | main() {
20 |   # Create a working directory.
21 |   local temp_dir="$(mktemp -d)"
22 |   trap "rm -rf ${temp_dir}" EXIT
23 | 
24 |   # Create a virtual environment
25 |   python3.11 -m venv "${temp_dir}/venv"
26 |   source "${temp_dir}/venv/bin/activate"
27 |   python --version
28 |   pip install --upgrade pip
29 |   pip --version
30 | 
31 |   # Build the pip package
32 |   pip install --upgrade setuptools wheel
33 |   python "setup_empirical.py" sdist bdist_wheel
34 | 
35 |   # Cleanup.
36 |   deactivate
37 | }
38 | 
39 | main "$@"
40 | 


--------------------------------------------------------------------------------
/pip_tools/build_pip_package.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright 2020, The TensorFlow Privacy Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | # Tool to build the TensorFlow Privacy pip package.
17 | set -e
18 | 
19 | main() {
20 |   # Create a working directory.
21 |   local temp_dir="$(mktemp -d)"
22 |   trap "rm -rf ${temp_dir}" EXIT
23 | 
24 |   # Create a virtual environment
25 |   python3.11 -m venv "${temp_dir}/venv"
26 |   source "${temp_dir}/venv/bin/activate"
27 |   python --version
28 |   pip install --upgrade pip
29 |   pip --version
30 | 
31 |   # Build the pip package
32 |   pip install --upgrade setuptools wheel
33 |   python "setup.py" sdist bdist_wheel
34 | 
35 |   # Cleanup.
36 |   deactivate
37 | }
38 | 
39 | main "$@"
40 | 


--------------------------------------------------------------------------------
/pip_tools/publish_empirical_pip_package.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright 2020, The TensorFlow Privacy Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | # Tool to publish the TensorFlow Privacy pip package.
17 | set -e
18 | 
19 | main() {
20 |   # Create a working directory.
21 |   local temp_dir="$(mktemp -d)"
22 |   trap "rm -rf ${temp_dir}" EXIT
23 | 
24 |   # Create a virtual environment
25 |   python3.11 -m venv "${temp_dir}/venv"
26 |   source "${temp_dir}/venv/bin/activate"
27 |   python --version
28 |   pip install --upgrade pip
29 |   pip --version
30 | 
31 |   # Publish the pip package.
32 |   package="$(ls "dist/"*".whl" | head -n1)"
33 |   pip install --upgrade twine
34 |   twine check "${package}"
35 |   twine upload "${package}"
36 | 
37 |   # Cleanup.
38 |   deactivate
39 | }
40 | 
41 | main "$@"
42 | 


--------------------------------------------------------------------------------
/pip_tools/publish_pip_package.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright 2020, The TensorFlow Privacy Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | # Tool to publish the TensorFlow Privacy pip package.
17 | set -e
18 | 
19 | main() {
20 |   # Create a working directory.
21 |   local temp_dir="$(mktemp -d)"
22 |   trap "rm -rf ${temp_dir}" EXIT
23 | 
24 |   # Create a virtual environment
25 |   python3.11 -m venv "${temp_dir}/venv"
26 |   source "${temp_dir}/venv/bin/activate"
27 |   python --version
28 |   pip install --upgrade pip
29 |   pip --version
30 | 
31 |   # Publish the pip package.
32 |   package="$(ls "dist/"*".whl" | head -n1)"
33 |   pip install --upgrade twine
34 |   twine check "${package}"
35 |   twine upload "${package}"
36 | 
37 |   # Cleanup.
38 |   deactivate
39 | }
40 | 
41 | main "$@"
42 | 


--------------------------------------------------------------------------------
/pip_tools/test_empirical_pip_package.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright 2020, The TensorFlow Privacy Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | # Tool to build the TensorFlow Privacy pip package.
17 | set -e
18 | 
19 | main() {
20 |   # Create a working directory.
21 |   local temp_dir="$(mktemp -d)"
22 |   trap "rm -rf ${temp_dir}" EXIT
23 | 
24 |   # Create a virtual environment
25 |   python3.11 -m venv "${temp_dir}/venv"
26 |   source "${temp_dir}/venv/bin/activate"
27 |   python --version
28 |   pip install --upgrade pip
29 |   pip --version
30 | 
31 |   # Test the pip package.
32 |   package="$(ls "dist/"*".whl" | head -n1)"
33 |   pip install --upgrade "${package}"
34 |   pip freeze
35 |   python -c "import tensorflow_privacy.privacy.privacy_tests as pt; print(pt.__version__)"
36 | 
37 |   # Cleanup.
38 |   deactivate
39 | }
40 | 
41 | main "$@"
42 | 


--------------------------------------------------------------------------------
/pip_tools/test_pip_package.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright 2020, The TensorFlow Privacy Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | # Tool to build the TensorFlow Privacy pip package.
17 | set -e
18 | 
19 | main() {
20 |   # Create a working directory.
21 |   local temp_dir="$(mktemp -d)"
22 |   trap "rm -rf ${temp_dir}" EXIT
23 | 
24 |   # Create a virtual environment
25 |   python3.11 -m venv "${temp_dir}/venv"
26 |   source "${temp_dir}/venv/bin/activate"
27 |   python --version
28 |   pip install --upgrade pip
29 |   pip --version
30 | 
31 |   # Test the pip package.
32 |   package="$(ls "dist/"*".whl" | head -n1)"
33 |   pip install --upgrade "${package}"
34 |   pip freeze
35 |   python -c "import tensorflow_privacy as tfp; print(tfp.__version__)"
36 | 
37 |   # Cleanup.
38 |   deactivate
39 | }
40 | 
41 | main "$@"
42 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Requirements for Tensorflow Privacy.
 2 | #
 3 | # If you add a *new* dependency and it is required by the TensorFlow Federated
 4 | # package, also add the dependency to `setup.py`.
 5 | #
 6 | # If you update the version of an *existing* dependency and it is required by
 7 | # the TensorFlow Federated package, also update the version of the dependency in
 8 | # `setup.py`.
 9 | #
10 | # *   For packages that have a stable release, we use a version that is
11 | #     compatible with that release (e.g. `~=x.y`). See
12 | #     https://peps.python.org/pep-0440/#compatible-release for more information.
13 | # *   For packages that do not have a stable release, we use a version that
14 | #     matches a release that has been tested (e.g. `==x.y.z`). See
15 | #     https://peps.python.org/pep-0440/#version-matching for more information.
16 | #
17 | # This assumes that the packages follows Semantic Versioning, see
18 | # https://semver.org/. If a package follows a different versioning scheme or
19 | # requires unique handling, we use a different version specifier and comment the
20 | # versioning scheme or reasoning.
21 | #
22 | # Note: As of 2022-08-17 there is bug in `pip` when multiple packages use the
23 | # compatible release operator `~=` to specify a version and one of those
24 | # versions ends in `0`. See https://github.com/pypa/pip/issues/9613 for more
25 | # information. In this case, use the equivalent clause `>=x.0,==x.*` instead of
26 | # `~=x.0`.
27 | 
28 | absl-py>=1.0,==1.*
29 | dm-tree==0.1.8
30 | dp-accounting==0.4.4
31 | immutabledict~=2.2
32 | matplotlib~=3.3
33 | numpy~=1.21
34 | packaging~=22.0
35 | pandas~=1.4
36 | scikit-learn>=1.0,==1.*
37 | scipy~=1.9
38 | statsmodels==0.14.0
39 | tensorflow-datasets~=4.5
40 | tensorflow-estimator~=2.4
41 | tensorflow-probability~=0.22.0
42 | tensorflow>=2.4.0,<=2.15.0
43 | tf-models-official~=2.13
44 | 


--------------------------------------------------------------------------------
/research/GDP_2019/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//visibility:private"])
 2 | 
 3 | licenses(["notice"])
 4 | 
 5 | filegroup(
 6 |     name = "ignore_srcs",
 7 |     srcs = [
 8 |         "adult_tutorial.py",
 9 |         "imdb_tutorial.py",
10 |     ],
11 |     tags = ["ignore_srcs"],
12 | )
13 | 


--------------------------------------------------------------------------------
/research/README.md:
--------------------------------------------------------------------------------
 1 | # Research
 2 | 
 3 | This folder contains code to reproduce results from research papers. Currently,
 4 | the following papers are included: 
 5 | 
 6 | * Semi-supervised Knowledge Transfer for Deep Learning from Private Training
 7 |   Data (ICLR 2017): `pate_2017`
 8 | 
 9 | * Scalable Private Learning with PATE (ICLR 2018): `pate_2018`
10 | 


--------------------------------------------------------------------------------
/research/audit_2020/README.md:
--------------------------------------------------------------------------------
 1 | # Auditing Private Machine Learning
 2 | Code for "Auditing Differentially Private Machine Learning: How Private is Private SGD?": https://arxiv.org/abs/2006.07709. This implementation is simple but not easily parallelizable. For a parallelizable version which is harder to run, see https://github.com/jagielski/auditing-dpsgd.
 3 | 
 4 | ## Usage
 5 | This attack relies on the AuditAttack class found in audit.py. The class allows one to generate poisoning, run trials to compute membership scores for the poisoning, and then use the resulting membership scores to compute a lower bound on epsilon.
 6 | 
 7 | ## Examples
 8 | Two examples are provided, mean_audit.py and fmnist_audit.py. fmnist_audit.py attacks the FashionMNIST dataset. It allows the user to specify between standard bkdr attacks and clipping-aware attacks, and also allows the user to specify between multiple poisoning attack sizes, model types, and whether to load saved model weights to start training from. mean_audit.py audits a model which computes the mean of a dataset. This provides an example of user-provided poisoning samples, rather than those autogenerated from our attacks.py library.
 9 | 
10 | ## Requirements
11 | Requires scikit-learn=0.24.1, statsmodels=0.12.2, tensorflow=1.14.0
12 | 


--------------------------------------------------------------------------------
/research/dp_newton/README.md:
--------------------------------------------------------------------------------
 1 | # Project Title
 2 | 
 3 | Faster Differentially Private Convex Optimization via Second-Order Methods
 4 | https://arxiv.org/abs/2112.03570 <br>
 5 | by Arun Ganesh, Mahdi Haghifam, Thomas Steinke, Abhradeep Thakurta.
 6 | 
 7 | ## Description
 8 | 
 9 | Implementation of the optimizatoin algorithms proposed in
10 | https://arxiv.org/abs/2112.03570 <br>
11 | 
12 | ## Getting Started
13 | 
14 | You will need to install fairly standard dependencies
15 | 
16 | run 'run_privacy_utility' to compare the convergence speed and excess loss of
17 | different algorithms.
18 | 
19 | ### Citation
20 | 
21 | You can cite this paper with
22 | 
23 | ```
24 | @article{ganesh2023faster,
25 |   title={Faster Differentially Private Convex Optimization
26 |     via Second-Order Methods},
27 |   author={Ganesh, Arun and Haghifam, Mahdi and Steinke, Thomas
28 |     and Thakurta, Abhradeep},
29 |   journal={arXiv preprint arXiv:2305.13209},
30 |   year={2023}
31 | }
32 | ```
33 | 


--------------------------------------------------------------------------------
/research/dp_newton/run_privacy_utility:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # =============================================================================
15 | 
16 | rm -rf ./src/results
17 | mkdir -p ./src/results
18 | rm -rf ./src/datasets_directory
19 | mkdir -p ./src/datasets_directory
20 | dataset="protein_dataset" # 'a1a_dataset', 'synthetic_dataset', 'fmnist_dataset'
21 | privacy_budget="3.0" # epsilon in DP
22 | num_iteration_GD="100" # number of iterations for DP-GD
23 | num_iteration_NT="15" # number of iterations for damped newton
24 | num_iteration_our="15" # number of iterations for double noise (proposed method)
25 | $HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'dp_gd' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_GD
26 | $HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'damped_newton' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_NT --grad_frac $"0.7"
27 | $HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'double_noise' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_our --grad_frac $"0.7" --trace_frac $"0.1" --trace_coeff $"0.5"
28 | $HOME/google-code/dpoptVenv/bin/python3 ./src/print_results.py
29 | 


--------------------------------------------------------------------------------
/research/dp_newton/src/print_results.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # =============================================================================
15 | 
16 | """print the achievable error of different algorithms"""
17 | 
18 | # pylint: skip-file
19 | # pyformat: disable
20 | 
21 | import json
22 | import os
23 | import numpy as np
24 | 
25 | RESULTS_PATH = './src/results/'
26 | excess_loss = {}
27 | opt_algs = [
28 |     'DPGD',
29 |     'DN-Hess-add',
30 |     'DN-UB-add',
31 |     'DN-Hess-clip',
32 |     'DN-UB-clip',
33 |     'private-newton',
34 | ]
35 | for filename in os.listdir(RESULTS_PATH):
36 |   f = os.path.join(RESULTS_PATH, filename)
37 |   with open(f, encoding='utf-8') as json_file:
38 |     data = json.load(json_file)
39 |     for alg in data.keys():
40 |       if alg in opt_algs:
41 |         loss_avg = np.array(data[alg]['loss_avg'])
42 |         loss_std = np.array(data[alg]['loss_std'])
43 |         clock_time = np.array(data[alg]['clock_time_avg'])
44 |         print('optimization algorithm: ', alg)
45 |         print('excess loss: ' + str(loss_avg[-1]))
46 |         print('run time: ' + str(clock_time[-1]) + '(sec)')
47 |         print('-----')
48 | 


--------------------------------------------------------------------------------
/research/hyperparameters_2022/README.md:
--------------------------------------------------------------------------------
 1 | # Hyperparameter Tuning with Renyi Differential Privacy
 2 | 
 3 | ### Nicolas Papernot and Thomas Steinke
 4 | 
 5 | This repository contains the code used to reproduce some of the experiments in
 6 | our
 7 | [ICLR 2022 paper on hyperparameter tuning with differential privacy](https://openreview.net/forum?id=-70L8lpp9DF).
 8 | 
 9 | You can reproduce Figure 7 in the paper by running `figure7.py`. It loads by
10 | default values used to plot the figure contained in the paper, and we also
11 | included a dictionary `lr_acc.json` containing the accuracy of a large number of
12 | ML models trained with different learning rates. If you'd like to try our
13 | approach to fine-tune your own parameters, you will have to modify the code that
14 | interacts with this dictionary (`lr_acc` in the code from `figure7.py`).
15 | 
16 | ## Citing this work
17 | 
18 | If you use this repository for academic research, you are highly encouraged
19 | (though not required) to cite our paper:
20 | 
21 | ```
22 | @inproceedings{
23 | papernot2022hyperparameter,
24 | title={Hyperparameter Tuning with Renyi Differential Privacy},
25 | author={Nicolas Papernot and Thomas Steinke},
26 | booktitle={International Conference on Learning Representations},
27 | year={2022},
28 | url={https://openreview.net/forum?id=-70L8lpp9DF}
29 | }
30 | ```
31 | 


--------------------------------------------------------------------------------
/research/hyperparameters_2022/figure7_default_values.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Default values for generating Figure 7."""
15 | 
16 | import json
17 | import numpy as np
18 | 
19 | orders = ([1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] +
20 |           list(range(5, 64)) + [128, 256, 512])
21 | rdp = [
22 |     2.04459751e-01, 2.45818210e-01, 2.87335988e-01, 3.29014798e-01,
23 |     3.70856385e-01, 4.12862542e-01, 4.97375951e-01, 5.82570265e-01,
24 |     6.68461534e-01, 7.55066706e-01, 8.42403732e-01, 1.01935100e+00,
25 |     1.19947313e+00, 1.38297035e+00, 1.57009549e+00, 1.76124790e+00,
26 |     1.95794503e+00, 2.19017390e+00, 4.48407479e+00, 3.08305394e+02,
27 |     4.98610133e+03, 1.11363692e+04, 1.72590079e+04, 2.33487231e+04,
28 |     2.94091123e+04, 3.54439803e+04, 4.14567914e+04, 4.74505356e+04,
29 |     5.34277419e+04, 5.93905358e+04, 6.53407051e+04, 7.12797586e+04,
30 |     7.72089762e+04, 8.31294496e+04, 8.90421151e+04, 9.49477802e+04,
31 |     1.00847145e+05, 1.06740819e+05, 1.12629335e+05, 1.18513163e+05,
32 |     1.24392717e+05, 1.30268362e+05, 1.36140424e+05, 1.42009194e+05,
33 |     1.47874932e+05, 1.53737871e+05, 1.59598221e+05, 1.65456171e+05,
34 |     1.71311893e+05, 1.77165542e+05, 1.83017260e+05, 1.88867175e+05,
35 |     1.94715404e+05, 2.00562057e+05, 2.06407230e+05, 2.12251015e+05,
36 |     2.18093495e+05, 2.23934746e+05, 2.29774840e+05, 2.35613842e+05,
37 |     2.41451813e+05, 2.47288808e+05, 2.53124881e+05, 2.58960080e+05,
38 |     2.64794449e+05, 2.70628032e+05, 2.76460867e+05, 2.82292992e+05,
39 |     2.88124440e+05, 6.66483142e+05, 1.41061455e+06, 2.89842152e+06
40 | ]
41 | with open("lr_acc.json", "r") as dict_f:
42 |   lr_acc = json.load(dict_f)
43 | num_trials = 1000
44 | lr_rates = np.logspace(np.log10(1e-4), np.log10(1.), num=1000)[-400:]
45 | gammas = np.asarray(
46 |     [1e-07, 8e-06, 1e-04, 0.00024, 0.0015, 0.0035, 0.025, 0.05, 0.1, 0.2, 0.5])
47 | non_private_acc = 0.9594
48 | 


--------------------------------------------------------------------------------
/research/instahide_attack_2020/README.md:
--------------------------------------------------------------------------------
 1 | Implementation of our reconstruction attack on InstaHide.
 2 | 
 3 | Is Private Learning Possible with Instance Encoding?
 4 | Nicholas Carlini, Samuel Deng, Sanjam Garg, Somesh Jha, Saeed Mahloujifar, Mohammad Mahmoody, Shuang Song, Abhradeep Thakurta, Florian Tramer
 5 | https://arxiv.org/abs/2011.05315
 6 | 
 7 | 
 8 | ## Overview
 9 | 
10 | InstaHide is a recent privacy-preserving machine learning framework.
11 | It takes a (sensitive) dataset and generates encoded images that are privacy-preserving.
12 | Our attack breaks InstaHide and shows it does not offer meaningful privacy.
13 | Given the encoded dataset, we can recover a near-identical copy of the original images.
14 | 
15 | This repository implements the attack described in our paper. It consists of a number of
16 | steps that shoul be run sequentially. It assumes access to pre-trained neural network
17 | classifiers that should be downloaded following the steps below.
18 | 
19 | 
20 | ### Requirements
21 | 
22 | * Python, version &ge; 3.5
23 | * jax
24 | * jaxlib
25 | * objax (https://github.com/google/objax)
26 | * PIL
27 | * sklearn
28 | 
29 | 
30 | ### Running the attack
31 | 
32 | To reproduce our results and run the attack, each of the files should be run in turn.
33 | 
34 | 0. Download the necessary dependency files:
35 | - (encryption.npy)[https://www.dropbox.com/sh/8zdsr1sjftia4of/AAA-60TOjGKtGEZrRmbawwqGa?dl=0] and (labels.npy)[https://www.dropbox.com/sh/8zdsr1sjftia4of/AAA-60TOjGKtGEZrRmbawwqGa?dl=0] from the (InstaHide Challenge)[https://github.com/Hazelsuko07/InstaHide_Challenge]
36 | - The (saved models)[https://drive.google.com/file/d/1YfKzGRfnnzKfUKpLjIRXRto8iD4FdwGw/view?usp=sharing] used to run the attack
37 | - Set up all the requirements as above
38 | 
39 | 1. Run `step_1_create_graph.py`. Produce the similarity graph to pair together encoded images that share an original image.
40 | 
41 | 2. Run `step_2_color_graph.py`. Color the graph to find 50 dense cliques.
42 | 
43 | 3. Run `step_3_second_graph.py`. Create a new bipartite similarity graph.
44 | 
45 | 4. Run `step_4_final_graph.py`. Solve the matching problem to assign encoded images to original images.
46 | 
47 | 5. Run `step_5_reconstruct.py`. Reconstruct the original images.
48 | 
49 | 6. Run `step_6_adjust_color.py`. Adjust the color curves to match.
50 | 
51 | 7. Run `step_7_visualize.py`. Show the final resulting images.
52 | 
53 | ## Citation
54 | 
55 | You can cite this attack at
56 | 
57 | ```
58 | @inproceedings{carlini2021private,
59 |   title={Is Private Learning Possible with Instance Encoding?},
60 |   author={Carlini, Nicholas and Deng, Samuel and Garg, Sanjam and Jha, Somesh and Mahloujifar, Saeed and Mahmoody, Mohammad and Thakurta, Abhradeep and Tram{\`e}r, Florian},
61 |   booktitle={2021 IEEE Symposium on Security and Privacy (SP)},
62 |   pages={410--427},
63 |   year={2021},
64 |   organization={IEEE}
65 | }
66 | ```


--------------------------------------------------------------------------------
/research/instahide_attack_2020/step_1_create_graph.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """
17 | Create the similarity graph given the encoded images by running the similarity
18 | neural network over all pairs of images.
19 | """
20 | 
21 | import objax
22 | import numpy as np
23 | import jax.numpy as jn
24 | import functools
25 | import os
26 | import random
27 | 
28 | from objax.zoo import wide_resnet
29 | 
30 | def setup():
31 |     global model
32 |     class DoesUseSame(objax.Module):
33 |         def __init__(self):
34 |             fn = functools.partial(wide_resnet.WideResNet, depth=28, width=6)
35 |             self.model = fn(6,2)
36 |             
37 |             model_vars = self.model.vars()
38 |             self.ema = objax.optimizer.ExponentialMovingAverage(model_vars, momentum=0.999, debias=True)
39 |     
40 | 
41 |             def predict_op(x,y):
42 |                 # The model takes the two images and checks if they correspond
43 |                 # to the same original image.
44 |                 xx = jn.concatenate([jn.abs(x),
45 |                                      jn.abs(y)],
46 |                                     axis=1)
47 |                 return self.model(xx, training=False)
48 |             
49 |             self.predict = objax.Jit(self.ema.replace_vars(predict_op), model_vars + self.ema.vars())
50 |             self.predict_fast = objax.Parallel(self.ema.replace_vars(predict_op), model_vars + self.ema.vars())
51 |     
52 |     model = DoesUseSame()
53 |     checkpoint = objax.io.Checkpoint("models/step1/", keep_ckpts=5, makedir=True)
54 |     start_epoch, last_ckpt = checkpoint.restore(model.vars())
55 | 
56 | 
57 | def doall():
58 |     global graph
59 |     n = np.load("data/encryption.npy")
60 |     n = np.transpose(n, (0,3,1,2))
61 | 
62 |     # Compute the similarity between each encoded image and all others
63 |     # This is n^2 work but should run fairly quickly, especially given
64 |     # more than one GPU. Otherwise about an hour or so.
65 |     graph = []
66 |     with model.vars().replicate():
67 |         for i in range(5000):
68 |             print(i)
69 |             v = model.predict_fast(np.tile(n[i:i+1], (5000,1,1,1)), n)
70 |             graph.append(np.array(v[:,0]-v[:,1]))
71 |     graph = np.array(graph)
72 |     np.save("data/graph.npy", graph)
73 | 
74 |     
75 | if __name__ == "__main__":
76 |     setup()
77 |     doall()
78 | 


--------------------------------------------------------------------------------
/research/instahide_attack_2020/step_4_final_graph.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | import multiprocessing as mp
17 | import pickle
18 | import random
19 | import numpy as np
20 | 
21 | 
22 | labels = np.load("data/label.npy")
23 | nextgraph = np.load("data/nextgraph.npy")
24 | 
25 | assigned = [[] for _ in range(5000)]
26 | lambdas = [[] for _ in range(5000)]
27 | for i in range(100):
28 |     order = (np.argsort(nextgraph[:,i]))
29 |     correct = (labels[order[:20]]>0).sum(axis=0).argmax()
30 | 
31 |     # Let's create the final graph
32 |     # Instead of doing a full bipartite matching, let's just greedily
33 |     # choose the closest 80 candidates for each encoded image to pair
34 |     # together can call it a day.
35 |     # This is within a percent or two of doing that, and much easier.
36 | 
37 |     # Also record the lambdas based on which image it coresponds to,
38 |     # but if they share a label then just guess it's an even 50/50 split.
39 | 
40 |     
41 |     for x in order[:80]:
42 |         if labels[x][correct] > 0 and len(assigned[x]) < 2:
43 |             assigned[x].append(i)
44 |             if np.sum(labels[x]>0) == 1:
45 |                 # the same label was mixed in twice. punt.
46 |                 lambdas[x].append(labels[x][correct]/2)
47 |             else:
48 |                 lambdas[x].append(labels[x][correct])
49 | 
50 | np.save("data/predicted_pairings_80.npy", assigned)
51 | np.save("data/predicted_lambdas_80.npy", lambdas)
52 | 


--------------------------------------------------------------------------------
/research/instahide_attack_2020/step_6_adjust_color.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """
16 | Fix the color curves. Use a pre-trained "neural network" with <100 weights.
17 | Visually this helps a lot, even if it's not doing much of anything in pactice.
18 | """
19 | 
20 | import random
21 | 
22 | import os
23 | os.environ['CUDA_VISIBLE_DEVICES'] = ''
24 | 
25 | import numpy as np
26 | import jax.numpy as jn
27 | 
28 | import objax
29 | 
30 | # Our extremely complicated neural network to re-color the images.
31 | # Takes one pixel at a time and fixes the color of that pixel.
32 | model = objax.nn.Sequential([objax.nn.Linear(3, 10),
33 |                              objax.functional.relu,
34 |                              objax.nn.Linear(10, 3)
35 |                              ])
36 | 
37 | # These are the weights.
38 | weights = [[-0.09795442, -0.26434848, -0.24964345, -0.11450608, 0.6797288, -0.48435465,
39 |             0.45307165, -0.31196147, -0.33266315, 0.20486055],
40 |            [[-0.9056427, 0.02872663, -1.5114126, -0.41024876, -0.98195165, 0.1143966,
41 |              0.6763464, -0.58654785, -1.797063, -0.2176538, ],
42 |             [ 1.1941166, 0.15515928, 1.1691351, -0.7256186, 0.8046044, 1.3127686,
43 |               -0.77297133, -1.1761239, 0.85841715, 0.95545965],
44 |             [ 0.20092924, 0.57503146, 0.22809981, 1.5288007, -0.94781816, -0.68305916,
45 |               -0.5245211, 1.4042739, -0.00527458, -1.1462274, ]],
46 |            [0.15683544, 0.22086962, 0.33100453],
47 |            [[ 7.7239674e-01, 4.0261227e-01, -9.6466336e-03],
48 |             [-2.2159107e-01, 1.5123411e-01, 3.4485441e-01],
49 |             [-1.7618114e+00, -7.1886492e-01, -4.6467595e-02],
50 |             [ 6.9419539e-01, 6.2531930e-01, 7.2271496e-01],
51 |             [-1.1913675e+00, -6.7755884e-01, -3.5114303e-01],
52 |             [ 4.8022485e-01, 1.7145030e-01, 7.4849324e-04],
53 |             [ 3.8332436e-02, -7.0614147e-01, -5.5127507e-01],
54 |             [-1.0929481e+00, -1.0268525e+00, -7.0265180e-01],
55 |             [ 1.4880739e+00, 7.1450096e-01, 2.9102692e-01],
56 |             [ 7.2846663e-01,  7.1322352e-01, -1.7453632e-01]]]
57 |            
58 | for i,(k,v) in enumerate(model.vars().items()):
59 |     v.assign(jn.array(weights[i]))
60 | 
61 | # Do all of the re-coloring
62 | predict = objax.Jit(lambda x: model(x, training=False),
63 |                     model.vars())
64 | 
65 | out = model(np.load("data/private_raw.npy"))
66 | np.save("data/private.npy", out)
67 | 


--------------------------------------------------------------------------------
/research/instahide_attack_2020/step_7_visualize.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | 
17 | """
18 | Given the private images, draw them in a 100x100 grid for visualization.
19 | """
20 | 
21 | import numpy as np
22 | from PIL import Image
23 | import matplotlib.pyplot as plt
24 | 
25 | p = np.load("data/private.npy")
26 | 
27 | def toimg(x):
28 |     print(x.shape)
29 |     img = (x+1)*127.5
30 |     img = np.clip(img, 0, 255)
31 |     img = np.reshape(img, (10, 10, 32, 32, 3))
32 |     img = np.concatenate(img, axis=2)
33 |     img = np.concatenate(img, axis=0)
34 |     img = Image.fromarray(np.array(img,dtype=np.uint8))
35 |     return img
36 | 
37 | toimg(p).save("data/reconstructed.png")
38 | 
39 | 


--------------------------------------------------------------------------------
/research/mi_lira_2021/fprtpr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorflow/privacy/a640ca62ca80d2e4e534fbecf2678e3d0786a2fa/research/mi_lira_2021/fprtpr.png


--------------------------------------------------------------------------------
/research/mi_lira_2021/score.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | import numpy as np
17 | import os
18 | import multiprocessing as mp
19 | 
20 | 
21 | def load_one(base):
22 |     """
23 |     This loads a  logits and converts it to a scored prediction.
24 |     """
25 |     root = os.path.join(logdir,base,'logits')
26 |     if not os.path.exists(root): return None
27 | 
28 |     if not os.path.exists(os.path.join(logdir,base,'scores')):
29 |         os.mkdir(os.path.join(logdir,base,'scores'))
30 |     
31 |     for f in os.listdir(root):
32 |         try:
33 |             opredictions = np.load(os.path.join(root,f))
34 |         except:
35 |             print("Fail")
36 |             continue
37 | 
38 |         ## Be exceptionally careful.
39 |         ## Numerically stable everything, as described in the paper.
40 |         predictions = opredictions - np.max(opredictions, axis=3, keepdims=True)
41 |         predictions = np.array(np.exp(predictions), dtype=np.float64)
42 |         predictions = predictions/np.sum(predictions,axis=3,keepdims=True)
43 | 
44 |         COUNT = predictions.shape[0]
45 |         #  x num_examples x num_augmentations x logits
46 |         y_true = predictions[np.arange(COUNT),:,:,labels[:COUNT]]
47 |         print(y_true.shape)
48 | 
49 |         print('mean acc',np.mean(predictions[:,0,0,:].argmax(1)==labels[:COUNT]))
50 |         
51 |         predictions[np.arange(COUNT),:,:,labels[:COUNT]] = 0
52 |         y_wrong = np.sum(predictions, axis=3)
53 | 
54 |         logit = (np.log(y_true.mean((1))+1e-45) - np.log(y_wrong.mean((1))+1e-45))
55 | 
56 |         np.save(os.path.join(logdir, base, 'scores', f), logit)
57 | 
58 | 
59 | def load_stats():
60 |     with mp.Pool(8) as p:
61 |         p.map(load_one, [x for x in os.listdir(logdir) if 'exp' in x])
62 | 
63 | 
64 | logdir = sys.argv[1]
65 | labels = np.load(os.path.join(logdir,"y_train.npy"))
66 | load_stats()
67 | 


--------------------------------------------------------------------------------
/research/mi_poison_2022/fprtpr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorflow/privacy/a640ca62ca80d2e4e534fbecf2678e3d0786a2fa/research/mi_poison_2022/fprtpr.png


--------------------------------------------------------------------------------
/research/mi_poison_2022/logs/.keep:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/research/pate_2017/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//visibility:private"])
 2 | 
 3 | licenses(["notice"])
 4 | 
 5 | filegroup(
 6 |     name = "ignore_srcs",
 7 |     srcs = [
 8 |         "aggregation.py",
 9 |         "analysis.py",
10 |         "deep_cnn.py",
11 |         "input.py",
12 |         "metrics.py",
13 |         "train_student.py",
14 |         "train_teachers.py",
15 |         "utils.py",
16 |     ],
17 |     tags = ["ignore_srcs"],
18 | )
19 | 


--------------------------------------------------------------------------------
/research/pate_2017/metrics.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | import numpy as np
21 | 
22 | 
23 | def accuracy(logits, labels):
24 |   """
25 |   Return accuracy of the array of logits (or label predictions) wrt the labels
26 |   :param logits: this can either be logits, probabilities, or a single label
27 |   :param labels: the correct labels to match against
28 |   :return: the accuracy as a float
29 |   """
30 |   assert len(logits) == len(labels)
31 | 
32 |   if len(np.shape(logits)) > 1:
33 |     # Predicted labels are the argmax over axis 1
34 |     predicted_labels = np.argmax(logits, axis=1)
35 |   else:
36 |     # Input was already labels
37 |     assert len(np.shape(logits)) == 1
38 |     predicted_labels = logits
39 | 
40 |   # Check against correct labels to compute correct guesses
41 |   correct = np.sum(predicted_labels == labels.reshape(len(labels)))
42 | 
43 |   # Divide by number of labels to obtain accuracy
44 |   accuracy = float(correct) / len(labels)
45 | 
46 |   # Return float value
47 |   return accuracy
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/research/pate_2017/train_student_mnist_250_lap_20_count_50_epochs_600.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | 
17 | # Be sure to clone https://github.com/openai/improved-gan
18 | # and add improved-gan/mnist_svhn_cifar10 to your PATH variable
19 | 
20 | # Download labels used to train the student
21 | wget https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_student_labels_lap_20.npy
22 | 
23 | # Train the student using improved-gan 
24 | THEANO_FLAGS='floatX=float32,device=gpu,lib.cnmem=1' train_mnist_fm_custom_labels.py --labels mnist_250_student_labels_lap_20.npy --count 50 --epochs 600
25 | 
26 | 


--------------------------------------------------------------------------------
/research/pate_2017/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | 
17 | def batch_indices(batch_nb, data_length, batch_size):
18 |   """
19 |   This helper function computes a batch start and end index
20 |   :param batch_nb: the batch number
21 |   :param data_length: the total length of the data being parsed by batches
22 |   :param batch_size: the number of inputs in each batch
23 |   :return: pair of (start, end) indices
24 |   """
25 |   # Batch start and end index
26 |   start = int(batch_nb * batch_size)
27 |   end = int((batch_nb + 1) * batch_size)
28 | 
29 |   # When there are not enough inputs left, we reuse some to complete the batch
30 |   if end > data_length:
31 |     shift = end - data_length
32 |     start -= shift
33 |     end -= shift
34 | 
35 |   return start, end
36 | 


--------------------------------------------------------------------------------
/research/pate_2018/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//visibility:private"])
 2 | 
 3 | licenses(["notice"])
 4 | 
 5 | filegroup(
 6 |     name = "ignore_srcs",
 7 |     srcs = [
 8 |         "core.py",
 9 |         "core_test.py",
10 |         "smooth_sensitivity.py",
11 |         "smooth_sensitivity_test.py",
12 |     ],
13 |     tags = ["ignore_srcs"],
14 | )
15 | 


--------------------------------------------------------------------------------
/research/pate_2018/ICLR2018/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//visibility:private"])
 2 | 
 3 | licenses(["notice"])
 4 | 
 5 | filegroup(
 6 |     name = "ignore_srcs",
 7 |     srcs = [
 8 |         "download.py",
 9 |         "plot_ls_q.py",
10 |         "plot_partition.py",
11 |         "plots_for_slides.py",
12 |         "rdp_bucketized.py",
13 |         "rdp_cumulative.py",
14 |         "smooth_sensitivity_table.py",
15 |         "utility_queries_answered.py",
16 |     ],
17 |     tags = ["ignore_srcs"],
18 | )
19 | 


--------------------------------------------------------------------------------
/research/pate_2018/ICLR2018/README.md:
--------------------------------------------------------------------------------
 1 | Scripts in support of the paper "Scalable Private Learning with PATE" by Nicolas
 2 | Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar, Ulfar
 3 | Erlingsson (ICLR 2018, https://arxiv.org/abs/1802.08908).
 4 | 
 5 | 
 6 | ### Requirements
 7 | 
 8 | * Python, version &ge; 2.7
 9 | * absl (see [here](https://github.com/abseil/abseil-py), or just type `pip install absl-py`)
10 | * matplotlib
11 | * numpy
12 | * scipy
13 | * sympy (for smooth sensitivity analysis)  
14 | * write access to the current directory (otherwise, output directories in download.py and *.sh
15 | scripts must be changed)
16 | 
17 | ## Reproducing Figures 1 and 5, and Table 2
18 | 
19 | Before running any of the analysis scripts, create the data/ directory and download votes files by running\
20 | `$ python download.py`
21 | 
22 | To generate Figures 1 and 5 run\
23 | `$ sh generate_figures.sh`\
24 | The output is written to the figures/ directory.
25 | 
26 | For Table 2 run (may take several hours)\
27 | `$ sh generate_table.sh`\
28 | The output is written to the console.
29 | 
30 | For data-independent bounds (for comparison with Table 2), run\
31 | `$ sh generate_table_data_independent.sh`\
32 | The output is written to the console.
33 | 
34 | ## Files in this directory
35 | 
36 | *   generate_figures.sh &mdash; Master script for generating Figures 1 and 5.
37 | 
38 | *   generate_table.sh &mdash; Master script for generating Table 2.
39 | 
40 | *   generate_table_data_independent.sh &mdash; Master script for computing data-independent
41 |     bounds.
42 | 
43 | *   rdp_bucketized.py &mdash; Script for producing Figure 1 (right) and Figure 5 (right).
44 | 
45 | *   rdp_cumulative.py &mdash; Script for producing Figure 1 (middle) and Figure 5 (left).
46 |    
47 | *   smooth_sensitivity_table.py &mdash; Script for generating Table 2.
48 | 
49 | *   utility_queries_answered &mdash; Script for producing Figure 1 (left).
50 | 
51 | *   plot_partition.py &mdash; Script for producing partition.pdf, a detailed breakdown of privacy
52 | costs for Confident-GNMax with smooth sensitivity analysis (takes ~50 hours).
53 | 
54 | *   plots_for_slides.py &mdash; Script for producing several plots for the slide deck. 
55 | 
56 | *   download.py &mdash; Utility script for populating the data/ directory.
57 | 
58 | *   plot_ls_q.py is not used.
59 | 
60 | 
61 | All Python files take flags. Run script_name.py --help for help on flags.
62 | 


--------------------------------------------------------------------------------
/research/pate_2018/ICLR2018/download.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Script to download votes files to the data/ directory.
16 | """
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | 
22 | from six.moves import urllib
23 | import os
24 | import tarfile
25 | 
26 | FILE_URI = 'https://storage.googleapis.com/pate-votes/votes.gz'
27 | DATA_DIR = 'data/'
28 | 
29 | 
30 | def download():
31 |   print('Downloading ' + FILE_URI)
32 |   tar_filename, _ = urllib.request.urlretrieve(FILE_URI)
33 |   print('Unpacking ' + tar_filename)
34 |   with tarfile.open(tar_filename, "r:gz") as tar:
35 |     tar.extractall(DATA_DIR)
36 |   print('Done!')
37 | 
38 | 
39 | if __name__ == '__main__':
40 |   if not os.path.exists(DATA_DIR):
41 |     print('Data directory does not exist. Creating ' + DATA_DIR)
42 |     os.makedirs(DATA_DIR)
43 |   download()
44 | 


--------------------------------------------------------------------------------
/research/pate_2018/ICLR2018/generate_figures.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ==============================================================================
16 | 
17 | 
18 | counts_file="data/glyph_5000_teachers.npy"
19 | output_dir="figures/"
20 | 
21 | mkdir -p $output_dir
22 | 
23 | if [ ! -d "$output_dir" ]; then
24 |   echo "Directory $output_dir does not exist."
25 |   exit 1
26 | fi
27 | 
28 | python rdp_bucketized.py \
29 |   --plot=small \
30 |   --counts_file=$counts_file \
31 |   --plot_file=$output_dir"noisy_thresholding_check_perf.pdf"
32 | 
33 | python rdp_bucketized.py \
34 |   --plot=large \
35 |   --counts_file=$counts_file \
36 |   --plot_file=$output_dir"noisy_thresholding_check_perf_details.pdf"
37 | 
38 | python rdp_cumulative.py \
39 |   --cache=False \
40 |   --counts_file=$counts_file \
41 |   --figures_dir=$output_dir
42 | 
43 | python utility_queries_answered.py --plot_file=$output_dir"utility_queries_answered.pdf"


--------------------------------------------------------------------------------
/research/pate_2018/ICLR2018/generate_table.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ==============================================================================
16 | 
17 | 
18 | echo "Reproducing Table 2. Takes a couple of hours."
19 | 
20 | executable="python smooth_sensitivity_table.py"
21 | data_dir="data"
22 | 
23 | echo
24 | echo "######## MNIST ########"
25 | echo
26 | 
27 | $executable \
28 |   --counts_file=$data_dir"/mnist_250_teachers.npy" \
29 |   --threshold=200 \
30 |   --sigma1=150 \
31 |   --sigma2=40 \
32 |   --queries=640 \
33 |   --delta=1e-5
34 | 
35 | echo
36 | echo "######## SVHN ########"
37 | echo
38 | 
39 | $executable \
40 |   --counts_file=$data_dir"/svhn_250_teachers.npy" \
41 |   --threshold=300 \
42 |   --sigma1=200 \
43 |   --sigma2=40 \
44 |   --queries=8500 \
45 |   --delta=1e-6
46 | 
47 | echo
48 | echo "######## Adult ########"
49 | echo
50 | 
51 | $executable \
52 |   --counts_file=$data_dir"/adult_250_teachers.npy" \
53 |   --threshold=300 \
54 |   --sigma1=200 \
55 |   --sigma2=40 \
56 |   --queries=1500 \
57 |   --delta=1e-5
58 | 
59 | echo
60 | echo "######## Glyph (Confident) ########"
61 | echo
62 | 
63 | $executable \
64 |   --counts_file=$data_dir"/glyph_5000_teachers.npy" \
65 |   --threshold=1000 \
66 |   --sigma1=500 \
67 |   --sigma2=100 \
68 |   --queries=12000 \
69 |   --delta=1e-8
70 | 
71 | echo
72 | echo "######## Glyph (Interactive, Round 1) ########"
73 | echo
74 | 
75 | $executable \
76 |   --counts_file=$data_dir"/glyph_round1.npy" \
77 |   --threshold=3500 \
78 |   --sigma1=1500 \
79 |   --sigma2=100 \
80 |   --delta=1e-8
81 | 
82 | echo
83 | echo "######## Glyph (Interactive, Round 2) ########"
84 | echo
85 | 
86 | $executable \
87 |   --counts_file=$data_dir"/glyph_round2.npy" \
88 |   --baseline_file=$data_dir"/glyph_round2_student.npy" \
89 |   --threshold=3500 \
90 |   --sigma1=2000 \
91 |   --sigma2=200 \
92 |   --teachers=5000 \
93 |   --delta=1e-8
94 | 


--------------------------------------------------------------------------------
/research/pate_2018/ICLR2018/generate_table_data_independent.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ==============================================================================
 16 | 
 17 | 
 18 | echo "Table 2 with data-independent analysis."
 19 | 
 20 | executable="python smooth_sensitivity_table.py"
 21 | data_dir="data"
 22 | 
 23 | echo
 24 | echo "######## MNIST ########"
 25 | echo
 26 | 
 27 | $executable \
 28 |   --counts_file=$data_dir"/mnist_250_teachers.npy" \
 29 |   --threshold=200 \
 30 |   --sigma1=150 \
 31 |   --sigma2=40 \
 32 |   --queries=640 \
 33 |   --delta=1e-5 \
 34 |   --data_independent
 35 | echo
 36 | echo "######## SVHN ########"
 37 | echo
 38 | 
 39 | $executable \
 40 |   --counts_file=$data_dir"/svhn_250_teachers.npy" \
 41 |   --threshold=300 \
 42 |   --sigma1=200 \
 43 |   --sigma2=40 \
 44 |   --queries=8500 \
 45 |   --delta=1e-6 \
 46 |   --data_independent
 47 | 
 48 | echo
 49 | echo "######## Adult ########"
 50 | echo
 51 | 
 52 | $executable \
 53 |   --counts_file=$data_dir"/adult_250_teachers.npy" \
 54 |   --threshold=300 \
 55 |   --sigma1=200 \
 56 |   --sigma2=40 \
 57 |   --queries=1500 \
 58 |   --delta=1e-5 \
 59 |   --data_independent
 60 | 
 61 | echo
 62 | echo "######## Glyph (Confident) ########"
 63 | echo
 64 | 
 65 | $executable \
 66 |   --counts_file=$data_dir"/glyph_5000_teachers.npy" \
 67 |   --threshold=1000 \
 68 |   --sigma1=500 \
 69 |   --sigma2=100 \
 70 |   --queries=12000 \
 71 |   --delta=1e-8 \
 72 |   --data_independent
 73 | 
 74 | echo
 75 | echo "######## Glyph (Interactive, Round 1) ########"
 76 | echo
 77 | 
 78 | $executable \
 79 |   --counts_file=$data_dir"/glyph_round1.npy" \
 80 |   --threshold=3500 \
 81 |   --sigma1=1500 \
 82 |   --sigma2=100 \
 83 |   --delta=1e-8 \
 84 |   --data_independent
 85 | 
 86 | echo
 87 | echo "######## Glyph (Interactive, Round 2) ########"
 88 | echo
 89 | 
 90 | $executable \
 91 |   --counts_file=$data_dir"/glyph_round2.npy" \
 92 |   --baseline_file=$data_dir"/glyph_round2_student.npy" \
 93 |   --threshold=3500 \
 94 |   --sigma1=2000 \
 95 |   --sigma2=200 \
 96 |   --teachers=5000 \
 97 |   --delta=1e-8 \
 98 |   --order=8.5 \
 99 |   --data_independent
100 | 


--------------------------------------------------------------------------------
/research/pate_2018/README.md:
--------------------------------------------------------------------------------
 1 | Implementation of an RDP privacy accountant and smooth sensitivity analysis for
 2 | the PATE framework. The underlying theory and supporting experiments appear in
 3 | "Scalable Private Learning with PATE" by Nicolas Papernot, Shuang Song, Ilya
 4 | Mironov, Ananth Raghunathan, Kunal Talwar, Ulfar Erlingsson (ICLR 2018,
 5 | https://arxiv.org/abs/1802.08908).
 6 | 
 7 | ## Overview
 8 | 
 9 | The PATE ('Private Aggregation of Teacher Ensembles') framework was introduced 
10 | by Papernot et al. in "Semi-supervised Knowledge Transfer for Deep Learning from
11 | Private Training Data" (ICLR 2017, https://arxiv.org/abs/1610.05755). The 
12 | framework enables model-agnostic training that provably provides [differential
13 | privacy](https://en.wikipedia.org/wiki/Differential_privacy) of the training 
14 | dataset. 
15 | 
16 | The framework consists of _teachers_, the _student_ model, and the _aggregator_. The 
17 | teachers are models trained on disjoint subsets of the training datasets. The student
18 | model has access to an insensitive (e.g., public) unlabelled dataset, which is labelled by 
19 | interacting with the ensemble of teachers via the _aggregator_. The aggregator tallies 
20 | outputs of the teacher models, and either forwards a (noisy) aggregate to the student, or
21 | refuses to answer.
22 | 
23 | Differential privacy is enforced by the aggregator. The privacy guarantees can be _data-independent_,
24 | which means that they are solely the function of the aggregator's parameters. Alternatively, privacy 
25 | analysis can be _data-dependent_, which allows for finer reasoning where, under certain conditions on
26 | the input distribution, the final privacy guarantees can be improved relative to the data-independent
27 | analysis. Data-dependent privacy guarantees may, by themselves, be a function of sensitive data and 
28 | therefore publishing these guarantees requires its own sanitization procedure. In our case 
29 | sanitization of data-dependent privacy guarantees proceeds via _smooth sensitivity_ analysis.
30 | 
31 | The common machinery used for all privacy analyses in this repository is the 
32 | R&eacute;nyi differential privacy, or RDP (see https://arxiv.org/abs/1702.07476). 
33 | 
34 | This repository contains implementations of privacy accountants and smooth 
35 | sensitivity analysis for several data-independent and data-dependent mechanism that together
36 | comprise the PATE framework.
37 | 
38 | 
39 | ### Requirements
40 | 
41 | * Python, version &ge; 2.7
42 | * absl (see [here](https://github.com/abseil/abseil-py), or just type `pip install absl-py`)
43 | * numpy
44 | * scipy
45 | * sympy (for smooth sensitivity analysis)
46 | * unittest (for testing)
47 | 
48 | 
49 | ### Self-testing
50 | 
51 | To verify the installation run
52 | ```bash
53 | $ python core_test.py
54 | $ python smooth_sensitivity_test.py
55 | ```
56 | 
57 | 
58 | ## Files in this directory
59 | 
60 | *   core.py &mdash; RDP privacy accountant for several vote aggregators (GNMax,
61 |     Threshold, Laplace).
62 | 
63 | *   smooth_sensitivity.py &mdash; Smooth sensitivity analysis for GNMax and
64 |     Threshold mechanisms.
65 | 
66 | *   core_test.py and smooth_sensitivity_test.py &mdash; Unit tests for the
67 |     files above.
68 | 
69 | ## Contact information
70 | 
71 | You may direct your comments to mironov@google.com and PR to @ilyamironov.
72 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """TensorFlow Privacy/DP Training library setup file for pip."""
15 | 
16 | import setuptools
17 | 
18 | with open('tensorflow_privacy/version.py') as file:
19 |   globals_dict = {}
20 |   exec(file.read(), globals_dict)  # pylint: disable=exec-used
21 |   VERSION = globals_dict['__version__']
22 | 
23 | README = (
24 |     'A Python library that includes implementations of TensorFlow optimizers '
25 |     'for training machine learning models with differential privacy.'
26 | )
27 | 
28 | setuptools.setup(
29 |     name='tensorflow_privacy',
30 |     version=VERSION,
31 |     description='A privacy-focused machine learning framework',
32 |     long_description=README,
33 |     long_description_content_type='text/plain',
34 |     url='https://github.com/tensorflow/privacy',
35 |     license='Apache-2.0',
36 |     packages=setuptools.find_packages(exclude=['*privacy.privacy_tests*']),
37 |     install_requires=[
38 |         'absl-py>=1.0,==1.*',
39 |         'dm-tree==0.1.8',
40 |         'dp-accounting==0.4.4',  # TODO(b/364653784)
41 |         'numpy~=1.21',
42 |         'packaging~=22.0',
43 |         'scikit-learn>=1.0,==1.*',
44 |         'scipy~=1.9',
45 |         'tensorflow>=2.4.0,<=2.15.0',
46 |         'tensorflow-probability~=0.22.0',
47 |     ],
48 |     python_requires='>=3.9.0,<3.12',
49 | )
50 | 


--------------------------------------------------------------------------------
/setup_empirical.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """TensorFlow Privacy/Privacy Tests library setup file for pip."""
15 | 
16 | import setuptools
17 | 
18 | with open('tensorflow_privacy/privacy/privacy_tests/version.py') as file:
19 |   globals_dict = {}
20 |   exec(file.read(), globals_dict)  # pylint: disable=exec-used
21 |   VERSION = globals_dict['__version__']
22 | 
23 | README = (
24 |     'A Python library that includes implementations of tests for empirical '
25 |     'privacy.'
26 | )
27 | 
28 | setuptools.setup(
29 |     name='tensorflow_empirical_privacy',
30 |     version=VERSION,
31 |     description='Tests for empirical privacy.',
32 |     long_description=README,
33 |     long_description_content_type='text/plain',
34 |     url='https://github.com/tensorflow/privacy',
35 |     license='Apache-2.0',
36 |     packages=setuptools.find_packages(include=['*privacy.privacy_tests*']),
37 |     install_requires=[
38 |         'absl-py>=1.0,==1.*',
39 |         'immutabledict~=2.2',
40 |         'matplotlib~=3.3',
41 |         'numpy~=1.21',
42 |         'pandas~=1.4',
43 |         'scikit-learn>=1.0,==1.*',
44 |         'scipy~=1.9',
45 |         'statsmodels==0.14.0',
46 |         'tensorflow>=2.4.0,<=2.15.0',
47 |         'tensorflow-privacy>=0.9.0',
48 |         'tf-models-official~=2.13',
49 |     ],
50 |     python_requires='>=3.9.0,<3.12',
51 | )
52 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/.bazelversion:
--------------------------------------------------------------------------------
1 | 5.1.1
2 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/BUILD:
--------------------------------------------------------------------------------
 1 | load("@bazel_skylib//rules:build_test.bzl", "build_test")
 2 | 
 3 | package(
 4 |     default_visibility = ["//visibility:public"],
 5 | )
 6 | 
 7 | licenses(["notice"])
 8 | 
 9 | exports_files([
10 |     "LICENSE",
11 | ])
12 | 
13 | py_library(
14 |     name = "tensorflow_privacy",
15 |     srcs = ["__init__.py"],
16 |     deps = [
17 |         ":version",
18 |         "//tensorflow_privacy/privacy/analysis:compute_dp_sgd_privacy_lib",
19 |         "//tensorflow_privacy/privacy/analysis:tree_aggregation_accountant",
20 |         "//tensorflow_privacy/privacy/dp_query",
21 |         "//tensorflow_privacy/privacy/dp_query:discrete_gaussian_query",
22 |         "//tensorflow_privacy/privacy/dp_query:distributed_discrete_gaussian_query",
23 |         "//tensorflow_privacy/privacy/dp_query:distributed_skellam_query",
24 |         "//tensorflow_privacy/privacy/dp_query:gaussian_query",
25 |         "//tensorflow_privacy/privacy/dp_query:nested_query",
26 |         "//tensorflow_privacy/privacy/dp_query:no_privacy_query",
27 |         "//tensorflow_privacy/privacy/dp_query:normalized_query",
28 |         "//tensorflow_privacy/privacy/dp_query:quantile_adaptive_clip_sum_query",
29 |         "//tensorflow_privacy/privacy/dp_query:quantile_adaptive_clip_tree_query",
30 |         "//tensorflow_privacy/privacy/dp_query:quantile_estimator_query",
31 |         "//tensorflow_privacy/privacy/dp_query:restart_query",
32 |         "//tensorflow_privacy/privacy/dp_query:tree_aggregation",
33 |         "//tensorflow_privacy/privacy/dp_query:tree_aggregation_query",
34 |         "//tensorflow_privacy/privacy/dp_query:tree_range_query",
35 |         "//tensorflow_privacy/privacy/estimators:dnn",
36 |         "//tensorflow_privacy/privacy/keras_models:dp_keras_model",
37 |         "//tensorflow_privacy/privacy/logistic_regression:datasets",
38 |         "//tensorflow_privacy/privacy/logistic_regression:multinomial_logistic",
39 |         "//tensorflow_privacy/privacy/logistic_regression:single_layer_softmax",
40 |         "//tensorflow_privacy/privacy/optimizers:dp_optimizer_keras",
41 |         "//tensorflow_privacy/privacy/optimizers:dp_optimizer_keras_vectorized",
42 |         "//tensorflow_privacy/v1:tensorflow_privacy_v1",
43 |     ],
44 | )
45 | 
46 | build_test(
47 |     name = "tensorflow_privacy_build_test",
48 |     targets = [":tensorflow_privacy"],
49 | )
50 | 
51 | py_library(
52 |     name = "version",
53 |     srcs = ["version.py"],
54 | )
55 | 
56 | filegroup(
57 |     name = "ignore_srcs",
58 |     tags = ["ignore_srcs"],
59 | )
60 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 | 
3 | licenses(["notice"])
4 | 
5 | py_library(
6 |     name = "privacy",
7 |     srcs = ["__init__.py"],
8 | )
9 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//visibility:public"])
 2 | 
 3 | licenses(["notice"])
 4 | 
 5 | py_library(
 6 |     name = "analysis",
 7 |     srcs = ["__init__.py"],
 8 | )
 9 | 
10 | py_library(
11 |     name = "compute_dp_sgd_privacy_lib",
12 |     srcs = ["compute_dp_sgd_privacy_lib.py"],
13 | )
14 | 
15 | py_binary(
16 |     name = "compute_dp_sgd_privacy",
17 |     srcs = ["compute_dp_sgd_privacy.py"],
18 |     deps = [":compute_dp_sgd_privacy_lib"],
19 | )
20 | 
21 | py_test(
22 |     name = "compute_dp_sgd_privacy_test",
23 |     size = "small",
24 |     timeout = "moderate",
25 |     srcs = ["compute_dp_sgd_privacy_test.py"],
26 |     deps = [":compute_dp_sgd_privacy_lib"],
27 | )
28 | 
29 | py_binary(
30 |     name = "compute_noise_from_budget",
31 |     srcs = ["compute_noise_from_budget.py"],
32 |     deps = [":compute_noise_from_budget_lib"],
33 | )
34 | 
35 | py_library(
36 |     name = "compute_noise_from_budget_lib",
37 |     srcs = ["compute_noise_from_budget_lib.py"],
38 | )
39 | 
40 | py_test(
41 |     name = "compute_noise_from_budget_test",
42 |     srcs = ["compute_noise_from_budget_test.py"],
43 |     deps = [":compute_noise_from_budget_lib"],
44 | )
45 | 
46 | py_library(
47 |     name = "gdp_accountant",
48 |     srcs = ["gdp_accountant.py"],
49 | )
50 | 
51 | py_library(
52 |     name = "tensor_buffer",
53 |     srcs = ["tensor_buffer.py"],
54 | )
55 | 
56 | py_test(
57 |     name = "tensor_buffer_eager_test",
58 |     size = "small",
59 |     srcs = ["tensor_buffer_eager_test.py"],
60 |     deps = [":tensor_buffer"],
61 | )
62 | 
63 | py_test(
64 |     name = "tensor_buffer_graph_test",
65 |     size = "small",
66 |     srcs = ["tensor_buffer_graph_test.py"],
67 |     deps = [":tensor_buffer"],
68 | )
69 | 
70 | py_library(
71 |     name = "tree_aggregation_accountant",
72 |     srcs = ["tree_aggregation_accountant.py"],
73 | )
74 | 
75 | py_test(
76 |     name = "tree_aggregation_accountant_test",
77 |     srcs = ["tree_aggregation_accountant_test.py"],
78 |     deps = [":tree_aggregation_accountant"],
79 | )
80 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | r"""Command-line script for computing privacy of a model trained with DP-SGD.
16 | 
17 | The script applies the RDP accountant to estimate privacy budget of an iterated
18 | Sampled Gaussian Mechanism. The mechanism's parameters are controlled by flags.
19 | 
20 | Example:
21 |   compute_noise_from_budget
22 |     --N=60000 \
23 |     --batch_size=256 \
24 |     --epsilon=2.92 \
25 |     --epochs=60 \
26 |     --delta=1e-5 \
27 |     --min_noise=1e-6
28 | 
29 | The output states that DP-SGD with these parameters should
30 | use a noise multiplier of 1.12.
31 | """
32 | 
33 | from absl import app
34 | from absl import flags
35 | 
36 | from tensorflow_privacy.privacy.analysis.compute_noise_from_budget_lib import compute_noise
37 | 
38 | FLAGS = flags.FLAGS
39 | 
40 | flags.DEFINE_integer('N', None, 'Total number of examples')
41 | flags.DEFINE_integer('batch_size', None, 'Batch size')
42 | flags.DEFINE_float('epsilon', None, 'Target epsilon for DP-SGD')
43 | flags.DEFINE_float('epochs', None, 'Number of epochs (may be fractional)')
44 | flags.DEFINE_float('delta', 1e-6, 'Target delta')
45 | flags.DEFINE_float('min_noise', 1e-5, 'Minimum noise level for search.')
46 | 
47 | 
48 | def main(argv):
49 |   del argv  # argv is not used.
50 | 
51 |   assert FLAGS.N is not None, 'Flag N is missing.'
52 |   assert FLAGS.batch_size is not None, 'Flag batch_size is missing.'
53 |   assert FLAGS.epsilon is not None, 'Flag epsilon is missing.'
54 |   assert FLAGS.epochs is not None, 'Flag epochs is missing.'
55 |   compute_noise(FLAGS.N, FLAGS.batch_size, FLAGS.epsilon, FLAGS.epochs,
56 |                 FLAGS.delta, FLAGS.min_noise)
57 | 
58 | 
59 | if __name__ == '__main__':
60 |   app.run(main)
61 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Library for computing privacy values for DP-SGD."""
16 | 
17 | import math
18 | 
19 | from absl import app
20 | import dp_accounting
21 | 
22 | 
23 | def compute_noise(n, batch_size, target_epsilon, epochs, delta, noise_lbd):
24 |   """Compute noise based on the given hyperparameters."""
25 |   q = batch_size / n  # q - the sampling ratio.
26 |   if q > 1:
27 |     raise app.UsageError('n must be larger than the batch size.')
28 |   orders = ([1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] +
29 |             list(range(5, 64)) + [128, 256, 512])
30 |   steps = int(math.ceil(epochs * n / batch_size))
31 | 
32 |   def make_event_from_noise(sigma):
33 |     return dp_accounting.SelfComposedDpEvent(
34 |         dp_accounting.PoissonSampledDpEvent(
35 |             q, dp_accounting.GaussianDpEvent(sigma)), steps)
36 | 
37 |   def make_accountant():
38 |     return dp_accounting.rdp.RdpAccountant(orders)
39 | 
40 |   accountant = make_accountant()
41 |   accountant.compose(make_event_from_noise(noise_lbd))
42 |   init_epsilon = accountant.get_epsilon(delta)
43 | 
44 |   if init_epsilon < target_epsilon:  # noise_lbd was an overestimate
45 |     print('noise_lbd too large for target epsilon.')
46 |     return 0
47 | 
48 |   target_noise = dp_accounting.calibrate_dp_mechanism(
49 |       make_accountant, make_event_from_noise, target_epsilon, delta,
50 |       dp_accounting.LowerEndpointAndGuess(noise_lbd, noise_lbd * 2))
51 | 
52 |   print(
53 |       'DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated'
54 |       ' over {} steps satisfies'.format(100 * q, target_noise, steps),
55 |       end=' ')
56 |   print('differential privacy with eps = {:.3g} and delta = {}.'.format(
57 |       target_epsilon, delta))
58 |   return target_noise
59 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | from absl.testing import absltest
17 | from absl.testing import parameterized
18 | 
19 | from tensorflow_privacy.privacy.analysis import compute_noise_from_budget_lib
20 | 
21 | 
22 | class ComputeNoiseFromBudgetTest(parameterized.TestCase):
23 | 
24 |   @parameterized.named_parameters(
25 |       ('Test0', 60000, 150, 0.941870567, 15, 1e-5, 1e-5, 1.3),
26 |       ('Test1', 100000, 100, 1.70928734, 30, 1e-7, 1e-6, 1.0),
27 |       ('Test2', 100000000, 1024, 5907984.81339406, 10, 1e-7, 1e-5, 0.1),
28 |       ('Test3', 100000000, 1024, 5907984.81339406, 10, 1e-7, 1, 0),
29 |   )
30 |   def test_compute_noise(self, n, batch_size, target_epsilon, epochs, delta,
31 |                          min_noise, expected_noise):
32 |     self.skipTest('Disable test.')
33 |     target_noise = compute_noise_from_budget_lib.compute_noise(
34 |         n, batch_size, target_epsilon, epochs, delta, min_noise)
35 |     self.assertAlmostEqual(target_noise, expected_noise)
36 | 
37 | 
38 | if __name__ == '__main__':
39 |   absltest.main()
40 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/gdp_accountant.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing,  software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,  either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # =============================================================================
15 | r"""Implements privacy accounting for Gaussian Differential Privacy.
16 | 
17 | Applies the Dual and Central Limit Theorem (CLT) to estimate privacy budget of
18 | an iterated subsampled Gaussian Mechanism (by either uniform or Poisson
19 | subsampling).
20 | """
21 | 
22 | import numpy as np
23 | from scipy import optimize
24 | from scipy import stats
25 | 
26 | 
27 | def compute_mu_uniform(epoch, noise_multi, n, batch_size):
28 |   """Compute mu from uniform subsampling."""
29 | 
30 |   t = epoch * n / batch_size
31 |   c = batch_size * np.sqrt(t) / n
32 |   return np.sqrt(2) * c * np.sqrt(
33 |       np.exp(noise_multi**(-2)) * stats.norm.cdf(1.5 / noise_multi) +
34 |       3 * stats.norm.cdf(-0.5 / noise_multi) - 2)
35 | 
36 | 
37 | def compute_mu_poisson(epoch, noise_multi, n, batch_size):
38 |   """Compute mu from Poisson subsampling."""
39 | 
40 |   t = epoch * n / batch_size
41 |   return np.sqrt(np.exp(noise_multi**(-2)) - 1) * np.sqrt(t) * batch_size / n
42 | 
43 | 
44 | def delta_eps_mu(eps, mu):
45 |   """Compute dual between mu-GDP and (epsilon, delta)-DP."""
46 |   return stats.norm.cdf(-eps / mu + mu /
47 |                         2) - np.exp(eps) * stats.norm.cdf(-eps / mu - mu / 2)
48 | 
49 | 
50 | def eps_from_mu(mu, delta):
51 |   """Compute epsilon from mu given delta via inverse dual."""
52 | 
53 |   def f(x):
54 |     """Reversely solve dual by matching delta."""
55 |     return delta_eps_mu(x, mu) - delta
56 | 
57 |   return optimize.root_scalar(f, bracket=[0, 500], method='brentq').root
58 | 
59 | 
60 | def compute_eps_uniform(epoch, noise_multi, n, batch_size, delta):
61 |   """Compute epsilon given delta from inverse dual of uniform subsampling."""
62 | 
63 |   return eps_from_mu(
64 |       compute_mu_uniform(epoch, noise_multi, n, batch_size), delta)
65 | 
66 | 
67 | def compute_eps_poisson(epoch, noise_multi, n, batch_size, delta):
68 |   """Compute epsilon given delta from inverse dual of Poisson subsampling."""
69 | 
70 |   return eps_from_mu(
71 |       compute_mu_poisson(epoch, noise_multi, n, batch_size), delta)
72 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/tensor_buffer_eager_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import tensorflow as tf
16 | 
17 | from tensorflow_privacy.privacy.analysis import tensor_buffer
18 | 
19 | tf.compat.v1.enable_eager_execution()
20 | 
21 | 
22 | class TensorBufferTest(tf.test.TestCase):
23 |   """Tests for TensorBuffer in eager mode."""
24 | 
25 |   def test_basic(self):
26 |     size, shape = 2, [2, 3]
27 | 
28 |     my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
29 | 
30 |     value1 = [[1, 2, 3], [4, 5, 6]]
31 |     my_buffer.append(value1)
32 |     self.assertAllEqual(my_buffer.values.numpy(), [value1])
33 | 
34 |     value2 = [[4, 5, 6], [7, 8, 9]]
35 |     my_buffer.append(value2)
36 |     self.assertAllEqual(my_buffer.values.numpy(), [value1, value2])
37 | 
38 |   def test_fail_on_scalar(self):
39 |     with self.assertRaisesRegex(ValueError, 'Shape cannot be scalar.'):
40 |       tensor_buffer.TensorBuffer(1, ())
41 | 
42 |   def test_fail_on_inconsistent_shape(self):
43 |     size, shape = 1, [2, 3]
44 | 
45 |     my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
46 | 
47 |     with self.assertRaisesRegex(tf.errors.InvalidArgumentError,
48 |                                 'Appending value of inconsistent shape.'):
49 |       my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32))
50 | 
51 |   def test_resize(self):
52 |     size, shape = 2, [2, 3]
53 | 
54 |     my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
55 | 
56 |     # Append three buffers. Third one should succeed after resizing.
57 |     value1 = [[1, 2, 3], [4, 5, 6]]
58 |     my_buffer.append(value1)
59 |     self.assertAllEqual(my_buffer.values.numpy(), [value1])
60 |     self.assertAllEqual(my_buffer.current_size.numpy(), 1)
61 |     self.assertAllEqual(my_buffer.capacity.numpy(), 2)
62 | 
63 |     value2 = [[4, 5, 6], [7, 8, 9]]
64 |     my_buffer.append(value2)
65 |     self.assertAllEqual(my_buffer.values.numpy(), [value1, value2])
66 |     self.assertAllEqual(my_buffer.current_size.numpy(), 2)
67 |     self.assertAllEqual(my_buffer.capacity.numpy(), 2)
68 | 
69 |     value3 = [[7, 8, 9], [10, 11, 12]]
70 |     my_buffer.append(value3)
71 |     self.assertAllEqual(my_buffer.values.numpy(), [value1, value2, value3])
72 |     self.assertAllEqual(my_buffer.current_size.numpy(), 3)
73 |     # Capacity should have doubled.
74 |     self.assertAllEqual(my_buffer.capacity.numpy(), 4)
75 | 
76 | 
77 | if __name__ == '__main__':
78 |   tf.test.main()
79 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/analysis/tensor_buffer_graph_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import tensorflow as tf
16 | from tensorflow_privacy.privacy.analysis import tensor_buffer
17 | 
18 | 
19 | class TensorBufferTest(tf.test.TestCase):
20 |   """Tests for TensorBuffer in graph mode."""
21 | 
22 |   def test_noresize(self):
23 |     """Test buffer does not resize if capacity is not exceeded."""
24 |     with self.cached_session() as sess:
25 |       size, shape = 2, [2, 3]
26 | 
27 |       my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
28 |       value1 = [[1, 2, 3], [4, 5, 6]]
29 |       with tf.control_dependencies([my_buffer.append(value1)]):
30 |         value2 = [[7, 8, 9], [10, 11, 12]]
31 |         with tf.control_dependencies([my_buffer.append(value2)]):
32 |           values = my_buffer.values
33 |           current_size = my_buffer.current_size
34 |           capacity = my_buffer.capacity
35 |       self.evaluate(tf.compat.v1.global_variables_initializer())
36 | 
37 |       v, cs, cap = sess.run([values, current_size, capacity])
38 |       self.assertAllEqual(v, [value1, value2])
39 |       self.assertEqual(cs, 2)
40 |       self.assertEqual(cap, 2)
41 | 
42 |   def test_resize(self):
43 |     """Test buffer resizes if capacity is exceeded."""
44 |     with self.cached_session() as sess:
45 |       size, shape = 2, [2, 3]
46 | 
47 |       my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
48 |       value1 = [[1, 2, 3], [4, 5, 6]]
49 |       with tf.control_dependencies([my_buffer.append(value1)]):
50 |         value2 = [[7, 8, 9], [10, 11, 12]]
51 |         with tf.control_dependencies([my_buffer.append(value2)]):
52 |           value3 = [[13, 14, 15], [16, 17, 18]]
53 |           with tf.control_dependencies([my_buffer.append(value3)]):
54 |             values = my_buffer.values
55 |             current_size = my_buffer.current_size
56 |             capacity = my_buffer.capacity
57 |       self.evaluate(tf.compat.v1.global_variables_initializer())
58 | 
59 |       v, cs, cap = sess.run([values, current_size, capacity])
60 |       self.assertAllEqual(v, [value1, value2, value3])
61 |       self.assertEqual(cs, 3)
62 |       self.assertEqual(cap, 4)
63 | 
64 | 
65 | if __name__ == '__main__':
66 |   tf.test.main()
67 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/bolt_on/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//visibility:private"])
 2 | 
 3 | licenses(["notice"])
 4 | 
 5 | filegroup(
 6 |     name = "ignore_srcs",
 7 |     srcs = [
 8 |         "__init__.py",
 9 |         "losses.py",
10 |         "losses_test.py",
11 |         "models.py",
12 |         "models_test.py",
13 |         "optimizers.py",
14 |         "optimizers_test.py",
15 |     ],
16 |     tags = ["ignore_srcs"],
17 | )
18 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/bolt_on/README.md:
--------------------------------------------------------------------------------
 1 | # BoltOn Subpackage
 2 | 
 3 | This package contains source code for the BoltOn method, a particular
 4 | differential-privacy (DP) technique that uses output perturbations and
 5 | leverages additional assumptions to provide a new way of approaching the
 6 | privacy guarantees.
 7 | 
 8 | ## BoltOn Description
 9 | 
10 | This method uses 4 key steps to achieve privacy guarantees:
11 |   1. Adds noise to weights after training (output perturbation).
12 |   2. Projects weights to R, the radius of the hypothesis space,
13 |       after each batch. This value is configurable by the user.
14 |   3. Limits learning rate
15 |   4. Uses a strongly convex loss function (see compile)
16 | 
17 | For more details on the strong convexity requirements, see:
18 | Bolt-on Differential Privacy for Scalable Stochastic Gradient
19 | Descent-based Analytics by Xi Wu et al. at https://arxiv.org/pdf/1606.04722.pdf
20 | 
21 | ## Why BoltOn?
22 | 
23 | The major difference for the BoltOn method is that it injects noise post model
24 | convergence, rather than noising gradients or weights during training. This
25 | approach requires some additional constraints listed in the Description.
26 | Should the use-case and model satisfy these constraints, this is another
27 | approach that can be trained to maximize utility while maintaining the privacy.
28 | The paper describes in detail the advantages and disadvantages of this approach
29 | and its results compared to some other methods, namely noising at each iteration
30 | and no noising.
31 | 
32 | ## Tutorials
33 | 
34 | This package has a tutorial that can be found in the root tutorials directory,
35 | under `bolton_tutorial.py`.
36 | 
37 | ## Contribution
38 | 
39 | This package was initially contributed by Georgian Partners with the hope of
40 | growing the tensorflow/privacy library. There are several rich use cases for
41 | delta-epsilon privacy in machine learning, some of which can be explored here:
42 | https://medium.com/apache-mxnet/epsilon-differential-privacy-for-machine-learning-using-mxnet-a4270fe3865e
43 | https://arxiv.org/pdf/1811.04911.pdf
44 | 
45 | ## Stability
46 | 
47 | As we are pegged on tensorflow2.0, this package may encounter stability
48 | issues in the ongoing development of tensorflow2.0.
49 | 
50 | This sub-package is currently stable for 2.0.0a0, 2.0.0b0, and 2.0.0.b1 If you
51 | would like to use this subpackage, please do use one of these versions as we
52 | cannot guarantee it will work for all latest releases. If you do find issues,
53 | feel free to raise an issue to the contributors listed below.
54 | 
55 | ## Contacts
56 | 
57 | In addition to the maintainers of tensorflow/privacy listed in the root
58 | README.md, please feel free to contact members of Georgian Partners. In
59 | particular,
60 | 
61 | * Georgian Partners(@georgianpartners)
62 | * Ji Chao Zhang(@Jichaogp)
63 | * Christopher Choquette(@cchoquette)
64 | 
65 | ## Copyright
66 | 
67 | Copyright 2019 - Google LLC
68 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/bolt_on/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """BoltOn Method for privacy."""
15 | 
16 | import sys
17 | 
18 | from packaging import version
19 | import tensorflow.compat.v1 as tf
20 | 
21 | if version.Version(tf.__version__) < version.Version("2.0.0"):
22 |   raise ImportError("Please upgrade your version "
23 |                     "of tensorflow from: {0} to at least 2.0.0 to "
24 |                     "use privacy/bolt_on".format(
25 |                         version.Version(tf.__version__)))
26 | if hasattr(sys, "skip_tf_privacy_import"):  # Useful for standalone scripts.
27 |   pass
28 | else:
29 |   from tensorflow_privacy.privacy.bolt_on.models import BoltOnModel  # pylint: disable=g-import-not-at-top
30 |   from tensorflow_privacy.privacy.bolt_on.optimizers import BoltOn  # pylint: disable=g-import-not-at-top
31 |   from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexHuber  # pylint: disable=g-import-not-at-top
32 |   from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexBinaryCrossentropy  # pylint: disable=g-import-not-at-top
33 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/dp_query_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from absl.testing import parameterized
16 | import tensorflow as tf
17 | from tensorflow_privacy.privacy.dp_query import no_privacy_query
18 | 
19 | 
20 | class SumAggregationQueryTest(tf.test.TestCase, parameterized.TestCase):
21 | 
22 |   def test_initial_sample_state_works_on_tensorspecs(self):
23 |     query = no_privacy_query.NoPrivacySumQuery()
24 |     template = tf.TensorSpec.from_tensor(tf.constant([1.0, 2.0]))
25 |     sample_state = query.initial_sample_state(template)
26 |     expected = [0.0, 0.0]
27 |     self.assertAllClose(sample_state, expected)
28 | 
29 | 
30 | if __name__ == '__main__':
31 |   tf.test.main()
32 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/no_privacy_query_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from absl.testing import parameterized
16 | import tensorflow as tf
17 | from tensorflow_privacy.privacy.dp_query import no_privacy_query
18 | from tensorflow_privacy.privacy.dp_query import test_utils
19 | 
20 | 
21 | class NoPrivacyQueryTest(tf.test.TestCase, parameterized.TestCase):
22 | 
23 |   def test_sum(self):
24 |     record1 = tf.constant([2.0, 0.0])
25 |     record2 = tf.constant([-1.0, 1.0])
26 | 
27 |     query = no_privacy_query.NoPrivacySumQuery()
28 |     query_result, _ = test_utils.run_query(query, [record1, record2])
29 |     expected = [1.0, 1.0]
30 |     self.assertAllClose(query_result, expected)
31 | 
32 |   def test_no_privacy_average(self):
33 |     record1 = tf.constant([5.0, 0.0])
34 |     record2 = tf.constant([-1.0, 2.0])
35 | 
36 |     query = no_privacy_query.NoPrivacyAverageQuery()
37 |     query_result, _ = test_utils.run_query(query, [record1, record2])
38 |     expected = [2.0, 1.0]
39 |     self.assertAllClose(query_result, expected)
40 | 
41 |   def test_no_privacy_weighted_average(self):
42 |     record1 = tf.constant([4.0, 0.0])
43 |     record2 = tf.constant([-1.0, 1.0])
44 | 
45 |     weights = [1, 3]
46 | 
47 |     query = no_privacy_query.NoPrivacyAverageQuery()
48 |     query_result, _ = test_utils.run_query(
49 |         query, [record1, record2], weights=weights)
50 |     expected = [0.25, 0.75]
51 |     self.assertAllClose(query_result, expected)
52 | 
53 |   @parameterized.named_parameters(
54 |       ('type_mismatch', [1.0], (1.0,), TypeError),
55 |       ('too_few_on_left', [1.0], [1.0, 1.0], ValueError),
56 |       ('too_few_on_right', [1.0, 1.0], [1.0], ValueError))
57 |   def test_incompatible_records(self, record1, record2, error_type):
58 |     query = no_privacy_query.NoPrivacySumQuery()
59 |     with self.assertRaises(error_type):
60 |       test_utils.run_query(query, [record1, record2])
61 | 
62 | 
63 | if __name__ == '__main__':
64 |   tf.test.main()
65 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/normalized_query_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import tensorflow as tf
16 | from tensorflow_privacy.privacy.dp_query import gaussian_query
17 | from tensorflow_privacy.privacy.dp_query import normalized_query
18 | from tensorflow_privacy.privacy.dp_query import test_utils
19 | 
20 | 
21 | class NormalizedQueryTest(tf.test.TestCase):
22 | 
23 |   def test_normalization(self):
24 |     record1 = tf.constant([-6.0, 8.0])  # Clipped to [-3.0, 4.0].
25 |     record2 = tf.constant([4.0, -3.0])  # Not clipped.
26 | 
27 |     sum_query = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0, stddev=0.0)
28 |     query = normalized_query.NormalizedQuery(
29 |         numerator_query=sum_query, denominator=2.0)
30 | 
31 |     query_result, _ = test_utils.run_query(query, [record1, record2])
32 |     expected = [0.5, 0.5]
33 |     self.assertAllClose(query_result, expected)
34 | 
35 | 
36 | if __name__ == '__main__':
37 |   tf.test.main()
38 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/dp_query/test_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utility methods for testing private queries.
15 | 
16 | Utility methods for testing private queries.
17 | """
18 | 
19 | 
20 | def run_query(query, records, global_state=None, weights=None):
21 |   """Executes query on the given set of records as a single sample.
22 | 
23 |   Args:
24 |     query: A PrivateQuery to run.
25 |     records: An iterable containing records to pass to the query.
26 |     global_state: The current global state. If None, an initial global state is
27 |       generated.
28 |     weights: An optional iterable containing the weights of the records.
29 | 
30 |   Returns:
31 |     A tuple (result, new_global_state) where "result" is the result of the
32 |       query and "new_global_state" is the updated global state.
33 |   """
34 |   if not global_state:
35 |     global_state = query.initial_global_state()
36 |   params = query.derive_sample_params(global_state)
37 |   sample_state = query.initial_sample_state(next(iter(records)))
38 |   if weights is None:
39 |     for record in records:
40 |       sample_state = query.accumulate_record(params, sample_state, record)
41 |   else:
42 |     for weight, record in zip(weights, records):
43 |       sample_state = query.accumulate_record(params, sample_state, record,
44 |                                              weight)
45 |   result, global_state, _ = query.get_noised_result(sample_state, global_state)
46 |   return result, global_state
47 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/estimators/BUILD:
--------------------------------------------------------------------------------
  1 | load("@rules_python//python:defs.bzl", "py_library")
  2 | 
  3 | package(default_visibility = ["//visibility:public"])
  4 | 
  5 | licenses(["notice"])
  6 | 
  7 | py_library(
  8 |     name = "estimators",
  9 |     srcs = ["__init__.py"],
 10 | )
 11 | 
 12 | py_library(
 13 |     name = "head_utils",
 14 |     srcs = [
 15 |         "head_utils.py",
 16 |     ],
 17 |     deps = [
 18 |         ":binary_class_head",
 19 |         ":multi_class_head",
 20 |     ],
 21 | )
 22 | 
 23 | py_library(
 24 |     name = "binary_class_head",
 25 |     srcs = [
 26 |         "binary_class_head.py",
 27 |     ],
 28 | )
 29 | 
 30 | py_library(
 31 |     name = "multi_class_head",
 32 |     srcs = [
 33 |         "multi_class_head.py",
 34 |     ],
 35 | )
 36 | 
 37 | py_library(
 38 |     name = "multi_label_head",
 39 |     srcs = [
 40 |         "multi_label_head.py",
 41 |     ],
 42 | )
 43 | 
 44 | py_library(
 45 |     name = "dnn",
 46 |     srcs = [
 47 |         "dnn.py",
 48 |     ],
 49 |     deps = [":head_utils"],
 50 | )
 51 | 
 52 | py_library(
 53 |     name = "test_utils",
 54 |     srcs = [
 55 |         "test_utils.py",
 56 |     ],
 57 | )
 58 | 
 59 | py_test(
 60 |     name = "binary_class_head_test",
 61 |     timeout = "long",
 62 |     srcs = ["binary_class_head_test.py"],
 63 |     deps = [
 64 |         ":binary_class_head",
 65 |         ":test_utils",
 66 |         "//tensorflow_privacy/privacy/optimizers:dp_optimizer_keras",
 67 |     ],
 68 | )
 69 | 
 70 | py_test(
 71 |     name = "multi_class_head_test",
 72 |     timeout = "long",
 73 |     srcs = ["multi_class_head_test.py"],
 74 |     deps = [
 75 |         ":multi_class_head",
 76 |         ":test_utils",
 77 |         "//tensorflow_privacy/privacy/optimizers:dp_optimizer_keras",
 78 |     ],
 79 | )
 80 | 
 81 | py_test(
 82 |     name = "multi_label_head_test",
 83 |     timeout = "long",
 84 |     srcs = ["multi_label_head_test.py"],
 85 |     deps = [
 86 |         ":multi_label_head",
 87 |         ":test_utils",
 88 |         "//tensorflow_privacy/privacy/optimizers:dp_optimizer_keras",
 89 |     ],
 90 | )
 91 | 
 92 | py_test(
 93 |     name = "dnn_test",
 94 |     timeout = "long",
 95 |     srcs = ["dnn_test.py"],
 96 |     deps = [
 97 |         ":dnn",
 98 |         ":test_utils",
 99 |         "//tensorflow_privacy/privacy/optimizers:dp_optimizer_keras",
100 |     ],
101 | )
102 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/estimators/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/estimators/binary_class_head_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numpy as np
16 | import tensorflow as tf
17 | from tensorflow_privacy.privacy.estimators import binary_class_head
18 | from tensorflow_privacy.privacy.estimators import test_utils
19 | from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer
20 | from tensorflow_estimator.python.estimator import estimator
21 | 
22 | 
23 | class DPBinaryClassHeadTest(tf.test.TestCase):
24 |   """Tests for DP-enabled binary class heads."""
25 | 
26 |   def testLoss(self):
27 |     """Tests loss() returns per-example losses."""
28 | 
29 |     head = binary_class_head.DPBinaryClassHead()
30 |     features = {'feature_a': np.full((4), 1.0)}
31 |     labels = np.array([[1.0], [1.0], [1.0], [0.0]])
32 |     logits = np.full((4, 1), 0.5)
33 | 
34 |     actual_loss = head.loss(labels, logits, features)
35 |     expected_loss = tf.nn.sigmoid_cross_entropy_with_logits(
36 |         labels=labels, logits=logits)
37 | 
38 |     self.assertEqual(actual_loss.shape, [4, 1])
39 | 
40 |     if tf.executing_eagerly():
41 |       self.assertEqual(actual_loss.shape, [4, 1])
42 |       self.assertAllClose(actual_loss, expected_loss)
43 |       return
44 | 
45 |     self.assertAllClose(expected_loss, self.evaluate(actual_loss))
46 | 
47 |   def testCreateTPUEstimatorSpec(self):
48 |     """Tests that an Estimator built with this head works."""
49 | 
50 |     train_features, train_labels = test_utils.make_input_data(256, 2)
51 |     feature_columns = []
52 |     for key in train_features:
53 |       feature_columns.append(tf.feature_column.numeric_column(key=key))
54 | 
55 |     head = binary_class_head.DPBinaryClassHead()
56 |     optimizer = DPKerasSGDOptimizer(
57 |         learning_rate=0.5,
58 |         l2_norm_clip=1.0,
59 |         noise_multiplier=0.0,
60 |         num_microbatches=2)
61 |     model_fn = test_utils.make_model_fn(head, optimizer, feature_columns)
62 |     classifier = estimator.Estimator(model_fn=model_fn)
63 | 
64 |     classifier.train(
65 |         input_fn=test_utils.make_input_fn(train_features, train_labels, True),
66 |         steps=4)
67 | 
68 |     test_features, test_labels = test_utils.make_input_data(64, 2)
69 |     classifier.evaluate(
70 |         input_fn=test_utils.make_input_fn(test_features, test_labels, False),
71 |         steps=4)
72 | 
73 |     predict_features, predict_labels_ = test_utils.make_input_data(64, 2)
74 |     classifier.predict(
75 |         input_fn=test_utils.make_input_fn(predict_features, predict_labels_,
76 |                                           False))
77 | 
78 | 
79 | if __name__ == '__main__':
80 |   tf.test.main()
81 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/estimators/dnn.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Estimator heads that allow integration with TF Privacy."""
15 | 
16 | import tensorflow as tf
17 | 
18 | from tensorflow_privacy.privacy.estimators import head_utils
19 | from tensorflow_estimator.python.estimator import estimator
20 | from tensorflow_estimator.python.estimator.canned import dnn
21 | 
22 | 
23 | class DNNClassifier(estimator.Estimator):
24 |   """DP version of `tf.estimator.DNNClassifier`."""
25 | 
26 |   def __init__(
27 |       self,
28 |       hidden_units,
29 |       feature_columns,
30 |       model_dir=None,
31 |       n_classes=2,
32 |       weight_column=None,
33 |       label_vocabulary=None,
34 |       optimizer=None,
35 |       activation_fn=tf.nn.relu,
36 |       dropout=None,
37 |       config=None,
38 |       warm_start_from=None,
39 |       loss_reduction=tf.keras.losses.Reduction.NONE,
40 |       batch_norm=False,
41 |   ):
42 |     """See `tf.estimator.DNNClassifier`."""
43 |     head = head_utils.binary_or_multi_class_head(
44 |         n_classes,
45 |         weight_column=weight_column,
46 |         label_vocabulary=label_vocabulary,
47 |         loss_reduction=loss_reduction)
48 |     estimator._canned_estimator_api_gauge.get_cell('Classifier').set('DNN')
49 | 
50 |     def _model_fn(features, labels, mode, config):
51 |       return dnn.dnn_model_fn_v2(
52 |           features=features,
53 |           labels=labels,
54 |           mode=mode,
55 |           head=head,
56 |           hidden_units=hidden_units,
57 |           feature_columns=tuple(feature_columns or []),
58 |           optimizer=optimizer,
59 |           activation_fn=activation_fn,
60 |           dropout=dropout,
61 |           config=config,
62 |           batch_norm=batch_norm)
63 | 
64 |     super().__init__(
65 |         model_fn=_model_fn,
66 |         model_dir=model_dir,
67 |         config=config,
68 |         warm_start_from=warm_start_from)
69 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/estimators/dnn_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import functools
16 | 
17 | from absl.testing import parameterized
18 | import tensorflow as tf
19 | from tensorflow_privacy.privacy.estimators import dnn
20 | from tensorflow_privacy.privacy.estimators import test_utils
21 | from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer
22 | 
23 | 
24 | class DPDNNClassifierTest(tf.test.TestCase, parameterized.TestCase):
25 |   """Tests for DP-enabled DNNClassifier."""
26 | 
27 |   @parameterized.named_parameters(
28 |       ('BinaryClassDNN 1', 2),
29 |       ('MultiClassDNN 1', 3),
30 |   )
31 |   def testDNN(self, classes):
32 |     train_features, train_labels = test_utils.make_input_data(256, classes)
33 |     feature_columns = []
34 |     for key in train_features:
35 |       feature_columns.append(tf.feature_column.numeric_column(key=key))
36 | 
37 |     optimizer = functools.partial(
38 |         DPKerasSGDOptimizer,
39 |         learning_rate=0.5,
40 |         l2_norm_clip=1.0,
41 |         noise_multiplier=0.0,
42 |         num_microbatches=1)
43 | 
44 |     classifier = dnn.DNNClassifier(
45 |         hidden_units=[10],
46 |         activation_fn='relu',
47 |         feature_columns=feature_columns,
48 |         n_classes=classes,
49 |         optimizer=optimizer,
50 |         loss_reduction=tf.losses.Reduction.NONE)
51 | 
52 |     classifier.train(
53 |         input_fn=test_utils.make_input_fn(train_features, train_labels, True,
54 |                                           16))
55 | 
56 |     test_features, test_labels = test_utils.make_input_data(64, classes)
57 |     classifier.evaluate(
58 |         input_fn=test_utils.make_input_fn(test_features, test_labels, False,
59 |                                           16))
60 | 
61 |     predict_features, predict_labels = test_utils.make_input_data(64, classes)
62 |     classifier.predict(
63 |         input_fn=test_utils.make_input_fn(predict_features, predict_labels,
64 |                                           False))
65 | 
66 | 
67 | if __name__ == '__main__':
68 |   tf.test.main()
69 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/estimators/head_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Estimator heads that allow integration with TF Privacy."""
15 | 
16 | from tensorflow_privacy.privacy.estimators.binary_class_head import DPBinaryClassHead
17 | from tensorflow_privacy.privacy.estimators.multi_class_head import DPMultiClassHead
18 | 
19 | 
20 | def binary_or_multi_class_head(n_classes, weight_column, label_vocabulary,
21 |                                loss_reduction):
22 |   """Creates either binary or multi-class head.
23 | 
24 |   Args:
25 |     n_classes: Number of label classes.
26 |     weight_column: A string or a `NumericColumn` created by
27 |       `tf.feature_column.numeric_column` defining feature column representing
28 |       weights. It is used to down weight or boost examples during training. It
29 |       will be multiplied by the loss of the example. If it is a string, it is
30 |       used as a key to fetch weight tensor from the `features`. If it is a
31 |       `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
32 |       weight_column.normalizer_fn is applied on it to get weight tensor.
33 |     label_vocabulary: A list of strings represents possible label values. If
34 |       given, labels must be string type and have any value in
35 |       `label_vocabulary`. If it is not given, that means labels are already
36 |       encoded as integer or float within [0, 1] for `n_classes=2` and encoded as
37 |       integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there
38 |       will be errors if vocabulary is not provided and labels are string.
39 |     loss_reduction: One of `tf.losses.Reduction` except `NONE`. Defines how to
40 |       reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.
41 | 
42 |   Returns:
43 |     A `Head` instance.
44 |   """
45 |   if n_classes == 2:
46 |     head = DPBinaryClassHead(
47 |         weight_column=weight_column,
48 |         label_vocabulary=label_vocabulary,
49 |         loss_reduction=loss_reduction)
50 |   else:
51 |     head = DPMultiClassHead(
52 |         n_classes,
53 |         weight_column=weight_column,
54 |         label_vocabulary=label_vocabulary,
55 |         loss_reduction=loss_reduction)
56 |   return head
57 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/estimators/v1/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//visibility:public"])
 2 | 
 3 | licenses(["notice"])
 4 | 
 5 | py_library(
 6 |     name = "v1",
 7 |     srcs = ["__init__.py"],
 8 | )
 9 | 
10 | py_library(
11 |     name = "head",
12 |     srcs = [
13 |         "head.py",
14 |     ],
15 | )
16 | 
17 | py_library(
18 |     name = "dnn",
19 |     srcs = [
20 |         "dnn.py",
21 |     ],
22 |     deps = [":head"],
23 | )
24 | 
25 | py_library(
26 |     name = "linear",
27 |     srcs = [
28 |         "linear.py",
29 |     ],
30 |     deps = [":head"],
31 | )
32 | 
33 | py_test(
34 |     name = "head_test",
35 |     timeout = "long",
36 |     srcs = ["head_test.py"],
37 |     deps = [
38 |         ":head",
39 |         "//tensorflow_privacy/privacy/estimators:test_utils",
40 |         "//tensorflow_privacy/privacy/optimizers:dp_optimizer",
41 |     ],
42 | )
43 | 
44 | py_test(
45 |     name = "dnn_test",
46 |     timeout = "long",
47 |     srcs = ["dnn_test.py"],
48 |     deps = [
49 |         ":dnn",
50 |         "//tensorflow_privacy/privacy/estimators:test_utils",
51 |         "//tensorflow_privacy/privacy/optimizers:dp_optimizer",
52 |     ],
53 | )
54 | 
55 | py_test(
56 |     name = "linear_test",
57 |     timeout = "long",
58 |     srcs = ["linear_test.py"],
59 |     deps = [
60 |         ":linear",
61 |         "//tensorflow_privacy/privacy/estimators:test_utils",
62 |         "//tensorflow_privacy/privacy/optimizers:dp_optimizer",
63 |     ],
64 | )
65 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/estimators/v1/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/estimators/v1/dnn.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """DP version of DNNClassifiers v1."""
16 | 
17 | import tensorflow as tf
18 | 
19 | from tensorflow_privacy.privacy.estimators.v1 import head as head_lib
20 | from tensorflow_estimator.python.estimator import estimator
21 | from tensorflow_estimator.python.estimator.canned import dnn
22 | 
23 | 
24 | class DNNClassifier(estimator.Estimator):
25 |   """DP version of `tf.compat.v1.estimator.DNNClassifier`."""
26 | 
27 |   def __init__(
28 |       self,
29 |       hidden_units,
30 |       feature_columns,
31 |       model_dir=None,
32 |       n_classes=2,
33 |       weight_column=None,
34 |       label_vocabulary=None,
35 |       optimizer='Adagrad',
36 |       activation_fn=tf.nn.relu,
37 |       dropout=None,
38 |       input_layer_partitioner=None,
39 |       config=None,
40 |       warm_start_from=None,
41 |       loss_reduction=tf.compat.v1.losses.Reduction.SUM,  # For scalar summary.
42 |       batch_norm=False,
43 |   ):
44 |     """See `tf.compat.v1.estimator.DNNClassifier`."""
45 |     head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
46 |         n_classes, weight_column, label_vocabulary, loss_reduction)
47 |     estimator._canned_estimator_api_gauge.get_cell('Classifier').set('DNN')
48 | 
49 |     def _model_fn(features, labels, mode, config):
50 |       """Call the defined shared dnn_model_fn."""
51 |       return dnn._dnn_model_fn(  # pylint: disable=protected-access
52 |           features=features,
53 |           labels=labels,
54 |           mode=mode,
55 |           head=head,
56 |           hidden_units=hidden_units,
57 |           feature_columns=tuple(feature_columns or []),
58 |           optimizer=optimizer,
59 |           activation_fn=activation_fn,
60 |           dropout=dropout,
61 |           input_layer_partitioner=input_layer_partitioner,
62 |           config=config,
63 |           batch_norm=batch_norm)
64 | 
65 |     super().__init__(
66 |         model_fn=_model_fn,
67 |         model_dir=model_dir,
68 |         config=config,
69 |         warm_start_from=warm_start_from)
70 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/estimators/v1/dnn_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import functools
16 | 
17 | from absl.testing import parameterized
18 | import tensorflow as tf
19 | from tensorflow_privacy.privacy.estimators import test_utils
20 | from tensorflow_privacy.privacy.estimators.v1 import dnn
21 | from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
22 | 
23 | # pylint: disable=g-deprecated-tf-checker
24 | 
25 | 
26 | class DPDNNClassifierTest(tf.test.TestCase, parameterized.TestCase):
27 |   """Tests for DP-enabled DNNClassifier."""
28 | 
29 |   @parameterized.named_parameters(
30 |       ('BinaryClassDNN', 2, 1),
31 |       ('BinaryClassDNN 4', 2, 4),
32 |       ('MultiClassDNN 3', 3, 1),
33 |       ('MultiClassDNN 4', 4, 1),
34 |       ('MultiClassDNN 4 4', 4, 4),
35 |   )
36 |   def testDNN(self, n_classes, num_microbatches):
37 |     train_features, train_labels = test_utils.make_input_data(256, n_classes)
38 |     feature_columns = []
39 |     for key in train_features:
40 |       feature_columns.append(tf.feature_column.numeric_column(key=key))
41 | 
42 |     optimizer = functools.partial(
43 |         DPGradientDescentGaussianOptimizer,
44 |         learning_rate=0.5,
45 |         l2_norm_clip=1.0,
46 |         noise_multiplier=0.0,
47 |         num_microbatches=num_microbatches,
48 |     )
49 | 
50 |     classifier = dnn.DNNClassifier(
51 |         hidden_units=[10],
52 |         activation_fn='relu',
53 |         feature_columns=feature_columns,
54 |         n_classes=n_classes,
55 |         optimizer=optimizer,
56 |         loss_reduction=tf.losses.Reduction.NONE)
57 | 
58 |     classifier.train(
59 |         input_fn=test_utils.make_input_fn(train_features, train_labels, True,
60 |                                           16))
61 | 
62 |     test_features, test_labels = test_utils.make_input_data(64, n_classes)
63 |     classifier.evaluate(
64 |         input_fn=test_utils.make_input_fn(test_features, test_labels, False,
65 |                                           16))
66 | 
67 |     predict_features, predict_labels = test_utils.make_input_data(64, n_classes)
68 |     classifier.predict(
69 |         input_fn=test_utils.make_input_fn(predict_features, predict_labels,
70 |                                           False))
71 | 
72 | 
73 | if __name__ == '__main__':
74 |   tf.test.main()
75 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/estimators/v1/linear.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """DP version of LinearClassifier v1."""
16 | 
17 | import tensorflow as tf
18 | from tensorflow_privacy.privacy.estimators.v1 import head as head_lib
19 | from tensorflow_estimator.python.estimator import estimator  # pylint: disable=g-deprecated-tf-checker
20 | from tensorflow_estimator.python.estimator.canned import linear  # pylint: disable=g-deprecated-tf-checker
21 | 
22 | 
23 | class LinearClassifier(estimator.Estimator):
24 |   """DP version of `tf.compat.v1.estimator.LinearClassifier`."""
25 | 
26 |   def __init__(
27 |       self,
28 |       feature_columns,
29 |       model_dir=None,
30 |       n_classes=2,
31 |       weight_column=None,
32 |       label_vocabulary=None,
33 |       optimizer='Ftrl',
34 |       config=None,
35 |       partitioner=None,
36 |       warm_start_from=None,
37 |       loss_reduction=tf.compat.v1.losses.Reduction.SUM,  # For scalar summary.
38 |       sparse_combiner='sum',
39 |   ):
40 |     """See `tf.compat.v1.estimator.LinearClassifier`."""
41 |     linear._validate_linear_sdca_optimizer_for_linear_classifier(  # pylint: disable=protected-access
42 |         feature_columns=feature_columns,
43 |         n_classes=n_classes,
44 |         optimizer=optimizer,
45 |         sparse_combiner=sparse_combiner,
46 |     )
47 |     estimator._canned_estimator_api_gauge.get_cell('Classifier').set('Linear')  # pylint: disable=protected-access
48 | 
49 |     head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
50 |         n_classes, weight_column, label_vocabulary, loss_reduction
51 |     )
52 | 
53 |     def _model_fn(features, labels, mode, config):
54 |       """Call the defined shared _linear_model_fn."""
55 |       return linear._linear_model_fn(  # pylint: disable=protected-access
56 |           features=features,
57 |           labels=labels,
58 |           mode=mode,
59 |           head=head,
60 |           feature_columns=tuple(feature_columns or []),
61 |           optimizer=optimizer,
62 |           partitioner=partitioner,
63 |           config=config,
64 |           sparse_combiner=sparse_combiner,
65 |       )
66 | 
67 |     super(LinearClassifier, self).__init__(
68 |         model_fn=_model_fn,
69 |         model_dir=model_dir,
70 |         config=config,
71 |         warm_start_from=warm_start_from,
72 |     )
73 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/estimators/v1/linear_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Tests for LinearClassifier."""
15 | 
16 | import functools
17 | 
18 | from absl.testing import parameterized
19 | import tensorflow as tf
20 | from tensorflow_privacy.privacy.estimators import test_utils
21 | from tensorflow_privacy.privacy.estimators.v1 import linear
22 | from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
23 | 
24 | # pylint: disable=g-deprecated-tf-checker
25 | 
26 | 
27 | class DPLinearClassifierClassifierTest(
28 |     tf.test.TestCase, parameterized.TestCase
29 | ):
30 |   """Tests for DP-enabled LinearClassifier."""
31 | 
32 |   @parameterized.named_parameters(
33 |       ('BinaryClassLinear 1', 2, 1),
34 |       ('BinaryClassLinear 4', 2, 4),
35 |       ('MultiClassLinear 3', 3, 1),
36 |       ('MultiClassLinear 4', 4, 1),
37 |       ('MultiClassLinear 4 1', 4, 2),
38 |   )
39 |   def testRunsWithoutErrors(self, n_classes, num_microbatches):
40 |     train_features, train_labels = test_utils.make_input_data(256, n_classes)
41 |     feature_columns = []
42 |     for key in train_features:
43 |       feature_columns.append(tf.feature_column.numeric_column(key=key))  # pylint: disable=g-deprecated-tf-checker
44 | 
45 |     optimizer = functools.partial(
46 |         DPGradientDescentGaussianOptimizer,
47 |         learning_rate=0.5,
48 |         l2_norm_clip=1.0,
49 |         noise_multiplier=0.0,
50 |         num_microbatches=num_microbatches,
51 |     )
52 | 
53 |     classifier = linear.LinearClassifier(
54 |         feature_columns=feature_columns,
55 |         n_classes=n_classes,
56 |         optimizer=optimizer,
57 |         loss_reduction=tf.compat.v1.losses.Reduction.SUM,
58 |     )
59 | 
60 |     classifier.train(
61 |         input_fn=test_utils.make_input_fn(
62 |             train_features, train_labels, True, 16
63 |         )
64 |     )
65 | 
66 |     test_features, test_labels = test_utils.make_input_data(64, n_classes)
67 |     classifier.evaluate(
68 |         input_fn=test_utils.make_input_fn(test_features, test_labels, False, 16)
69 |     )
70 | 
71 |     predict_features, predict_labels = test_utils.make_input_data(64, n_classes)
72 |     classifier.predict(
73 |         input_fn=test_utils.make_input_fn(
74 |             predict_features, predict_labels, False
75 |         )
76 |     )
77 | 
78 | 
79 | if __name__ == '__main__':
80 |   tf.test.main()
81 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/fast_gradient_clipping/BUILD:
--------------------------------------------------------------------------------
  1 | package(default_visibility = ["//visibility:public"])
  2 | 
  3 | py_library(
  4 |     name = "type_aliases",
  5 |     srcs = ["type_aliases.py"],
  6 | )
  7 | 
  8 | py_library(
  9 |     name = "bert_encoder_utils",
 10 |     srcs = ["bert_encoder_utils.py"],
 11 |     deps = [":gradient_clipping_utils"],
 12 | )
 13 | 
 14 | py_test(
 15 |     name = "bert_encoder_utils_test",
 16 |     srcs = ["bert_encoder_utils_test.py"],
 17 |     deps = [":bert_encoder_utils"],
 18 | )
 19 | 
 20 | py_library(
 21 |     name = "common_manip_utils",
 22 |     srcs = ["common_manip_utils.py"],
 23 |     deps = [":type_aliases"],
 24 | )
 25 | 
 26 | py_library(
 27 |     name = "common_test_utils",
 28 |     srcs = ["common_test_utils.py"],
 29 |     deps = [
 30 |         ":clip_grads",
 31 |         ":layer_registry",
 32 |         ":type_aliases",
 33 |     ],
 34 | )
 35 | 
 36 | py_library(
 37 |     name = "gradient_clipping_utils",
 38 |     srcs = ["gradient_clipping_utils.py"],
 39 |     deps = [
 40 |         ":common_manip_utils",
 41 |         ":layer_registry",
 42 |         ":type_aliases",
 43 |         "//tensorflow_privacy/privacy/sparsity_preserving_noise:layer_registry",
 44 |         "//tensorflow_privacy/privacy/sparsity_preserving_noise:type_aliases",
 45 |     ],
 46 | )
 47 | 
 48 | py_test(
 49 |     name = "gradient_clipping_utils_test",
 50 |     srcs = ["gradient_clipping_utils_test.py"],
 51 |     shard_count = 8,
 52 |     deps = [
 53 |         ":gradient_clipping_utils",
 54 |         ":layer_registry",
 55 |         "//tensorflow_privacy/privacy/sparsity_preserving_noise:layer_registry",
 56 |     ],
 57 | )
 58 | 
 59 | py_library(
 60 |     name = "layer_registry",
 61 |     srcs = ["layer_registry.py"],
 62 |     deps = [
 63 |         ":type_aliases",
 64 |         "//tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions:dense",
 65 |         "//tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions:embedding",
 66 |     ],
 67 | )
 68 | 
 69 | py_library(
 70 |     name = "clip_grads",
 71 |     srcs = ["clip_grads.py"],
 72 |     deps = [
 73 |         ":common_manip_utils",
 74 |         ":gradient_clipping_utils",
 75 |         ":layer_registry",
 76 |         ":type_aliases",
 77 |     ],
 78 | )
 79 | 
 80 | py_library(
 81 |     name = "noise_utils",
 82 |     srcs = ["noise_utils.py"],
 83 |     deps = ["//tensorflow_privacy/privacy/sparsity_preserving_noise:sparse_noise_utils"],
 84 | )
 85 | 
 86 | py_test(
 87 |     name = "clip_grads_test",
 88 |     srcs = ["clip_grads_test.py"],
 89 |     shard_count = 8,
 90 |     deps = [
 91 |         ":clip_grads",
 92 |         ":common_test_utils",
 93 |         ":gradient_clipping_utils",
 94 |         ":layer_registry",
 95 |         ":type_aliases",
 96 |     ],
 97 | )
 98 | 
 99 | py_test(
100 |     name = "noise_utils_test",
101 |     srcs = ["noise_utils_test.py"],
102 |     deps = [":noise_utils"],
103 | )
104 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/fast_gradient_clipping/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/fast_gradient_clipping/common_manip_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """A collection of common utility functions for tensor/data manipulation."""
15 | 
16 | from typing import Optional
17 | 
18 | import tensorflow as tf
19 | from tensorflow_privacy.privacy.fast_gradient_clipping import type_aliases
20 | 
21 | 
22 | def maybe_add_microbatch_axis(
23 |     x: type_aliases.PackedTensors,
24 |     num_microbatches: Optional[type_aliases.BatchSize],
25 | ) -> type_aliases.PackedTensors:
26 |   """Adds the microbatch axis to a collection of tensors.
27 | 
28 |   Args:
29 |     x: Model output or input tensors.
30 |     num_microbatches: If None, x is returned unchanged. Otherwise, must divide
31 |       the batch size.
32 | 
33 |   Returns:
34 |     The input tensor x, reshaped from [batch_size, ...] to
35 |     [num_microbatches, batch_size / num_microbatches, ...].
36 |   """
37 |   if num_microbatches is None:
38 |     return x
39 | 
40 |   def _expand(t):
41 |     with tf.control_dependencies(
42 |         [tf.assert_equal(tf.math.floormod(tf.shape(t)[0], num_microbatches), 0)]
43 |     ):
44 |       return tf.reshape(
45 |           t, tf.concat([[num_microbatches, -1], tf.shape(t)[1:]], axis=0)
46 |       )
47 | 
48 |   return tf.nest.map_structure(_expand, x)
49 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/dense_tpu_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import tensorflow as tf
16 | from tensorflow_privacy.privacy.fast_gradient_clipping import common_test_utils as ctu
17 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import dense_test
18 | 
19 | 
20 | class GradNormTpuTest(dense_test.GradNormTest):
21 | 
22 |   def setUp(self):
23 |     super(dense_test.GradNormTest, self).setUp()
24 |     self.strategy = ctu.create_tpu_strategy()
25 |     self.assertIn('TPU', self.strategy.extended.worker_devices[0])
26 |     self.using_tpu = True
27 | 
28 | 
29 | if __name__ == '__main__':
30 |   tf.test.main()
31 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/einsum_dense.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Fast clipping function for `tfm.nlp.layers.EinsumDense`."""
15 | 
16 | from collections.abc import Mapping, Sequence
17 | from typing import Any, Optional
18 | import tensorflow as tf
19 | from tensorflow_privacy.privacy.fast_gradient_clipping import type_aliases
20 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import einsum_utils
21 | 
22 | 
23 | def einsum_layer_computation(
24 |     layer_instance: tf.keras.layers.EinsumDense,
25 |     input_args: Sequence[Any],
26 |     input_kwargs: Mapping[str, Any],
27 |     tape: tf.GradientTape,
28 |     num_microbatches: Optional[tf.Tensor] = None,
29 | ) -> type_aliases.RegistryFunctionOutput:
30 |   """Registry function for `tf.keras.layers.EinsumDense`.
31 | 
32 |   For the technical details, see the documentation of
33 |   `einsum_utils.compute_fast_einsum_gradient_norm()`.
34 | 
35 |   Args:
36 |     layer_instance: A `tf.keras.layers.EinsumDense` instance.
37 |     input_args: See `dense_layer_computation()` in `dense.py`.
38 |     input_kwargs: See `dense_layer_computation()` in `dense.py`.
39 |     tape: See `dense_layer_computation()` in `dense.py`.
40 |     num_microbatches: See `dense_layer_computation()` in `dense.py`.
41 | 
42 |   Returns:
43 |     See `dense_layer_computation()` in `dense.py`.
44 |   """
45 |   if input_kwargs:
46 |     raise ValueError("EinsumDense layer calls should not receive kwargs.")
47 |   del input_kwargs
48 |   if len(input_args) != 1:
49 |     raise ValueError("Only layer inputs of length 1 are permitted.")
50 |   orig_activation = layer_instance.activation
51 |   # Some activation functions may not apply a transform to the elements of the
52 |   # output individually (which is needed for the fast clipping trick to work).
53 |   # To avoid this case, we watch the variables that are only generated by the
54 |   # linear transformation of the `EinsumDense` layer instance.
55 |   layer_instance.activation = None
56 |   base_vars = layer_instance(*input_args)
57 |   tape.watch(base_vars)
58 |   layer_instance.activation = orig_activation
59 |   outputs = orig_activation(base_vars) if orig_activation else base_vars
60 | 
61 |   def sqr_norm_fn(grads):
62 |     return einsum_utils.compute_fast_einsum_squared_gradient_norm(
63 |         layer_instance.equation,
64 |         input_args[0],
65 |         grads,
66 |         layer_instance.bias_axes,
67 |         num_microbatches,
68 |     )
69 | 
70 |   return base_vars, outputs, sqr_norm_fn
71 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/einsum_dense_tpu_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import tensorflow as tf
16 | from tensorflow_privacy.privacy.fast_gradient_clipping import common_test_utils
17 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import einsum_dense_test
18 | 
19 | 
20 | class GradNormTpuTest(einsum_dense_test.GradNormTest):
21 | 
22 |   def setUp(self):
23 |     super(einsum_dense_test.GradNormTest, self).setUp()
24 |     self.strategy = common_test_utils.create_tpu_strategy()
25 |     self.assertIn('TPU', self.strategy.extended.worker_devices[0])
26 |     self.using_tpu = True
27 | 
28 | 
29 | if __name__ == '__main__':
30 |   tf.test.main()
31 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/embedding_tpu_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import tensorflow as tf
16 | from tensorflow_privacy.privacy.fast_gradient_clipping import common_test_utils
17 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import embedding_test
18 | 
19 | 
20 | class GradNormTpuTest(embedding_test.GradNormTest):
21 | 
22 |   def setUp(self):
23 |     tf.config.experimental.disable_mlir_bridge()
24 |     super(embedding_test.GradNormTest, self).setUp()
25 |     self.strategy = common_test_utils.create_tpu_strategy()
26 |     self.assertIn('TPU', self.strategy.extended.worker_devices[0])
27 |     self.using_tpu = True
28 | 
29 | 
30 | if __name__ == '__main__':
31 |   tf.test.main()
32 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/layer_normalization_tpu_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import tensorflow as tf
16 | from tensorflow_privacy.privacy.fast_gradient_clipping import common_test_utils
17 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import layer_normalization_test
18 | 
19 | 
20 | class GradNormTpuTest(layer_normalization_test.GradNormTest):
21 | 
22 |   def setUp(self):
23 |     super(layer_normalization_test.GradNormTest, self).setUp()
24 |     self.strategy = common_test_utils.create_tpu_strategy()
25 |     self.assertIn('TPU', self.strategy.extended.worker_devices[0])
26 |     self.using_tpu = True
27 | 
28 | 
29 | if __name__ == '__main__':
30 |   tf.test.main()
31 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/multi_head_attention_tpu_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import tensorflow as tf
16 | from tensorflow_privacy.privacy.fast_gradient_clipping import common_test_utils as ctu
17 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import multi_head_attention_test
18 | 
19 | 
20 | class GradNormTpuTest(multi_head_attention_test.GradNormTest):
21 | 
22 |   def setUp(self):
23 |     super(multi_head_attention_test.GradNormTest, self).setUp()
24 |     self.strategy = ctu.create_tpu_strategy()
25 |     self.assertIn('TPU', self.strategy.extended.worker_devices[0])
26 |     self.using_tpu = True
27 | 
28 | 
29 | if __name__ == '__main__':
30 |   tf.test.main()
31 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/nlp_on_device_embedding.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Fast clipping function for `tfm.nlp.layers.OnDeviceEmbedding`."""
15 | 
16 | from collections.abc import Mapping, Sequence
17 | from typing import Any, Optional
18 | import tensorflow as tf
19 | from tensorflow_privacy.privacy.fast_gradient_clipping import type_aliases
20 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import registry_function_utils
21 | 
22 | 
23 | def nlp_on_device_embedding_layer_computation(
24 |     layer_instance: tf.keras.layers.Layer,
25 |     input_args: Sequence[Any],
26 |     input_kwargs: Mapping[str, Any],
27 |     tape: tf.GradientTape,
28 |     num_microbatches: Optional[tf.Tensor] = None,
29 | ) -> type_aliases.RegistryFunctionOutput:
30 |   """Registry function for `tfm.nlp.layers.OnDeviceEmbedding`.
31 | 
32 |   Args:
33 |     layer_instance: A `tfm.nlp.layers.OnDeviceEmbedding` instance.
34 |     input_args: See `dense_layer_computation()` in `dense.py`.
35 |     input_kwargs: See `dense_layer_computation()` in `dense.py`.
36 |     tape: See `dense_layer_computation()` in `dense.py`.
37 |     num_microbatches: See `dense_layer_computation()` in `dense.py`.
38 | 
39 |   Returns:
40 |     See `dense_layer_computation()` in `dense.py`.
41 |   """
42 |   if input_kwargs:
43 |     raise ValueError("Embedding layer calls should not receive kwargs.")
44 |   del input_kwargs
45 |   if len(input_args) != 1:
46 |     raise ValueError("Only layer inputs of length 1 are permitted.")
47 |   if hasattr(layer_instance, "_use_one_hot"):
48 |     if layer_instance._use_one_hot:  # pylint: disable=protected-access
49 |       raise NotImplementedError(
50 |           "The embedding feature '_use_one_hot' is not supported."
51 |       )
52 |   # NOTE: Since the implementation of `tfm.nlp.layers.OnDeviceEmbedding` uses
53 |   # `.set_shape()`, we can assume that inputs are not ragged.
54 |   input_ids = tf.cast(*input_args, tf.int32)
55 |   if len(layer_instance.trainable_variables) != 1:
56 |     raise ValueError(
57 |         "Only layer instances with only one set of trainable variables"
58 |         "are permitted."
59 |     )
60 |   base_vars = layer_instance.trainable_variables[0]
61 |   tape.watch(base_vars)
62 |   outputs = layer_instance(input_ids)
63 | 
64 |   def sqr_norm_fn(base_vars_grads: tf.IndexedSlices):
65 |     return registry_function_utils.embedding_sqr_norm_fn(
66 |         base_vars_grads.values,
67 |         input_ids,
68 |         num_microbatches,
69 |     )
70 | 
71 |   return base_vars, outputs, sqr_norm_fn
72 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/nlp_on_device_embedding_tpu_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import tensorflow as tf
16 | from tensorflow_privacy.privacy.fast_gradient_clipping import common_test_utils
17 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import nlp_on_device_embedding_test
18 | 
19 | 
20 | class GradNormTpuTest(nlp_on_device_embedding_test.GradNormTest):
21 | 
22 |   def setUp(self):
23 |     tf.config.experimental.disable_mlir_bridge()
24 |     super(nlp_on_device_embedding_test.GradNormTest, self).setUp()
25 |     self.strategy = common_test_utils.create_tpu_strategy()
26 |     self.assertIn('TPU', self.strategy.extended.worker_devices[0])
27 |     self.using_tpu = True
28 | 
29 | 
30 | if __name__ == '__main__':
31 |   tf.test.main()
32 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/nlp_position_embedding.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Fast clipping function for `tfm.nlp.layers.OnDeviceEmbedding`."""
15 | 
16 | from collections.abc import Mapping, Sequence
17 | from typing import Any, Optional
18 | import tensorflow as tf
19 | from tensorflow_privacy.privacy.fast_gradient_clipping import common_manip_utils
20 | from tensorflow_privacy.privacy.fast_gradient_clipping import type_aliases
21 | 
22 | 
23 | def nlp_position_embedding_layer_computation(
24 |     layer_instance: tf.keras.layers.Layer,
25 |     input_args: Sequence[Any],
26 |     input_kwargs: Mapping[str, Any],
27 |     tape: tf.GradientTape,
28 |     num_microbatches: Optional[tf.Tensor] = None,
29 | ) -> type_aliases.RegistryFunctionOutput:
30 |   """Registry function for `tfm.nlp.layers.PositionEmbedding`.
31 | 
32 |   Args:
33 |     layer_instance: A `tfm.nlp.layers.PositionEmbedding` instance.
34 |     input_args: See `dense_layer_computation()` in `dense.py`.
35 |     input_kwargs: See `dense_layer_computation()` in `dense.py`.
36 |     tape: See `dense_layer_computation()` in `dense.py`.
37 |     num_microbatches: See `dense_layer_computation()` in `dense.py`.
38 | 
39 |   Returns:
40 |     See `dense_layer_computation()` in `dense.py`.
41 |   """
42 |   if input_kwargs:
43 |     raise ValueError("Embedding layer calls should not receive kwargs.")
44 |   del input_kwargs
45 |   if len(input_args) != 1:
46 |     raise ValueError("Only layer inputs of length 1 are permitted.")
47 |   input_ids = tf.cast(*input_args, tf.int32)
48 |   base_vars = layer_instance(input_ids)
49 |   tape.watch(base_vars)
50 | 
51 |   def sqr_norm_fn(grads):
52 |     broadcast_axes = list(range(len(grads.shape)))
53 |     del broadcast_axes[layer_instance._seq_axis]  # pylint: disable=protected-access
54 |     del broadcast_axes[-1], broadcast_axes[0]
55 |     reduced_grads = tf.reduce_sum(grads, axis=broadcast_axes)
56 |     if num_microbatches is not None:
57 |       reduced_grads = common_manip_utils.maybe_add_microbatch_axis(
58 |           reduced_grads,
59 |           num_microbatches,
60 |       )
61 |       reduced_grads = tf.reduce_sum(reduced_grads, axis=1)
62 |     reduction_axes = tf.range(1, tf.rank(reduced_grads))
63 |     return tf.reduce_sum(tf.square(reduced_grads), axis=reduction_axes)
64 | 
65 |   return base_vars, base_vars, sqr_norm_fn
66 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/fast_gradient_clipping/registry_functions/nlp_position_embedding_tpu_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import tensorflow as tf
16 | from tensorflow_privacy.privacy.fast_gradient_clipping import common_test_utils
17 | from tensorflow_privacy.privacy.fast_gradient_clipping.registry_functions import nlp_position_embedding_test
18 | 
19 | 
20 | class GradNormTpuTest(nlp_position_embedding_test.GradNormTest):
21 | 
22 |   def setUp(self):
23 |     super(nlp_position_embedding_test.GradNormTest, self).setUp()
24 |     self.strategy = common_test_utils.create_tpu_strategy()
25 |     self.assertIn('TPU', self.strategy.extended.worker_devices[0])
26 |     self.using_tpu = True
27 | 
28 | 
29 | if __name__ == '__main__':
30 |   tf.test.main()
31 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/fast_gradient_clipping/type_aliases.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """A collection of type aliases used throughout the clipping library."""
15 | 
16 | from collections.abc import Callable, Iterable, Mapping, Sequence
17 | from typing import Any, Optional, Union
18 | import tensorflow as tf
19 | 
20 | 
21 | # Tensorflow aliases.
22 | Tensor = Union[tf.Tensor, tf.IndexedSlices, tf.SparseTensor, tf.RaggedTensor]
23 | 
24 | PackedTensors = Union[Tensor, Iterable[Tensor], Mapping[str, Tensor]]
25 | 
26 | InputTensors = PackedTensors
27 | 
28 | OutputTensors = Union[Tensor, Iterable[Tensor]]
29 | 
30 | BatchSize = Union[int, tf.Tensor]
31 | 
32 | LossFn = Callable[..., tf.Tensor]
33 | 
34 | # Layer Registry aliases.
35 | SquareNormFunction = Callable[[OutputTensors], tf.Tensor]
36 | 
37 | RegistryFunctionOutput = tuple[Any, OutputTensors, SquareNormFunction]
38 | 
39 | RegistryFunction = Callable[
40 |     [
41 |         Any,
42 |         tuple[Any, ...],
43 |         Mapping[str, Any],
44 |         tf.GradientTape,
45 |         Union[tf.Tensor, None],
46 |     ],
47 |     RegistryFunctionOutput,
48 | ]
49 | 
50 | # Clipping aliases.
51 | GeneratorFunction = Optional[Callable[[Any, tuple, Mapping], tuple[Any, Any]]]
52 | 
53 | # Testing aliases.
54 | LayerGenerator = Callable[[int, int], tf.keras.layers.Layer]
55 | 
56 | ModelGenerator = Callable[
57 |     [LayerGenerator, Sequence[int], Sequence[int]], tf.keras.Model
58 | ]
59 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/keras_models/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//visibility:public"])
 2 | 
 3 | licenses(["notice"])
 4 | 
 5 | py_library(
 6 |     name = "keras_models",
 7 |     srcs = ["__init__.py"],
 8 | )
 9 | 
10 | py_library(
11 |     name = "dp_keras_model",
12 |     srcs = [
13 |         "dp_keras_model.py",
14 |     ],
15 |     deps = [
16 |         "//tensorflow_privacy/privacy/fast_gradient_clipping:clip_grads",
17 |         "//tensorflow_privacy/privacy/fast_gradient_clipping:common_manip_utils",
18 |         "//tensorflow_privacy/privacy/fast_gradient_clipping:gradient_clipping_utils",
19 |         "//tensorflow_privacy/privacy/fast_gradient_clipping:noise_utils",
20 |         "//tensorflow_privacy/privacy/sparsity_preserving_noise:layer_registry",
21 |         "//tensorflow_privacy/privacy/sparsity_preserving_noise:sparse_noise_utils",
22 |     ],
23 | )
24 | 
25 | py_test(
26 |     name = "dp_keras_model_test",
27 |     srcs = ["dp_keras_model_test.py"],
28 |     shard_count = 16,
29 |     deps = [
30 |         "//tensorflow_privacy/privacy/fast_gradient_clipping:layer_registry",
31 |         "//tensorflow_privacy/privacy/keras_models:dp_keras_model",
32 |     ],
33 | )
34 | 
35 | py_test(
36 |     name = "dp_keras_model_distributed_test",
37 |     timeout = "long",
38 |     srcs = ["dp_keras_model_distributed_test.py"],
39 |     tags = [
40 |         "manual",
41 |     ],
42 |     deps = [
43 |         ":dp_keras_model",
44 |         "//tensorflow_privacy/privacy/fast_gradient_clipping:layer_registry",
45 |     ],
46 | )
47 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/keras_models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/logistic_regression/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//visibility:public"])
 2 | 
 3 | licenses(["notice"])
 4 | 
 5 | py_library(
 6 |     name = "logistic_regression",
 7 |     srcs = ["__init__.py"],
 8 | )
 9 | 
10 | py_library(
11 |     name = "multinomial_logistic",
12 |     srcs = ["multinomial_logistic.py"],
13 |     deps = [
14 |         ":datasets",
15 |         ":single_layer_softmax",
16 |         "//tensorflow_privacy/privacy/optimizers:dp_optimizer_keras",
17 |     ],
18 | )
19 | 
20 | py_test(
21 |     name = "multinomial_logistic_test",
22 |     size = "large",
23 |     srcs = ["multinomial_logistic_test.py"],
24 |     local = True,
25 |     deps = [
26 |         ":datasets",
27 |         ":multinomial_logistic",
28 |         "//tensorflow_privacy/privacy/analysis:compute_dp_sgd_privacy_lib",
29 |     ],
30 | )
31 | 
32 | py_library(
33 |     name = "datasets",
34 |     srcs = ["datasets.py"],
35 | )
36 | 
37 | py_test(
38 |     name = "datasets_test",
39 |     size = "small",
40 |     srcs = ["datasets_test.py"],
41 |     tags = ["requires-net:external"],
42 |     deps = [":datasets"],
43 | )
44 | 
45 | py_library(
46 |     name = "single_layer_softmax",
47 |     srcs = ["single_layer_softmax.py"],
48 |     deps = [":datasets"],
49 | )
50 | 
51 | py_test(
52 |     name = "single_layer_softmax_test",
53 |     size = "medium",
54 |     srcs = ["single_layer_softmax_test.py"],
55 |     deps = [
56 |         ":datasets",
57 |         ":single_layer_softmax",
58 |     ],
59 | )
60 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/logistic_regression/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Implementation of a single-layer softmax classifier."""
15 | 
16 | from typing import List, Optional, Union, Tuple, Any
17 | 
18 | import tensorflow as tf
19 | from tensorflow_privacy.privacy.logistic_regression import datasets
20 | 
21 | 
22 | def single_layer_softmax_classifier(
23 |     train_dataset: datasets.RegressionDataset,
24 |     test_dataset: datasets.RegressionDataset,
25 |     epochs: int,
26 |     num_classes: int,
27 |     optimizer: tf.keras.optimizers.Optimizer,
28 |     loss: Union[tf.keras.losses.Loss, str] = 'categorical_crossentropy',
29 |     batch_size: int = 32,
30 |     kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
31 | ) -> Tuple[Any, List[float]]:
32 |   """Trains a single layer neural network classifier with softmax activation.
33 | 
34 |   Args:
35 |     train_dataset: consists of num_train many labeled examples, where the labels
36 |       are in {0,1,...,num_classes-1}.
37 |     test_dataset: consists of num_test many labeled examples, where the labels
38 |       are in {0,1,...,num_classes-1}.
39 |     epochs: the number of epochs.
40 |     num_classes: the number of classes.
41 |     optimizer: a tf.keras optimizer.
42 |     loss: a tf.keras loss function.
43 |     batch_size: a positive integer.
44 |     kernel_regularizer: a regularization function.
45 | 
46 |   Returns:
47 |     List of test accuracies (one for each epoch) on test_dataset of model
48 |     trained on train_dataset.
49 |   """
50 |   one_hot_train_labels = tf.one_hot(train_dataset.labels, num_classes)
51 |   one_hot_test_labels = tf.one_hot(test_dataset.labels, num_classes)
52 |   model = tf.keras.Sequential()
53 |   model.add(
54 |       tf.keras.layers.Dense(
55 |           units=num_classes,
56 |           activation='softmax',
57 |           kernel_regularizer=kernel_regularizer))
58 |   model.compile(optimizer, loss=loss, metrics=['accuracy'])
59 |   history = model.fit(
60 |       train_dataset.points,
61 |       one_hot_train_labels,
62 |       batch_size=batch_size,
63 |       epochs=epochs,
64 |       validation_data=(test_dataset.points, one_hot_test_labels),
65 |       verbose=0)
66 |   weights = model.layers[0].weights
67 |   return weights, history.history['val_accuracy']
68 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | from absl.testing import parameterized
18 | from tensorflow_privacy.privacy.logistic_regression import datasets
19 | from tensorflow_privacy.privacy.logistic_regression import single_layer_softmax
20 | 
21 | 
22 | class SingleLayerSoftmaxTest(parameterized.TestCase):
23 | 
24 |   @parameterized.parameters(
25 |       (5000, 500, 3, 40, 2, 0.05),
26 |       (5000, 500, 4, 40, 2, 0.05),
27 |       (10000, 1000, 3, 40, 4, 0.1),
28 |       (10000, 1000, 4, 40, 4, 0.1),
29 |   )
30 |   def test_single_layer_softmax(self, num_train, num_test, dimension, epochs,
31 |                                 num_classes, tolerance):
32 |     (train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data(
33 |         num_train, num_test, dimension, num_classes)
34 |     _, accuracy = single_layer_softmax.single_layer_softmax_classifier(
35 |         train_dataset, test_dataset, epochs, num_classes, 'sgd')
36 |     self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance)
37 | 
38 | if __name__ == '__main__':
39 |   unittest.main()
40 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/membership_inference_attack/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//visibility:public"])
 2 | 
 3 | licenses(["notice"])
 4 | 
 5 | py_library(
 6 |     name = "__init__",  # There is already a membership_inference_attack.
 7 |     srcs = ["__init__.py"],
 8 | )
 9 | 
10 | py_library(
11 |     name = "keras_evaluation",
12 |     srcs = ["keras_evaluation.py"],
13 |     deps = ["//tensorflow_privacy/privacy/privacy_tests/membership_inference_attack:keras_evaluation"],
14 | )
15 | 
16 | py_library(
17 |     name = "plotting",
18 |     srcs = ["plotting.py"],
19 |     deps = [
20 |         "//tensorflow_privacy/privacy/privacy_tests/membership_inference_attack:plotting",
21 |     ],
22 | )
23 | 
24 | py_library(
25 |     name = "membership_inference_attack",
26 |     srcs = [
27 |         "data_structures.py",
28 |         "dataset_slicing.py",
29 |         "membership_inference_attack.py",
30 |         "models.py",
31 |         "plotting.py",
32 |         "seq2seq_mia.py",
33 |     ],
34 |     deps = [
35 |         "//tensorflow_privacy/privacy/privacy_tests/membership_inference_attack",
36 |     ],
37 | )
38 | 
39 | py_library(
40 |     name = "privacy_report",
41 |     srcs = ["privacy_report.py"],
42 |     deps = ["//tensorflow_privacy/privacy/privacy_tests/membership_inference_attack"],
43 | )
44 | 
45 | py_library(
46 |     name = "tf_estimator_evaluation",
47 |     srcs = ["tf_estimator_evaluation.py"],
48 |     deps = ["//tensorflow_privacy/privacy/privacy_tests/membership_inference_attack:tf_estimator_evaluation"],
49 | )
50 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/membership_inference_attack/README.md:
--------------------------------------------------------------------------------
1 | The sources from this folder were moved to
2 | privacy/privacy_tests/membership_inference_attack.
3 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/membership_inference_attack/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """The old location of Membership Inference Attack sources."""
15 | 
16 | import warnings
17 | 
18 | warnings.warn(
19 |     "\nMembership inference attack sources were moved. Please replace"
20 |     "\nimport tensorflow_privacy.privacy.membership_inference_attack\n"
21 |     "\nwith"
22 |     "\nimport tensorflow_privacy.privacy.privacy_tests.membership_inference_attack"
23 | )
24 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Moved to privacy_attack/membership_inference_attack."""
15 | 
16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import *  # pylint: disable=wildcard-import
17 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Moved to privacy_attack/membership_inference_attack."""
15 | 
16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.dataset_slicing import *  # pylint: disable=wildcard-import
17 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/membership_inference_attack/keras_evaluation.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Moved to privacy_attack/membership_inference_attack."""
15 | 
16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.keras_evaluation import *  # pylint: disable=wildcard-import
17 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Moved to privacy_attack/membership_inference_attack."""
15 | 
16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.membership_inference_attack import *  # pylint: disable=wildcard-import
17 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/membership_inference_attack/models.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Moved to privacy_attack/membership_inference_attack."""
15 | 
16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.models import *  # pylint: disable=wildcard-import
17 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/membership_inference_attack/plotting.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Moved to privacy_attack/membership_inference_attack."""
15 | 
16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.plotting import *  # pylint: disable=wildcard-import
17 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/membership_inference_attack/privacy_report.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Moved to privacy_attack/membership_inference_attack."""
15 | 
16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.plotting import *  # pylint: disable=wildcard-import
17 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Moved to privacy_attack/membership_inference_attack."""
15 | 
16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.seq2seq_mia import *  # pylint: disable=wildcard-import
17 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Moved to privacy_attack/membership_inference_attack."""
15 | 
16 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.tf_estimator_evaluation import *  # pylint: disable=wildcard-import
17 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/optimizers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//visibility:public"])
 2 | 
 3 | licenses(["notice"])
 4 | 
 5 | py_library(
 6 |     name = "privacy_tests",
 7 |     srcs = ["__init__.py"],
 8 |     deps = [
 9 |         ":version",
10 |     ],
11 | )
12 | 
13 | py_test(
14 |     name = "utils_test",
15 |     timeout = "long",
16 |     srcs = ["utils_test.py"],
17 |     deps = [":utils"],
18 | )
19 | 
20 | py_test(
21 |     name = "epsilon_lower_bound_test",
22 |     srcs = ["epsilon_lower_bound_test.py"],
23 |     deps = [":epsilon_lower_bound"],
24 | )
25 | 
26 | py_library(
27 |     name = "utils",
28 |     srcs = ["utils.py"],
29 | )
30 | 
31 | py_library(
32 |     name = "epsilon_lower_bound",
33 |     srcs = ["epsilon_lower_bound.py"],
34 | )
35 | 
36 | py_library(
37 |     name = "version",
38 |     srcs = ["version.py"],
39 | )
40 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/README.md:
--------------------------------------------------------------------------------
1 | # Privacy tests
2 | 
3 | A good privacy-preserving model learns from the training data, but
4 | doesn't memorize individual samples. Excessive memorization is not only harmful
5 | for the model predictive power, but also presents a privacy risk.
6 | 
7 | This library provides empirical tests for measuring potential memorization.
8 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """TensorFlow Empirical Privacy."""
15 | 
16 | from tensorflow_privacy.privacy.privacy_tests.version import __version__  # pylint: disable=g-bad-import-order
17 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We are happy to accept contributions to this project under the research folder.
 4 | The research folder is intended for the attacks that are not yet generic enough
 5 | to be included into the main library.
 6 | 
 7 | We are happy to accept contributions to the primary codebase, see below for more
 8 | details.
 9 | 
10 | Please follow these guidelines when sending us a pull request.
11 | 
12 | ## Contributor License Agreement
13 | 
14 | Contributions to this project must be accompanied by a Contributor License
15 | Agreement. You (or your employer) retain the copyright to your contribution,
16 | this simply gives us permission to use and redistribute your contributions as
17 | part of the project. Head over to <https://cla.developers.google.com/> to see
18 | your current agreements on file or to sign a new one.
19 | 
20 | You generally only need to submit a CLA once, so if you've already submitted
21 | one (even if it was for a different project), you probably don't need to do it
22 | again.
23 | 
24 | ## Community Guidelines
25 | 
26 | This project follows
27 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/).
28 | 
29 | ## Does my new feature belong here?
30 | 
31 | ### Research folder
32 | 
33 | We use the following principles to guide what we add to our libraries. If your
34 | contribution doesn't align with these principles, we're likely to decline.
35 | 
36 | * **Novelty:** The code should provide new attacks to the library. We will not
37 | accept code that duplicates existing attacks.
38 | * **Appropriate context and explanation:** The code should contain a README.md
39 | file based on the provided template.This template should explain the code's functionality, and provide basic steps on how to use it.
40 | * **Experiment-driven:** The code should contain an runnable example or a colab (e.g. on a toy model such as MNIST or CIFAR-10).
41 | * **Quality requirements:** (1) The code should adhere to the
42 | [Google Python style guide](https://google.github.io/styleguide/pyguide).
43 | (2) The public API of the attack should have clear code documentation (expected inputs/outputs)
44 | (3) The code should have reasonable unit test coverage (>60%);
45 | 
46 | ### Primary codebase
47 | 
48 | The primary codebase should include attacks that are of general interest and
49 | have a wide range of applications. For example, the standard membership
50 | inference test is applicable to virtually any classification model.
51 | 
52 | The code contributed to the primary codebase should have a production style API
53 | which is consistent with the API of other attacks. Most likely, Google and the
54 | contributing team will need to meet and discuss the API before starting the
55 | contribution.
56 | 
57 | 
58 | If you're uncertain whether a planned contribution fits with these principles,
59 | [open an issue](https://github.com/tensorflow/privacy/issues/new)
60 | and describe what you want to add. We'll let you know whether it's something we
61 | want to include and will help you figure out the best way to implement it.
62 | 
63 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelab_roc_fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorflow/privacy/a640ca62ca80d2e4e534fbecf2678e3d0786a2fa/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelab_roc_fig.png


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//visibility:public"])
 2 | 
 3 | licenses(["notice"])
 4 | 
 5 | py_binary(
 6 |     name = "example",
 7 |     srcs = ["example.py"],
 8 |     deps = [
 9 |         "//tensorflow_privacy/privacy/privacy_tests/membership_inference_attack",
10 |         "//tensorflow_privacy/privacy/privacy_tests/membership_inference_attack:privacy_report",
11 |     ],
12 | )
13 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/README.md:
--------------------------------------------------------------------------------
 1 | # Membership inference attack examples
 2 | 
 3 | ## Introductory codelab
 4 | 
 5 | The easiest way to get started is to go through [the introductory codelab](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/codelab.ipynb).
 6 | This trains a simple image classification model and tests it against a series
 7 | of membership inference attacks.
 8 | 
 9 | For a more detailed overview of the library, please check the sections below.
10 | 
11 | ## End to end example
12 | As an alternative to the introductory codelab, we also have a standalone
13 | [example.py](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/example.py).
14 | 
15 | ## Sequence to sequence models
16 | 
17 | If you're interested in sequence to sequence model attacks, please see the
18 | [seq2seq colab](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb).
19 | 
20 | ## Membership probability score
21 | 
22 | If you're interested in the membership probability score (also called privacy
23 | risk score) developed by Song and Mittal, please see their
24 | [membership probability codelab](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/membership_probability_codelab.ipynb).
25 | 
26 | The accompanying paper is on [arXiv](https://arxiv.org/abs/2003.10595).
27 | 
28 | ## Word2Vec models
29 | 
30 | If you're interested in word2vec models, please see the
31 | [word2vec codelab](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/word2vec_codelab.ipynb).
32 | 
33 | This notebook implements advanced membership inference, as well as a secret
34 | sharer attack. Based on [this paper](https://arxiv.org/abs/2004.00053) and
35 | [this code](https://github.com/google/embedding-tests).
36 | 
37 | ## Copyright
38 | 
39 | Copyright 2020 - Google LLC
40 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/BUILD:
--------------------------------------------------------------------------------
1 | package(default_visibility = ["//visibility:public"])
2 | 
3 | licenses(["notice"])
4 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Congzheng Song
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/plotting.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Plotting functionality for membership inference attack analysis.
15 | 
16 | Functions to plot ROC curves and histograms as well as functionality to store
17 | figures to colossus.
18 | """
19 | 
20 | from typing import Text, Iterable, Optional
21 | 
22 | import matplotlib.pyplot as plt
23 | import numpy as np
24 | from sklearn import metrics
25 | 
26 | 
27 | def save_plot(figure: plt.Figure, path: Text, outformat='png'):
28 |   """Store a figure to disk."""
29 |   if path is not None:
30 |     with open(path, 'wb') as f:
31 |       figure.savefig(f, bbox_inches='tight', format=outformat)
32 |     plt.close(figure)
33 | 
34 | 
35 | def plot_curve_with_area(x: Iterable[float],
36 |                          y: Iterable[float],
37 |                          xlabel: Text = 'x',
38 |                          ylabel: Text = 'y') -> plt.Figure:
39 |   """Plot the curve defined by inputs and the area under the curve.
40 | 
41 |   All entries of x and y are required to lie between 0 and 1.
42 |   For example, x could be recall and y precision, or x is fpr and y is tpr.
43 | 
44 |   Args:
45 |     x: Values on x-axis (1d)
46 |     y: Values on y-axis (must be same length as x)
47 |     xlabel: Label for x axis
48 |     ylabel: Label for y axis
49 | 
50 |   Returns:
51 |     The matplotlib figure handle
52 |   """
53 |   fig = plt.figure()
54 |   plt.plot([0, 1], [0, 1], 'k', lw=1.0)
55 |   plt.plot(x, y, lw=2, label=f'AUC: {metrics.auc(x, y):.3f}')
56 |   plt.xlabel(xlabel)
57 |   plt.ylabel(ylabel)
58 |   plt.gca().set_aspect('equal', adjustable='box')
59 |   plt.legend()
60 |   return fig
61 | 
62 | 
63 | def plot_histograms(train: Iterable[float],
64 |                     test: Iterable[float],
65 |                     xlabel: Text = 'x',
66 |                     thresh: Optional[float] = None) -> plt.Figure:
67 |   """Plot histograms of training versus test metrics."""
68 |   xmin = min(np.min(train), np.min(test))
69 |   xmax = max(np.max(train), np.max(test))
70 |   bins = np.linspace(xmin, xmax, 100)
71 |   fig = plt.figure()
72 |   plt.hist(test, bins=bins, density=True, alpha=0.5, label='test', log='y')
73 |   plt.hist(train, bins=bins, density=True, alpha=0.5, label='train', log='y')
74 |   if thresh is not None:
75 |     plt.axvline(thresh, c='r', label=f'threshold = {thresh:.3f}')
76 |   plt.xlabel(xlabel)
77 |   plt.ylabel('normalized counts (density)')
78 |   plt.legend()
79 |   return fig
80 | 
81 | 
82 | def plot_roc_curve(roc_curve, plot_func=plot_curve_with_area) -> plt.Figure:
83 |   """Plot the ROC curve and the area under the curve."""
84 |   return plot_func(roc_curve.fpr, roc_curve.tpr, xlabel='FPR', ylabel='TPR')
85 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils_tensorboard.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utility functions for writing attack results to tensorboard."""
15 | 
16 | from typing import List, Union
17 | 
18 | import tensorflow as tf
19 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults
20 | from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import get_flattened_attack_metrics
21 | 
22 | 
23 | def write_to_tensorboard_tf2(writers, tags, values, step):
24 |   """Write metrics to tensorboard.
25 | 
26 |   Args:
27 |     writers: a list of tensorboard writers or one writer to be used for metrics.
28 |       If it's a list, it should be of the same length as tags
29 |     tags: a list of tags of metrics
30 |     values: a list of values of metrics with the same length as tags
31 |     step: step for the tensorboard summary
32 |   """
33 |   if writers is None or not writers:
34 |     raise ValueError('write_to_tensorboard does not get any writer.')
35 | 
36 |   if not isinstance(writers, list):
37 |     writers = [writers] * len(tags)
38 | 
39 |   assert len(writers) == len(tags) == len(values)
40 | 
41 |   for writer, tag, val in zip(writers, tags, values):
42 |     with writer.as_default():
43 |       tf.summary.scalar(tag, val, step=step)
44 |       writer.flush()
45 | 
46 |   for writer in set(writers):
47 |     with writer.as_default():
48 |       writer.flush()
49 | 
50 | 
51 | def write_results_to_tensorboard_tf2(
52 |     attack_results: AttackResults,
53 |     writers: Union[tf.summary.SummaryWriter, List[tf.summary.SummaryWriter]],
54 |     step: int, merge_classifiers: bool):
55 |   """Write attack results to tensorboard.
56 | 
57 |   Args:
58 |     attack_results: results from attack
59 |     writers: a list of tensorboard writers or one writer to be used for metrics
60 |     step: step for the tensorboard summary
61 |     merge_classifiers: if true, plot different classifiers with the same
62 |       slicing_spec and metric in the same figure
63 |   """
64 |   if writers is None or not writers:
65 |     raise ValueError('write_results_to_tensorboard does not get any writer.')
66 | 
67 |   att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
68 |       attack_results)
69 |   if merge_classifiers:
70 |     att_tags = ['attack/' + f'{s}_{m}' for s, m in zip(att_slices, att_metrics)]
71 |     write_to_tensorboard_tf2([writers[t] for t in att_types], att_tags,
72 |                              att_values, step)
73 |   else:
74 |     att_tags = [
75 |         'attack/' + f'{s}_{t}_{m}'
76 |         for t, s, m in zip(att_types, att_slices, att_metrics)
77 |     ]
78 |     write_to_tensorboard_tf2(writers, att_tags, att_values, step)
79 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/secret_sharer/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//visibility:public"])
 2 | 
 3 | licenses(["notice"])
 4 | 
 5 | py_library(
 6 |     name = "secret_sharer",
 7 |     srcs = ["__init__.py"],
 8 | )
 9 | 
10 | py_library(
11 |     name = "exposures",
12 |     srcs = ["exposures.py"],
13 | )
14 | 
15 | py_test(
16 |     name = "exposures_test",
17 |     srcs = ["exposures_test.py"],
18 |     deps = [":exposures"],
19 | )
20 | 
21 | py_library(
22 |     name = "generate_secrets",
23 |     srcs = ["generate_secrets.py"],
24 | )
25 | 
26 | py_test(
27 |     name = "generate_secrets_test",
28 |     srcs = ["generate_secrets_test.py"],
29 |     deps = [":generate_secrets"],
30 | )
31 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/secret_sharer/README.md:
--------------------------------------------------------------------------------
 1 | # Secret Sharer Attack
 2 | 
 3 | A good privacy-preserving model learns from the training data, but doesn't
 4 | memorize it. This folder contains codes for conducting the Secret Sharer attack
 5 | from [this paper](https://arxiv.org/abs/1802.08232). It is a method to test if a
 6 | machine learning model memorizes its training data.
 7 | 
 8 | The high level idea is to insert some random sequences as “secrets” into the
 9 | training data, and then measure if the model has memorized those secrets. If
10 | there is significant memorization, it means that there can be potential privacy
11 | risk.
12 | 
13 | ## How to Use
14 | 
15 | ### Overview of the files
16 | 
17 | -   `generate_secrets.py` contains the code for generating secrets.
18 | -   `exposures.py` contains code for evaluating exposures.
19 | -   `secret_sharer_example.ipynb` is an example (character-level LSTM) for using
20 |     the above code to conduct secret sharer attack.
21 | 
22 | ### More Usage Examples
23 | 
24 | ## Word2Vec models
25 | 
26 | If you're interested in word2vec models, please see the
27 | [word2vec codelab](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/word2vec_codelab.ipynb).
28 | 
29 | In addition to secret sharer, this notebook also implements membership inference
30 | attacks. Based on [this paper](https://arxiv.org/abs/2004.00053) and
31 | [this code](https://github.com/google/embedding-tests).
32 | 
33 | ### Contact / Feedback
34 | 
35 | Fill out this
36 | [Google form](https://docs.google.com/forms/d/1DPwr3_OfMcqAOA6sdelTVjIZhKxMZkXvs94z16UCDa4/edit)
37 | or reach out to us at tf-privacy@google.com and let us know how you’re using
38 | this module. We’re keen on hearing your stories, feedback, and suggestions!
39 | 
40 | ## Contributing
41 | 
42 | If you wish to add novel attacks to the attack library, please check our
43 | [guidelines](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/membership_inference_attack/CONTRIBUTING.md).
44 | 
45 | ## Copyright
46 | 
47 | Copyright 2021 - Google LLC
48 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/secret_sharer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from absl.testing import absltest
16 | import numpy as np
17 | from scipy import stats
18 | 
19 | from tensorflow_privacy.privacy.privacy_tests.secret_sharer import exposures
20 | 
21 | 
22 | class UtilsTest(absltest.TestCase):
23 | 
24 |   def __init__(self, methodname):
25 |     """Initialize the test class."""
26 |     super().__init__(methodname)
27 | 
28 |   def test_exposure_interpolation(self):
29 |     """Test exposure by interpolation."""
30 |     perplexities = {
31 |         '1': [0, 0.1],  # smallest perplexities
32 |         '2': [20.0],  # largest perplexities
33 |         '5': [3.5],  # rank = 4
34 |         '8': [3.5],  # rank = 4
35 |     }
36 |     perplexities_reference = [float(x) for x in range(1, 17)]
37 |     resulted_exposures = exposures.compute_exposure_interpolation(
38 |         perplexities, perplexities_reference)
39 |     num_perplexities_reference = len(perplexities_reference)
40 |     exposure_largest = np.log2(num_perplexities_reference)
41 |     exposure_smallest = np.log2(num_perplexities_reference) - np.log2(
42 |         num_perplexities_reference + 1)
43 |     exposure_rank4 = np.log2(num_perplexities_reference) - np.log2(4)
44 |     expected_exposures = {
45 |         '1': np.array([exposure_largest] * 2),
46 |         '2': np.array([exposure_smallest]),
47 |         '5': np.array([exposure_rank4]),
48 |         '8': np.array([exposure_rank4])
49 |     }
50 | 
51 |     self.assertEqual(resulted_exposures.keys(), expected_exposures.keys())
52 |     for r in resulted_exposures.keys():
53 |       np.testing.assert_almost_equal(expected_exposures[r],
54 |                                      resulted_exposures[r])
55 | 
56 |   def test_exposure_extrapolation(self):
57 |     parameters = (4, 0, 1)
58 |     perplexities = {
59 |         '1': stats.skewnorm.rvs(*parameters, size=(2,)),
60 |         '10': stats.skewnorm.rvs(*parameters, size=(5,))
61 |     }
62 |     perplexities_reference = stats.skewnorm.rvs(*parameters, size=(10000,))
63 |     resulted_exposures = exposures.compute_exposure_extrapolation(
64 |         perplexities, perplexities_reference)
65 |     fitted_parameters = stats.skewnorm.fit(perplexities_reference)
66 | 
67 |     self.assertEqual(resulted_exposures.keys(), perplexities.keys())
68 |     for r in resulted_exposures.keys():
69 |       np.testing.assert_almost_equal(
70 |           resulted_exposures[r],
71 |           -np.log2(stats.skewnorm.cdf(perplexities[r], *fitted_parameters)))
72 | 
73 | 
74 | if __name__ == '__main__':
75 |   absltest.main()
76 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/privacy_tests/version.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """TensorFlow Privacy/Privacy Tests version."""
15 | 
16 | __version__ = '0.1.0'
17 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/sparsity_preserving_noise/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//visibility:public"])
 2 | 
 3 | licenses(["notice"])
 4 | 
 5 | py_library(
 6 |     name = "sparse_noise_utils",
 7 |     srcs = ["sparse_noise_utils.py"],
 8 |     deps = [
 9 |         ":type_aliases",
10 |         "//tensorflow_privacy/privacy/fast_gradient_clipping:gradient_clipping_utils",
11 |     ],
12 | )
13 | 
14 | py_test(
15 |     name = "sparse_noise_utils_test",
16 |     srcs = ["sparse_noise_utils_test.py"],
17 |     deps = [
18 |         ":sparse_noise_utils",
19 |         "//tensorflow_privacy/privacy/fast_gradient_clipping:gradient_clipping_utils",
20 |     ],
21 | )
22 | 
23 | py_library(
24 |     name = "type_aliases",
25 |     srcs = ["type_aliases.py"],
26 | )
27 | 
28 | py_library(
29 |     name = "layer_registry",
30 |     srcs = ["layer_registry.py"],
31 |     deps = [
32 |         ":type_aliases",
33 |         "//tensorflow_privacy/privacy/sparsity_preserving_noise/registry_functions:embedding",
34 |     ],
35 | )
36 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/sparsity_preserving_noise/layer_registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Registry of layer classes to their contribution histogram functions."""
15 | 
16 | from typing import Type
17 | 
18 | import tensorflow as tf
19 | from tensorflow_privacy.privacy.sparsity_preserving_noise import type_aliases
20 | from tensorflow_privacy.privacy.sparsity_preserving_noise.registry_functions import embedding
21 | 
22 | 
23 | # ==============================================================================
24 | # Main class
25 | # ==============================================================================
26 | class LayerRegistry:
27 |   """Custom container for layer registry functions."""
28 | 
29 |   def __init__(self):
30 |     """Basic initialization of various internal dictionaries."""
31 |     self._layer_class_dict = {}
32 |     self._registry = {}
33 | 
34 |   def is_elem(self, layer_instance: tf.keras.layers.Layer) -> bool:
35 |     """Checks if a layer instance's class is in the registry."""
36 |     return hash(layer_instance.__class__) in self._registry
37 | 
38 |   def lookup(
39 |       self, layer_instance: tf.keras.layers.Layer
40 |   ) -> type_aliases.SparsityPreservingNoiseLayerRegistryFunction:
41 |     """Returns the layer registry function for a given layer instance."""
42 |     return self._registry[hash(layer_instance.__class__)]
43 | 
44 |   def insert(
45 |       self,
46 |       layer_class: Type[tf.keras.layers.Layer],
47 |       layer_registry_function: type_aliases.SparsityPreservingNoiseLayerRegistryFunction,
48 |   ):
49 |     """Inserts a layer registry function into the internal dictionaries."""
50 |     layer_key = hash(layer_class)
51 |     self._layer_class_dict[layer_key] = layer_class
52 |     self._registry[layer_key] = layer_registry_function
53 | 
54 | 
55 | # ==============================================================================
56 | # Main factory methods
57 | # ==============================================================================
58 | def make_default_layer_registry() -> LayerRegistry:
59 |   registry = LayerRegistry()
60 |   registry.insert(
61 |       tf.keras.layers.Embedding,
62 |       embedding.embedding_layer_contribution_histogram,
63 |   )
64 |   return registry
65 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/sparsity_preserving_noise/registry_functions/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = ["//visibility:public"])
 2 | 
 3 | licenses(["notice"])
 4 | 
 5 | py_library(
 6 |     name = "embedding",
 7 |     srcs = ["embedding.py"],
 8 |     deps = ["//tensorflow_privacy/privacy/sparsity_preserving_noise:type_aliases"],
 9 | )
10 | 
11 | py_test(
12 |     name = "embedding_test",
13 |     srcs = ["embedding_test.py"],
14 |     deps = [":embedding"],
15 | )
16 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/privacy/sparsity_preserving_noise/type_aliases.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Type aliases for sparsity preserving noise."""
15 | 
16 | from collections.abc import Callable, Mapping, Sequence
17 | from typing import Any
18 | import tensorflow as tf
19 | 
20 | InputArgs = Sequence[Any]
21 | InputKwargs = Mapping[str, Any]
22 | SparseGradient = tf.IndexedSlices | tf.SparseTensor
23 | ContributionCountHistogram = tf.SparseTensor
24 | ContributionCountHistogramFn = Callable[
25 |     [SparseGradient], ContributionCountHistogram
26 | ]
27 | NumMicrobatches = int | tf.Tensor
28 | SparsityPreservingNoiseLayerRegistryFunction = Callable[
29 |     [tf.keras.layers.Layer, InputArgs, InputKwargs, NumMicrobatches | None],
30 |     dict[str, ContributionCountHistogramFn],
31 | ]
32 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/v1/BUILD:
--------------------------------------------------------------------------------
 1 | load("@bazel_skylib//rules:build_test.bzl", "build_test")
 2 | 
 3 | package(default_visibility = ["//visibility:public"])
 4 | 
 5 | licenses(["notice"])
 6 | 
 7 | py_library(
 8 |     name = "tensorflow_privacy_v1",
 9 |     srcs = ["__init__.py"],
10 |     deps = [
11 |         "//tensorflow_privacy/privacy/estimators/v1:dnn",
12 |         "//tensorflow_privacy/privacy/optimizers:dp_optimizer",
13 |         "//tensorflow_privacy/privacy/optimizers:dp_optimizer_vectorized",
14 |     ],
15 | )
16 | 
17 | build_test(
18 |     name = "tensorflow_privacy_build_test",
19 |     targets = [":tensorflow_privacy_v1"],
20 | )
21 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/v1/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """TensorFlow Privacy library v1 imports.
15 | 
16 | This module includes classes designed to be compatible with TF1, based on
17 | `tf.compat.v1.train.Optimizer` and `tf.estimator.Estimator`.
18 | """
19 | 
20 | import sys
21 | 
22 | # pylint: disable=g-import-not-at-top
23 | 
24 | if hasattr(sys, 'skip_tf_privacy_import'):  # Useful for standalone scripts.
25 |   pass
26 | else:
27 |   # Estimators
28 |   from tensorflow_privacy.privacy.estimators.v1.dnn import DNNClassifier as DNNClassifierV1
29 | 
30 |   # Optimizers
31 |   from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradGaussianOptimizer
32 |   from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradOptimizer
33 |   from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamGaussianOptimizer
34 |   from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamOptimizer
35 |   from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
36 |   from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
37 |   from tensorflow_privacy.privacy.optimizers.dp_optimizer import make_optimizer_class
38 | 
39 |   from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagradOptimizer
40 |   from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdamOptimizer
41 |   from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGDOptimizer
42 | 
43 |   from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagrad
44 |   from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdam
45 |   from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGD
46 |   from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import make_vectorized_optimizer_class
47 | 


--------------------------------------------------------------------------------
/tensorflow_privacy/version.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Privacy Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """TensorFlow Privacy version."""
15 | 
16 | __version__ = '0.9.0'
17 | 


--------------------------------------------------------------------------------
/tutorials/BUILD:
--------------------------------------------------------------------------------
 1 | licenses(["notice"])
 2 | 
 3 | py_library(
 4 |     name = "mnist_dpsgd_tutorial_common",
 5 |     srcs = ["mnist_dpsgd_tutorial_common.py"],
 6 | )
 7 | 
 8 | py_binary(
 9 |     name = "mnist_dpsgd_tutorial",
10 |     srcs = ["mnist_dpsgd_tutorial.py"],
11 |     deps = [
12 |         ":mnist_dpsgd_tutorial_common",
13 |         "//tensorflow_privacy/privacy/analysis:compute_dp_sgd_privacy_lib",
14 |         "//tensorflow_privacy/privacy/optimizers:dp_optimizer",
15 |     ],
16 | )
17 | 
18 | py_binary(
19 |     name = "mnist_dpsgd_tutorial_eager",
20 |     srcs = ["mnist_dpsgd_tutorial_eager.py"],
21 |     deps = ["//tensorflow_privacy/privacy/optimizers:dp_optimizer"],
22 | )
23 | 
24 | py_binary(
25 |     name = "mnist_dpsgd_tutorial_keras",
26 |     srcs = ["mnist_dpsgd_tutorial_keras.py"],
27 |     deps = ["//tensorflow_privacy/privacy/optimizers:dp_optimizer_keras"],
28 | )
29 | 
30 | py_binary(
31 |     name = "mnist_dpsgd_tutorial_keras_model",
32 |     srcs = ["mnist_dpsgd_tutorial_keras_model.py"],
33 |     deps = ["//tensorflow_privacy/privacy/keras_models:dp_keras_model"],
34 | )
35 | 
36 | py_binary(
37 |     name = "mnist_dpsgd_tutorial_vectorized",
38 |     srcs = ["mnist_dpsgd_tutorial_vectorized.py"],
39 |     deps = ["//tensorflow_privacy/privacy/optimizers:dp_optimizer_vectorized"],
40 | )
41 | 
42 | py_binary(
43 |     name = "mnist_dpsgd_tutorial_tpu",
44 |     srcs = ["mnist_dpsgd_tutorial_tpu.py"],
45 |     deps = [
46 |         ":mnist_dpsgd_tutorial_common",
47 |         "//tensorflow_privacy/privacy/analysis:compute_dp_sgd_privacy_lib",
48 |         "//tensorflow_privacy/privacy/optimizers:dp_optimizer",
49 |     ],
50 | )
51 | 
52 | py_binary(
53 |     name = "mnist_lr_tutorial",
54 |     srcs = ["mnist_lr_tutorial.py"],
55 |     deps = ["//tensorflow_privacy/privacy/optimizers:dp_optimizer"],
56 | )
57 | 
58 | py_binary(
59 |     name = "lm_dpsgd_tutorial",
60 |     srcs = ["lm_dpsgd_tutorial.py"],
61 |     deps = ["//tensorflow_privacy/privacy/optimizers:dp_optimizer"],
62 | )
63 | 
64 | py_binary(
65 |     name = "movielens_tutorial",
66 |     srcs = ["movielens_tutorial.py"],
67 |     deps = [
68 |         "//tensorflow_privacy/privacy/analysis:gdp_accountant",
69 |         "//tensorflow_privacy/privacy/optimizers:dp_optimizer",
70 |     ],
71 | )
72 | 
73 | filegroup(
74 |     name = "ignore_srcs",
75 |     srcs = ["bolton_tutorial.py"],
76 |     tags = ["ignore_srcs"],
77 | )
78 | 


--------------------------------------------------------------------------------
/tutorials/mnist_dpsgd_tutorial_common.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020, The TensorFlow Authors.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Common tools for DP-SGD MNIST tutorials."""
15 | 
16 | import tensorflow as tf
17 | import tensorflow_datasets as tfds
18 | 
19 | 
20 | def get_cnn_model(features):
21 |   """Given input features, returns the logits from a simple CNN model."""
22 |   input_layer = tf.reshape(features, [-1, 28, 28, 1])
23 |   y = tf.keras.layers.Conv2D(
24 |       16, 8, strides=2, padding='same', activation='relu')(
25 |           input_layer)
26 |   y = tf.keras.layers.MaxPool2D(2, 1)(y)
27 |   y = tf.keras.layers.Conv2D(
28 |       32, 4, strides=2, padding='valid', activation='relu')(
29 |           y)
30 |   y = tf.keras.layers.MaxPool2D(2, 1)(y)
31 |   y = tf.keras.layers.Flatten()(y)
32 |   y = tf.keras.layers.Dense(32, activation='relu')(y)
33 |   logits = tf.keras.layers.Dense(10)(y)
34 | 
35 |   return logits
36 | 
37 | 
38 | def make_input_fn(split, input_batch_size=256, repetitions=-1, tpu=False):
39 |   """Make input function on given MNIST split."""
40 | 
41 |   def input_fn(params=None):
42 |     """A simple input function."""
43 |     batch_size = params.get('batch_size', input_batch_size)
44 | 
45 |     def parser(example):
46 |       image, label = example['image'], example['label']
47 |       image = tf.cast(image, tf.float32)
48 |       image /= 255.0
49 |       label = tf.cast(label, tf.int32)
50 |       return image, label
51 | 
52 |     dataset = tfds.load(name='mnist', split=split)
53 |     dataset = dataset.map(parser).shuffle(60000).repeat(repetitions).batch(
54 |         batch_size)
55 |     # If this input function is not meant for TPUs, we can stop here.
56 |     # Otherwise, we need to explicitly set its shape. Note that for unknown
57 |     # reasons, returning the latter format causes performance regression
58 |     # on non-TPUs.
59 |     if not tpu:
60 |       return dataset
61 | 
62 |     # Give inputs statically known shapes; needed for TPUs.
63 |     images, labels = tf.data.make_one_shot_iterator(dataset).get_next()
64 |     # return images, labels
65 |     images.set_shape([batch_size, 28, 28, 1])
66 |     labels.set_shape([
67 |         batch_size,
68 |     ])
69 |     return images, labels
70 | 
71 |   return input_fn
72 | 


--------------------------------------------------------------------------------
/tutorials/walkthrough/BUILD:
--------------------------------------------------------------------------------
1 | licenses(["notice"])
2 | 
3 | py_binary(
4 |     name = "mnist_scratch",
5 |     srcs = ["mnist_scratch.py"],
6 | )
7 | 


--------------------------------------------------------------------------------