├── .reuse
    └── dep5
├── LICENSE
├── LICENSES
    └── Apache-2.0.txt
├── README.md
├── Tutorial_dp-GAN.ipynb
├── Tutorial_dp-VAE.ipynb
├── dataset
    └── adult.csv
├── differential_privacy
    ├── dp_sgd
    │   └── dp_optimizer
    │   │   ├── dp_optimizer.py
    │   │   ├── sanitizer.py
    │   │   ├── sanitizers
    │   │       ├── base.py
    │   │       ├── basic.py
    │   │       ├── basicOverall.py
    │   │       └── grouped.py
    │   │   └── utils.py
    └── privacy_accountant
    │   └── tf
    │       └── accountant.py
├── discretedata_manager.py
├── figs
    ├── image003.png
    └── image006.jpg
├── models.py
└── tflib
    ├── README.md
    ├── __init__.py
    ├── cifar10.py
    ├── inception_score.py
    ├── mnist.py
    ├── ops
        ├── __init__.py
        ├── batchnorm.py
        ├── cond_batchnorm.py
        ├── conv1d.py
        ├── conv2d.py
        ├── deconv2d.py
        ├── layernorm.py
        └── linear.py
    ├── plot.py
    ├── save_images.py
    └── small_imagenet.py


/.reuse/dep5:
--------------------------------------------------------------------------------
 1 | Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
 2 | Upstream-Name: security-research-differentially-private-generative-models
 3 | Upstream-Contact: Anderson Santana de Oliveira <anderson.santana.de.oliveira@sap.com>
 4 | Source: https://github.com/SAP-samples/security-research-differentially-private-generative-models
 5 | 
 6 | # Sample paragraph, commented out:
 7 | #
 8 | # Files: src/*
 9 | # Copyright: $YEAR $NAME <$CONTACT>
10 | # License: ...
11 | 
12 | Files: * 
13 | Copyright: 2020 SAP SE or an SAP affiliate company and sap-devtoberfest-2020 contributors
14 | License: Apache-2.0


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                   Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/LICENSES/Apache-2.0.txt:
--------------------------------------------------------------------------------
  1 | Apache License
  2 | Version 2.0, January 2004
  3 | http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 | "License" shall mean the terms and conditions for use, reproduction, and distribution
 10 | as defined by Sections 1 through 9 of this document.
 11 | 
 12 | "Licensor" shall mean the copyright owner or entity authorized by the copyright
 13 | owner that is granting the License.
 14 | 
 15 | "Legal Entity" shall mean the union of the acting entity and all other entities
 16 | that control, are controlled by, or are under common control with that entity.
 17 | For the purposes of this definition, "control" means (i) the power, direct
 18 | or indirect, to cause the direction or management of such entity, whether
 19 | by contract or otherwise, or (ii) ownership of fifty percent (50%) or more
 20 | of the outstanding shares, or (iii) beneficial ownership of such entity.
 21 | 
 22 | "You" (or "Your") shall mean an individual or Legal Entity exercising permissions
 23 | granted by this License.
 24 | 
 25 | "Source" form shall mean the preferred form for making modifications, including
 26 | but not limited to software source code, documentation source, and configuration
 27 | files.
 28 | 
 29 | "Object" form shall mean any form resulting from mechanical transformation
 30 | or translation of a Source form, including but not limited to compiled object
 31 | code, generated documentation, and conversions to other media types.
 32 | 
 33 | "Work" shall mean the work of authorship, whether in Source or Object form,
 34 | made available under the License, as indicated by a copyright notice that
 35 | is included in or attached to the work (an example is provided in the Appendix
 36 | below).
 37 | 
 38 | "Derivative Works" shall mean any work, whether in Source or Object form,
 39 | that is based on (or derived from) the Work and for which the editorial revisions,
 40 | annotations, elaborations, or other modifications represent, as a whole, an
 41 | original work of authorship. For the purposes of this License, Derivative
 42 | Works shall not include works that remain separable from, or merely link (or
 43 | bind by name) to the interfaces of, the Work and Derivative Works thereof.
 44 | 
 45 | "Contribution" shall mean any work of authorship, including the original version
 46 | of the Work and any modifications or additions to that Work or Derivative
 47 | Works thereof, that is intentionally submitted to Licensor for inclusion in
 48 | the Work by the copyright owner or by an individual or Legal Entity authorized
 49 | to submit on behalf of the copyright owner. For the purposes of this definition,
 50 | "submitted" means any form of electronic, verbal, or written communication
 51 | sent to the Licensor or its representatives, including but not limited to
 52 | communication on electronic mailing lists, source code control systems, and
 53 | issue tracking systems that are managed by, or on behalf of, the Licensor
 54 | for the purpose of discussing and improving the Work, but excluding communication
 55 | that is conspicuously marked or otherwise designated in writing by the copyright
 56 | owner as "Not a Contribution."
 57 | 
 58 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf
 59 | of whom a Contribution has been received by Licensor and subsequently incorporated
 60 | within the Work.
 61 | 
 62 | 2. Grant of Copyright License. Subject to the terms and conditions of this
 63 | License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive,
 64 | no-charge, royalty-free, irrevocable copyright license to reproduce, prepare
 65 | Derivative Works of, publicly display, publicly perform, sublicense, and distribute
 66 | the Work and such Derivative Works in Source or Object form.
 67 | 
 68 | 3. Grant of Patent License. Subject to the terms and conditions of this License,
 69 | each Contributor hereby grants to You a perpetual, worldwide, non-exclusive,
 70 | no-charge, royalty-free, irrevocable (except as stated in this section) patent
 71 | license to make, have made, use, offer to sell, sell, import, and otherwise
 72 | transfer the Work, where such license applies only to those patent claims
 73 | licensable by such Contributor that are necessarily infringed by their Contribution(s)
 74 | alone or by combination of their Contribution(s) with the Work to which such
 75 | Contribution(s) was submitted. If You institute patent litigation against
 76 | any entity (including a cross-claim or counterclaim in a lawsuit) alleging
 77 | that the Work or a Contribution incorporated within the Work constitutes direct
 78 | or contributory patent infringement, then any patent licenses granted to You
 79 | under this License for that Work shall terminate as of the date such litigation
 80 | is filed.
 81 | 
 82 | 4. Redistribution. You may reproduce and distribute copies of the Work or
 83 | Derivative Works thereof in any medium, with or without modifications, and
 84 | in Source or Object form, provided that You meet the following conditions:
 85 | 
 86 | (a) You must give any other recipients of the Work or Derivative Works a copy
 87 | of this License; and
 88 | 
 89 | (b) You must cause any modified files to carry prominent notices stating that
 90 | You changed the files; and
 91 | 
 92 | (c) You must retain, in the Source form of any Derivative Works that You distribute,
 93 | all copyright, patent, trademark, and attribution notices from the Source
 94 | form of the Work, excluding those notices that do not pertain to any part
 95 | of the Derivative Works; and
 96 | 
 97 | (d) If the Work includes a "NOTICE" text file as part of its distribution,
 98 | then any Derivative Works that You distribute must include a readable copy
 99 | of the attribution notices contained within such NOTICE file, excluding those
100 | notices that do not pertain to any part of the Derivative Works, in at least
101 | one of the following places: within a NOTICE text file distributed as part
102 | of the Derivative Works; within the Source form or documentation, if provided
103 | along with the Derivative Works; or, within a display generated by the Derivative
104 | Works, if and wherever such third-party notices normally appear. The contents
105 | of the NOTICE file are for informational purposes only and do not modify the
106 | License. You may add Your own attribution notices within Derivative Works
107 | that You distribute, alongside or as an addendum to the NOTICE text from the
108 | Work, provided that such additional attribution notices cannot be construed
109 | as modifying the License.
110 | 
111 | You may add Your own copyright statement to Your modifications and may provide
112 | additional or different license terms and conditions for use, reproduction,
113 | or distribution of Your modifications, or for any such Derivative Works as
114 | a whole, provided Your use, reproduction, and distribution of the Work otherwise
115 | complies with the conditions stated in this License.
116 | 
117 | 5. Submission of Contributions. Unless You explicitly state otherwise, any
118 | Contribution intentionally submitted for inclusion in the Work by You to the
119 | Licensor shall be under the terms and conditions of this License, without
120 | any additional terms or conditions. Notwithstanding the above, nothing herein
121 | shall supersede or modify the terms of any separate license agreement you
122 | may have executed with Licensor regarding such Contributions.
123 | 
124 | 6. Trademarks. This License does not grant permission to use the trade names,
125 | trademarks, service marks, or product names of the Licensor, except as required
126 | for reasonable and customary use in describing the origin of the Work and
127 | reproducing the content of the NOTICE file.
128 | 
129 | 7. Disclaimer of Warranty. Unless required by applicable law or agreed to
130 | in writing, Licensor provides the Work (and each Contributor provides its
131 | Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
132 | KIND, either express or implied, including, without limitation, any warranties
133 | or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR
134 | A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness
135 | of using or redistributing the Work and assume any risks associated with Your
136 | exercise of permissions under this License.
137 | 
138 | 8. Limitation of Liability. In no event and under no legal theory, whether
139 | in tort (including negligence), contract, or otherwise, unless required by
140 | applicable law (such as deliberate and grossly negligent acts) or agreed to
141 | in writing, shall any Contributor be liable to You for damages, including
142 | any direct, indirect, special, incidental, or consequential damages of any
143 | character arising as a result of this License or out of the use or inability
144 | to use the Work (including but not limited to damages for loss of goodwill,
145 | work stoppage, computer failure or malfunction, or any and all other commercial
146 | damages or losses), even if such Contributor has been advised of the possibility
147 | of such damages.
148 | 
149 | 9. Accepting Warranty or Additional Liability. While redistributing the Work
150 | or Derivative Works thereof, You may choose to offer, and charge a fee for,
151 | acceptance of support, warranty, indemnity, or other liability obligations
152 | and/or rights consistent with this License. However, in accepting such obligations,
153 | You may act only on Your own behalf and on Your sole responsibility, not on
154 | behalf of any other Contributor, and only if You agree to indemnify, defend,
155 | and hold each Contributor harmless for any liability incurred by, or claims
156 | asserted against, such Contributor by reason of your accepting any such warranty
157 | or additional liability.
158 | 
159 | END OF TERMS AND CONDITIONS
160 | 
161 | APPENDIX: How to apply the Apache License to your work.
162 | 
163 | To apply the Apache License to your work, attach the following boilerplate
164 | notice, with the fields enclosed by brackets "[]" replaced with your own identifying
165 | information. (Don't include the brackets!)  The text should be enclosed in
166 | the appropriate comment syntax for the file format. We also recommend that
167 | a file or class name and description of purpose be included on the same "printed
168 | page" as the copyright notice for easier identification within third-party
169 | archives.
170 | 
171 | Copyright [yyyy] [name of copyright owner]
172 | 
173 | Licensed under the Apache License, Version 2.0 (the "License");
174 | you may not use this file except in compliance with the License.
175 | You may obtain a copy of the License at
176 | 
177 | http://www.apache.org/licenses/LICENSE-2.0
178 | 
179 | Unless required by applicable law or agreed to in writing, software
180 | distributed under the License is distributed on an "AS IS" BASIS,
181 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
182 | See the License for the specific language governing permissions and
183 | limitations under the License.
184 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | SPDX-FileCopyrightText: 2020 SAP SE
 3 | 
 4 | SPDX-License-Identifier: Apache-2.0
 5 | -->
 6 | 
 7 | ![](https://img.shields.io/badge/STATUS-NOT%20CURRENTLY%20MAINTAINED-red.svg?longCache=true&style=flat)
 8 | 
 9 | # Important Notice
10 | This public repository is read-only and no longer maintained. For the latest sample code repositories, visit the [SAP Samples](https://github.com/SAP-samples) organization.
11 | 
12 | # Differentially Private Generative Models
13 | 
14 | [![REUSE status](https://api.reuse.software/badge/github.com/SAP-samples/security-research-differentially-private-generative-models)](https://api.reuse.software/info/github.com/SAP-samples/security-research-differentially-private-generative-models)
15 | 
16 | ## Description:
17 | This repository explains how generative models can be used in combination with differential privacy to synthetize feature-rich realistic categorical datasets in a privacy preserving manner. It brings two jupyter notebooks for dp-GANs (differentially-private Generative Adversarial Networks) and dp-VAE (Variational Autoencoder) to generate new data in a differetnial private mode. The code allows to quickly generate new dataset (incl. numerical features) in private or public mode. dp_SGD and dp_Adam optimizers from tensowflow/ privacy library (https://github.com/tensorflow/privacy) are used these models. 
18 | 
19 | ## Requirements
20 | - [Python](https://www.python.org/)
21 | - [Jupyter](https://jupyter.org/)
22 | - [Tensorflow](https://github.com/tensorflow)
23 | - Pandas, keras, and more see the notebooks import sections
24 | - [H2O AutoML](http://docs.h2o.ai/h2o/latest-stable/h2o-docs/automl.html)
25 | - Check further dependencies in the Jupyter notebooks Tutorial_dp-GAN.ipynb and Tutorial_dp-VAE.ipynb
26 | 
27 | ## Download the tensorflow privacy project
28 | 1. Clone Tensorflow privacy into this project repository :
29 | ```
30 | git clone https://github.com/tensorflow/privacy
31 | 
32 | cd privacy
33 | pip install -e .
34 | ```
35 | 
36 | 
37 | 2. Open the notebooks in Jupyter and run them
38 | 
39 | 
40 | ## Authors / Contributors
41 | 
42 |  - Lyudmylla Dymytrova
43 |  - Lorenzo Frigerio
44 |  - Anderson Santana de Oliveira
45 |  
46 | ## Known Issues
47 | No issues known
48 | 
49 | 
50 | ## How to obtain support
51 | This project is provided "as-is" and any bug reports are not guaranteed to be fixed.
52 | 
53 | 
54 | ## Citations
55 | If you use this code in your research,
56 | please cite:
57 | 
58 | ```
59 | @article{DBLP:journals/corr/abs-1901-02477,
60 |   author    = {Lorenzo Frigerio and
61 |                Anderson Santana de Oliveira and
62 |                Laurent Gomez and
63 |                Patrick Duverger},
64 |   title     = {Differentially Private Generative Adversarial Networks for Time Series,
65 |                Continuous, and Discrete Open Data},
66 |   journal   = {CoRR},
67 |   volume    = {abs/1901.02477},
68 |   year      = {2019},
69 |   url       = {http://arxiv.org/abs/1901.02477},
70 |   archivePrefix = {arXiv},
71 |   eprint    = {1901.02477},
72 |   timestamp = {Fri, 01 Feb 2019 13:39:59 +0100},
73 |   biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1901-02477},
74 |   bibsource = {dblp computer science bibliography, https://dblp.org}
75 | }
76 | ```
77 | 
78 | ## References
79 | - [1] Lorenzo Frigerio, Anderson Santana de Oliveira, Laurent Gomez, Patrick Duverger:
80 | Differentially Private Generative Adversarial Networks for Time Series, Continuous, and Discrete Open Data. CoRR abs/1901.02477 (2019). https://arxiv.org/abs/1901.02477
81 | 
82 | 
83 | ## License
84 | Copyright (c) 2020 SAP SE or an SAP affiliate company. All rights reserved.
85 | This project is licensed under the Apache Software License, version 2.0 except as noted otherwise in the [LICENSE](LICENSES/Apache-2.0.txt) file.
86 | 


--------------------------------------------------------------------------------
/differential_privacy/dp_sgd/dp_optimizer/dp_optimizer.py:
--------------------------------------------------------------------------------
  1 | # (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,
  2 | # possibly with some small edits by @corcra)
  3 | 
  4 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  5 | # SPDX-FileCopyrightText: 2020 SAP SE
  6 | #
  7 | # SPDX-License-Identifier: Apache-2.0
  8 | 
  9 | """Differentially private optimizers.
 10 | """
 11 | from __future__ import division
 12 | 
 13 | import tensorflow as tf
 14 | 
 15 | from differential_privacy.dp_sgd.dp_optimizer import utils
 16 | 
 17 | class DPGradientDescentOptimizer(tf.train.AdamOptimizer):
 18 |   """Differentially private gradient descent optimizer.
 19 |   """
 20 |   iteration = 0
 21 |   def __init__(self, learning_rate, eps_delta, sanitizer,
 22 |                sigma=None, use_locking=False, name="Adam",
 23 |                batches_per_lot=1):
 24 |     """Construct a differentially private gradient descent optimizer.
 25 | 
 26 |     The optimizer uses fixed privacy budget for each batch of training.
 27 | 
 28 |     Args:
 29 |       learning_rate: for GradientDescentOptimizer.
 30 |       eps_delta: EpsDelta pair for each epoch.
 31 |       sanitizer: for sanitizing the graident.
 32 |       sigma: noise sigma. If None, use eps_delta pair to compute sigma;
 33 |         otherwise use supplied sigma directly.
 34 |       use_locking: use locking.
 35 |       name: name for the object.
 36 |       batches_per_lot: Number of batches in a lot.
 37 |     """
 38 | 
 39 |     super(DPGradientDescentOptimizer, self).__init__(learning_rate, beta1=0.5, beta2=0.9, use_locking = use_locking, name = name)
 40 |     # Also, if needed, define the gradient accumulators
 41 |     self._batches_per_lot = batches_per_lot
 42 |     self._grad_accum_dict = {}
 43 |     if batches_per_lot > 1:
 44 |       self._batch_count = tf.Variable(1, dtype=tf.int32, trainable=False,
 45 |                                       name="batch_count")
 46 |       var_list = tf.trainable_variables()
 47 |       with tf.variable_scope("grad_acc_for"):
 48 |         for var in var_list:
 49 |           v_grad_accum = tf.Variable(tf.zeros_like(var),
 50 |                                      trainable=False,
 51 |                                      name=utils.GetTensorOpName(var))
 52 |           self._grad_accum_dict[var.name] = v_grad_accum
 53 | 
 54 |     self._eps_delta = eps_delta
 55 |     self._sanitizer = sanitizer
 56 |     self._sigma = sigma
 57 | 
 58 |   def compute_sanitized_gradients(self, loss, iteration,var_list=None,
 59 |                                   add_noise=True):
 60 |     """Compute the sanitized gradients.
 61 | 
 62 |     Args:
 63 |       loss: the loss tensor.
 64 |       var_list: the optional variables.
 65 |       add_noise: if true, then add noise. Always clip.
 66 |     Returns:
 67 |       a pair of (list of sanitized gradients) and privacy spending accumulation
 68 |       operations.
 69 |     Raises:
 70 |       TypeError: if var_list contains non-variable.
 71 |     """
 72 | 
 73 |     self._assert_valid_dtypes([loss])
 74 | 
 75 |     xs = [tf.convert_to_tensor(x) for x in var_list]
 76 |     # TODO check this change
 77 | 
 78 |     px_grads_byexample = tf.gradients(loss, xs)
 79 | 
 80 |     #loss_list = tf.unstack(loss, axis=0)
 81 |     #px_grads_byexample = [tf.gradients(l, xs) for l in loss_list]
 82 |     #px_grads = [[x[v] for x in px_grads_byexample] for v in range(len(xs))]
 83 |     px_grads = [px_grads_byexample[v] for v in range(len(xs))]
 84 | 
 85 |     #px_grads = tf.gradients(loss, xs)
 86 |     # add a dummy 0th dimension to reflect the fact that we have a batch size of 1...
 87 |   #  px_grads = [tf.expand_dims(x, 0) for x in px_grads]
 88 | #    px_grads = per_example_gradients.PerExampleGradients(loss, xs)
 89 | 
 90 |     sanitized, clipped, num_ex = self.call_sanitize_basic(px_grads, var_list)
 91 |     bound = []
 92 |     #sanitized, clipped = self._sanitizer.sanitize_overall(px_grads, var_list,self._eps_delta, sigma=self._sigma,
 93 |     #                                                   add_noise=add_noise,batches_per_lot=self._batches_per_lot)  # remove l2norm_inv to come back to clipping on each layer
 94 |     #sanitized, clipped = self.call_sanitize_grouped(px_grads, var_list,iteration)
 95 |     #sanitized, clipped, bound = self.call_sanitize_group(px_grads, var_list,add_noise,[[0],[2],[4],[6],[1],[3],[5],[7]])#[[0,2,4,6],[1,3,5,7]])#[[0,2,4],[6],[1],[3],[5],[7]])#
 96 |     return sanitized, clipped, px_grads, bound, num_ex
 97 | 
 98 |   def call_sanitize_group(self, px_grads, var_list, add_noise, groups): #sanitize with gradients divided in groups of layers (each group is a set of layers))
 99 |       sanitized_grads = [None] * len(var_list)
100 |       clipped_grads = [None] * len(var_list)
101 |       for group in groups:
102 |         sigma_multiplier = 1  # 0.7#0.7
103 |         bound_multiplier = 1 # 2#5
104 |         if(group[0]%2!=0):
105 |             sigma_multiplier = 1.3
106 |             bound_multiplier = 0.5
107 |         sanitized_grad, clipped_grad, bound = self._sanitizer.sanitize_overall([ px_grads[i] for i in group], [ var_list[i] for i in group],self._eps_delta, sigma=(self._sigma*sigma_multiplier), bound_multiplier = bound_multiplier,
108 |                                                        add_noise=add_noise,batches_per_lot=self._batches_per_lot)
109 |         for i in range(len(sanitized_grad)):
110 |               sanitized_grads[group[i]] = sanitized_grad[i]
111 |               clipped_grads[group[i]] = clipped_grad[i]
112 |       return sanitized_grads,clipped_grads, bound
113 | 
114 |   def call_sanitize_grouped(self,px_grads, var_list,iteration):
115 |       sanitized_grads = []
116 |       clipped_grads = []
117 |       index = 0
118 |       for px_grad, v in zip(px_grads, var_list):
119 |         mul_l2norm_bound = 1
120 |         tensor_name = utils.GetTensorOpName(v)
121 |         #if (tensor_name[-1] == 'b'):
122 |         #    mul_l2norm_bound *= 1
123 |         #mul_l2norm_bound *= int(tensor_name[14])
124 |         privacy_multiplier = tf.add(tf.multiply(tf.mod(tf.add(iteration,tf.constant(index,tf.float32)), tf.constant(12.0,tf.float32)),tf.constant(0.05,tf.float32)),tf.constant(1.0))
125 |         #tf.add(tf.subtract(iteration,iteration),tf.constant(1,tf.float32))
126 |         curr_sigma = self._sigma * privacy_multiplier #* 0.4 * ((curr_iteration + index) % 6) #self._iteration#tf.constant(1.0,tf.float32)#self._iteration#* (5-int(tensor_name[14]))
127 |         mul_l2norm_bound /= tf.multiply(privacy_multiplier,tf.constant(2.0,tf.float32))
128 |         index += 1
129 |         sanitized_grad, clipped_grad = self._sanitizer.sanitize_grouped(
130 |           px_grad, self._eps_delta, sigma=curr_sigma,
131 |           tensor_name=tensor_name, add_noise=True,
132 |           num_examples=self._batches_per_lot * tf.slice(
133 |               tf.shape(px_grad), [0], [1]), mul_l2norm_bound=mul_l2norm_bound)  # remove l2norm_inv to come back to clipping on each layer
134 |         sanitized_grads.append(sanitized_grad)
135 |         clipped_grads.append(clipped_grad)
136 |       return sanitized_grads, clipped_grads
137 | 
138 |   def call_sanitize_basic(self,px_grads, var_list): #basic sanitizer with different parameters for bias weights
139 |     sanitized_grads=[]
140 |     clipped_grads = []
141 |     for px_grad, v in zip(px_grads, var_list):
142 |         tensor_name = utils.GetTensorOpName(v)
143 |         if(tensor_name[-1]=='b'):# and tensor_name[14]=='4'):
144 |             isBias = True
145 |         else:
146 |             isBias = False
147 |         sanitized_grad, clipped_grad, num_ex = self._sanitizer.sanitize(
148 |             px_grad, self._eps_delta, sigma=self._sigma,
149 |             tensor_name=tensor_name, add_noise=True,
150 |             num_examples=self._batches_per_lot * tf.slice(
151 |             tf.shape(px_grad), [0], [1]),isBias=isBias) #remove l2norm_inv to come back to clipping on each layer
152 |         sanitized_grads.append(sanitized_grad)
153 |         clipped_grads.append(clipped_grad)
154 |     return sanitized_grads, clipped_grads, num_ex
155 | 
156 |   def minimize(self, loss, iteration, global_step=None, var_list=None,
157 |                name=None):
158 |     """Minimize using sanitized gradients.
159 | 
160 |     This gets a var_list which is the list of trainable variables.
161 |     For each var in var_list, we defined a grad_accumulator variable
162 |     during init. When batches_per_lot > 1, we accumulate the gradient
163 |     update in those. At the end of each lot, we apply the update back to
164 |     the variable. This has the effect that for each lot we compute
165 |     gradients at the point at the beginning of the lot, and then apply one
166 |     update at the end of the lot. In other words, semantically, we are doing
167 |     SGD with one lot being the equivalent of one usual batch of size
168 |     batch_size * batches_per_lot.
169 |     This allows us to simulate larger batches than our memory size would permit.
170 | 
171 |     The lr and the num_steps are in the lot world.
172 | 
173 |     Args:
174 |       loss: the loss tensor.
175 |       global_step: the optional global step.
176 |       var_list: the optional variables.
177 |       name: the optional name.
178 |     Returns:
179 |       the operation that runs one step of DP gradient descent.
180 |     """
181 | 
182 |     # First validate the var_list
183 | 
184 |     if var_list is None:
185 |       var_list = tf.trainable_variables()
186 |     for var in var_list:
187 |       if not isinstance(var, tf.Variable):
188 |         raise TypeError("Argument is not a variable.Variable: %s" % var)
189 | 
190 |     # Modification: apply gradient once every batches_per_lot many steps.
191 |     # This may lead to smaller error
192 | 
193 |     if self._batches_per_lot == 1:
194 |       sanitized_grads, clipped_grads, gradient, bound, num_ex = self.compute_sanitized_gradients(
195 |           loss,iteration, var_list=var_list)
196 | 
197 |       grads_and_vars = list(zip(sanitized_grads, var_list))
198 | 
199 |       self._assert_valid_dtypes([v for g, v in grads_and_vars if g is not None])
200 | 
201 |       apply_grads = self.apply_gradients(grads_and_vars,
202 |                                          global_step=global_step, name=name)
203 |       return apply_grads, sanitized_grads, clipped_grads, gradient, bound, num_ex
204 | 
205 |     # Condition for deciding whether to accumulate the gradient
206 |     # or actually apply it.
207 |     # we use a private self_batch_count to keep track of number of batches.
208 |     # global step will count number of lots processed.
209 | 
210 |     update_cond = tf.equal(tf.constant(0),
211 |                            tf.mod(self._batch_count,
212 |                                   tf.constant(self._batches_per_lot)))
213 | 
214 |     # Things to do for batches other than last of the lot.
215 |     # Add non-noisy clipped grads to shadow variables.
216 | 
217 |     def non_last_in_lot_op(loss, var_list,iteration):
218 |       """Ops to do for a typical batch.
219 | 
220 |       For a batch that is not the last one in the lot, we simply compute the
221 |       sanitized gradients and apply them to the grad_acc variables.
222 | 
223 |       Args:
224 |         loss: loss function tensor
225 |         var_list: list of variables
226 |       Returns:
227 |         A tensorflow op to do the updates to the gradient accumulators
228 |       """
229 |       sanitized_grads = self.compute_sanitized_gradients(
230 |           loss, iteration, var_list=var_list, add_noise=False)
231 | 
232 |       update_ops_list = []
233 |       for var, grad in zip(var_list, sanitized_grads):
234 |         grad_acc_v = self._grad_accum_dict[var.name]
235 |         update_ops_list.append(grad_acc_v.assign_add(grad))
236 |       update_ops_list.append(self._batch_count.assign_add(1))
237 |       return tf.group(*update_ops_list)
238 | 
239 |     # Things to do for last batch of a lot.
240 |     # Add noisy clipped grads to accumulator.
241 |     # Apply accumulated grads to vars.
242 | 
243 |     def last_in_lot_op(loss, var_list, global_step, iteration):
244 |       """Ops to do for last batch in a lot.
245 | 
246 |       For the last batch in the lot, we first add the sanitized gradients to
247 |       the gradient acc variables, and then apply these
248 |       values over to the original variables (via an apply gradient)
249 | 
250 |       Args:
251 |         loss: loss function tensor
252 |         var_list: list of variables
253 |         global_step: optional global step to be passed to apply_gradients
254 |       Returns:
255 |         A tensorflow op to push updates from shadow vars to real vars.
256 |       """
257 | 
258 |       # We add noise in the last lot. This is why we need this code snippet
259 |       # that looks almost identical to the non_last_op case here.
260 |       sanitized_grads = self.compute_sanitized_gradients(
261 |           loss, iteration, var_list=var_list, add_noise=True)
262 | 
263 |       normalized_grads = []
264 |       for var, grad in zip(var_list, sanitized_grads):
265 |         grad_acc_v = self._grad_accum_dict[var.name]
266 |         # To handle the lr difference per lot vs per batch, we divide the
267 |         # update by number of batches per lot.
268 |         normalized_grad = tf.div(grad_acc_v.assign_add(grad),
269 |                                  tf.to_float(self._batches_per_lot))
270 | 
271 |         normalized_grads.append(normalized_grad)
272 | 
273 |       with tf.control_dependencies(normalized_grads):
274 |         grads_and_vars = list(zip(normalized_grads, var_list))
275 |         self._assert_valid_dtypes(
276 |             [v for g, v in grads_and_vars if g is not None])
277 |         apply_san_grads = self.apply_gradients(grads_and_vars,
278 |                                                global_step=global_step,
279 |                                                name="apply_grads")
280 | 
281 |       # Now reset the accumulators to zero
282 |       resets_list = []
283 |       with tf.control_dependencies([apply_san_grads]):
284 |         for _, acc in self._grad_accum_dict.items():
285 |           reset = tf.assign(acc, tf.zeros_like(acc))
286 |           resets_list.append(reset)
287 |       resets_list.append(self._batch_count.assign_add(1))
288 | 
289 |       last_step_update = tf.group(*([apply_san_grads] + resets_list))
290 |       return last_step_update
291 |     # pylint: disable=g-long-lambda
292 |     update_op = tf.cond(update_cond,
293 |                         lambda: last_in_lot_op(
294 |                             loss, var_list,
295 |                             global_step),
296 |                         lambda: non_last_in_lot_op(
297 |                             loss, var_list, iteration))
298 |     return tf.group(update_op)
299 | 


--------------------------------------------------------------------------------
/differential_privacy/dp_sgd/dp_optimizer/sanitizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | # SPDX-FileCopyrightText: 2020 SAP SE
  3 | #
  4 | # SPDX-License-Identifier: Apache-2.0
  5 | 
  6 | """Defines Sanitizer class for sanitizing tensors.
  7 | 
  8 | A sanitizer first limits the sensitivity of a tensor and then adds noise
  9 | to the tensor. The parameters are determined by the privacy_spending and the
 10 | other parameters. It also uses an accountant to keep track of the privacy
 11 | spending.
 12 | """
 13 | from __future__ import division
 14 | 
 15 | import collections
 16 | 
 17 | import tensorflow as tf
 18 | 
 19 | from differential_privacy.dp_sgd.dp_optimizer import utils
 20 | from differential_privacy.dp_sgd.dp_optimizer.sanitizers.basic import BasicClipper
 21 | from differential_privacy.dp_sgd.dp_optimizer.sanitizers.basicOverall import BasicClipperOverall
 22 | from differential_privacy.dp_sgd.dp_optimizer.sanitizers.grouped import GroupedClipper
 23 | import pdb
 24 | 
 25 | ClipOption = collections.namedtuple("ClipOption",
 26 |                                     ["l2norm_bound", "clip"])
 27 | 
 28 | 
 29 | class AmortizedGaussianSanitizer(object):
 30 |   """Sanitizer with Gaussian noise and amoritzed privacy spending accounting.
 31 | 
 32 |   This sanitizes a tensor by first clipping the tensor, summing the tensor
 33 |   and then adding appropriate amount of noise. It also uses an amortized
 34 |   accountant to keep track of privacy spending.
 35 |   """
 36 | 
 37 |   def __init__(self, accountant, default_option, disc_params):
 38 |     """Construct an AmortizedGaussianSanitizer.
 39 | 
 40 |     Args:
 41 |       accountant: the privacy accountant. Expect an amortized one.
 42 |       default_option: the default ClipOptoin.
 43 |     """
 44 |     self.disc_parames = disc_params
 45 |     self._accountant = accountant
 46 |     self._default_option = default_option
 47 |     self._options = {}
 48 | 
 49 |   def set_option(self, tensor_name, option):
 50 |     """Set options for an individual tensor.
 51 | 
 52 |     Args:
 53 |       tensor_name: the name of the tensor.
 54 |       option: clip option.
 55 |     """
 56 | 
 57 |     self._options[tensor_name] = option
 58 | 
 59 |   # TODO ATTENTION, MY WORK
 60 |   def compute_overall_bound(self,t):
 61 |       saved_shape = tf.shape(t)
 62 |       batch_size = tf.slice(saved_shape, [0], [1])
 63 |       return tf.reshape(t, [1,-1])#[batch_size, [-1]]))
 64 | 
 65 |   def sanitize_overall(self, px_grads, var_list,eps_delta, option=ClipOption(None, None), num_examples=None, sigma=None, bound_multiplier = 1, add_noise=True,batches_per_lot=None):
 66 |     num_tot_examples = tf.zeros([1],dtype=tf.int32)
 67 |     sanitized_gradient = []
 68 |     clipped_gradients = []
 69 |     t_list = []
 70 |     weights_shapes = []
 71 |     weights_sizes = []
 72 |     linear_clipped_weights = []
 73 | 
 74 |     t2 = []
 75 | 
 76 |     for px_grad, v in zip(px_grads, var_list):
 77 |         t_list.append(tf.reshape(px_grad, tf.concat(axis=0, values=[tf.slice(tf.shape(px_grad), [0], [1]), [-1]])))#tf.reshape(px_grad, tf.concat(axis=0, values=[tf.slice(tf.shape(px_grad), [0], [1]), [-1]])))#self.compute_overall_bound(px_grad))
 78 | 
 79 |         l2norm_inv = tf.rsqrt(tf.reduce_sum(t_list[0] * t_list[0], [1]) + 0.000001)#tf.div(tf.constant(1.0),(tf.norm(t_list[0])+0.000001))
 80 |     t_overall = tf.concat(t_list, axis=0)
 81 |     #l2norm_inv = tf.div(tf.constant(1.0),tf.norm(t_overall))#tf.rsqrt(tf.reduce_sum(t_overall * t_overall, [1]) + 0.000001)#tf.div(tf.constant(1.0),tf.norm(px_grad))
 82 |     #t_overall = tf.reshape(t_overall, tf.concat(axis=0, values=[tf.slice(tf.shape(t_overall), [0], [1]), [-1]]))
 83 |     #l2norm_inv = tf.div(tf.constant(1.0),tf.norm(t_overall))#tf.rsqrt(tf.reduce_sum(t_overall * t_overall, [1]) + 0.000001)
 84 | 
 85 | 
 86 |     #    saved_shape = tf.shape(px_grad)
 87 |     #    batch_size = tf.slice(saved_shape, [0], [1])
 88 |     #    t2 = tf.reshape(px_grad, tf.concat(axis=0, values=[batch_size, [-1]]))
 89 |     # Add a small number to avoid divide by 0
 90 |     #l2norm_inv = tf.rsqrt(tf.reduce_sum(t2 * t2, [1]) + 0.000001)
 91 | 
 92 | 
 93 | 
 94 |     for px_grad, v in zip(px_grads, var_list):
 95 |       tensor_name = utils.GetTensorOpName(v)
 96 |       if sigma is None:
 97 |         # pylint: disable=unpacking-non-sequence
 98 |         eps, delta = eps_delta
 99 |         with tf.control_dependencies(
100 |             [tf.Assert(tf.greater(eps, 0),
101 |                        ["eps needs to be greater than 0"]),
102 |              tf.Assert(tf.greater(delta, 0),
103 |                        ["delta needs to be greater than 0"])]):
104 |           # The following formula is taken from
105 |           #   Dwork and Roth, The Algorithmic Foundations of Differential
106 |           #   Privacy, Appendix A.
107 |           #   http://www.cis.upenn.edu/~aaroth/Papers/privacybook.pdf
108 |           sigma = tf.sqrt(2.0 * tf.log(1.25 / delta)) / eps
109 | 
110 |       l2norm_bound, clip = option
111 |       if l2norm_bound is None:
112 |         l2norm_bound, clip = self._default_option
113 |         l2norm_bound *= bound_multiplier
114 |         if ((v.name is not None) and
115 |             (v.name in self._options)):
116 |           l2norm_bound, clip = self._options[v.name]
117 |       #clipper = GroupedClipper(self.disc_parames)
118 |       clipper = BasicClipper(l2norm_bound)#BasicClipperOverall(l2norm_bound,l2norm_inv)#BasicClipper(l2norm_bound)#BasicClipperOverall(l2norm_bound,l2norm_inv)#BasicClipper(l2norm_bound)#BasicClipperOverall(l2norm_bound,l2norm_inv)#BasicClipper(l2norm_bound)#
119 |       if clip:
120 |         x, boundNew = clipper.clip_grads(px_grad)
121 |         clipped_gradients.append(x)
122 |         linear_clipped_weights.append(self.compute_overall_bound(x))
123 |       num_examples_cur = tf.slice(tf.shape(x), [0], [1])
124 |       if(x.shape.ndims>1):
125 |         weights_sizes.append((x.shape[0]*x.shape[1]).value)
126 |       else:
127 |         weights_sizes.append(x.shape[0].value)
128 |       weights_shapes.append(x.shape)
129 |       num_tot_examples = tf.add(num_tot_examples,num_examples_cur)
130 | 
131 |       #saned_x = self.sanitize(
132 |       #  px_grad, eps_delta, sigma=sigma,
133 |       #  tensor_name=tensor_name, add_noise=add_noise,
134 |       #  num_examples=batches_per_lot * tf.slice(
135 |       #    tf.shape(px_grad), [0], [1]),
136 |       #  isBias=False) #remove l2norm_inv to come back to clipping on each layer
137 |       #sanitized_grads.append(sanitized_grad)
138 |     #num_examples = tf.slice(tf.shape(t_overall), [0], [1])
139 |     all_clipped_weights = tf.concat(linear_clipped_weights, axis=-1)
140 |     privacy_accum_op = self._accountant.accumulate_privacy_spending(
141 |     eps_delta, sigma, 200)
142 |     with tf.control_dependencies([privacy_accum_op]):
143 |       saned_x = clipper.add_noise(all_clipped_weights, sigma * l2norm_bound)
144 | 
145 |     splits = tf.split(saned_x,weights_sizes,1)
146 |     for i,split in enumerate(splits):
147 |       sanitized_gradient.append(tf.reshape(split,weights_shapes[i]))
148 |     return sanitized_gradient, clipped_gradients, boundNew
149 | 
150 |   def sanitize_grouped(self, x, eps_delta, sigma=None,
151 |                option=ClipOption(None, None), tensor_name=None,
152 |                num_examples=None, add_noise=True, mul_l2norm_bound=None ):
153 |     """Sanitize the given tensor.
154 | 
155 |        This santize a given tensor by first applying l2 norm clipping and then
156 |        adding Gaussian noise. It calls the privacy accountant for updating the
157 |        privacy spending.
158 | 
159 |        Args:
160 |          x: the tensor to sanitize.
161 |          eps_delta: a pair of eps, delta for (eps,delta)-DP. Use it to
162 |            compute sigma if sigma is None.
163 |          sigma: if sigma is not None, use sigma.
164 |          option: a ClipOption which, if supplied, used for
165 |            clipping and adding noise.
166 |          tensor_name: the name of the tensor.
167 |          num_examples: if None, use the number of "rows" of x.
168 |          add_noise: if True, then add noise, else just clip.
169 |        Returns:
170 |          a pair of sanitized tensor and the operation to accumulate privacy
171 |          spending.
172 |        """
173 | 
174 |     if sigma is None:
175 |       # pylint: disable=unpacking-non-sequence
176 |       eps, delta = eps_delta
177 |       with tf.control_dependencies(
178 |               [tf.Assert(tf.greater(eps, 0),
179 |                          ["eps needs to be greater than 0"]),
180 |                tf.Assert(tf.greater(delta, 0),
181 |                          ["delta needs to be greater than 0"])]):
182 |         # The following formula is taken from
183 |         #   Dwork and Roth, The Algorithmic Foundations of Differential
184 |         #   Privacy, Appendix A.
185 |         #   http://www.cis.upenn.edu/~aaroth/Papers/privacybook.pdf
186 |         sigma = tf.sqrt(2.0 * tf.log(1.25 / delta)) / eps
187 | 
188 |     l2norm_bound, clip = option
189 |     if l2norm_bound is None:
190 |       l2norm_bound, clip = self._default_option
191 | 
192 |       if ((tensor_name is not None) and
193 |               (tensor_name in self._options)):
194 |         l2norm_bound, clip = self._options[tensor_name]
195 |     l2norm_bound = mul_l2norm_bound * l2norm_bound
196 |     clipper = BasicClipper(l2norm_bound)
197 |     if clip:
198 |       x = clipper.clip_grads(x)
199 |     if add_noise:
200 |       if num_examples is None:
201 |         num_examples = tf.slice(tf.shape(x), [0], [1])
202 |       privacy_accum_op = self._accountant.accumulate_privacy_spending(
203 |         eps_delta, sigma, num_examples)
204 |       with tf.control_dependencies([privacy_accum_op]):
205 |         saned_x = clipper.add_noise(x, tf.multiply(sigma,l2norm_bound))
206 |     else:
207 |       saned_x = tf.reduce_sum(x, 0)
208 |     return saned_x, x
209 | 
210 | 
211 |   def sanitize(self, x, eps_delta, sigma=None,
212 |                option=ClipOption(None, None), tensor_name=None,
213 |                num_examples=None, add_noise=True, isBias=False):
214 |     """Sanitize the given tensor.
215 | 
216 |     This santize a given tensor by first applying l2 norm clipping and then
217 |     adding Gaussian noise. It calls the privacy accountant for updating the
218 |     privacy spending.
219 | 
220 |     Args:
221 |       x: the tensor to sanitize.
222 |       eps_delta: a pair of eps, delta for (eps,delta)-DP. Use it to
223 |         compute sigma if sigma is None.
224 |       sigma: if sigma is not None, use sigma.
225 |       option: a ClipOption which, if supplied, used for
226 |         clipping and adding noise.
227 |       tensor_name: the name of the tensor.
228 |       num_examples: if None, use the number of "rows" of x.
229 |       add_noise: if True, then add noise, else just clip.
230 |     Returns:
231 |       a pair of sanitized tensor and the operation to accumulate privacy
232 |       spending.
233 |     """
234 | 
235 |     if sigma is None:
236 |       # pylint: disable=unpacking-non-sequence
237 |       eps, delta = eps_delta
238 |       with tf.control_dependencies(
239 |           [tf.Assert(tf.greater(eps, 0),
240 |                      ["eps needs to be greater than 0"]),
241 |            tf.Assert(tf.greater(delta, 0),
242 |                      ["delta needs to be greater than 0"])]):
243 |         # The following formula is taken from
244 |         #   Dwork and Roth, The Algorithmic Foundations of Differential
245 |         #   Privacy, Appendix A.
246 |         #   http://www.cis.upenn.edu/~aaroth/Papers/privacybook.pdf
247 |         sigma = tf.sqrt(2.0 * tf.log(1.25 / delta)) / eps
248 | 
249 |     l2norm_bound, clip = option
250 |     if l2norm_bound is None:
251 |       l2norm_bound, clip = self._default_option
252 | 
253 |       if ((tensor_name is not None) and
254 |           (tensor_name in self._options)):
255 |         l2norm_bound, clip = self._options[tensor_name]
256 |     #clipper = GroupedClipper(self.disc_parames)
257 |     if (isBias):
258 |         sigma *= 1.3#0.7
259 |         l2norm_bound *= 0.5#2#5
260 |     clipper = BasicClipper(l2norm_bound)
261 |     if clip:
262 |       x = clipper.clip_grads(x)
263 |     if add_noise:
264 |       if num_examples is None:
265 |         num_examples = tf.slice(tf.shape(x), [0], [1])
266 |       privacy_accum_op, q = self._accountant.accumulate_privacy_spending(
267 |           eps_delta, sigma, num_examples)#TODO CHECK WHAT IT IS CORRECT num_examples) 200
268 |       with tf.control_dependencies([privacy_accum_op]):
269 |         saned_x = clipper.add_noise(x,sigma * l2norm_bound)
270 |     else:
271 |       saned_x = tf.reduce_sum(x, 0)
272 |     return saned_x, x, q
273 | 


--------------------------------------------------------------------------------
/differential_privacy/dp_sgd/dp_optimizer/sanitizers/base.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 SAP SE
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | from abc import abstractmethod
 6 | 
 7 | class Sanitizer(object):
 8 | 
 9 |     def __init__(self, *args, **kwargs):
10 |         pass
11 | 
12 |     @abstractmethod
13 |     def clip_grads(self, t, name=None):
14 |         pass
15 | 
16 |     @abstractmethod
17 |     def add_noise(self, t, sigma, name=None):
18 |         pass


--------------------------------------------------------------------------------
/differential_privacy/dp_sgd/dp_optimizer/sanitizers/basic.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 SAP SE
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import tensorflow as tf
 6 | import numpy as np
 7 | 
 8 | 
 9 | from differential_privacy.dp_sgd.dp_optimizer.sanitizers.base import Sanitizer
10 | 
11 | class BasicClipper(Sanitizer):
12 | 
13 |     def __init__(self, upper_bound, specials=None):
14 |         self.upper_bound = upper_bound
15 | 
16 |     def clip_grads(self, t, name=None):
17 |         """Clip an array of tensors by L2 norm.
18 | 
19 |         Shrink each dimension-0 slice of tensor (for matrix it is each row) such
20 |         that the l2 norm is at most upper_bound. Here we clip each row as it
21 |         corresponds to each example in the batch.
22 | 
23 |         Args:
24 |             t: the input tensor.
25 |             upper_bound: the upperbound of the L2 norm.
26 |             name: optional name.
27 |         Returns:
28 |             the clipped tensor.
29 |         """
30 |         #assert self.upper_bound > 0
31 |         with tf.name_scope(values=[t, self.upper_bound], name=name,
32 |                            default_name="batch_clip_by_l2norm") as name:
33 |             saved_shape = tf.shape(t)
34 |         batch_size = tf.slice(saved_shape, [0], [1])
35 | 
36 |         # Add a small number to avoid divide by 0
37 |         t2 = tf.reshape(t, tf.concat(axis=0, values=[batch_size, [-1]]))
38 |         upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]),tf.div(tf.constant(1.0),self.upper_bound))
39 |         # Add a small number to avoid divide by 0
40 |         l2norm_inv = tf.rsqrt(tf.reduce_sum(t2 * t2, [1]) + 0.000001)
41 |         scale = tf.minimum(l2norm_inv, upper_bound_inv) * self.upper_bound
42 |         clipped_t = tf.matmul(tf.diag(scale), t2)
43 |         clipped_t = tf.reshape(clipped_t, saved_shape, name=name)
44 |         return clipped_t
45 | 
46 |     def add_noise(self, t, sigma, name=None):
47 |         """Add i.i.d. Gaussian noise (0, sigma^2) to every entry of t.
48 | 
49 |         Args:
50 |           t: the input tensor.
51 |           sigma: the stddev of the Gaussian noise.
52 |           name: optional name.
53 |         Returns:
54 |           the noisy tensor.
55 |         """
56 | 
57 |         with tf.name_scope(values=[t, sigma], name=name,
58 |                            default_name="add_gaussian_noise") as name:
59 |             noisy_t = t + tf.random_normal(tf.shape(t), stddev=sigma)
60 |         return noisy_t


--------------------------------------------------------------------------------
/differential_privacy/dp_sgd/dp_optimizer/sanitizers/basicOverall.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 SAP SE
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import tensorflow as tf
 6 | import numpy as np
 7 | 
 8 | 
 9 | from differential_privacy.dp_sgd.dp_optimizer.sanitizers.base import Sanitizer
10 | 
11 | class BasicClipperOverall(Sanitizer):
12 | 
13 |     def __init__(self, upper_bound, l2norm_inv, specials=None):
14 |         self.upper_bound = upper_bound
15 |         self.l2norm_inv = l2norm_inv
16 | 
17 |     def clip_grads(self, t, name=None):
18 |         """Clip an array of tensors by L2 norm.
19 | 
20 |         Shrink each dimension-0 slice of tensor (for matrix it is each row) such
21 |         that the l2 norm is at most upper_bound. Here we clip each row as it
22 |         corresponds to each example in the batch.
23 | 
24 |         Args:
25 |             t: the input tensor.
26 |             upper_bound: the upperbound of the L2 norm.
27 |             name: optional name.
28 |         Returns:
29 |             the clipped tensor.
30 |         """
31 |         assert self.upper_bound > 0
32 |         with tf.name_scope(values=[t, self.upper_bound], name=name,
33 |                            default_name="batch_clip_by_l2norm") as name:
34 |             saved_shape = tf.shape(t)
35 |         batch_size = tf.slice(saved_shape, [0], [1])
36 | 
37 |         t2 = tf.reshape(t, tf.concat(axis=0, values=[batch_size, [-1]]))
38 |         upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]),
39 |                                   tf.constant(1.0 / self.upper_bound))
40 |         scale = tf.minimum(self.l2norm_inv, upper_bound_inv) * self.upper_bound
41 |         clipped_t = tf.matmul(tf.diag(scale), t2)
42 |         clipped_t = tf.reshape(clipped_t, saved_shape, name=name)
43 |         return clipped_t, upper_bound_inv
44 | 
45 |     def add_noise(self, t, sigma, name=None):
46 |         """Add i.i.d. Gaussian noise (0, sigma^2) to every entry of t.
47 | 
48 |         Args:
49 |           t: the input tensor.
50 |           sigma: the stddev of the Gaussian noise.
51 |           name: optional name.
52 |         Returns:
53 |           the noisy tensor.
54 |         """
55 | 
56 |         with tf.name_scope(values=[t, sigma], name=name,
57 |                            default_name="add_gaussian_noise") as name:
58 |             noisy_t = t + tf.random_normal(tf.shape(t), stddev=sigma)
59 |         return noisy_t


--------------------------------------------------------------------------------
/differential_privacy/dp_sgd/dp_optimizer/sanitizers/grouped.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 SAP SE
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import tensorflow as tf
 6 | import numpy as np
 7 | 
 8 | 
 9 | from differential_privacy.dp_sgd.dp_optimizer.sanitizers.base import Sanitizer
10 | 
11 | class GroupedClipper(Sanitizer):
12 | 
13 |     def __init__(self, specials=None):
14 |         i = 0
15 | 
16 | 
17 |     def clip_grads(self,t,bound,name=None):
18 | 
19 |         assert bound > 0
20 |         with tf.name_scope(values=[t, bound], name=name,
21 |                            default_name="batch_clip_by_l2norm") as name:
22 |             saved_shape = tf.shape(t)
23 |         batch_size = tf.slice(saved_shape, [0], [1])
24 | 
25 |         # Add a small number to avoid divide by 0
26 |         t2 = tf.reshape(t, tf.concat(axis=0, values=[batch_size, [-1]]))
27 |         upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]),
28 |                                   tf.constant(1.0 / bound))
29 |         # Add a small number to avoid divide by 0
30 |         l2norm_inv = tf.rsqrt(tf.reduce_sum(t2 * t2, [1]) + 0.000001)
31 |         scale = tf.minimum(l2norm_inv, upper_bound_inv) * bound
32 |         clipped_t = tf.matmul(tf.diag(scale), t2)
33 |         clipped_t = tf.reshape(clipped_t, saved_shape, name=name)
34 |         return clipped_t
35 | 
36 | 
37 |     def add_noise(self, t, sigma, name=None):
38 | 
39 |         """Add i.i.d. Gaussian noise (0, sigma^2) to every entry of t.
40 | 
41 |         Args:
42 |           t: the input tensor.
43 |           sigma: the stddev of the Gaussian noise.
44 |           name: optional name.
45 |         Returns:
46 |           the noisy tensor.
47 |         """
48 | 
49 |         with tf.name_scope(values=[t, sigma], name=name,
50 |                            default_name="add_gaussian_noise") as name:
51 |             noisy_t = t + tf.random_normal(tf.shape(t), stddev=sigma)
52 |         return noisy_t


--------------------------------------------------------------------------------
/differential_privacy/dp_sgd/dp_optimizer/utils.py:
--------------------------------------------------------------------------------
  1 | # (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,
  2 | # possibly with some small edits by @corcra)
  3 | 
  4 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  5 | # SPDX-FileCopyrightText: 2020 SAP SE
  6 | #
  7 | # SPDX-License-Identifier: Apache-2.0
  8 | 
  9 | """Utils for building and training NN models.
 10 | """
 11 | from __future__ import division
 12 | 
 13 | import math
 14 | 
 15 | import numpy
 16 | import tensorflow as tf
 17 | 
 18 | 
 19 | class LayerParameters(object):
 20 |   """class that defines a non-conv layer."""
 21 |   def __init__(self):
 22 |     self.name = ""
 23 |     self.num_units = 0
 24 |     self._with_bias = False
 25 |     self.relu = False
 26 |     self.gradient_l2norm_bound = 0.0
 27 |     self.bias_gradient_l2norm_bound = 0.0
 28 |     self.trainable = True
 29 |     self.weight_decay = 0.0
 30 | 
 31 | 
 32 | class ConvParameters(object):
 33 |   """class that defines a conv layer."""
 34 |   def __init__(self):
 35 |     self.patch_size = 5
 36 |     self.stride = 1
 37 |     self.in_channels = 1
 38 |     self.out_channels = 0
 39 |     self.with_bias = True
 40 |     self.relu = True
 41 |     self.max_pool = True
 42 |     self.max_pool_size = 2
 43 |     self.max_pool_stride = 2
 44 |     self.trainable = False
 45 |     self.in_size = 28
 46 |     self.name = ""
 47 |     self.num_outputs = 0
 48 |     self.bias_stddev = 0.1
 49 | 
 50 | 
 51 | # Parameters for a layered neural network.
 52 | class NetworkParameters(object):
 53 |   """class that define the overall model structure."""
 54 |   def __init__(self):
 55 |     self.input_size = 0
 56 |     self.projection_type = 'NONE'  # NONE, RANDOM, PCA
 57 |     self.projection_dimensions = 0
 58 |     self.default_gradient_l2norm_bound = 0.0
 59 |     self.layer_parameters = []  # List of LayerParameters
 60 |     self.conv_parameters = []  # List of ConvParameters
 61 | 
 62 | 
 63 | def GetTensorOpName(x):
 64 |   """Get the name of the op that created a tensor.
 65 | 
 66 |   Useful for naming related tensors, as ':' in name field of op is not permitted
 67 | 
 68 |   Args:
 69 |     x: the input tensor.
 70 |   Returns:
 71 |     the name of the op.
 72 |   """
 73 | 
 74 |   t = x.name.rsplit(":", 1)
 75 |   if len(t) == 1:
 76 |     return x.name
 77 |   else:
 78 |     return t[0]
 79 | 
 80 | 
 81 | def BuildNetwork(inputs, network_parameters):
 82 |   """Build a network using the given parameters.
 83 | 
 84 |   Args:
 85 |     inputs: a Tensor of floats containing the input data.
 86 |     network_parameters: NetworkParameters object
 87 |       that describes the parameters for the network.
 88 |   Returns:
 89 |     output, training_parameters: where the outputs (a tensor) is the output
 90 |       of the network, and training_parameters (a dictionary that maps the
 91 |       name of each variable to a dictionary of parameters) is the parameters
 92 |       used during training.
 93 |   """
 94 | 
 95 |   training_parameters = {}
 96 |   num_inputs = network_parameters.input_size
 97 |   outputs = inputs
 98 |   projection = None
 99 | 
100 |   # First apply convolutions, if needed
101 |   for conv_param in network_parameters.conv_parameters:
102 |     outputs = tf.reshape(
103 |         outputs,
104 |         [-1, conv_param.in_size, conv_param.in_size,
105 |          conv_param.in_channels])
106 |     conv_weights_name = "%s_conv_weight" % (conv_param.name)
107 |     conv_bias_name = "%s_conv_bias" % (conv_param.name)
108 |     conv_std_dev = 1.0 / (conv_param.patch_size
109 |                           * math.sqrt(conv_param.in_channels))
110 |     conv_weights = tf.Variable(
111 |         tf.truncated_normal([conv_param.patch_size,
112 |                              conv_param.patch_size,
113 |                              conv_param.in_channels,
114 |                              conv_param.out_channels],
115 |                             stddev=conv_std_dev),
116 |         trainable=conv_param.trainable,
117 |         name=conv_weights_name)
118 |     conv_bias = tf.Variable(
119 |         tf.truncated_normal([conv_param.out_channels],
120 |                             stddev=conv_param.bias_stddev),
121 |         trainable=conv_param.trainable,
122 |         name=conv_bias_name)
123 |     training_parameters[conv_weights_name] = {}
124 |     training_parameters[conv_bias_name] = {}
125 |     conv = tf.nn.conv2d(outputs, conv_weights,
126 |                         strides=[1, conv_param.stride,
127 |                                  conv_param.stride, 1],
128 |                         padding="SAME")
129 |     relud = tf.nn.relu(conv + conv_bias)
130 |     mpd = tf.nn.max_pool(relud, ksize=[1,
131 |                                        conv_param.max_pool_size,
132 |                                        conv_param.max_pool_size, 1],
133 |                          strides=[1, conv_param.max_pool_stride,
134 |                                   conv_param.max_pool_stride, 1],
135 |                          padding="SAME")
136 |     outputs = mpd
137 |     num_inputs = conv_param.num_outputs
138 |     # this should equal
139 |     # in_size * in_size * out_channels / (stride * max_pool_stride)
140 | 
141 |   # once all the convs are done, reshape to make it flat
142 |   outputs = tf.reshape(outputs, [-1, num_inputs])
143 | 
144 |   # Now project, if needed
145 |   if network_parameters.projection_type is not "NONE":
146 |     projection = tf.Variable(tf.truncated_normal(
147 |         [num_inputs, network_parameters.projection_dimensions],
148 |         stddev=1.0 / math.sqrt(num_inputs)), trainable=False, name="projection")
149 |     num_inputs = network_parameters.projection_dimensions
150 |     outputs = tf.matmul(outputs, projection)
151 | 
152 |   # Now apply any other layers
153 | 
154 |   for layer_parameters in network_parameters.layer_parameters:
155 |     num_units = layer_parameters.num_units
156 |     hidden_weights_name = "%s_weight" % (layer_parameters.name)
157 |     hidden_weights = tf.Variable(
158 |         tf.truncated_normal([num_inputs, num_units],
159 |                             stddev=1.0 / math.sqrt(num_inputs)),
160 |         name=hidden_weights_name, trainable=layer_parameters.trainable)
161 |     training_parameters[hidden_weights_name] = {}
162 |     if layer_parameters.gradient_l2norm_bound:
163 |       training_parameters[hidden_weights_name]["gradient_l2norm_bound"] = (
164 |           layer_parameters.gradient_l2norm_bound)
165 |     if layer_parameters.weight_decay:
166 |       training_parameters[hidden_weights_name]["weight_decay"] = (
167 |           layer_parameters.weight_decay)
168 | 
169 |     outputs = tf.matmul(outputs, hidden_weights)
170 |     if layer_parameters.with_bias:
171 |       hidden_biases_name = "%s_bias" % (layer_parameters.name)
172 |       hidden_biases = tf.Variable(tf.zeros([num_units]),
173 |                                   name=hidden_biases_name)
174 |       training_parameters[hidden_biases_name] = {}
175 |       if layer_parameters.bias_gradient_l2norm_bound:
176 |         training_parameters[hidden_biases_name][
177 |             "bias_gradient_l2norm_bound"] = (
178 |                 layer_parameters.bias_gradient_l2norm_bound)
179 | 
180 |       outputs += hidden_biases
181 |     if layer_parameters.relu:
182 |       outputs = tf.nn.relu(outputs)
183 |     # num_inputs for the next layer is num_units in the current layer.
184 |     num_inputs = num_units
185 | 
186 |   return outputs, projection, training_parameters
187 | 
188 | 
189 | def VaryRate(start, end, saturate_epochs, epoch):
190 |   """Compute a linearly varying number.
191 | 
192 |   Decrease linearly from start to end until epoch saturate_epochs.
193 | 
194 |   Args:
195 |     start: the initial number.
196 |     end: the end number.
197 |     saturate_epochs: after this we do not reduce the number; if less than
198 |       or equal to zero, just return start.
199 |     epoch: the current learning epoch.
200 |   Returns:
201 |     the caculated number.
202 |   """
203 |   if saturate_epochs <= 0:
204 |     return start
205 | 
206 |   step = (start - end) / (saturate_epochs - 1)
207 |   if epoch < saturate_epochs:
208 |     return start - step * epoch
209 |   else:
210 |     return end
211 | 
212 | 
213 | 
214 | def SoftThreshold(t, threshold_ratio, name=None):
215 |   """Soft-threshold a tensor by the mean value.
216 | 
217 |   Softthreshold each dimension-0 vector (for matrix it is each column) by
218 |   the mean of absolute value multiplied by the threshold_ratio factor. Here
219 |   we soft threshold each column as it corresponds to each unit in a layer.
220 | 
221 |   Args:
222 |     t: the input tensor.
223 |     threshold_ratio: the threshold ratio.
224 |     name: the optional name for the returned tensor.
225 |   Returns:
226 |     the thresholded tensor, where each entry is soft-thresholded by
227 |     threshold_ratio times the mean of the aboslute value of each column.
228 |   """
229 | 
230 |   assert threshold_ratio >= 0
231 |   with tf.name_scope(values=[t, threshold_ratio], name=name,
232 |                      default_name="soft_thresholding") as name:
233 |     saved_shape = tf.shape(t)
234 |     t2 = tf.reshape(t, tf.concat(axis=0, values=[tf.slice(saved_shape, [0], [1]), -1]))
235 |     t_abs = tf.abs(t2)
236 |     t_x = tf.sign(t2) * tf.nn.relu(t_abs -
237 |                                    (tf.reduce_mean(t_abs, [0],
238 |                                                    keep_dims=True) *
239 |                                     threshold_ratio))
240 |     return tf.reshape(t_x, saved_shape, name=name)
241 | 
242 | def GenerateBinomialTable(m):
243 |   """Generate binomial table.
244 | 
245 |   Args:
246 |     m: the size of the table.
247 |   Returns:
248 |     A two dimensional array T where T[i][j] = (i choose j),
249 |     for 0<= i, j <=m.
250 |   """
251 | 
252 |   table = numpy.zeros((m + 1, m + 1), dtype=numpy.float64)
253 |   for i in range(m + 1):
254 |     table[i, 0] = 1
255 |   for i in range(1, m + 1):
256 |     for j in range(1, m + 1):
257 |       v = table[i - 1, j] + table[i - 1, j -1]
258 |       assert not math.isnan(v) and not math.isinf(v)
259 |       table[i, j] = v
260 |   return tf.convert_to_tensor(table)
261 | 


--------------------------------------------------------------------------------
/differential_privacy/privacy_accountant/tf/accountant.py:
--------------------------------------------------------------------------------
  1 | # (originally from https://github.com/tensorflow/models/tree/master/research/differential_privacy,
  2 | # possibly with some small edits by @corcra)
  3 | 
  4 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  5 | # SPDX-FileCopyrightText: 2020 SAP SE
  6 | #
  7 | # SPDX-License-Identifier: Apache-2.0
  8 | 
  9 | """Defines Accountant class for keeping track of privacy spending.
 10 | 
 11 | A privacy accountant keeps track of privacy spendings. It has methods
 12 | accumulate_privacy_spending and get_privacy_spent. Here we only define
 13 | AmortizedAccountant which tracks the privacy spending in the amortized
 14 | way. It uses privacy amplication via sampling to compute the privacy
 15 | spending for each batch and strong composition (specialized for Gaussian
 16 | noise) for accumulate the privacy spending.
 17 | """
 18 | from __future__ import division
 19 | 
 20 | import abc
 21 | import collections
 22 | import math
 23 | import sys
 24 | 
 25 | import numpy
 26 | import tensorflow as tf
 27 | 
 28 | from differential_privacy.dp_sgd.dp_optimizer import utils
 29 | 
 30 | EpsDelta = collections.namedtuple("EpsDelta", ["spent_eps", "spent_delta"])
 31 | 
 32 | import pdb
 33 | 
 34 | # TODO(liqzhang) To ensure the same API for AmortizedAccountant and
 35 | # MomentsAccountant, we pass the union of arguments to both, so we
 36 | # have unused_sigma for AmortizedAccountant and unused_eps_delta for
 37 | # MomentsAccountant. Consider to revise the API to avoid the unused
 38 | # arguments.  It would be good to use @abc.abstractmethod, etc, to
 39 | # define the common interface as a base class.
 40 | class AmortizedAccountant(object):
 41 |   """Keep track of privacy spending in an amortized way.
 42 | 
 43 |   AmortizedAccountant accumulates the privacy spending by assuming
 44 |   all the examples are processed uniformly at random so the spending is
 45 |   amortized among all the examples. And we assume that we use Gaussian noise
 46 |   so the accumulation is on eps^2 and delta, using advanced composition.
 47 |   """
 48 | 
 49 |   def __init__(self, total_examples):
 50 |     """Initialization. Currently only support amortized tracking.
 51 | 
 52 |     Args:
 53 |       total_examples: total number of examples.
 54 |     """
 55 | 
 56 |     assert total_examples > 0
 57 |     self._total_examples = total_examples
 58 |     self._eps_squared_sum = tf.Variable(tf.zeros([1]), trainable=False,
 59 |                                         name="eps_squared_sum")
 60 |     self._delta_sum = tf.Variable(tf.zeros([1]), trainable=False,
 61 |                                   name="delta_sum")
 62 | 
 63 |   def accumulate_privacy_spending(self, eps_delta, unused_sigma,
 64 |                                   num_examples):
 65 |     """Accumulate the privacy spending.
 66 | 
 67 |     Currently only support approximate privacy. Here we assume we use Gaussian
 68 |     noise on randomly sampled batch so we get better composition: 1. the per
 69 |     batch privacy is computed using privacy amplication via sampling bound;
 70 |     2. the composition is done using the composition with Gaussian noise.
 71 |     TODO(liqzhang) Add a link to a document that describes the bounds used.
 72 | 
 73 |     Args:
 74 |       eps_delta: EpsDelta pair which can be tensors.
 75 |       unused_sigma: the noise sigma. Unused for this accountant.
 76 |       num_examples: the number of examples involved.
 77 |     Returns:
 78 |       a TensorFlow operation for updating the privacy spending.
 79 |     """
 80 | 
 81 |     eps, delta = eps_delta
 82 |     with tf.control_dependencies(
 83 |         [tf.Assert(tf.greater(delta, 0),
 84 |                    ["delta needs to be greater than 0"])]):
 85 |       amortize_ratio = (tf.cast(num_examples, tf.float32) * 1.0 /
 86 |                         self._total_examples)
 87 |       # Use privacy amplification via sampling bound.
 88 |       # See Lemma 2.2 in http://arxiv.org/pdf/1405.7085v2.pdf
 89 |       # TODO(liqzhang) Add a link to a document with formal statement
 90 |       # and proof.
 91 |       amortize_eps = tf.reshape(tf.log(1.0 + amortize_ratio * (
 92 |           tf.exp(eps) - 1.0)), [1])
 93 |       amortize_delta = tf.reshape(amortize_ratio * delta, [1])
 94 |       return tf.group(*[tf.assign_add(self._eps_squared_sum,
 95 |                                       tf.square(amortize_eps)),
 96 |                         tf.assign_add(self._delta_sum, amortize_delta)])
 97 | 
 98 |   def get_privacy_spent(self, sess, target_eps=None):
 99 |     """Report the spending so far.
100 | 
101 |     Args:
102 |       sess: the session to run the tensor.
103 |       target_eps: the target epsilon. Unused.
104 |     Returns:
105 |       the list containing a single EpsDelta, with values as Python floats (as
106 |       opposed to numpy.float64). This is to be consistent with
107 |       MomentAccountant which can return a list of (eps, delta) pair.
108 |     """
109 | 
110 |     # pylint: disable=unused-argument
111 |     unused_target_eps = target_eps
112 |     eps_squared_sum, delta_sum = sess.run([self._eps_squared_sum,
113 |                                            self._delta_sum])
114 |     return [EpsDelta(math.sqrt(eps_squared_sum), float(delta_sum))]
115 | 
116 | 
117 | class MomentsAccountant(object):
118 |   """Privacy accountant which keeps track of moments of privacy loss.
119 | 
120 |   Note: The constructor of this class creates tf.Variables that must
121 |   be initialized with tf.global_variables_initializer() or similar calls.
122 | 
123 |   MomentsAccountant accumulates the high moments of the privacy loss. It
124 |   requires a method for computing differenital moments of the noise (See
125 |   below for the definition). So every specific accountant should subclass
126 |   this class by implementing _differential_moments method.
127 | 
128 |   Denote by X_i the random variable of privacy loss at the i-th step.
129 |   Consider two databases D, D' which differ by one item. X_i takes value
130 |   log Pr[M(D')==x]/Pr[M(D)==x] with probability Pr[M(D)==x].
131 |   In MomentsAccountant, we keep track of y_i(L) = log E[exp(L X_i)] for some
132 |   large enough L. To compute the final privacy spending,  we apply Chernoff
133 |   bound (assuming the random noise added at each step is independent) to
134 |   bound the total privacy loss Z = sum X_i as follows:
135 |     Pr[Z > e] = Pr[exp(L Z) > exp(L e)]
136 |               < E[exp(L Z)] / exp(L e)
137 |               = Prod_i E[exp(L X_i)] / exp(L e)
138 |               = exp(sum_i log E[exp(L X_i)]) / exp(L e)
139 |               = exp(sum_i y_i(L) - L e)
140 |   Hence the mechanism is (e, d)-differentially private for
141 |     d =  exp(sum_i y_i(L) - L e).
142 |   We require d < 1, i.e. e > sum_i y_i(L) / L. We maintain y_i(L) for several
143 |   L to compute the best d for any give e (normally should be the lowest L
144 |   such that 2 * sum_i y_i(L) / L < e.
145 | 
146 |   We further assume that at each step, the mechanism operates on a random
147 |   sample with sampling probability q = batch_size / total_examples. Then
148 |     E[exp(L X)] = E[(Pr[M(D)==x / Pr[M(D')==x])^L]
149 |   By distinguishing two cases of whether D < D' or D' < D, we have
150 |   that
151 |     E[exp(L X)] <= max (I1, I2)
152 |   where
153 |     I1 = (1-q) E ((1-q) + q P(X+1) / P(X))^L + q E ((1-q) + q P(X) / P(X-1))^L
154 |     I2 = E (P(X) / ((1-q) + q P(X+1)))^L
155 | 
156 |   In order to compute I1 and I2, one can consider to
157 |     1. use an asymptotic bound, which recovers the advance composition theorem;
158 |     2. use the closed formula (like GaussianMomentsAccountant);
159 |     3. use numerical integration or random sample estimation.
160 | 
161 |   Dependent on the distribution, we can often obtain a tigher estimation on
162 |   the moments and hence a more accurate estimation of the privacy loss than
163 |   obtained using generic composition theorems.
164 | 
165 |   """
166 | 
167 |   __metaclass__ = abc.ABCMeta
168 | 
169 |   def __init__(self, total_examples, moment_orders=32):
170 |     """Initialize a MomentsAccountant.
171 | 
172 |     Args:
173 |       total_examples: total number of examples.
174 |       moment_orders: the order of moments to keep.
175 |     """
176 | 
177 |     assert total_examples > 0
178 |     self._total_examples = total_examples
179 |     self._moment_orders = (moment_orders
180 |                            if isinstance(moment_orders, (list, tuple))
181 |                            else range(1, moment_orders + 1))
182 |     self._max_moment_order = max(self._moment_orders)
183 |     assert self._max_moment_order < 100, "The moment order is too large."
184 |     self._log_moments = [tf.Variable(numpy.float64(0.0),
185 |                                      trainable=False,
186 |                                      name=("log_moments-%d" % moment_order))
187 |                          for moment_order in self._moment_orders]
188 | 
189 |   @abc.abstractmethod
190 |   def _compute_log_moment(self, sigma, q, moment_order):
191 |     """Compute high moment of privacy loss.
192 | 
193 |     Args:
194 |       sigma: the noise sigma, in the multiples of the sensitivity.
195 |       q: the sampling ratio.
196 |       moment_order: the order of moment.
197 |     Returns:
198 |       log E[exp(moment_order * X)]
199 |     """
200 |     pass
201 | 
202 |   def accumulate_privacy_spending(self, unused_eps_delta,
203 |                                   sigma, num_examples):
204 |     """Accumulate privacy spending.
205 | 
206 |     In particular, accounts for privacy spending when we assume there
207 |     are num_examples, and we are releasing the vector
208 |     (sum_{i=1}^{num_examples} x_i) + Normal(0, stddev=l2norm_bound*sigma)
209 |     where l2norm_bound is the maximum l2_norm of each example x_i, and
210 |     the num_examples have been randomly selected out of a pool of
211 |     self.total_examples.
212 | 
213 |     Args:
214 |       unused_eps_delta: EpsDelta pair which can be tensors. Unused
215 |         in this accountant.
216 |       sigma: the noise sigma, in the multiples of the sensitivity (that is,
217 |         if the l2norm sensitivity is k, then the caller must have added
218 |         Gaussian noise with stddev=k*sigma to the result of the query).
219 |       num_examples: the number of examples involved.
220 |     Returns:
221 |       a TensorFlow operation for updating the privacy spending.
222 |     """
223 |     q = tf.cast(num_examples, tf.float64) * 1.0 / self._total_examples
224 | 
225 |     moments_accum_ops = []
226 |     for i in range(len(self._log_moments)):
227 |       moment = self._compute_log_moment(sigma, q, self._moment_orders[i])
228 |       moments_accum_ops.append(tf.assign_add(self._log_moments[i], moment))
229 |     return tf.group(*moments_accum_ops), q
230 | 
231 |   def _compute_delta(self, log_moments, eps):
232 |     """Compute delta for given log_moments and eps.
233 | 
234 |     Args:
235 |       log_moments: the log moments of privacy loss, in the form of pairs
236 |         of (moment_order, log_moment)
237 |       eps: the target epsilon.
238 |     Returns:
239 |       delta
240 |     """
241 |     min_delta = 1.0
242 |     for moment_order, log_moment in log_moments:
243 |       if math.isinf(log_moment) or math.isnan(log_moment):
244 |         sys.stderr.write("The %d-th order is inf or Nan\n" % moment_order)
245 |         continue
246 |       if log_moment < moment_order * eps:
247 |         min_delta = min(min_delta,
248 |                         math.exp(log_moment - moment_order * eps))
249 |     return min_delta
250 | 
251 |   def _compute_eps(self, log_moments, delta):
252 |     min_eps = float("inf")
253 |     for moment_order, log_moment in log_moments:
254 |       if math.isinf(log_moment) or math.isnan(log_moment):
255 |         sys.stderr.write("The %d-th order is inf or Nan\n" % moment_order)
256 |         continue
257 |       min_eps = min(min_eps, (log_moment - math.log(delta)) / moment_order)
258 |     return min_eps
259 | 
260 |   def get_privacy_spent(self, sess, target_eps=None, target_deltas=None):
261 |     """Compute privacy spending in (e, d)-DP form for a single or list of eps.
262 | 
263 |     Args:
264 |       sess: the session to run the tensor.
265 |       target_eps: a list of target epsilon's for which we would like to
266 |         compute corresponding delta value.
267 |       target_deltas: a list of target deltas for which we would like to
268 |         compute the corresponding eps value. Caller must specify
269 |         either target_eps or target_delta.
270 |     Returns:
271 |       A list of EpsDelta pairs.
272 |     """
273 |     assert (target_eps is None) ^ (target_deltas is None)
274 |     eps_deltas = []
275 |     log_moments = sess.run(self._log_moments)
276 |     log_moments_with_order = numpy.array(list(zip(self._moment_orders, log_moments)))
277 |     if target_eps is not None:
278 |       for eps in target_eps:
279 |         delta = self._compute_delta(log_moments_with_order, eps)
280 |         eps_deltas.append(EpsDelta(eps, delta))
281 |     else:
282 |       assert target_deltas
283 |       for delta in target_deltas:
284 |         eps_deltas.append(
285 |             EpsDelta(self._compute_eps(log_moments_with_order, delta), delta))
286 |     return eps_deltas
287 | 
288 | 
289 | class GaussianMomentsAccountant(MomentsAccountant):
290 |   """MomentsAccountant which assumes Gaussian noise.
291 | 
292 |   GaussianMomentsAccountant assumes the noise added is centered Gaussian
293 |   noise N(0, sigma^2 I). In this case, we can compute the differential moments
294 |   accurately using a formula.
295 | 
296 |   For asymptotic bound, for Gaussian noise with variance sigma^2, we can show
297 |   for L < sigma^2,  q L < sigma,
298 |     log E[exp(L X)] = O(q^2 L^2 / sigma^2).
299 |   Using this we derive that for training T epoches, with batch ratio q,
300 |   the Gaussian mechanism with variance sigma^2 (with q < 1/sigma) is (e, d)
301 |   private for d = exp(T/q q^2 L^2 / sigma^2 - L e). Setting L = sigma^2,
302 |   Tq = e/2, the mechanism is (e, exp(-e sigma^2/2))-DP. Equivalently, the
303 |   mechanism is (e, d)-DP if sigma = sqrt{2 log(1/d)}/e, q < 1/sigma,
304 |   and T < e/(2q). This bound is better than the bound obtained using general
305 |   composition theorems, by an Omega(sqrt{log k}) factor on epsilon, if we run
306 |   k steps. Since we use direct estimate, the obtained privacy bound has tight
307 |   constant.
308 | 
309 |   For GaussianMomentAccountant, it suffices to compute I1, as I1 >= I2,
310 |   which reduce to computing E(P(x+s)/P(x+s-1) - 1)^i for s = 0 and 1. In the
311 |   companion gaussian_moments.py file, we supply procedure for computing both
312 |   I1 and I2 (the computation of I2 is through multi-precision integration
313 |   package). It can be verified that indeed I1 >= I2 for wide range of parameters
314 |   we have tried, though at the moment we are unable to prove this claim.
315 | 
316 |   We recommend that when using this accountant, users independently verify
317 |   using gaussian_moments.py that for their parameters, I1 is indeed larger
318 |   than I2. This can be done by following the instructions in
319 |   gaussian_moments.py.
320 |   """
321 | 
322 |   def __init__(self, total_examples, moment_orders=32):
323 |     """Initialization.
324 | 
325 |     Args:
326 |       total_examples: total number of examples.
327 |       moment_orders: the order of moments to keep.
328 |     """
329 |     super(self.__class__, self).__init__(total_examples, moment_orders)
330 |     self._binomial_table = utils.GenerateBinomialTable(self._max_moment_order)
331 | 
332 |   def _differential_moments(self, sigma, s, t):
333 |     """Compute 0 to t-th differential moments for Gaussian variable.
334 | 
335 |         E[(P(x+s)/P(x+s-1)-1)^t]
336 |       = sum_{i=0}^t (t choose i) (-1)^{t-i} E[(P(x+s)/P(x+s-1))^i]
337 |       = sum_{i=0}^t (t choose i) (-1)^{t-i} E[exp(-i*(2*x+2*s-1)/(2*sigma^2))]
338 |       = sum_{i=0}^t (t choose i) (-1)^{t-i} exp(i(i+1-2*s)/(2 sigma^2))
339 |     Args:
340 |       sigma: the noise sigma, in the multiples of the sensitivity.
341 |       s: the shift.
342 |       t: 0 to t-th moment.
343 |     Returns:
344 |       0 to t-th moment as a tensor of shape [t+1].
345 |     """
346 |     assert t <= self._max_moment_order, ("The order of %d is out "
347 |                                          "of the upper bound %d."
348 |                                          % (t, self._max_moment_order))
349 |     binomial = tf.slice(self._binomial_table, [0, 0],
350 |                         [t + 1, t + 1])
351 |     signs = numpy.zeros((t + 1, t + 1), dtype=numpy.float64)
352 |     for i in range(t + 1):
353 |       for j in range(t + 1):
354 |         signs[i, j] = 1.0 - 2 * ((i - j) % 2)
355 |     #exponents = tf.constant([j * (j + 1.0 - 2.0 * s) / (2.0 * sigma * sigma)
356 |     #                         for j in range(t + 1)], dtype=tf.float64)
357 |     exponents = [j * (j + 1.0 - 2.0 * s) / (2.0 * sigma * sigma) for j in range(t + 1)]
358 | 
359 |     # x[i, j] = binomial[i, j] * signs[i, j] = (i choose j) * (-1)^{i-j}
360 |     x = tf.multiply(binomial, signs)
361 |     # y[i, j] = x[i, j] * exp(exponents[j])
362 |     #         = (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2))
363 |     # Note: this computation is done by broadcasting pointwise multiplication
364 |     # between [t+1, t+1] tensor and [t+1] tensor.
365 |     exponents = tf.stack(exponents,axis=0)
366 |     exponents = tf.cast(exponents, tf.float64)
367 |     y = tf.multiply(x, tf.exp(exponents))
368 |     # z[i] = sum_j y[i, j]
369 |     #      = sum_j (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2))
370 |     z = tf.reduce_sum(y, 1)
371 |     return z
372 | 
373 |   def _compute_log_moment(self, sigma, q, moment_order):
374 |     """Compute high moment of privacy loss.
375 | 
376 |     Args:
377 |       sigma: the noise sigma, in the multiples of the sensitivity.
378 |       q: the sampling ratio.
379 |       moment_order: the order of moment.
380 |     Returns:
381 |       log E[exp(moment_order * X)]
382 |     """
383 |     assert moment_order <= self._max_moment_order, ("The order of %d is out "
384 |                                                     "of the upper bound %d."
385 |                                                     % (moment_order,
386 |                                                        self._max_moment_order))
387 |     binomial_table = tf.slice(self._binomial_table, [moment_order, 0],
388 |                               [1, moment_order + 1])
389 |     # qs = [1 q q^2 ... q^L] = exp([0 1 2 ... L] * log(q))
390 |     qs = tf.exp(tf.constant([i * 1.0 for i in range(moment_order + 1)],
391 |                             dtype=tf.float64) * tf.cast(
392 |                                 tf.log(q), dtype=tf.float64))
393 |     moments0 = self._differential_moments(sigma, 0.0, moment_order)
394 |     term0 = tf.reduce_sum(binomial_table * qs * moments0)
395 |     moments1 = self._differential_moments(sigma, 1.0, moment_order)
396 |     term1 = tf.reduce_sum(binomial_table * qs * moments1)
397 |     return tf.squeeze(tf.log(tf.cast(q * term0 + (1.0 - q) * term1,
398 |                                      tf.float64)))
399 | 
400 | 
401 | class DummyAccountant(object):
402 |   """An accountant that does no accounting."""
403 | 
404 |   def accumulate_privacy_spending(self, *unused_args):
405 |     return tf.no_op()
406 | 
407 |   def get_privacy_spent(self, unused_sess, **unused_kwargs):
408 |     return [EpsDelta(numpy.inf, 1.0)]
409 | 


--------------------------------------------------------------------------------
/discretedata_manager.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 SAP SE
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import pandas as pd
 6 | import numpy as np
 7 | 
 8 | class discretedata_manager(object):
 9 |     def __init__(self, data):
10 |         self.createOHdataset(data)
11 |         self.dictLables = self.extractDictLables()
12 |         df_num = self.OHdataset.select_dtypes(include=[np.number])
13 |         df_norm = (df_num - df_num.min()) / ((df_num.max() - df_num.min()))
14 |         self.OHdataset[df_norm.columns] = df_norm
15 | 
16 |     def extractDictLables(self):
17 |         dictLables = {}
18 |         column = ''
19 |         for var in self.OHdataset.columns:
20 |             if ('_' not in var):
21 |                 dictLables[var] = [[max(self.OHdataset[var]),min(self.OHdataset[var]),np.mean(self.OHdataset[var])]]
22 |             else:
23 |                 if(column != var.split('_')[0]):
24 |                     dictLables[var.split('_')[0]] = [var.split('_')[1]]
25 |                     column = var.split('_')[0]
26 |                 else:
27 |                     dictLables[column].append(var.split('_')[1])
28 |         self.variables = list(dictLables.keys())
29 |         return dictLables
30 | 
31 |     def mapOHlabels(self,ohlabels,labels):
32 |         return [labels[i] for i in ohlabels]
33 | 
34 |     def getLabelsLength(self):
35 |         labelsLength = []
36 |         for key in self.dictLables:
37 |             labelsLength.append(len(self.dictLables[key]))
38 |         return labelsLength
39 | 
40 |     def getLabelLengthTot(self):
41 |         totLength = 0
42 |         for key in self.dictLables:
43 |             totLength += (len(self.dictLables[key]))
44 |         return totLength
45 | 
46 |     def createOHdataset(self,data):
47 |         self.OHdataset = pd.get_dummies(data)
48 |         #TODO add in case of numerical variables
49 | #         cols = self.OHdataset.columns.tolist()
50 | #         temp = cols[0]
51 | #         cols = cols[1:]
52 | #         cols.append(temp)
53 | #         self.OHdataset = self.OHdataset[cols]
54 | 
55 |     def convertDiscreteMatrix(self,data):
56 |         newData = np.zeros(data.shape)
57 |         newTextDataset = []
58 |         lengths = self.getLabelsLength()
59 |         totIndex = 0
60 |         for index, length in enumerate(lengths):
61 |             samplesDisc = []
62 |             textData = []
63 |             for i in range(data.shape[0]):
64 |                 if(length>1):
65 |                     p = np.array(data[i, totIndex:totIndex + length])
66 |                     p /= p.sum()
67 |                     sampled_value = np.random.choice(length, 1, p=p)
68 |                     samplesDisc.append(sampled_value)
69 |                     newData[i, samplesDisc[i] + totIndex] = 1
70 |                     textData.append(self.dictLables[self.variables[index]][sampled_value[0]])
71 |                 else:
72 |                     sampled_value = data[i, totIndex]
73 |                     norm_values = self.dictLables[self.variables[index]][0]
74 |                     denormalized_value = sampled_value  * ((norm_values[0] - norm_values[1]) ) + norm_values[1]
75 |                     samplesDisc.append(denormalized_value)
76 |                     newData[i, totIndex] = samplesDisc[i]
77 |                     textData.append(int(denormalized_value)) 
78 |             newTextDataset.append(textData)
79 |             totIndex += length
80 |         return newData, newTextDataset
81 | 
82 |       


--------------------------------------------------------------------------------
/figs/image003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SAP-archive/security-research-differentially-private-generative-models/21a53e28de6c4bc8235b361cfa4acf17a150a12a/figs/image003.png


--------------------------------------------------------------------------------
/figs/image006.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SAP-archive/security-research-differentially-private-generative-models/21a53e28de6c4bc8235b361cfa4acf17a150a12a/figs/image006.jpg


--------------------------------------------------------------------------------
/models.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: 2020 SAP SE
  2 | #
  3 | # SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | import tflib as tflib
  6 | import tensorflow as tf
  7 | 
  8 | def ReLULayer(name, n_in, n_out, inputs):
  9 |     output = tflib.ops.linear.Linear(
 10 |         name+'.Linear',
 11 |         n_in,
 12 |         n_out,
 13 |         inputs,
 14 |         initialization='he'
 15 |     )
 16 |     output = tf.nn.relu(output)
 17 |     return output
 18 | 
 19 | def LeakyReLULayer(name, n_in, n_out, inputs):
 20 |     output = tflib.ops.linear.Linear(
 21 |         name+'.Linear',
 22 |         n_in,
 23 |         n_out,
 24 |         inputs,
 25 |         initialization='he'
 26 |     )
 27 |     output = tf.nn.leaky_relu(output)#relu(output)
 28 |     return output
 29 | 
 30 | def softmax(logits):
 31 |     return tf.reshape(
 32 |         tf.nn.softmax(logits),
 33 |         tf.shape(logits)
 34 |     )
 35 | 
 36 | 
 37 | def Generator(n_samples, real_data, disc_manager,diminput, DIM):
 38 |     noise = tf.random_normal([n_samples, diminput])
 39 |     output = LeakyReLULayer('Generator.1', diminput, DIM, noise)
 40 |     output = LeakyReLULayer('Generator.2', DIM, DIM, output)
 41 |     output = LeakyReLULayer('Generator.3', DIM, DIM, output)
 42 |     output = tflib.ops.linear.Linear('Generator.4', DIM, diminput, output)
 43 |     totIndex = 0
 44 |     outputs = []
 45 |     labelsLength = disc_manager.getLabelsLength()
 46 |     for length in labelsLength:
 47 |         if(length==1):
 48 |             outputs.append(tf.nn.sigmoid(output[:,totIndex:totIndex+length]))   #output[:,totIndex:totIndex+length])
 49 |         else:
 50 |             outputs.append(softmax(output[:,totIndex:totIndex+length]))
 51 |         totIndex += length
 52 |     output = tf.concat(outputs,axis=1)
 53 |     return output#tf.concat([output1, output2], 1)
 54 | 
 55 | def Generator_reconstruct(n_samples, real_data, disc_manager,diminput, DIM, noise):
 56 |     output = LeakyReLULayer('Generator.1', diminput, DIM, noise)
 57 |     output = LeakyReLULayer('Generator.2', DIM, DIM, output)
 58 |     output = LeakyReLULayer('Generator.3', DIM, DIM, output)
 59 |     output = tflib.ops.linear.Linear('Generator.4', DIM, diminput, output)
 60 |     totIndex = 0
 61 |     outputs = []
 62 |     labelsLength = disc_manager.getLabelsLength()
 63 |     for length in labelsLength:
 64 |         if(length==1):
 65 |             outputs.append(tf.nn.sigmoid(output[:,totIndex:totIndex+length]))   #output[:,totIndex:totIndex+length])
 66 |         else:
 67 |             outputs.append(softmax(output[:,totIndex:totIndex+length]))
 68 |         totIndex += length
 69 |     output = tf.concat(outputs,axis=1)
 70 |     return output#tf.concat([output1, output2], 1)
 71 | 
 72 | def Discriminator(inputs,diminput, DIM):
 73 |     output = LeakyReLULayer('Discriminator.1', diminput, DIM, inputs)
 74 |     output = LeakyReLULayer('Discriminator.2', DIM, DIM, output)
 75 |     output = LeakyReLULayer('Discriminator.3', DIM, DIM, output)
 76 |     output = tflib.ops.linear.Linear('Discriminator.4', DIM, 1, output)
 77 |     return tf.reshape(output, [-1])
 78 | 
 79 | def Discriminator_bb(inputs,diminput, DIM):
 80 |     output = LeakyReLULayer('Bbdiscriminator.1', diminput, DIM, inputs)
 81 |     output = LeakyReLULayer('Bbdiscriminator.2', DIM, DIM, output)
 82 |     output = LeakyReLULayer('Bbdiscriminator.3', DIM, DIM, output)
 83 |     output = tflib.ops.linear.Linear('Bbdiscriminator.4', DIM, 1, output)
 84 |     return tf.reshape(output, [-1])
 85 | 
 86 | def GeneratorRNN(n_samples, diminput, DIM, seq_length, hidden_units,num_generated_features,reuse=False):
 87 |     with tf.variable_scope("generator") as scope:
 88 |         #if reuse:
 89 |         #    scope.reuse_variables()
 90 |         lstm_initializer = None
 91 |         bias_start = 1.0
 92 |         #W_out_G_initializer = tf.truncated_normal_initializer()
 93 |         #b_out_G_initializer = tf.truncated_normal_initializer()
 94 |         #W_out_G = tf.get_variable(name='W_out_G', shape=[hidden_units, num_generated_features], initializer=W_out_G_initializer)
 95 |         #b_out_G = tf.get_variable(name='b_out_G', shape=num_generated_features, initializer=b_out_G_initializer)
 96 | 
 97 |         noise = tf.random_normal([n_samples, seq_length, 1])
 98 |         cell = LSTMCell(num_units=hidden_units, state_is_tuple=True, bias_start=bias_start, initializer=lstm_initializer)
 99 |         #cell = LSTMCell(num_units=hidden_units, state_is_tuple=True)
100 |         initial_state = cell.zero_state(96, tf.float32)
101 | 
102 |         #cell = LSTMCell(num_units=hidden_units,
103 |         #                state_is_tuple=True
104 |         #                )
105 |         rnn_outputs, rnn_states = tf.nn.dynamic_rnn(
106 |             cell=cell,
107 |             dtype=tf.float32,
108 |             sequence_length=[seq_length] * n_samples,
109 |             inputs=noise,
110 |             )       #initial_state=initial_state, time_major=True
111 | 
112 |         rnn_outputs_2d = tf.reshape(rnn_outputs, [-1, hidden_units])
113 |         logits_2d = LeakyReLULayer('LSTM.1', hidden_units, 1, rnn_outputs_2d)
114 |         #logits_2d = tf.matmul(rnn_outputs_2d, W_out_G) + b_out_G
115 |         #output_2d = tf.multiply(tf.nn.tanh(logits_2d), scale_out_G)
116 |         output_2d = tf.nn.sigmoid(logits_2d)
117 |         output_3d = tf.reshape(output_2d, [-1, seq_length, num_generated_features])
118 |     return output_3d
119 | 
120 | def DiscriminatorRNN(inputs,diminput, DIM,  reuse=False):
121 |     with tf.variable_scope("discriminator") as scope:
122 |         if reuse:
123 |             scope.reuse_variables()
124 |         output = tf.reshape(inputs,[-1,diminput])
125 |         output = LeakyReLULayer('Discriminator.1', diminput, DIM, output)
126 |         output = LeakyReLULayer('Discriminator.2', DIM, DIM, output)
127 |         output = LeakyReLULayer('Discriminator.3', DIM, DIM, output)
128 |         output = tflib.ops.linear.Linear('Discriminator.4', DIM, 1, output)
129 |         return tf.reshape(output, [-1])
130 |         #W_out_D = tf.get_variable(name='W_out_D', shape=[hidden_units, 1],
131 |         #                          initializer=tf.truncated_normal_initializer())
132 |         #b_out_D = tf.get_variable(name='b_out_D', shape=1,
133 |         #                          initializer=tf.truncated_normal_initializer())
134 |         #cell = tf.contrib.rnn.LSTMCell(num_units=hidden_units,
135 |         #                               state_is_tuple=True,reuse=reuse)
136 |         #rnn_outputs, rnn_states = tf.nn.dynamic_rnn(
137 |         #    cell=cell,
138 |         #    dtype=tf.float32,
139 |         #    inputs=inputs)
140 |         #logits = tf.einsum('ijk,km', rnn_outputs, W_out_D) + b_out_D
141 |         #output = tf.nn.sigmoid(logits)
142 |     #return output, logits


--------------------------------------------------------------------------------
/tflib/README.md:
--------------------------------------------------------------------------------
1 | <!--
2 | SPDX-FileCopyrightText: 2020 SAP SE
3 | 
4 | SPDX-License-Identifier: Apache-2.0
5 | -->
6 | 
7 | From https://github.com/igul222/improved_wgan_training.git


--------------------------------------------------------------------------------
/tflib/__init__.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: 2020 SAP SE
  2 | #
  3 | # SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | 
  8 | import locale
  9 | 
 10 | locale.setlocale(locale.LC_ALL, '')
 11 | 
 12 | _params = {}
 13 | _param_aliases = {}
 14 | def param(name, *args, **kwargs):
 15 |     """
 16 |     A wrapper for `tf.Variable` which enables parameter sharing in models.
 17 |     
 18 |     Creates and returns theano shared variables similarly to `tf.Variable`, 
 19 |     except if you try to create a param with the same name as a 
 20 |     previously-created one, `param(...)` will just return the old one instead of 
 21 |     making a new one.
 22 | 
 23 |     This constructor also adds a `param` attribute to the shared variables it 
 24 |     creates, so that you can easily search a graph for all params.
 25 |     """
 26 | 
 27 |     if name not in _params:
 28 |         kwargs['name'] = name
 29 |         param = tf.Variable(*args, **kwargs)
 30 |         param.param = True
 31 |         _params[name] = param
 32 |     result = _params[name]
 33 |     i = 0
 34 |     while result in _param_aliases:
 35 |         # print 'following alias {}: {} to {}'.format(i, result, _param_aliases[result])
 36 |         i += 1
 37 |         result = _param_aliases[result]
 38 |     return result
 39 | 
 40 | def params_with_name(name):
 41 |     return [p for n,p in _params.items() if name in n]
 42 | 
 43 | def delete_all_params():
 44 |     _params.clear()
 45 | 
 46 | def alias_params(replace_dict):
 47 |     for old,new in replace_dict.items():
 48 |         # print "aliasing {} to {}".format(old,new)
 49 |         _param_aliases[old] = new
 50 | 
 51 | def delete_param_aliases():
 52 |     _param_aliases.clear()
 53 | 
 54 | # def search(node, critereon):
 55 | #     """
 56 | #     Traverse the Theano graph starting at `node` and return a list of all nodes
 57 | #     which match the `critereon` function. When optimizing a cost function, you 
 58 | #     can use this to get a list of all of the trainable params in the graph, like
 59 | #     so:
 60 | 
 61 | #     `lib.search(cost, lambda x: hasattr(x, "param"))`
 62 | #     """
 63 | 
 64 | #     def _search(node, critereon, visited):
 65 | #         if node in visited:
 66 | #             return []
 67 | #         visited.add(node)
 68 | 
 69 | #         results = []
 70 | #         if isinstance(node, T.Apply):
 71 | #             for inp in node.inputs:
 72 | #                 results += _search(inp, critereon, visited)
 73 | #         else: # Variable node
 74 | #             if critereon(node):
 75 | #                 results.append(node)
 76 | #             if node.owner is not None:
 77 | #                 results += _search(node.owner, critereon, visited)
 78 | #         return results
 79 | 
 80 | #     return _search(node, critereon, set())
 81 | 
 82 | # def print_params_info(params):
 83 | #     """Print information about the parameters in the given param set."""
 84 | 
 85 | #     params = sorted(params, key=lambda p: p.name)
 86 | #     values = [p.get_value(borrow=True) for p in params]
 87 | #     shapes = [p.shape for p in values]
 88 | #     print "Params for cost:"
 89 | #     for param, value, shape in zip(params, values, shapes):
 90 | #         print "\t{0} ({1})".format(
 91 | #             param.name,
 92 | #             ",".join([str(x) for x in shape])
 93 | #         )
 94 | 
 95 | #     total_param_count = 0
 96 | #     for shape in shapes:
 97 | #         param_count = 1
 98 | #         for dim in shape:
 99 | #             param_count *= dim
100 | #         total_param_count += param_count
101 | #     print "Total parameter count: {0}".format(
102 | #         locale.format("%d", total_param_count, grouping=True)
103 | #     )
104 | 
105 | def print_model_settings(locals_):
106 |     print ("Uppercase local vars:")
107 |     all_vars = [(k,v) for (k,v) in locals_.items() if (k.isupper() and k!='T' and k!='SETTINGS' and k!='ALL_SETTINGS')]
108 |     all_vars = sorted(all_vars, key=lambda x: x[0])
109 |     for var_name, var_value in all_vars:
110 |         print(str(var_name)+": "+str(var_value))
111 | 
112 | 
113 | def print_model_settings_dict(settings):
114 |     print ("Settings dict:")
115 |     all_vars = [(k,v) for (k,v) in settings.items()]
116 |     all_vars = sorted(all_vars, key=lambda x: x[0])
117 |     for var_name, var_value in all_vars:
118 |         print(str(var_name) + ": " + str(var_value))


--------------------------------------------------------------------------------
/tflib/cifar10.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 SAP SE
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import numpy as np
 6 | 
 7 | import os
 8 | import urllib
 9 | import gzip
10 | import pickle
11 | 
12 | def unpickle(file):
13 |     fo = open(file, 'rb')
14 |     dict = pickle.load(fo, encoding='latin1')
15 |     fo.close()
16 |     return dict['data'], dict['labels']
17 | 
18 | def cifar_generator(filenames, batch_size, data_dir):
19 |     all_data = []
20 |     all_labels = []
21 |     for filename in filenames:        
22 |         data, labels = unpickle(data_dir + '/' + filename)
23 |         all_data.append(data)
24 |         all_labels.append(labels)
25 | 
26 |     images = np.concatenate(all_data, axis=0)
27 |     labels = np.concatenate(all_labels, axis=0)
28 | 
29 |     def get_epoch():
30 |         rng_state = np.random.get_state()
31 |         np.random.shuffle(images)
32 |         np.random.set_state(rng_state)
33 |         np.random.shuffle(labels)
34 | 
35 |         for i in range(int(len(images) / batch_size)):
36 |             yield (images[i*batch_size:(i+1)*batch_size], labels[i*batch_size:(i+1)*batch_size])
37 | 
38 |     return get_epoch
39 | 
40 | 
41 | def load(batch_size, data_dir):
42 |     return (
43 |         cifar_generator(['data_batch_1','data_batch_2','data_batch_3','data_batch_4','data_batch_5'], batch_size, data_dir), 
44 |         cifar_generator(['test_batch'], batch_size, data_dir)
45 |     )


--------------------------------------------------------------------------------
/tflib/inception_score.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: 2020 SAP SE
  2 | #
  3 | # SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | # From https://github.com/openai/improved-gan/blob/master/inception_score/model.py
  6 | # Code derived from tensorflow/tensorflow/models/image/imagenet/classify_image.py
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os.path
 12 | import sys
 13 | import tarfile
 14 | 
 15 | import numpy as np
 16 | from six.moves import urllib
 17 | import tensorflow as tf
 18 | import glob
 19 | import scipy.misc
 20 | import math
 21 | import sys
 22 | 
 23 | MODEL_DIR = '/tmp/imagenet'
 24 | DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'
 25 | softmax = None
 26 | 
 27 | # Call this function with list of images. Each of elements should be a 
 28 | # numpy array with values ranging from 0 to 255.
 29 | def get_inception_score(images, splits=10):
 30 |   assert(type(images) == list)
 31 |   assert(type(images[0]) == np.ndarray)
 32 |   assert(len(images[0].shape) == 3)
 33 |   assert(np.max(images[0]) > 10)
 34 |   assert(np.min(images[0]) >= 0.0)
 35 |   inps = []
 36 |   for img in images:
 37 |     img = img.astype(np.float32)
 38 |     inps.append(np.expand_dims(img, 0))
 39 |   bs = 100
 40 |   with tf.Session() as sess:
 41 |     preds = []
 42 |     n_batches = int(math.ceil(float(len(inps)) / float(bs)))
 43 |     for i in range(n_batches):
 44 |         # sys.stdout.write(".")
 45 |         # sys.stdout.flush()
 46 |         inp = inps[(i * bs):min((i + 1) * bs, len(inps))]
 47 |         inp = np.concatenate(inp, 0)
 48 |         pred = sess.run(softmax, {'ExpandDims:0': inp})
 49 |         preds.append(pred)
 50 |     preds = np.concatenate(preds, 0)
 51 |     scores = []
 52 |     for i in range(splits):
 53 |       part = preds[(i * preds.shape[0] // splits):((i + 1) * preds.shape[0] // splits), :]
 54 |       kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
 55 |       kl = np.mean(np.sum(kl, 1))
 56 |       scores.append(np.exp(kl))
 57 |     return np.mean(scores), np.std(scores)
 58 | 
 59 | # This function is called automatically.
 60 | def _init_inception():
 61 |   global softmax
 62 |   if not os.path.exists(MODEL_DIR):
 63 |     os.makedirs(MODEL_DIR)
 64 |   filename = DATA_URL.split('/')[-1]
 65 |   filepath = os.path.join(MODEL_DIR, filename)
 66 |   if not os.path.exists(filepath):
 67 |     def _progress(count, block_size, total_size):
 68 |       sys.stdout.write('\r>> Downloading %s %.1f%%' % (
 69 |           filename, float(count * block_size) / float(total_size) * 100.0))
 70 |       sys.stdout.flush()
 71 |     filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
 72 |     print()
 73 |     statinfo = os.stat(filepath)
 74 |     print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
 75 |   tarfile.open(filepath, 'r:gz').extractall(MODEL_DIR)
 76 |   with tf.gfile.FastGFile(os.path.join(
 77 |       MODEL_DIR, 'classify_image_graph_def.pb'), 'rb') as f:
 78 |     graph_def = tf.GraphDef()
 79 |     graph_def.ParseFromString(f.read())
 80 |     _ = tf.import_graph_def(graph_def, name='')
 81 |   # Works with an arbitrary minibatch size.
 82 |   with tf.Session() as sess:
 83 |     pool3 = sess.graph.get_tensor_by_name('pool_3:0')
 84 |     ops = pool3.graph.get_operations()
 85 |     for op_idx, op in enumerate(ops):
 86 |         for o in op.outputs:
 87 |             shape = o.get_shape()
 88 |             shape = [s.value for s in shape]
 89 |             new_shape = []
 90 |             for j, s in enumerate(shape):
 91 |                 if s == 1 and j == 0:
 92 |                     new_shape.append(None)
 93 |                 else:
 94 |                     new_shape.append(s)
 95 |             o._shape = tf.TensorShape(new_shape)
 96 |     w = sess.graph.get_operation_by_name("softmax/logits/MatMul").inputs[1]
 97 |     logits = tf.matmul(tf.squeeze(pool3), w)
 98 |     softmax = tf.nn.softmax(logits)
 99 | 
100 | if softmax is None:
101 |     r = 5
102 |  # _init_inception()
103 | 


--------------------------------------------------------------------------------
/tflib/mnist.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 SAP SE
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import numpy
 6 | 
 7 | import os
 8 | import urllib
 9 | import gzip
10 | import cPickle as pickle
11 | 
12 | def mnist_generator(data, batch_size, n_labelled, limit=None):
13 |     images, targets = data
14 | 
15 |     rng_state = numpy.random.get_state()
16 |     numpy.random.shuffle(images)
17 |     numpy.random.set_state(rng_state)
18 |     numpy.random.shuffle(targets)
19 |     if limit is not None:
20 |         print "WARNING ONLY FIRST {} MNIST DIGITS".format(limit)
21 |         images = images.astype('float32')[:limit]
22 |         targets = targets.astype('int32')[:limit]
23 |     if n_labelled is not None:
24 |         labelled = numpy.zeros(len(images), dtype='int32')
25 |         labelled[:n_labelled] = 1
26 | 
27 |     def get_epoch():
28 |         rng_state = numpy.random.get_state()
29 |         numpy.random.shuffle(images)
30 |         numpy.random.set_state(rng_state)
31 |         numpy.random.shuffle(targets)
32 | 
33 |         if n_labelled is not None:
34 |             numpy.random.set_state(rng_state)
35 |             numpy.random.shuffle(labelled)
36 | 
37 |         image_batches = images.reshape(-1, batch_size, 784)
38 |         target_batches = targets.reshape(-1, batch_size)
39 | 
40 |         if n_labelled is not None:
41 |             labelled_batches = labelled.reshape(-1, batch_size)
42 | 
43 |             for i in xrange(len(image_batches)):
44 |                 yield (numpy.copy(image_batches[i]), numpy.copy(target_batches[i]), numpy.copy(labelled))
45 | 
46 |         else:
47 | 
48 |             for i in xrange(len(image_batches)):
49 |                 yield (numpy.copy(image_batches[i]), numpy.copy(target_batches[i]))
50 | 
51 |     return get_epoch
52 | 
53 | def load(batch_size, test_batch_size, n_labelled=None):
54 |     filepath = '/tmp/mnist.pkl.gz'
55 |     url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
56 | 
57 |     if not os.path.isfile(filepath):
58 |         print "Couldn't find MNIST dataset in /tmp, downloading..."
59 |         urllib.urlretrieve(url, filepath)
60 | 
61 |     with gzip.open('/tmp/mnist.pkl.gz', 'rb') as f:
62 |         train_data, dev_data, test_data = pickle.load(f)
63 | 
64 |     return (
65 |         mnist_generator(train_data, batch_size, n_labelled), 
66 |         mnist_generator(dev_data, test_batch_size, n_labelled), 
67 |         mnist_generator(test_data, test_batch_size, n_labelled)
68 |     )


--------------------------------------------------------------------------------
/tflib/ops/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2020 SAP SE
2 | #
3 | # SPDX-License-Identifier: Apache-2.0


--------------------------------------------------------------------------------
/tflib/ops/batchnorm.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 SAP SE
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import tflib as lib
 6 | 
 7 | import numpy as np
 8 | import tensorflow as tf
 9 | 
10 | def Batchnorm(name, axes, inputs, is_training=None, stats_iter=None, update_moving_stats=True, fused=True):
11 |     if ((axes == [0,2,3]) or (axes == [0,2])) and fused==True:
12 |         if axes==[0,2]:
13 |             inputs = tf.expand_dims(inputs, 3)
14 |         # Old (working but pretty slow) implementation:
15 |         ##########
16 | 
17 |         # inputs = tf.transpose(inputs, [0,2,3,1])
18 | 
19 |         # mean, var = tf.nn.moments(inputs, [0,1,2], keep_dims=False)
20 |         # offset = lib.param(name+'.offset', np.zeros(mean.get_shape()[-1], dtype='float32'))
21 |         # scale = lib.param(name+'.scale', np.ones(var.get_shape()[-1], dtype='float32'))
22 |         # result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-4)
23 | 
24 |         # return tf.transpose(result, [0,3,1,2])
25 | 
26 |         # New (super fast but untested) implementation:
27 |         offset = lib.param(name+'.offset', np.zeros(inputs.get_shape()[1], dtype='float32'))
28 |         scale = lib.param(name+'.scale', np.ones(inputs.get_shape()[1], dtype='float32'))
29 | 
30 |         moving_mean = lib.param(name+'.moving_mean', np.zeros(inputs.get_shape()[1], dtype='float32'), trainable=False)
31 |         moving_variance = lib.param(name+'.moving_variance', np.ones(inputs.get_shape()[1], dtype='float32'), trainable=False)
32 | 
33 |         def _fused_batch_norm_training():
34 |             return tf.nn.fused_batch_norm(inputs, scale, offset, epsilon=1e-5, data_format='NCHW')
35 |         def _fused_batch_norm_inference():
36 |             # Version which blends in the current item's statistics
37 |             batch_size = tf.cast(tf.shape(inputs)[0], 'float32')
38 |             mean, var = tf.nn.moments(inputs, [2,3], keep_dims=True)
39 |             mean = ((1./batch_size)*mean) + (((batch_size-1.)/batch_size)*moving_mean)[None,:,None,None]
40 |             var = ((1./batch_size)*var) + (((batch_size-1.)/batch_size)*moving_variance)[None,:,None,None]
41 |             return tf.nn.batch_normalization(inputs, mean, var, offset[None,:,None,None], scale[None,:,None,None], 1e-5), mean, var
42 | 
43 |             # Standard version
44 |             # return tf.nn.fused_batch_norm(
45 |             #     inputs,
46 |             #     scale,
47 |             #     offset,
48 |             #     epsilon=1e-2, 
49 |             #     mean=moving_mean,
50 |             #     variance=moving_variance,
51 |             #     is_training=False,
52 |             #     data_format='NCHW'
53 |             # )
54 | 
55 |         if is_training is None:
56 |             outputs, batch_mean, batch_var = _fused_batch_norm_training()
57 |         else:
58 |             outputs, batch_mean, batch_var = tf.cond(is_training,
59 |                                                        _fused_batch_norm_training,
60 |                                                        _fused_batch_norm_inference)
61 |             if update_moving_stats:
62 |                 no_updates = lambda: outputs
63 |                 def _force_updates():
64 |                     """Internal function forces updates moving_vars if is_training."""
65 |                     float_stats_iter = tf.cast(stats_iter, tf.float32)
66 | 
67 |                     update_moving_mean = tf.assign(moving_mean, ((float_stats_iter/(float_stats_iter+1))*moving_mean) + ((1/(float_stats_iter+1))*batch_mean))
68 |                     update_moving_variance = tf.assign(moving_variance, ((float_stats_iter/(float_stats_iter+1))*moving_variance) + ((1/(float_stats_iter+1))*batch_var))
69 | 
70 |                     with tf.control_dependencies([update_moving_mean, update_moving_variance]):
71 |                         return tf.identity(outputs)
72 |                 outputs = tf.cond(is_training, _force_updates, no_updates)
73 | 
74 |         if axes == [0,2]:
75 |             return outputs[:,:,:,0] # collapse last dim
76 |         else:
77 |             return outputs
78 |     else:
79 |         # raise Exception('old BN')
80 |         # TODO we can probably use nn.fused_batch_norm here too for speedup
81 |         mean, var = tf.nn.moments(inputs, axes, keep_dims=True)
82 |         shape = mean.get_shape().as_list()
83 |         if 0 not in axes:
84 |             print ("WARNING ({}): didn't find 0 in axes, but not using separate BN params for each item in batch "+name)
85 |             shape[0] = 1
86 |         offset = lib.param(name+'.offset', np.zeros(shape, dtype='float32'))
87 |         scale = lib.param(name+'.scale', np.ones(shape, dtype='float32'))
88 |         result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-5)
89 | 
90 | 
91 |         return result
92 | 


--------------------------------------------------------------------------------
/tflib/ops/cond_batchnorm.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 SAP SE
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import tflib as lib
 6 | 
 7 | import numpy as np
 8 | import tensorflow as tf
 9 | 
10 | def Batchnorm(name, axes, inputs, is_training=None, stats_iter=None, update_moving_stats=True, fused=True, labels=None, n_labels=None):
11 |     """conditional batchnorm (dumoulin et al 2016) for BCHW conv filtermaps"""
12 |     if axes != [0,2,3]:
13 |         raise Exception('unsupported')
14 |     mean, var = tf.nn.moments(inputs, axes, keep_dims=True)
15 |     shape = mean.get_shape().as_list() # shape is [1,n,1,1]
16 |     offset_m = lib.param(name+'.offset', np.zeros([n_labels,shape[1]], dtype='float32'))
17 |     scale_m = lib.param(name+'.scale', np.ones([n_labels,shape[1]], dtype='float32'))
18 |     offset = tf.nn.embedding_lookup(offset_m, labels)
19 |     scale = tf.nn.embedding_lookup(scale_m, labels)
20 |     result = tf.nn.batch_normalization(inputs, mean, var, offset[:,:,None,None], scale[:,:,None,None], 1e-5)
21 |     return result


--------------------------------------------------------------------------------
/tflib/ops/conv1d.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: 2020 SAP SE
  2 | #
  3 | # SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | import tflib as lib
  6 | 
  7 | import numpy as np
  8 | import tensorflow as tf
  9 | 
 10 | _default_weightnorm = False
 11 | def enable_default_weightnorm():
 12 |     global _default_weightnorm
 13 |     _default_weightnorm = True
 14 | 
 15 | def Conv1D(name, input_dim, output_dim, filter_size, inputs, he_init=True, mask_type=None, stride=1, weightnorm=None, biases=True, gain=1.):
 16 |     """
 17 |     inputs: tensor of shape (batch size, num channels, width)
 18 |     mask_type: one of None, 'a', 'b'
 19 | 
 20 |     returns: tensor of shape (batch size, num channels, width)
 21 |     """
 22 |     with tf.name_scope(name) as scope:
 23 | 
 24 |         if mask_type is not None:
 25 |             mask_type, mask_n_channels = mask_type
 26 | 
 27 |             mask = np.ones(
 28 |                 (filter_size, input_dim, output_dim), 
 29 |                 dtype='float32'
 30 |             )
 31 |             center = filter_size // 2
 32 | 
 33 |             # Mask out future locations
 34 |             # filter shape is (width, input channels, output channels)
 35 |             mask[center+1:, :, :] = 0.
 36 | 
 37 |             # Mask out future channels
 38 |             for i in xrange(mask_n_channels):
 39 |                 for j in xrange(mask_n_channels):
 40 |                     if (mask_type=='a' and i >= j) or (mask_type=='b' and i > j):
 41 |                         mask[
 42 |                             center,
 43 |                             i::mask_n_channels,
 44 |                             j::mask_n_channels
 45 |                         ] = 0.
 46 | 
 47 | 
 48 |         def uniform(stdev, size):
 49 |             return np.random.uniform(
 50 |                 low=-stdev * np.sqrt(3),
 51 |                 high=stdev * np.sqrt(3),
 52 |                 size=size
 53 |             ).astype('float32')
 54 | 
 55 |         fan_in = input_dim * filter_size
 56 |         fan_out = output_dim * filter_size / stride
 57 | 
 58 |         if mask_type is not None: # only approximately correct
 59 |             fan_in /= 2.
 60 |             fan_out /= 2.
 61 | 
 62 |         if he_init:
 63 |             filters_stdev = np.sqrt(4./(fan_in+fan_out))
 64 |         else: # Normalized init (Glorot & Bengio)
 65 |             filters_stdev = np.sqrt(2./(fan_in+fan_out))
 66 | 
 67 |         filter_values = uniform(
 68 |             filters_stdev,
 69 |             (filter_size, input_dim, output_dim)
 70 |         )
 71 |         # print "WARNING IGNORING GAIN"
 72 |         filter_values *= gain
 73 | 
 74 |         filters = lib.param(name+'.Filters', filter_values)
 75 | 
 76 |         if weightnorm==None:
 77 |             weightnorm = _default_weightnorm
 78 |         if weightnorm:
 79 |             norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1)))
 80 |             target_norms = lib.param(
 81 |                 name + '.g',
 82 |                 norm_values
 83 |             )
 84 |             with tf.name_scope('weightnorm') as scope:
 85 |                 norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1]))
 86 |                 filters = filters * (target_norms / norms)
 87 | 
 88 |         if mask_type is not None:
 89 |             with tf.name_scope('filter_mask'):
 90 |                 filters = filters * mask
 91 | 
 92 |         result = tf.nn.conv1d(
 93 |             value=inputs, 
 94 |             filters=filters, 
 95 |             stride=stride,
 96 |             padding='SAME',
 97 |             data_format='NCHW'
 98 |         )
 99 | 
100 |         if biases:
101 |             _biases = lib.param(
102 |                 name+'.Biases',
103 |                 np.zeros([output_dim], dtype='float32')
104 |             )
105 | 
106 |             # result = result + _biases
107 | 
108 |             result = tf.expand_dims(result, 3)
109 |             result = tf.nn.bias_add(result, _biases, data_format='NCHW')
110 |             result = tf.squeeze(result)
111 | 
112 |         return result
113 | 


--------------------------------------------------------------------------------
/tflib/ops/conv2d.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: 2020 SAP SE
  2 | #
  3 | # SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | import tflib as lib
  6 | 
  7 | import numpy as np
  8 | import tensorflow as tf
  9 | 
 10 | _default_weightnorm = False
 11 | def enable_default_weightnorm():
 12 |     global _default_weightnorm
 13 |     _default_weightnorm = True
 14 | 
 15 | _weights_stdev = None
 16 | def set_weights_stdev(weights_stdev):
 17 |     global _weights_stdev
 18 |     _weights_stdev = weights_stdev
 19 | 
 20 | def unset_weights_stdev():
 21 |     global _weights_stdev
 22 |     _weights_stdev = None
 23 | 
 24 | def Conv2D(name, input_dim, output_dim, filter_size, inputs, he_init=True, mask_type=None, stride=1, weightnorm=None, biases=True, gain=1.):
 25 |     """
 26 |     inputs: tensor of shape (batch size, num channels, height, width)
 27 |     mask_type: one of None, 'a', 'b'
 28 | 
 29 |     returns: tensor of shape (batch size, num channels, height, width)
 30 |     """
 31 |     with tf.name_scope(name) as scope:
 32 | 
 33 |         if mask_type is not None:
 34 |             mask_type, mask_n_channels = mask_type
 35 | 
 36 |             mask = np.ones(
 37 |                 (filter_size, filter_size, input_dim, output_dim), 
 38 |                 dtype='float32'
 39 |             )
 40 |             center = filter_size // 2
 41 | 
 42 |             # Mask out future locations
 43 |             # filter shape is (height, width, input channels, output channels)
 44 |             mask[center+1:, :, :, :] = 0.
 45 |             mask[center, center+1:, :, :] = 0.
 46 | 
 47 |             # Mask out future channels
 48 |             for i in xrange(mask_n_channels):
 49 |                 for j in xrange(mask_n_channels):
 50 |                     if (mask_type=='a' and i >= j) or (mask_type=='b' and i > j):
 51 |                         mask[
 52 |                             center,
 53 |                             center,
 54 |                             i::mask_n_channels,
 55 |                             j::mask_n_channels
 56 |                         ] = 0.
 57 | 
 58 | 
 59 |         def uniform(stdev, size):
 60 |             return np.random.uniform(
 61 |                 low=-stdev * np.sqrt(3),
 62 |                 high=stdev * np.sqrt(3),
 63 |                 size=size
 64 |             ).astype('float32')
 65 | 
 66 |         fan_in = input_dim * filter_size**2
 67 |         fan_out = output_dim * filter_size**2 / (stride**2)
 68 | 
 69 |         if mask_type is not None: # only approximately correct
 70 |             fan_in /= 2.
 71 |             fan_out /= 2.
 72 | 
 73 |         if he_init:
 74 |             filters_stdev = np.sqrt(4./(fan_in+fan_out))
 75 |         else: # Normalized init (Glorot & Bengio)
 76 |             filters_stdev = np.sqrt(2./(fan_in+fan_out))
 77 | 
 78 |         if _weights_stdev is not None:
 79 |             filter_values = uniform(
 80 |                 _weights_stdev,
 81 |                 (filter_size, filter_size, input_dim, output_dim)
 82 |             )
 83 |         else:
 84 |             filter_values = uniform(
 85 |                 filters_stdev,
 86 |                 (filter_size, filter_size, input_dim, output_dim)
 87 |             )
 88 | 
 89 |         # print "WARNING IGNORING GAIN"
 90 |         filter_values *= gain
 91 | 
 92 |         filters = lib.param(name+'.Filters', filter_values)
 93 | 
 94 |         if weightnorm==None:
 95 |             weightnorm = _default_weightnorm
 96 |         if weightnorm:
 97 |             norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1,2)))
 98 |             target_norms = lib.param(
 99 |                 name + '.g',
100 |                 norm_values
101 |             )
102 |             with tf.name_scope('weightnorm') as scope:
103 |                 norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1,2]))
104 |                 filters = filters * (target_norms / norms)
105 | 
106 |         if mask_type is not None:
107 |             with tf.name_scope('filter_mask'):
108 |                 filters = filters * mask
109 | 
110 |         result = tf.nn.conv2d(
111 |             input=inputs, 
112 |             filter=filters, 
113 |             strides=[1, 1, stride, stride],
114 |             padding='SAME',
115 |             data_format='NCHW'
116 |         )
117 | 
118 |         if biases:
119 |             _biases = lib.param(
120 |                 name+'.Biases',
121 |                 np.zeros(output_dim, dtype='float32')
122 |             )
123 | 
124 |             result = tf.nn.bias_add(result, _biases, data_format='NCHW')
125 | 
126 | 
127 |         return result
128 | 


--------------------------------------------------------------------------------
/tflib/ops/deconv2d.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: 2020 SAP SE
  2 | #
  3 | # SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | import tflib as lib
  6 | 
  7 | import numpy as np
  8 | import tensorflow as tf
  9 | 
 10 | _default_weightnorm = False
 11 | def enable_default_weightnorm():
 12 |     global _default_weightnorm
 13 |     _default_weightnorm = True
 14 | 
 15 | _weights_stdev = None
 16 | def set_weights_stdev(weights_stdev):
 17 |     global _weights_stdev
 18 |     _weights_stdev = weights_stdev
 19 | 
 20 | def unset_weights_stdev():
 21 |     global _weights_stdev
 22 |     _weights_stdev = None
 23 | 
 24 | def Deconv2D(
 25 |     name, 
 26 |     input_dim, 
 27 |     output_dim, 
 28 |     filter_size, 
 29 |     inputs, 
 30 |     he_init=True,
 31 |     weightnorm=None,
 32 |     biases=True,
 33 |     gain=1.,
 34 |     mask_type=None,
 35 |     ):
 36 |     """
 37 |     inputs: tensor of shape (batch size, height, width, input_dim)
 38 |     returns: tensor of shape (batch size, 2*height, 2*width, output_dim)
 39 |     """
 40 |     with tf.name_scope(name) as scope:
 41 | 
 42 |         if mask_type != None:
 43 |             raise Exception('Unsupported configuration')
 44 | 
 45 |         def uniform(stdev, size):
 46 |             return np.random.uniform(
 47 |                 low=-stdev * np.sqrt(3),
 48 |                 high=stdev * np.sqrt(3),
 49 |                 size=size
 50 |             ).astype('float32')
 51 | 
 52 |         stride = 2
 53 |         fan_in = input_dim * filter_size**2 / (stride**2)
 54 |         fan_out = output_dim * filter_size**2
 55 | 
 56 |         if he_init:
 57 |             filters_stdev = np.sqrt(4./(fan_in+fan_out))
 58 |         else: # Normalized init (Glorot & Bengio)
 59 |             filters_stdev = np.sqrt(2./(fan_in+fan_out))
 60 | 
 61 | 
 62 |         if _weights_stdev is not None:
 63 |             filter_values = uniform(
 64 |                 _weights_stdev,
 65 |                 (filter_size, filter_size, output_dim, input_dim)
 66 |             )
 67 |         else:
 68 |             filter_values = uniform(
 69 |                 filters_stdev,
 70 |                 (filter_size, filter_size, output_dim, input_dim)
 71 |             )
 72 | 
 73 |         filter_values *= gain
 74 | 
 75 |         filters = lib.param(
 76 |             name+'.Filters',
 77 |             filter_values
 78 |         )
 79 | 
 80 |         if weightnorm==None:
 81 |             weightnorm = _default_weightnorm
 82 |         if weightnorm:
 83 |             norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1,3)))
 84 |             target_norms = lib.param(
 85 |                 name + '.g',
 86 |                 norm_values
 87 |             )
 88 |             with tf.name_scope('weightnorm') as scope:
 89 |                 norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1,3]))
 90 |                 filters = filters * tf.expand_dims(target_norms / norms, 1)
 91 | 
 92 |         #inputs = tf.transpose(inputs, [0, 3, 1, 2], name='NCHW_to_NHWC')
 93 |         #inputs = tf.transpose(inputs, [0,2,3,1], name='NCHW_to_NHWC')
 94 | 
 95 |         input_shape = tf.shape(inputs)
 96 |         try: # tf pre-1.0 (top) vs 1.0 (bottom)
 97 |             output_shape = tf.pack([input_shape[0], 2*input_shape[1], 2*input_shape[2], output_dim])
 98 |         except Exception as e:
 99 |             output_shape = tf.stack([input_shape[0], 2*input_shape[1], 2*input_shape[2], output_dim])
100 | 
101 |         result = tf.nn.conv2d_transpose(
102 |             value=inputs, 
103 |             filter=filters,
104 |             output_shape=output_shape, 
105 |             strides=[1, 2, 2, 1],
106 |             padding='SAME'
107 |         )
108 | 
109 |         if biases:
110 |             _biases = lib.param(
111 |                 name+'.Biases',
112 |                 np.zeros(output_dim, dtype='float32')
113 |             )
114 |             result = tf.nn.bias_add(result, _biases)
115 | 
116 |         #result = tf.transpose(result, [0,3,1,2], name='NHWC_to_NCHW')
117 | 
118 | 
119 |         return result
120 | 


--------------------------------------------------------------------------------
/tflib/ops/layernorm.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 SAP SE
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import tflib as lib
 6 | 
 7 | import numpy as np
 8 | import tensorflow as tf
 9 | 
10 | def Layernorm(name, norm_axes, inputs):
11 |     mean, var = tf.nn.moments(inputs, norm_axes, keep_dims=True)
12 | 
13 |     # Assume the 'neurons' axis is the first of norm_axes. This is the case for fully-connected and BCHW conv layers.
14 |     n_neurons = inputs.get_shape().as_list()[norm_axes[0]]
15 | 
16 |     offset = lib.param(name+'.offset', np.zeros(n_neurons, dtype='float32'))
17 |     scale = lib.param(name+'.scale', np.ones(n_neurons, dtype='float32'))
18 | 
19 |     # Add broadcasting dims to offset and scale (e.g. BCHW conv data)
20 |     offset = tf.reshape(offset, [-1] + [1 for i in xrange(len(norm_axes)-1)])
21 |     scale = tf.reshape(scale, [-1] + [1 for i in xrange(len(norm_axes)-1)])
22 | 
23 |     result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-5)
24 | 
25 |     return result


--------------------------------------------------------------------------------
/tflib/ops/linear.py:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: 2020 SAP SE
  2 | #
  3 | # SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | import tflib as lib
  6 | 
  7 | import numpy as np
  8 | import tensorflow as tf
  9 | 
 10 | _default_weightnorm = False
 11 | def enable_default_weightnorm():
 12 |     global _default_weightnorm
 13 |     _default_weightnorm = True
 14 | 
 15 | def disable_default_weightnorm():
 16 |     global _default_weightnorm
 17 |     _default_weightnorm = False
 18 | 
 19 | _weights_stdev = None
 20 | def set_weights_stdev(weights_stdev):
 21 |     global _weights_stdev
 22 |     _weights_stdev = weights_stdev
 23 | 
 24 | def unset_weights_stdev():
 25 |     global _weights_stdev
 26 |     _weights_stdev = None
 27 | 
 28 | def Linear(
 29 |         name, 
 30 |         input_dim, 
 31 |         output_dim, 
 32 |         inputs,
 33 |         biases=True,
 34 |         initialization=None,
 35 |         weightnorm=None,
 36 |         gain=1.
 37 |         ):
 38 |     """
 39 |     initialization: None, `lecun`, 'glorot', `he`, 'glorot_he', `orthogonal`, `("uniform", range)`
 40 |     """
 41 |     with tf.name_scope(name) as scope:
 42 | 
 43 |         def uniform(stdev, size):
 44 |             if _weights_stdev is not None:
 45 |                 stdev = _weights_stdev
 46 |             return np.random.uniform(
 47 |                 low=-stdev * np.sqrt(3),
 48 |                 high=stdev * np.sqrt(3),
 49 |                 size=size
 50 |             ).astype('float32')
 51 | 
 52 |         if initialization == 'lecun':# and input_dim != output_dim):
 53 |             # disabling orth. init for now because it's too slow
 54 |             weight_values = uniform(
 55 |                 np.sqrt(1./input_dim),
 56 |                 (input_dim, output_dim)
 57 |             )
 58 | 
 59 |         elif initialization == 'glorot' or (initialization == None):
 60 | 
 61 |             weight_values = uniform(
 62 |                 np.sqrt(2./(input_dim+output_dim)),
 63 |                 (input_dim, output_dim)
 64 |             )
 65 | 
 66 |         elif initialization == 'he':
 67 | 
 68 |             weight_values = uniform(
 69 |                 np.sqrt(2./input_dim),
 70 |                 (input_dim, output_dim)
 71 |             )
 72 | 
 73 |         elif initialization == 'glorot_he':
 74 | 
 75 |             weight_values = uniform(
 76 |                 np.sqrt(4./(input_dim+output_dim)),
 77 |                 (input_dim, output_dim)
 78 |             )
 79 | 
 80 |         elif initialization == 'orthogonal' or \
 81 |             (initialization == None and input_dim == output_dim):
 82 |             
 83 |             # From lasagne
 84 |             def sample(shape):
 85 |                 if len(shape) < 2:
 86 |                     raise RuntimeError("Only shapes of length 2 or more are "
 87 |                                        "supported.")
 88 |                 flat_shape = (shape[0], np.prod(shape[1:]))
 89 |                  # TODO: why normal and not uniform?
 90 |                 a = np.random.normal(0.0, 1.0, flat_shape)
 91 |                 u, _, v = np.linalg.svd(a, full_matrices=False)
 92 |                 # pick the one with the correct shape
 93 |                 q = u if u.shape == flat_shape else v
 94 |                 q = q.reshape(shape)
 95 |                 return q.astype('float32')
 96 |             weight_values = sample((input_dim, output_dim))
 97 |         
 98 |         elif initialization[0] == 'uniform':
 99 |         
100 |             weight_values = np.random.uniform(
101 |                 low=-initialization[1],
102 |                 high=initialization[1],
103 |                 size=(input_dim, output_dim)
104 |             ).astype('float32')
105 | 
106 |         else:
107 | 
108 |             raise Exception('Invalid initialization!')
109 | 
110 |         weight_values *= gain
111 | 
112 |         weight = lib.param(
113 |             name + '.W',
114 |             weight_values
115 |         )
116 | 
117 |         if weightnorm==None:
118 |             weightnorm = _default_weightnorm
119 |         if weightnorm:
120 |             norm_values = np.sqrt(np.sum(np.square(weight_values), axis=0))
121 |             # norm_values = np.linalg.norm(weight_values, axis=0)
122 | 
123 |             target_norms = lib.param(
124 |                 name + '.g',
125 |                 norm_values
126 |             )
127 | 
128 |             with tf.name_scope('weightnorm') as scope:
129 |                 norms = tf.sqrt(tf.reduce_sum(tf.square(weight), reduction_indices=[0]))
130 |                 weight = weight * (target_norms / norms)
131 | 
132 |         # if 'Discriminator' in name:
133 |         #     print "WARNING weight constraint on {}".format(name)
134 |         #     weight = tf.nn.softsign(10.*weight)*.1
135 | 
136 |         if inputs.get_shape().ndims == 2:
137 |             result = tf.matmul(inputs, weight)
138 |         else:
139 |             reshaped_inputs = tf.reshape(inputs, [-1, input_dim])
140 |             result = tf.matmul(reshaped_inputs, weight)
141 |             result = tf.reshape(result, tf.pack(tf.unpack(tf.shape(inputs))[:-1] + [output_dim]))
142 | 
143 |         if biases:
144 |             result = tf.nn.bias_add(
145 |                 result,
146 |                 lib.param(
147 |                     name + '.b',
148 |                     np.zeros((output_dim,), dtype='float32')
149 |                 )
150 |             )
151 | 
152 |         return result


--------------------------------------------------------------------------------
/tflib/plot.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 SAP SE
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import numpy as np
 6 | 
 7 | import matplotlib
 8 | matplotlib.use('Agg')
 9 | import matplotlib.pyplot as plt
10 | 
11 | import collections
12 | import time
13 | import pickle
14 | 
15 | _since_beginning = collections.defaultdict(lambda: {})
16 | _since_last_flush = collections.defaultdict(lambda: {})
17 | 
18 | _iter = [0]
19 | def tick():
20 | 	_iter[0] += 1
21 | 
22 | def plot(name, value):
23 | 	_since_last_flush[name][_iter[0]] = value
24 | 
25 | def flush():
26 | 	prints = []
27 | 
28 | 	for name, vals in _since_last_flush.items():
29 | 		prints.append(str(name)+"\t"+str(np.mean(list(vals.values()))))
30 | 		_since_beginning[name].update(vals)
31 | 
32 | 		x_vals = np.sort(list(_since_beginning[name].keys()))
33 | 		y_vals = [_since_beginning[name][x] for x in x_vals]
34 | 
35 | 		plt.clf()
36 | 		plt.plot(x_vals, y_vals)
37 | 		plt.xlabel('iteration')
38 | 		plt.ylabel(name)
39 | 		plt.savefig(name.replace(' ', '_')+'.jpg')
40 | 
41 | 	print("iter"+ str(_iter[0])+"\t"+("\t".join(prints)))
42 | 	_since_last_flush.clear()
43 | 
44 | 	with open('log.pkl', 'wb') as f:
45 | 		pickle.dump(dict(_since_beginning), f, pickle.HIGHEST_PROTOCOL)


--------------------------------------------------------------------------------
/tflib/save_images.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 SAP SE
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | """
 6 | Image grid saver, based on color_grid_vis from github.com/Newmu
 7 | """
 8 | 
 9 | import numpy as np
10 | import scipy.misc
11 | from scipy.misc import imsave
12 | 
13 | def save_images(X, save_path):
14 |     # [0, 1] -> [0,255]
15 |     if isinstance(X.flatten()[0], np.floating):
16 |         X = (255.99*X).astype('uint8')
17 | 
18 |     n_samples = X.shape[0]
19 |     rows = int(np.sqrt(n_samples))
20 |     while n_samples % rows != 0:
21 |         rows -= 1
22 | 
23 |     nh, nw = rows, n_samples/rows
24 | 
25 |     if X.ndim == 2:
26 |         X = np.reshape(X, (X.shape[0], int(np.sqrt(X.shape[1])), int(np.sqrt(X.shape[1]))))
27 | 
28 |     if X.ndim == 4:
29 |         # BCHW -> BHWC
30 |         X = X.transpose(0,2,3,1)
31 |         h, w = X[0].shape[:2]
32 |         img = np.zeros((h*nh, w*nw, 3))
33 |     elif X.ndim == 3:
34 |         h, w = X[0].shape[:2]
35 |         img = np.zeros((h*nh, w*nw))
36 | 
37 |     for n, x in enumerate(X):
38 |         j = n/nw
39 |         i = n%nw
40 |         img[j*h:j*h+h, i*w:i*w+w] = x
41 | 
42 |     imsave(save_path, img)


--------------------------------------------------------------------------------
/tflib/small_imagenet.py:
--------------------------------------------------------------------------------
 1 | # SPDX-FileCopyrightText: 2020 SAP SE
 2 | #
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | import numpy as np
 6 | import scipy.misc
 7 | import time
 8 | 
 9 | def make_generator(path, n_files, batch_size):
10 |     epoch_count = [1]
11 |     def get_epoch():
12 |         images = np.zeros((batch_size, 3, 64, 64), dtype='int32')
13 |         files = range(n_files)
14 |         random_state = np.random.RandomState(epoch_count[0])
15 |         random_state.shuffle(files)
16 |         epoch_count[0] += 1
17 |         for n, i in enumerate(files):
18 |             image = scipy.misc.imread("{}/{}.png".format(path, str(i+1).zfill(len(str(n_files)))))
19 |             images[n % batch_size] = image.transpose(2,0,1)
20 |             if n > 0 and n % batch_size == 0:
21 |                 yield (images,)
22 |     return get_epoch
23 | 
24 | def load(batch_size, data_dir='/home/ishaan/data/imagenet64'):
25 |     return (
26 |         make_generator(data_dir+'/train_64x64', 1281149, batch_size),
27 |         make_generator(data_dir+'/valid_64x64', 49999, batch_size)
28 |     )
29 | 
30 | if __name__ == '__main__':
31 |     train_gen, valid_gen = load(64)
32 |     t0 = time.time()
33 |     for i, batch in enumerate(train_gen(), start=1):
34 |         print "{}\t{}".format(str(time.time() - t0), batch[0][0,0,0,0])
35 |         if i == 1000:
36 |             break
37 |         t0 = time.time()


--------------------------------------------------------------------------------