├── classifier ├── __init__.py └── cnn_classifier.py ├── privacygan ├── __init__.py ├── cifar │ ├── __init__.py │ └── cifar_gan.py ├── lfw │ ├── __init__.py │ └── lfw_gan.py ├── mnist │ ├── __init__.py │ └── mnist_gan.py └── privacy_gan.py ├── requirements.txt ├── CODE_OF_CONDUCT.md ├── contributing.md ├── LICENSE ├── README.md ├── setup.py ├── SECURITY.md ├── PrivGAN_mnist_fash_tf2.ipynb ├── .gitignore ├── PrivGAN_mnist_tf2.ipynb ├── PrivGAN_lfw_tf2.ipynb ├── PrivGan_CIFAR_tf2.ipynb └── MNIST_down_tf2.ipynb /classifier/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /privacygan/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /privacygan/cifar/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /privacygan/lfw/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /privacygan/mnist/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.16.2 2 | pandas>=0.25.3 3 | tqdm>=4.38.0 4 | keras>=2.2.4 5 | scipy>=1.1.0 6 | tensorflow>=1.14.0 7 | scikit-learn==0.20.3 8 | matplotlib>=3.3.0 9 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This project welcomes contributions and suggestions. Most contributions require you to 4 | agree to a Contributor License Agreement (CLA) declaring that you have the right to, 5 | and actually do, grant us the rights to use your contribution. For details, visit 6 | https://cla.microsoft.com. 7 | 8 | When you submit a pull request, a CLA-bot will automatically determine whether you need 9 | to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the 10 | instructions provided by the bot. You will only need to do this once across all repositories using our CLA. 11 | 12 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 13 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 14 | or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # privGAN 2 | This repository contains the source code for PrivGan - a novel approach for deterring membership inference attacks on GAN generated synthetic medical data.Currently, the repository contains the jupyter notebooks for various datasets. We will be converting the code into a library in the future. 3 | Please visit our paper 'PrivGAN: Protecting GANs from membership inference attacks at low cost' [ArXiv Link](https://arxiv.org/abs/2001.00071) Accepted at PETS 2021 4 | 5 | ### Version information 6 | 1) Python 3.7.3 7 | 2) Numpy 1.16.2 8 | 3) Pandas 0.25.3 9 | 4) Tqdm 4.38.0 10 | 5) Keras 2.2.4 11 | 6) Scipy 1.1.0 12 | 7) Tensorflow 1.14.0 13 | 8) Scikit-learn 0.20.3 14 | 15 | 16 | ### Notebooks comparing white-box attack accuracy of privGAN and GAN on verious datasets 17 | 1) PrivGAN_mnist.ipynb 18 | 2) PrivGAN_mnist_fashion.ipynb 19 | 3) PrivGAN_lfw.ipynb 20 | 4) PrivGAN_cifar.ipynb 21 | 22 | 23 | ### Notebooks comparing performance on downstream classification tasks 24 | 1) MNIST_down.ipynb 25 | 26 | ### Installation 27 | 28 | 29 | ### Contribution 30 | Please review the link here to know code of conduct https://opensource.microsoft.com/codeofconduct . Before submitting a pull request please remove all output from your notebooks by going to Cell -> All Output -> Clear 31 | 32 | 33 | ### Contact 34 | 35 | ### Copyright 36 | Copyright (c) Microsoft Corporation. 37 | 38 | 39 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) Microsoft Corporation. 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE 22 | """TensorFlow Privacy library setup file for pip.""" 23 | from setuptools import find_packages 24 | from setuptools import setup 25 | 26 | setup( 27 | name='privGan', 28 | version='1.0', 29 | url='https://github.com/microsoft/privGAN', 30 | license='MIT', 31 | author ='Sumit Mukherjee, Nabajyoti Patowary', 32 | author_email='privgan@microsoft.com', 33 | description='Privacy protected GAN for image data', 34 | long_description='This repository contains the source code for PrivGan - a novel approach \ 35 | for deterring membership inference attacks on GAN generated synthetic medical data.Currently, \ 36 | the repository contains the jupyter notebooks for various datasets. We will be converting \ 37 | the code into a library in the future. Please visit our paper \ 38 | "PrivGAN: Protecting GANs from membership inference attacks at low cost" [ArXiv Link](https://arxiv.org/abs/2001.00071) submitted at PETS 2021.', 39 | long_description_content_type="text/markdown", 40 | install_requires=[ 41 | 'numpy>=1.16.2', 42 | 'pandas>=0.25.3', 43 | 'tqdm>=4.38.0', 44 | 'keras>=2.2.4', 45 | 'scipy>=1.1.0', 46 | 'matplotlib>=3.3.0' 47 | 48 | ], 49 | # Explicit dependence on TensorFlow is not supported. 50 | # See https://github.com/tensorflow/tensorflow/issues/7166 51 | extras_require={ 52 | 'tf': ['tensorflow>=1.14.0'] 53 | }, 54 | packages=find_packages()) -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | -------------------------------------------------------------------------------- /classifier/cnn_classifier.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | import tensorflow as tf 5 | from tensorflow.keras import Input 6 | from tensorflow.keras import Model, Sequential 7 | from tensorflow.keras.layers import Reshape, Dense, Dropout, Flatten, LeakyReLU, Conv2D, MaxPooling2D 8 | 9 | class CNNClassifier: 10 | """CNN classifier to classify images produced by different GAN models 11 | The classifier uses @Adadelta as optimizer 12 | """ 13 | def __init__(self, num_classes, input_shape, dropout=0.5, learning_rate=1.0, rho=0.95, epsilon=1e-06): 14 | """Initializes and compiles the CNN classifier 15 | 16 | Args: 17 | num_classes (int): number of classes to be used for classification 18 | input_shape (tensor): 4D tensor shape of the input 19 | dropout (float, optional): dropout layer param. Defaults to 0.5. 20 | learning_rate (float, optional): learning rate of the optimizer. Defaults to 1.0. 21 | rho (float, optional): decay rate. Defaults to 0.95. 22 | epsilon (float optional): constant epsilon used to better conditioning the grad update. Defaults to 1e-06. 23 | """ 24 | self.dropout = dropout 25 | self.learning_rate = learning_rate 26 | self.rho = rho 27 | self.epsilon = epsilon 28 | self.num_classes = num_classes 29 | self.input_shape = input_shape 30 | self.model = self.__build_model() 31 | 32 | def train(self, x_train, y_train, x_validation, y_validation, batch_size=256, epochs=25): 33 | """Trains and evaluates the CNN classifier model. 34 | Uses accuracy as the metric 35 | 36 | Args: 37 | x_train (tensor): training data 38 | y_train (tensor): labels of the training data 39 | x_validation (tensor): validation batch input 40 | y_validation (tensor): tensor representing labels of the validation batch 41 | batch_size (int): size of the batch per epoch. Defaults to 256 42 | epochs (int): number of epochs. Defaults to 25 43 | 44 | Returns: 45 | Scalar test loss - loss and accuracy post evaluation 46 | """ 47 | self.model.fit(x_train, y_train, 48 | batch_size=batch_size, 49 | epochs=epochs, 50 | verbose=1, 51 | validation_data=(x_validation, y_validation)) 52 | return self.model.evaluate(x_validation, y_validation, verbose=0) 53 | 54 | 55 | def __build_model(self): 56 | """Private method used to build the cnn classifier 57 | 58 | Returns: 59 | Sequential: A cnn model 60 | """ 61 | model = Sequential() 62 | model.add(Conv2D(32, kernel_size=(3, 3), 63 | activation='relu', 64 | input_shape=self.input_shape)) 65 | model.add(Conv2D(64, (3, 3), activation='relu')) 66 | model.add(MaxPooling2D(pool_size=(2, 2))) 67 | model.add(Dropout(self.dropout)) 68 | model.add(Flatten()) 69 | model.add(Dense(128, activation='relu')) 70 | model.add(Dropout(self.dropout)) 71 | model.add(Dense(self.num_classes, activation='softmax')) 72 | model.compile(loss=tf.keras.losses.categorical_crossentropy, 73 | optimizer=tf.keras.optimizers.Adadelta(learning_rate=self.learning_rate, rho=self.rho, epsilon=self.epsilon), 74 | metrics=['accuracy']) 75 | return model 76 | -------------------------------------------------------------------------------- /privacygan/lfw/lfw_gan.py: -------------------------------------------------------------------------------- 1 | #Copyright (c) Microsoft Corporation. All rights reserved. 2 | #Licensed under the MIT License. 3 | 4 | 5 | from tensorflow.keras import Sequential 6 | from tensorflow.keras.layers import Dense, Dropout, Flatten, LeakyReLU 7 | from tensorflow.keras.optimizers import Adam 8 | from tensorflow.keras import initializers 9 | import numpy as np 10 | 11 | def LFW_Generator(randomDim = 100, optim = Adam(lr=0.0002, beta_1=0.5)): 12 | """Creates a generateof for LFW dataset 13 | 14 | Args: 15 | randomDim (int, optional): input shape. Defaults to 100. 16 | optim ([Adam], optional): optimizer. Defaults to Adam(lr=0.0002, beta_1=0.5). 17 | """ 18 | generator = Sequential() 19 | generator.add(Dense(512, input_dim=randomDim, kernel_initializer=initializers.RandomNormal(stddev=0.02), 20 | name = 'layer'+str(np.random.randint(0,1e9)))) 21 | generator.add(LeakyReLU(0.2, 22 | name = 'layer'+str(np.random.randint(0,1e9)))) 23 | generator.add(Dense(512, 24 | name = 'layer'+str(np.random.randint(0,1e9)))) 25 | generator.add(LeakyReLU(0.2, 26 | name = 'layer'+str(np.random.randint(0,1e9)))) 27 | generator.add(Dense(1024, 28 | name = 'layer'+str(np.random.randint(0,1e9)))) 29 | generator.add(LeakyReLU(0.2, 30 | name = 'layer'+str(np.random.randint(0,1e9)))) 31 | generator.add(Dense(2914, activation='tanh', 32 | name = 'layer'+str(np.random.randint(0,1e9)))) 33 | generator.compile(loss='binary_crossentropy', optimizer=optim) 34 | 35 | return generator 36 | 37 | 38 | def LFW_Discriminator(optim = Adam(lr=0.0002, beta_1=0.5)): 39 | """Discriminator for LFW dataset 40 | 41 | Args: 42 | optim ([Adam], optional): optimizer. Defaults to Adam(lr=0.0002, beta_1=0.5). 43 | """ 44 | discriminator = Sequential() 45 | discriminator.add(Dense(2048, input_dim=2914, kernel_initializer=initializers.RandomNormal(stddev=0.02), 46 | name = 'layer'+str(np.random.randint(0,1e9)))) 47 | discriminator.add(LeakyReLU(0.2, 48 | name = 'layer'+str(np.random.randint(0,1e9)))) 49 | discriminator.add(Dense(512, 50 | name = 'layer'+str(np.random.randint(0,1e9)))) 51 | discriminator.add(LeakyReLU(0.2, 52 | name = 'layer'+str(np.random.randint(0,1e9)))) 53 | discriminator.add(Dense(256, 54 | name = 'layer'+str(np.random.randint(0,1e9)))) 55 | discriminator.add(LeakyReLU(0.2, 56 | name = 'layer'+str(np.random.randint(0,1e9)))) 57 | discriminator.add(Dense(1, activation='sigmoid', 58 | name = 'layer'+str(np.random.randint(0,1e9)))) 59 | discriminator.compile(loss='binary_crossentropy', optimizer=optim) 60 | 61 | return discriminator 62 | 63 | def LFW_DiscriminatorPrivate(OutSize = 2, optim = Adam(lr=0.0002, beta_1=0.5)): 64 | """The discriminator designed to guess which Generator generated the data 65 | 66 | Args: 67 | OutSize (int, optional): [description]. Defaults to 2. 68 | optim ([type], optional): optimizer. Defaults to Adam(lr=0.0002, beta_1=0.5). 69 | """ 70 | discriminator = Sequential() 71 | discriminator.add(Dense(2048, input_dim=2914, kernel_initializer=initializers.RandomNormal(stddev=0.02), 72 | name = 'layer'+str(np.random.randint(0,1e9)))) 73 | discriminator.add(LeakyReLU(0.2, 74 | name = 'layer'+str(np.random.randint(0,1e9)))) 75 | discriminator.add(Dense(512, 76 | name = 'layer'+str(np.random.randint(0,1e9)))) 77 | discriminator.add(LeakyReLU(0.2, 78 | name = 'layer'+str(np.random.randint(0,1e9)))) 79 | discriminator.add(Dense(256, 80 | name = 'layer'+str(np.random.randint(0,1e9)))) 81 | discriminator.add(LeakyReLU(0.2, 82 | name = 'layer'+str(np.random.randint(0,1e9)))) 83 | discriminator.add(Dense(OutSize, activation='softmax', 84 | name = 'layer'+str(np.random.randint(0,1e9)))) 85 | discriminator.compile(loss='sparse_categorical_crossentropy', optimizer=optim) 86 | 87 | return discriminator -------------------------------------------------------------------------------- /privacygan/mnist/mnist_gan.py: -------------------------------------------------------------------------------- 1 | #Copyright (c) Microsoft Corporation. All rights reserved. 2 | #Licensed under the MIT License. 3 | 4 | 5 | import tensorflow as tf 6 | from tensorflow.keras import Input 7 | from tensorflow.keras import Model, Sequential 8 | from tensorflow.keras.layers import Dense, Dropout, Flatten, LeakyReLU 9 | from tensorflow.keras.optimizers import Adam 10 | from tensorflow.keras import initializers 11 | import numpy as np 12 | 13 | 14 | def MNIST_Generator(randomDim = 100, optim = Adam(lr=0.0002, beta_1=0.5)): 15 | """Creates a generator for MNIST dataset 16 | 17 | Args: 18 | randomDim (int, optional): input shape. Defaults to 100. 19 | optim ([Adam], optional): optimizer. Defaults to Adam(lr=0.0002, beta_1=0.5). 20 | 21 | """ 22 | 23 | generator = Sequential() 24 | generator.add(Dense(512, input_dim=randomDim, kernel_initializer=initializers.RandomNormal(stddev=0.02), 25 | name = 'layer'+str(np.random.randint(0,1e9)))) 26 | generator.add(LeakyReLU(0.2, 27 | name = 'layer'+str(np.random.randint(0,1e9)))) 28 | generator.add(Dense(512, 29 | name = 'layer'+str(np.random.randint(0,1e9)))) 30 | generator.add(LeakyReLU(0.2, 31 | name = 'layer'+str(np.random.randint(0,1e9)))) 32 | generator.add(Dense(1024, 33 | name = 'layer'+str(np.random.randint(0,1e9)))) 34 | generator.add(LeakyReLU(0.2, 35 | name = 'layer'+str(np.random.randint(0,1e9)))) 36 | generator.add(Dense(784, activation='tanh', 37 | name = 'layer'+str(np.random.randint(0,1e9)))) 38 | generator.compile(loss='binary_crossentropy', optimizer=optim) 39 | 40 | return generator 41 | 42 | 43 | def MNIST_Discriminator(optim = Adam(lr=0.0002, beta_1=0.5)): 44 | """Discriminator for MNIST dataset 45 | 46 | Args: 47 | optim ([Adam], optional): optimizer. Defaults to Adam(lr=0.0002, beta_1=0.5). 48 | """ 49 | 50 | discriminator = Sequential() 51 | discriminator.add(Dense(2048, input_dim=784, kernel_initializer=initializers.RandomNormal(stddev=0.02), 52 | name = 'layer'+str(np.random.randint(0,1e9)))) 53 | discriminator.add(LeakyReLU(0.2, 54 | name = 'layer'+str(np.random.randint(0,1e9)))) 55 | discriminator.add(Dense(512, 56 | name = 'layer'+str(np.random.randint(0,1e9)))) 57 | discriminator.add(LeakyReLU(0.2, 58 | name = 'layer'+str(np.random.randint(0,1e9)))) 59 | discriminator.add(Dense(256, 60 | name = 'layer'+str(np.random.randint(0,1e9)))) 61 | discriminator.add(LeakyReLU(0.2, 62 | name = 'layer'+str(np.random.randint(0,1e9)))) 63 | discriminator.add(Dense(1, activation='sigmoid', 64 | name = 'layer'+str(np.random.randint(0,1e9)))) 65 | discriminator.compile(loss='binary_crossentropy', optimizer=optim) 66 | 67 | return discriminator 68 | 69 | def MNIST_DiscriminatorPrivate(OutSize = 2, optim = Adam(lr=0.0002, beta_1=0.5)): 70 | """The discriminator designed to guess which Generator generated the data 71 | 72 | Args: 73 | OutSize (int, optional): [description]. Defaults to 2. 74 | optim ([type], optional): optimizer. Defaults to Adam(lr=0.0002, beta_1=0.5). 75 | """ 76 | 77 | discriminator = Sequential() 78 | discriminator.add(Dense(2048, input_dim=784, kernel_initializer=initializers.RandomNormal(stddev=0.02), 79 | name = 'layer'+str(np.random.randint(0,1e9)))) 80 | discriminator.add(LeakyReLU(0.2, 81 | name = 'layer'+str(np.random.randint(0,1e9)))) 82 | discriminator.add(Dense(512, 83 | name = 'layer'+str(np.random.randint(0,1e9)))) 84 | discriminator.add(LeakyReLU(0.2, 85 | name = 'layer'+str(np.random.randint(0,1e9)))) 86 | discriminator.add(Dense(256, 87 | name = 'layer'+str(np.random.randint(0,1e9)))) 88 | discriminator.add(LeakyReLU(0.2, 89 | name = 'layer'+str(np.random.randint(0,1e9)))) 90 | discriminator.add(Dense(OutSize, activation='softmax', 91 | name = 'layer'+str(np.random.randint(0,1e9)))) 92 | discriminator.compile(loss='sparse_categorical_crossentropy', optimizer=optim) 93 | 94 | return discriminator -------------------------------------------------------------------------------- /privacygan/cifar/cifar_gan.py: -------------------------------------------------------------------------------- 1 | #Copyright (c) Microsoft Corporation. All rights reserved. 2 | #Licensed under the MIT License. 3 | 4 | 5 | from tensorflow.keras import Sequential 6 | from tensorflow.keras.layers import Dense, Dropout, Flatten, LeakyReLU, Reshape, Conv2DTranspose, Conv2D 7 | from tensorflow.keras.optimizers import Adam 8 | from tensorflow.keras import initializers 9 | import numpy as np 10 | 11 | def CIFAR_Generator(randomDim = 100, optim = Adam(lr=0.0002, beta_1=0.5)): 12 | """Creates a generateof for LFW dataset 13 | 14 | Args: 15 | randomDim (int, optional): input shape. Defaults to 100. 16 | optim ([Adam], optional): optimizer. Defaults to Adam(lr=0.0002, beta_1=0.5). 17 | """ 18 | 19 | generator = Sequential() 20 | generator.add(Dense(2*2*512, input_shape=(randomDim,), kernel_initializer=initializers.RandomNormal(stddev=0.02), 21 | name = 'layer'+str(np.random.randint(0,1e9)))) 22 | generator.add(Reshape((2, 2, 512), 23 | name = 'layer'+str(np.random.randint(0,1e9)))) 24 | generator.add(LeakyReLU(0.2, 25 | name = 'layer'+str(np.random.randint(0,1e9)))) 26 | generator.add(Conv2DTranspose(256, kernel_size=5, strides=2, padding='same', 27 | name = 'layer'+str(np.random.randint(0,1e9)))) 28 | generator.add(LeakyReLU(0.2, 29 | name = 'layer'+str(np.random.randint(0,1e9)))) 30 | generator.add(Conv2DTranspose(128, kernel_size=5, strides=2, padding='same', 31 | name = 'layer'+str(np.random.randint(0,1e9)))) 32 | generator.add(LeakyReLU(0.2, 33 | name = 'layer'+str(np.random.randint(0,1e9)))) 34 | generator.add(Conv2DTranspose(64, kernel_size=5, strides=2, padding='same', 35 | name = 'layer'+str(np.random.randint(0,1e9)))) 36 | generator.add(LeakyReLU(0.2, 37 | name = 'layer'+str(np.random.randint(0,1e9)))) 38 | generator.add(Conv2DTranspose(3, kernel_size=5, strides=2, padding='same', 39 | activation='tanh', 40 | name = 'layer'+str(np.random.randint(0,1e9)))) 41 | generator.compile(loss='binary_crossentropy', optimizer=optim) 42 | 43 | return generator 44 | 45 | def CIFAR_Discriminator(optim = Adam(lr=0.0002, beta_1=0.5)): 46 | """Discriminator for LFW dataset 47 | 48 | Args: 49 | optim ([Adam], optional): optimizer. Defaults to Adam(lr=0.0002, beta_1=0.5). 50 | """ 51 | 52 | discriminator = Sequential() 53 | discriminator.add(Conv2D(64, kernel_size=5, strides=2, padding='same', 54 | input_shape=((32, 32, 3)), kernel_initializer=initializers.RandomNormal(stddev=0.02), 55 | name = 'layer'+str(np.random.randint(0,1e9)))) 56 | discriminator.add(LeakyReLU(0.2, 57 | name = 'layer'+str(np.random.randint(0,1e9)))) 58 | discriminator.add(Conv2D(128, kernel_size=5, strides=2, padding='same', 59 | name = 'layer'+str(np.random.randint(0,1e9)))) 60 | discriminator.add(LeakyReLU(0.2, 61 | name = 'layer'+str(np.random.randint(0,1e9)))) 62 | discriminator.add(Conv2D(128, kernel_size=5, strides=2, padding='same', 63 | name = 'layer'+str(np.random.randint(0,1e9)))) 64 | discriminator.add(LeakyReLU(0.2, 65 | name = 'layer'+str(np.random.randint(0,1e9)))) 66 | discriminator.add(Conv2D(256, kernel_size=5, strides=2, padding='same', 67 | name = 'layer'+str(np.random.randint(0,1e9)))) 68 | discriminator.add(LeakyReLU(0.2, 69 | name = 'layer'+str(np.random.randint(0,1e9)))) 70 | discriminator.add(Flatten(name = 'layer'+str(np.random.randint(0,1e9)))) 71 | discriminator.add(Dense(1, activation='sigmoid', 72 | name = 'layer'+str(np.random.randint(0,1e9)))) 73 | discriminator.compile(loss='binary_crossentropy', optimizer=optim) 74 | 75 | return discriminator 76 | 77 | def CIFAR_DiscriminatorPrivate(OutSize = 2, optim = Adam(lr=0.0002, beta_1=0.5)): 78 | """The discriminator designed to guess which Generator generated the data 79 | 80 | Args: 81 | OutSize (int, optional): [description]. Defaults to 2. 82 | optim ([type], optional): optimizer. Defaults to Adam(lr=0.0002, beta_1=0.5). 83 | """ 84 | 85 | discriminator = Sequential() 86 | discriminator.add(Conv2D(64, kernel_size=5, strides=2, padding='same', 87 | input_shape=((32, 32, 3)), kernel_initializer=initializers.RandomNormal(stddev=0.02), 88 | name = 'layer'+str(np.random.randint(0,1e9)))) 89 | discriminator.add(LeakyReLU(0.2, 90 | name = 'layer'+str(np.random.randint(0,1e9)))) 91 | discriminator.add(Conv2D(128, kernel_size=5, strides=2, padding='same', 92 | name = 'layer'+str(np.random.randint(0,1e9)))) 93 | discriminator.add(LeakyReLU(0.2, 94 | name = 'layer'+str(np.random.randint(0,1e9)))) 95 | discriminator.add(Conv2D(128, kernel_size=5, strides=2, padding='same', 96 | name = 'layer'+str(np.random.randint(0,1e9)))) 97 | discriminator.add(LeakyReLU(0.2,name = 'layer'+str(np.random.randint(0,1e9)))) 98 | discriminator.add(Conv2D(256, kernel_size=5, strides=2, padding='same', 99 | name = 'layer'+str(np.random.randint(0,1e9)))) 100 | discriminator.add(LeakyReLU(0.2, 101 | name = 'layer'+str(np.random.randint(0,1e9)))) 102 | discriminator.add(Flatten(name = 'layer'+str(np.random.randint(0,1e9)))) 103 | discriminator.add(Dense(OutSize, activation='softmax', 104 | name = 'layer'+str(np.random.randint(0,1e9)))) 105 | discriminator.compile(loss='sparse_categorical_crossentropy', optimizer=optim) 106 | 107 | return discriminator 108 | 109 | 110 | -------------------------------------------------------------------------------- /PrivGAN_mnist_fash_tf2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Copyright (c) Microsoft Corporation. All rights reserved. \n", 10 | "# Licensed under the MIT License\n", 11 | "\n", 12 | "import os\n", 13 | "import numpy as np\n", 14 | "import pandas as pd\n", 15 | "from tqdm import tqdm\n", 16 | "import matplotlib.pyplot as plt\n", 17 | "from tensorflow.keras import Input\n", 18 | "from tensorflow.keras import Model, Sequential\n", 19 | "from tensorflow.keras.layers import Reshape, Dense, Dropout, Flatten, LeakyReLU, Conv2D, MaxPooling2D, ZeroPadding2D, Conv2DTranspose, UpSampling2D, BatchNormalization\n", 20 | "from tensorflow.keras.optimizers import Adam\n", 21 | "from tensorflow.keras.datasets import fashion_mnist\n", 22 | "from tensorflow.keras import initializers\n", 23 | "from tensorflow.keras import backend as K\n", 24 | "from scipy import stats\n", 25 | "import tensorflow as tf\n", 26 | "import warnings\n", 27 | "from privacygan import privacy_gan as pg\n", 28 | "from privacygan.mnist import mnist_gan\n", 29 | "from datetime import datetime\n", 30 | "\n", 31 | "\n", 32 | "warnings.filterwarnings(\"ignore\")\n", 33 | "print(tf.__version__)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "### Load dataset" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "# Load MNIST data\n", 50 | "(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()\n", 51 | "X_train = (X_train.astype(np.float32) - 127.5)/127.5\n", 52 | "X_test = (X_test.astype(np.float32) - 127.5)/127.5\n", 53 | "X_train = X_train.reshape(X_train.shape[0], X_train.shape[1]*X_train.shape[2])\n", 54 | "X_test = X_test.reshape(X_test.shape[0], X_test.shape[1]*X_test.shape[2])\n", 55 | "X_all = np.concatenate((X_train,X_test))\n", 56 | "\n", 57 | "\n", 58 | "#Generate training test split\n", 59 | "frac = 0.1 \n", 60 | "n = int(frac*len(X_all))\n", 61 | "l = np.array(range(len(X_all)))\n", 62 | "l = np.random.choice(l,len(l),replace = False)\n", 63 | "X = X_all[l[:n]]\n", 64 | "X_comp = X_all[l[n:]]\n", 65 | "\n", 66 | "print('training set size:',X.shape)\n", 67 | "print('test set size:',X_comp.shape)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "### Simple GAN" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "(generator, discriminator, dLosses, gLosses) = pg.SimpGAN(X, epochs = 1, batchSize = 256, verbose = 50)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "#perform white box attack\n", 93 | "Acc = pg.WBattack(X,X_comp, discriminator)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "#plot distribution of discriminator scores of training and test set\n", 103 | "plt.hist(discriminator.predict(X)[:,0],color = 'r', alpha = 0.5, label = 'train', density = True, bins = 50)\n", 104 | "plt.hist(discriminator.predict(X_comp)[:,0],color = 'b', alpha = 0.5, label = 'test', density = True, bins = 50)\n", 105 | "plt.xlabel('Discriminator probability')\n", 106 | "plt.ylabel('Normed frequency')\n", 107 | "plt.title('GAN')\n", 108 | "plt.legend()" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "#Generate fake images\n", 118 | "pg.DisplayImages(generator, figSize=(5,5), NoImages = 25)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "### Private GAN " 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "K.clear_session()\n", 135 | "optim = Adam(lr=0.0002, beta_1=0.5)\n", 136 | "generators = [mnist_gan.MNIST_Generator(optim = Adam(lr=0.0002, beta_1=0.5)),\n", 137 | " mnist_gan.MNIST_Generator(optim = Adam(lr=0.0002, beta_1=0.5))]\n", 138 | "discriminators = [mnist_gan.MNIST_Discriminator(optim = Adam(lr=0.0002, beta_1=0.5))\n", 139 | " ,mnist_gan.MNIST_Discriminator(optim = Adam(lr=0.0002, beta_1=0.5))]\n", 140 | "pDisc = mnist_gan.MNIST_DiscriminatorPrivate(OutSize = 2, \n", 141 | " optim = Adam(lr=0.0002, beta_1=0.5))\n", 142 | "\n", 143 | "(generators, discriminators, _, dLosses, dpLosses, gLosses)= pg.privGAN(X, epochs = 1, \n", 144 | " disc_epochs=1,\n", 145 | " batchSize=256,\n", 146 | " generators = generators, \n", 147 | " discriminators = discriminators,\n", 148 | " pDisc = pDisc,\n", 149 | " optim = optim,\n", 150 | " privacy_ratio = 1.0)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "#perform white box attack\n", 160 | "pg.WBattack_priv(X,X_comp, discriminators)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "#Generate fake images\n", 170 | "pg.DisplayImages(generators[1], figSize=(5,5), NoImages = 25)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "#plot distribution of discriminator scores of training and test set\n", 180 | "plt.hist(discriminators[0].predict(X)[:,0],color = 'r', alpha = 0.5, label = 'train', density = True, bins = 50)\n", 181 | "plt.hist(discriminators[0].predict(X_comp)[:,0],color = 'b', alpha = 0.5, label = 'test', density = True, bins = 50)\n", 182 | "plt.xlabel('Discriminator probability')\n", 183 | "plt.ylabel('Normed frequency')\n", 184 | "plt.title('privGAN')\n", 185 | "plt.legend()" 186 | ] 187 | } 188 | ], 189 | "metadata": { 190 | "kernelspec": { 191 | "display_name": "Python 3", 192 | "language": "python", 193 | "name": "python3" 194 | }, 195 | "language_info": { 196 | "codemirror_mode": { 197 | "name": "ipython", 198 | "version": 3 199 | }, 200 | "file_extension": ".py", 201 | "mimetype": "text/x-python", 202 | "name": "python", 203 | "nbconvert_exporter": "python", 204 | "pygments_lexer": "ipython3", 205 | "version": "3.8.3" 206 | } 207 | }, 208 | "nbformat": 4, 209 | "nbformat_minor": 4 210 | } 211 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Aa][Rr][Mm]/ 27 | [Aa][Rr][Mm]64/ 28 | bld/ 29 | [Bb]in/ 30 | [Oo]bj/ 31 | [Ll]og/ 32 | [Ll]ogs/ 33 | 34 | # Visual Studio 2015/2017 cache/options directory 35 | .vs/ 36 | # Uncomment if you have tasks that create the project's static files in wwwroot 37 | #wwwroot/ 38 | 39 | # Visual Studio 2017 auto generated files 40 | Generated\ Files/ 41 | 42 | # MSTest test Results 43 | [Tt]est[Rr]esult*/ 44 | [Bb]uild[Ll]og.* 45 | 46 | # NUnit 47 | *.VisualState.xml 48 | TestResult.xml 49 | nunit-*.xml 50 | 51 | # Build Results of an ATL Project 52 | [Dd]ebugPS/ 53 | [Rr]eleasePS/ 54 | dlldata.c 55 | 56 | # Benchmark Results 57 | BenchmarkDotNet.Artifacts/ 58 | 59 | # .NET Core 60 | project.lock.json 61 | project.fragment.lock.json 62 | artifacts/ 63 | 64 | # StyleCop 65 | StyleCopReport.xml 66 | 67 | # Files built by Visual Studio 68 | *_i.c 69 | *_p.c 70 | *_h.h 71 | *.ilk 72 | *.meta 73 | *.obj 74 | *.iobj 75 | *.pch 76 | *.pdb 77 | *.ipdb 78 | *.pgc 79 | *.pgd 80 | *.rsp 81 | *.sbr 82 | *.tlb 83 | *.tli 84 | *.tlh 85 | *.tmp 86 | *.tmp_proj 87 | *_wpftmp.csproj 88 | *.log 89 | *.vspscc 90 | *.vssscc 91 | .builds 92 | *.pidb 93 | *.svclog 94 | *.scc 95 | 96 | # Chutzpah Test files 97 | _Chutzpah* 98 | 99 | # Visual C++ cache files 100 | ipch/ 101 | *.aps 102 | *.ncb 103 | *.opendb 104 | *.opensdf 105 | *.sdf 106 | *.cachefile 107 | *.VC.db 108 | *.VC.VC.opendb 109 | 110 | # Visual Studio profiler 111 | *.psess 112 | *.vsp 113 | *.vspx 114 | *.sap 115 | 116 | # Visual Studio Trace Files 117 | *.e2e 118 | 119 | # TFS 2012 Local Workspace 120 | $tf/ 121 | 122 | # Guidance Automation Toolkit 123 | *.gpState 124 | 125 | # ReSharper is a .NET coding add-in 126 | _ReSharper*/ 127 | *.[Rr]e[Ss]harper 128 | *.DotSettings.user 129 | 130 | # TeamCity is a build add-in 131 | _TeamCity* 132 | 133 | # DotCover is a Code Coverage Tool 134 | *.dotCover 135 | 136 | # AxoCover is a Code Coverage Tool 137 | .axoCover/* 138 | !.axoCover/settings.json 139 | 140 | # Visual Studio code coverage results 141 | *.coverage 142 | *.coveragexml 143 | 144 | # NCrunch 145 | _NCrunch_* 146 | .*crunch*.local.xml 147 | nCrunchTemp_* 148 | 149 | # MightyMoose 150 | *.mm.* 151 | AutoTest.Net/ 152 | 153 | # Web workbench (sass) 154 | .sass-cache/ 155 | 156 | # Installshield output folder 157 | [Ee]xpress/ 158 | 159 | # DocProject is a documentation generator add-in 160 | DocProject/buildhelp/ 161 | DocProject/Help/*.HxT 162 | DocProject/Help/*.HxC 163 | DocProject/Help/*.hhc 164 | DocProject/Help/*.hhk 165 | DocProject/Help/*.hhp 166 | DocProject/Help/Html2 167 | DocProject/Help/html 168 | 169 | # Click-Once directory 170 | publish/ 171 | 172 | # Publish Web Output 173 | *.[Pp]ublish.xml 174 | *.azurePubxml 175 | # Note: Comment the next line if you want to checkin your web deploy settings, 176 | # but database connection strings (with potential passwords) will be unencrypted 177 | *.pubxml 178 | *.publishproj 179 | 180 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 181 | # checkin your Azure Web App publish settings, but sensitive information contained 182 | # in these scripts will be unencrypted 183 | PublishScripts/ 184 | 185 | # NuGet Packages 186 | *.nupkg 187 | # NuGet Symbol Packages 188 | *.snupkg 189 | # The packages folder can be ignored because of Package Restore 190 | **/[Pp]ackages/* 191 | # except build/, which is used as an MSBuild target. 192 | !**/[Pp]ackages/build/ 193 | # Uncomment if necessary however generally it will be regenerated when needed 194 | #!**/[Pp]ackages/repositories.config 195 | # NuGet v3's project.json files produces more ignorable files 196 | *.nuget.props 197 | *.nuget.targets 198 | 199 | # Microsoft Azure Build Output 200 | csx/ 201 | *.build.csdef 202 | 203 | # Microsoft Azure Emulator 204 | ecf/ 205 | rcf/ 206 | 207 | # Windows Store app package directories and files 208 | AppPackages/ 209 | BundleArtifacts/ 210 | Package.StoreAssociation.xml 211 | _pkginfo.txt 212 | *.appx 213 | *.appxbundle 214 | *.appxupload 215 | 216 | # Visual Studio cache files 217 | # files ending in .cache can be ignored 218 | *.[Cc]ache 219 | # but keep track of directories ending in .cache 220 | !?*.[Cc]ache/ 221 | 222 | # Others 223 | ClientBin/ 224 | ~$* 225 | *~ 226 | *.dbmdl 227 | *.dbproj.schemaview 228 | *.jfm 229 | *.pfx 230 | *.publishsettings 231 | orleans.codegen.cs 232 | 233 | # Including strong name files can present a security risk 234 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 235 | #*.snk 236 | 237 | # Since there are multiple workflows, uncomment next line to ignore bower_components 238 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 239 | #bower_components/ 240 | 241 | # RIA/Silverlight projects 242 | Generated_Code/ 243 | 244 | # Backup & report files from converting an old project file 245 | # to a newer Visual Studio version. Backup files are not needed, 246 | # because we have git ;-) 247 | _UpgradeReport_Files/ 248 | Backup*/ 249 | UpgradeLog*.XML 250 | UpgradeLog*.htm 251 | ServiceFabricBackup/ 252 | *.rptproj.bak 253 | 254 | # SQL Server files 255 | *.mdf 256 | *.ldf 257 | *.ndf 258 | 259 | # Business Intelligence projects 260 | *.rdl.data 261 | *.bim.layout 262 | *.bim_*.settings 263 | *.rptproj.rsuser 264 | *- [Bb]ackup.rdl 265 | *- [Bb]ackup ([0-9]).rdl 266 | *- [Bb]ackup ([0-9][0-9]).rdl 267 | 268 | # Microsoft Fakes 269 | FakesAssemblies/ 270 | 271 | # GhostDoc plugin setting file 272 | *.GhostDoc.xml 273 | 274 | # Node.js Tools for Visual Studio 275 | .ntvs_analysis.dat 276 | node_modules/ 277 | 278 | # Visual Studio 6 build log 279 | *.plg 280 | 281 | # Visual Studio 6 workspace options file 282 | *.opt 283 | 284 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 285 | *.vbw 286 | 287 | # Visual Studio LightSwitch build output 288 | **/*.HTMLClient/GeneratedArtifacts 289 | **/*.DesktopClient/GeneratedArtifacts 290 | **/*.DesktopClient/ModelManifest.xml 291 | **/*.Server/GeneratedArtifacts 292 | **/*.Server/ModelManifest.xml 293 | _Pvt_Extensions 294 | 295 | # Paket dependency manager 296 | .paket/paket.exe 297 | paket-files/ 298 | 299 | # FAKE - F# Make 300 | .fake/ 301 | 302 | # CodeRush personal settings 303 | .cr/personal 304 | 305 | # Python Tools for Visual Studio (PTVS) 306 | __pycache__/ 307 | *.pyc 308 | 309 | # Cake - Uncomment if you are using it 310 | # tools/** 311 | # !tools/packages.config 312 | 313 | # Tabs Studio 314 | *.tss 315 | 316 | # Telerik's JustMock configuration file 317 | *.jmconfig 318 | 319 | # BizTalk build output 320 | *.btp.cs 321 | *.btm.cs 322 | *.odx.cs 323 | *.xsd.cs 324 | 325 | # OpenCover UI analysis results 326 | OpenCover/ 327 | 328 | # Azure Stream Analytics local run output 329 | ASALocalRun/ 330 | 331 | # MSBuild Binary and Structured Log 332 | *.binlog 333 | 334 | # NVidia Nsight GPU debugger configuration file 335 | *.nvuser 336 | 337 | # MFractors (Xamarin productivity tool) working folder 338 | .mfractor/ 339 | 340 | # Local History for Visual Studio 341 | .localhistory/ 342 | 343 | # BeatPulse healthcheck temp database 344 | healthchecksdb 345 | 346 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 347 | MigrationBackup/ 348 | 349 | # Ionide (cross platform F# VS Code tools) working folder 350 | .ionide/ 351 | -------------------------------------------------------------------------------- /PrivGAN_mnist_tf2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Copyright (c) Microsoft Corporation. All rights reserved. \n", 10 | "# Licensed under the MIT License\n", 11 | "\n", 12 | "import os\n", 13 | "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n", 14 | "import numpy as np\n", 15 | "import pandas as pd\n", 16 | "from tqdm import tqdm\n", 17 | "import matplotlib.pyplot as plt\n", 18 | "from tensorflow.keras import Input\n", 19 | "from tensorflow.keras import Model, Sequential\n", 20 | "from tensorflow.keras.layers import Reshape, Dense, Dropout, Flatten, LeakyReLU, Conv2D, MaxPooling2D, ZeroPadding2D, Conv2DTranspose, UpSampling2D, BatchNormalization\n", 21 | "from tensorflow.keras.optimizers import Adam\n", 22 | "from tensorflow.keras.datasets import cifar10\n", 23 | "from tensorflow.keras import initializers\n", 24 | "from tensorflow.keras import backend as K\n", 25 | "from tensorflow.keras.datasets import mnist\n", 26 | "from keras import initializers\n", 27 | "from scipy import stats\n", 28 | "import tensorflow as tf\n", 29 | "import warnings\n", 30 | "from privacygan import privacy_gan as pg\n", 31 | "from privacygan.mnist import mnist_gan\n", 32 | "from datetime import datetime\n", 33 | "\n", 34 | "\n", 35 | "warnings.filterwarnings(\"ignore\")\n", 36 | "\n", 37 | "print(tf.__version__)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "### Load dataset " 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "# Load MNIST data and concatenate the train+test set\n", 54 | "(X_train, y_train), (X_test, y_test) = mnist.load_data()\n", 55 | "X_train = (X_train.astype(np.float32) - 127.5)/127.5\n", 56 | "X_test = (X_test.astype(np.float32) - 127.5)/127.5\n", 57 | "X_train = X_train.reshape(X_train.shape[0], X_train.shape[1]*X_train.shape[2])\n", 58 | "X_test = X_test.reshape(X_test.shape[0], X_test.shape[1]*X_test.shape[2])\n", 59 | "X_all = np.concatenate((X_train,X_test))\n", 60 | "\n", 61 | "\n", 62 | "#Generate training test split\n", 63 | "frac = 0.1 \n", 64 | "n = int(frac*len(X_all))\n", 65 | "l = np.array(range(len(X_all)))\n", 66 | "l = np.random.choice(l,len(l),replace = False)\n", 67 | "X = X_all[l[:n]]\n", 68 | "X_comp = X_all[l[n:]]\n", 69 | "\n", 70 | "print('training set size:',X.shape)\n", 71 | "print('test set size:',X_comp.shape)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "### Test Simple GAN " 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "(generator, discriminator, dLosses, gLosses) = pg.SimpGAN(X, epochs = 1, batchSize = 256)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "#perform white box attack\n", 97 | "Acc = pg.WBattack(X,X_comp, discriminator)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "#plot distribution of discriminator scores of training and test set\n", 107 | "plt.hist(discriminator.predict(X)[:,0],color = 'r', alpha = 0.5, label = 'train', density = True, bins = 50)\n", 108 | "plt.hist(discriminator.predict(X_comp)[:,0],color = 'b', alpha = 0.5, label = 'test', density = True, bins = 50)\n", 109 | "plt.xlabel('Discriminator probability')\n", 110 | "plt.ylabel('Normed frequency')\n", 111 | "plt.title('GAN')\n", 112 | "plt.legend()" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "#Generate fake images\n", 122 | "pg.DisplayImages(generator, figSize=(5,5), NoImages = 25)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "### Test privGAN" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "### Test privGAN (with n_reps = 2)\n", 139 | "K.clear_session()\n", 140 | "optim = Adam(lr=0.0002, beta_1=0.5)\n", 141 | "generators = [mnist_gan.MNIST_Generator(optim = Adam(lr=0.0002, beta_1=0.5)),\n", 142 | " mnist_gan.MNIST_Generator(optim = Adam(lr=0.0002, beta_1=0.5))]\n", 143 | "discriminators = [mnist_gan.MNIST_Discriminator(optim = Adam(lr=0.0002, beta_1=0.5))\n", 144 | " ,mnist_gan.MNIST_Discriminator(optim = Adam(lr=0.0002, beta_1=0.5))]\n", 145 | "pDisc = mnist_gan.MNIST_DiscriminatorPrivate(OutSize = 2, \n", 146 | " optim = Adam(lr=0.0002, beta_1=0.5))\n", 147 | "\n", 148 | "(generators, discriminators, _, dLosses, dpLosses, gLosses)= pg.privGAN(X, epochs = 1, \n", 149 | " disc_epochs=1,\n", 150 | " batchSize=256,\n", 151 | " generators = generators, \n", 152 | " discriminators = discriminators,\n", 153 | " pDisc = pDisc,\n", 154 | " optim = optim,\n", 155 | " privacy_ratio = 1.0)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "#perform white box attack\n", 165 | "pg.WBattack_priv(X,X_comp, discriminators)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "#Generate fake images\n", 175 | "pg.DisplayImages(generators[1], figSize=(5,5), NoImages = 25)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "#plot distribution of discriminator scores of training and test set\n", 185 | "plt.hist(discriminators[0].predict(X)[:,0],color = 'r', alpha = 0.5, label = 'train', density = True, bins = 50)\n", 186 | "plt.hist(discriminators[0].predict(X_comp)[:,0],color = 'b', alpha = 0.5, label = 'test', density = True, bins = 50)\n", 187 | "plt.xlabel('Discriminator probability')\n", 188 | "plt.ylabel('Normed frequency')\n", 189 | "plt.title('privGAN')\n", 190 | "plt.legend()" 191 | ] 192 | } 193 | ], 194 | "metadata": { 195 | "kernelspec": { 196 | "display_name": "Python 3", 197 | "language": "python", 198 | "name": "python3" 199 | }, 200 | "language_info": { 201 | "codemirror_mode": { 202 | "name": "ipython", 203 | "version": 3 204 | }, 205 | "file_extension": ".py", 206 | "mimetype": "text/x-python", 207 | "name": "python", 208 | "nbconvert_exporter": "python", 209 | "pygments_lexer": "ipython3", 210 | "version": "3.8.3" 211 | } 212 | }, 213 | "nbformat": 4, 214 | "nbformat_minor": 4 215 | } 216 | -------------------------------------------------------------------------------- /PrivGAN_lfw_tf2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Copyright (c) Microsoft Corporation. All rights reserved. \n", 10 | "# Licensed under the MIT License\n", 11 | "\n", 12 | "import os\n", 13 | "import numpy as np\n", 14 | "import pandas as pd\n", 15 | "from tqdm import tqdm\n", 16 | "import matplotlib.pyplot as plt\n", 17 | "import tensorflow as tf\n", 18 | "from tensorflow.keras import Input\n", 19 | "from tensorflow.keras import Model, Sequential\n", 20 | "from tensorflow.keras.layers import Reshape, Dense, Dropout, Flatten, LeakyReLU, Conv2D, MaxPooling2D, UpSampling2D\n", 21 | "from tensorflow.keras.optimizers import Adam\n", 22 | "from tensorflow.keras import backend as K\n", 23 | "from scipy import stats\n", 24 | "import warnings\n", 25 | "from privacygan import privacy_gan as pg \n", 26 | "from privacygan.lfw import lfw_gan\n", 27 | "\n", 28 | "from datetime import datetime\n", 29 | "from sklearn.datasets import fetch_lfw_people\n", 30 | "\n", 31 | "\n", 32 | "warnings.filterwarnings(\"ignore\")" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "### Load dataset" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "lfw_people = fetch_lfw_people()\n", 49 | "X_all = lfw_people['data']\n", 50 | "X_all = (X_all.astype(np.float32) - 127.5)/127.5\n", 51 | "\n", 52 | "#Generate training test split\n", 53 | "frac = 0.1 \n", 54 | "n = int(frac*len(X_all))\n", 55 | "l = np.array(range(len(X_all)))\n", 56 | "l = np.random.choice(l,len(l),replace = False)\n", 57 | "X = X_all[l[:n]]\n", 58 | "X_comp = X_all[l[n:]]\n", 59 | "\n", 60 | "print('training set size:',X.shape)\n", 61 | "print('test set size:',X_comp.shape)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "### Simple GAN" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "generator = lfw_gan.LFW_Generator()\n", 78 | "discriminator = lfw_gan.LFW_Discriminator()\n", 79 | "(generator, discriminator, dLosses, gLosses) = pg.SimpGAN(X, epochs = 1, batchSize = 256,\n", 80 | " discriminator = discriminator,\n", 81 | " generator = generator)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "#perform white box attack\n", 91 | "Acc = pg.WBattack(X,X_comp, discriminator)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "#plot distribution of discriminator scores of training and test set\n", 101 | "plt.hist(discriminator.predict(X)[:,0],color = 'r', alpha = 0.5, label = 'train', density = True, bins = 50)\n", 102 | "plt.hist(discriminator.predict(X_comp)[:,0],color = 'b', alpha = 0.5, label = 'test', density = True, bins = 50)\n", 103 | "plt.xlabel('Discriminator probability')\n", 104 | "plt.ylabel('Normed frequency')\n", 105 | "plt.title('GAN')\n", 106 | "plt.legend()" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "#generate synthetic images\n", 116 | "noise = np.random.normal(0, 1, size=[X.shape[0], 100])\n", 117 | "generatedImages = generator.predict(noise)\n", 118 | "temp = generatedImages[:25].reshape(25, 62, 47)\n", 119 | "plt.figure(figsize=(5, 5))\n", 120 | "for i in range(temp.shape[0]):\n", 121 | " plt.subplot(5,5, i+1)\n", 122 | " plt.imshow(temp[i], interpolation='nearest', cmap='gray_r')\n", 123 | " plt.axis('off')\n", 124 | "plt.tight_layout()" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "### Private GAN" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "K.clear_session()\n", 141 | "optim = Adam(lr=0.0002, beta_1=0.5)\n", 142 | "generators = [lfw_gan.LFW_Generator(optim = Adam(lr=0.0002, beta_1=0.5)),\n", 143 | " lfw_gan.LFW_Generator(optim = Adam(lr=0.0002, beta_1=0.5))]\n", 144 | "discriminators = [lfw_gan.LFW_Discriminator(optim = Adam(lr=0.0002, beta_1=0.5))\n", 145 | " ,lfw_gan.LFW_Discriminator(optim = Adam(lr=0.0002, beta_1=0.5))]\n", 146 | "pDisc = lfw_gan.LFW_DiscriminatorPrivate(OutSize = 2, \n", 147 | " optim = Adam(lr=0.0002, beta_1=0.5))\n", 148 | "\n", 149 | "(generators, discriminators, _, dLosses, dpLosses, gLosses)= pg.privGAN(X, epochs = 1, \n", 150 | " disc_epochs=1,\n", 151 | " batchSize=256,\n", 152 | " generators = generators, \n", 153 | " discriminators = discriminators,\n", 154 | " pDisc = pDisc,\n", 155 | " optim = optim,\n", 156 | " privacy_ratio = 1.0)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "#perform white box attack\n", 166 | "pg.WBattack_priv(X,X_comp, discriminators)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "#generate synthetic images\n", 176 | "noise = np.random.normal(0, 1, size=[X.shape[0], 100])\n", 177 | "generatedImages = generators[0].predict(noise)\n", 178 | "temp = generatedImages[:25].reshape(25, 62, 47)\n", 179 | "plt.figure(figsize=(5, 5))\n", 180 | "for i in range(temp.shape[0]):\n", 181 | " plt.subplot(5,5, i+1)\n", 182 | " plt.imshow(temp[i], interpolation='nearest', cmap='gray_r')\n", 183 | " plt.axis('off')\n", 184 | "plt.tight_layout()" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "#plot distribution of discriminator scores of training and test set\n", 194 | "plt.hist(discriminators[0].predict(X)[:,0],color = 'r', alpha = 0.5, label = 'train', density = True, bins = 50)\n", 195 | "plt.hist(discriminators[0].predict(X_comp)[:,0],color = 'b', alpha = 0.5, label = 'test', density = True, bins = 50)\n", 196 | "plt.xlabel('Discriminator probability')\n", 197 | "plt.ylabel('Normed frequency')\n", 198 | "plt.title('privGAN')\n", 199 | "plt.legend()" 200 | ] 201 | } 202 | ], 203 | "metadata": { 204 | "kernelspec": { 205 | "display_name": "Python 3", 206 | "language": "python", 207 | "name": "python3" 208 | }, 209 | "language_info": { 210 | "codemirror_mode": { 211 | "name": "ipython", 212 | "version": 3 213 | }, 214 | "file_extension": ".py", 215 | "mimetype": "text/x-python", 216 | "name": "python", 217 | "nbconvert_exporter": "python", 218 | "pygments_lexer": "ipython3", 219 | "version": "3.8.3" 220 | } 221 | }, 222 | "nbformat": 4, 223 | "nbformat_minor": 4 224 | } 225 | -------------------------------------------------------------------------------- /PrivGan_CIFAR_tf2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Copyright (c) Microsoft Corporation. All rights reserved. \n", 10 | "# Licensed under the MIT License\n", 11 | "\n", 12 | "import os\n", 13 | "import numpy as np\n", 14 | "import pandas as pd\n", 15 | "import pandas as pd\n", 16 | "from tqdm import tqdm\n", 17 | "import matplotlib.pyplot as plt\n", 18 | "import tensorflow as tf\n", 19 | "from tensorflow.keras import Input\n", 20 | "from tensorflow.keras import Model, Sequential\n", 21 | "from tensorflow.keras.layers import Reshape, Dense, Dropout, Flatten, LeakyReLU, Conv2D, MaxPooling2D, ZeroPadding2D, Conv2DTranspose, UpSampling2D, BatchNormalization\n", 22 | "from tensorflow.keras.optimizers import Adam\n", 23 | "from tensorflow.keras.datasets import cifar10\n", 24 | "from tensorflow.keras import initializers\n", 25 | "from privacygan import privacy_gan as pg\n", 26 | "from privacygan.cifar import cifar_gan\n", 27 | "import warnings\n", 28 | "\n", 29 | "print(tf.__version__)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "# Load CIFAR-10 data\n", 39 | "(X_train, y_train), (X_test, y_test) = cifar10.load_data()\n", 40 | "\n", 41 | "X_all = np.concatenate((X_train,X_test))\n", 42 | "\n", 43 | "(n, d1, d2, d3) = X_all.shape\n", 44 | "\n", 45 | "if d3 !=3:\n", 46 | " X_all = np.moveaxis(X_all, 1, 3)\n", 47 | " \n", 48 | "X_all = np.float32(X_all)\n", 49 | "X_all = (X_all / 255 - 0.5) * 2\n", 50 | "X_all = np.clip(X_all, -1, 1)\n", 51 | "\n", 52 | "#Generate training test split\n", 53 | "frac = 0.1 \n", 54 | "n = int(frac*len(X_all))\n", 55 | "l = np.array(range(len(X_all)))\n", 56 | "l = np.random.choice(l,len(l),replace = False)\n", 57 | "X = X_all[l[:n]]\n", 58 | "X_comp = X_all[l[n:]]\n", 59 | "\n", 60 | "print(X.shape)\n", 61 | "print(X_comp.shape)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "#Specify models \n", 71 | "generator = cifar_gan.CIFAR_Generator()\n", 72 | "discriminator = cifar_gan.CIFAR_Discriminator()\n", 73 | "generators = [cifar_gan.CIFAR_Generator(),cifar_gan.CIFAR_Generator()]\n", 74 | "discriminators = [cifar_gan.CIFAR_Discriminator(),cifar_gan.CIFAR_Discriminator()]\n", 75 | "pDisc = cifar_gan.CIFAR_DiscriminatorPrivate(OutSize = 2)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "(generator, discriminator, dLosses, gLosses) = pg.SimpGAN(X, epochs = 1, \n", 85 | " generator = generator, \n", 86 | " discriminator = discriminator,\n", 87 | " batchSize=256)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "#perform white box attack\n", 97 | "Acc = pg.WBattack(X,X_comp, discriminator)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "#plot distribution of discriminator scores of training and test set\n", 107 | "plt.hist(discriminator.predict(X)[:,0],color = 'r', alpha = 0.5, label = 'train', density = True, bins = 50)\n", 108 | "plt.hist(discriminator.predict(X_comp)[:,0],color = 'b', alpha = 0.5, label = 'test', density = True, bins = 50)\n", 109 | "plt.xlabel('Discriminator probability')\n", 110 | "plt.ylabel('Normed frequency')\n", 111 | "plt.title('GAN')\n", 112 | "plt.legend()" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "noise = np.random.normal(0, 1, size=[X.shape[0], 100])\n", 122 | "generatedImages = generator.predict(noise)\n", 123 | "temp = generatedImages[:25].reshape(25, 32, 32, 3)\n", 124 | "plt.figure(figsize=(5, 5))\n", 125 | "for i in range(temp.shape[0]):\n", 126 | " plt.subplot(5,5, i+1)\n", 127 | " plt.imshow(temp[i], interpolation='nearest', cmap='gray_r')\n", 128 | " plt.axis('off')\n", 129 | "plt.tight_layout()" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "tf.keras.backend.clear_session()\n", 139 | "optim = Adam(lr=0.0002, beta_1=0.5)\n", 140 | "generator = cifar_gan.CIFAR_Generator(optim = Adam(lr=0.0002, beta_1=0.5))\n", 141 | "discriminator = cifar_gan.CIFAR_Discriminator(optim = Adam(lr=0.0002, beta_1=0.5))\n", 142 | "generators = [cifar_gan.CIFAR_Generator(optim = Adam(lr=0.0002, beta_1=0.5)),\n", 143 | " cifar_gan.CIFAR_Generator(optim = Adam(lr=0.0002, beta_1=0.5))]\n", 144 | "discriminators = [cifar_gan.CIFAR_Discriminator(optim = Adam(lr=0.0002, beta_1=0.5)),\n", 145 | " cifar_gan.CIFAR_Discriminator(optim = Adam(lr=0.0002, beta_1=0.5))]\n", 146 | "pDisc = cifar_gan.CIFAR_DiscriminatorPrivate(OutSize = 2,\n", 147 | " optim = Adam(lr=0.0002, beta_1=0.5))\n", 148 | "(generators, discriminators, _, dLosses, dpLosses, gLosses)= pg.privGAN(X, epochs = 1,\n", 149 | " disc_epochs=1,\n", 150 | " generators = generators, \n", 151 | " discriminators = discriminators,\n", 152 | " pDisc = pDisc,\n", 153 | " optim = optim,\n", 154 | " privacy_ratio=1.0,\n", 155 | " batchSize=256)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "#perform white box attack\n", 165 | "pg.WBattack_priv(X,X_comp, discriminators)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "noise = np.random.normal(0, 1, size=[X.shape[0], 100])\n", 175 | "generatedImages = generators[0].predict(noise)\n", 176 | "temp = generatedImages[:25].reshape(25, 32, 32, 3)\n", 177 | "plt.figure(figsize=(5, 5))\n", 178 | "for i in range(temp.shape[0]):\n", 179 | " plt.subplot(5,5, i+1)\n", 180 | " plt.imshow(temp[i], interpolation='nearest', cmap='gray_r')\n", 181 | " plt.axis('off')\n", 182 | "plt.tight_layout()" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "plt.hist(discriminators[0].predict(X)[:,0],color = 'r', alpha = 0.5, label = 'train', density = True, bins = 50)\n", 192 | "plt.hist(discriminators[0].predict(X_comp)[:,0],color = 'b', alpha = 0.5, label = 'test', density = True, bins = 50)\n", 193 | "plt.xlabel('Discriminator probability')\n", 194 | "plt.ylabel('Normed frequency')\n", 195 | "plt.title('privGAN (1.0)')\n", 196 | "plt.legend()" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "pg.WBattack_priv(X,X_comp, discriminators)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [] 214 | } 215 | ], 216 | "metadata": { 217 | "kernelspec": { 218 | "display_name": "Python 3", 219 | "language": "python", 220 | "name": "python3" 221 | }, 222 | "language_info": { 223 | "codemirror_mode": { 224 | "name": "ipython", 225 | "version": 3 226 | }, 227 | "file_extension": ".py", 228 | "mimetype": "text/x-python", 229 | "name": "python", 230 | "nbconvert_exporter": "python", 231 | "pygments_lexer": "ipython3", 232 | "version": "3.8.3" 233 | } 234 | }, 235 | "nbformat": 4, 236 | "nbformat_minor": 4 237 | } 238 | -------------------------------------------------------------------------------- /MNIST_down_tf2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Overview\n", 8 | "Notebook illustrating performance of a CNN classifier on MNIST dataset compared against generated data by simple GAN(simpGAN) and privGan" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "# Copyright (c) Microsoft Corporation. All rights reserved. \n", 18 | "# Licensed under the MIT License.\n", 19 | "\n", 20 | "import os\n", 21 | "import numpy as np\n", 22 | "import pandas as pd\n", 23 | "from tqdm import tqdm\n", 24 | "import matplotlib.pyplot as plt\n", 25 | "import tensorflow as tf\n", 26 | "from tensorflow.keras import Input\n", 27 | "from tensorflow.keras import Model, Sequential\n", 28 | "from tensorflow.keras.layers import Reshape, Dense, Dropout, Flatten, LeakyReLU, Conv2D, MaxPooling2D\n", 29 | "from tensorflow.keras.optimizers import Adam\n", 30 | "from privacygan import privacy_gan as pg\n", 31 | "from privacygan.mnist import mnist_gan\n", 32 | "from classifier.cnn_classifier import CNNClassifier\n", 33 | "import warnings\n", 34 | "\n", 35 | "print(tf.__version__)\n", 36 | "\n", 37 | "#https://github.com/keras-team/keras/wiki/Keras-2.0-release-notes\n", 38 | "#https://stackoverflow.com/questions/60289143/migrating-code-to-tensorflow-2-0-gives-invalid-argument-error-default-maxpoolin\n", 39 | "tf.keras.backend.set_image_data_format(\"channels_last\")" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "## Retrieve train and test data from the MNIST dataset" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "# Load MNIST data\n", 56 | "(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()\n", 57 | "X_train = (X_train.astype(np.float32) - 127.5)/127.5\n", 58 | "X_test = (X_test.astype(np.float32) - 127.5)/127.5\n", 59 | "X_train = X_train.reshape(X_train.shape[0], X_train.shape[1]*X_train.shape[2])\n", 60 | "X_test = X_test.reshape(X_test.shape[0], X_test.shape[1]*X_test.shape[2])\n", 61 | "print(X_train.shape)\n", 62 | "print(X_test.shape)\n", 63 | "\n", 64 | "NUM_CLASSES = 10" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "# CNN model to classify MNIST dataset\n", 74 | "NUM_EPOCHS = 1 # use 25 for actual results\n", 75 | "BATCH_SIZE_PER_EPOCH = 256\n", 76 | "y_tr = tf.keras.utils.to_categorical(y_train, NUM_CLASSES)\n", 77 | "y_t = tf.keras.utils.to_categorical(y_test, NUM_CLASSES)\n", 78 | "\n", 79 | "x_train = X_train.reshape(X_train.shape[0], 28, 28, 1)\n", 80 | "x_test = X_test.reshape(X_test.shape[0], 28, 28, 1)\n", 81 | "\n", 82 | "classifier = CNNClassifier(NUM_CLASSES,(28,28,1))\n", 83 | "score = classifier.train(x_train,y_tr,x_test,y_t,BATCH_SIZE_PER_EPOCH, NUM_EPOCHS)\n", 84 | "print('Test loss:', score[0])\n", 85 | "print('Test accuracy:', score[1])\n", 86 | "r_0 = [score[0],score[1]]" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "## Using SimpGan generate synthetic images " 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "#generate simple synthetic images of same size as X_train with same balance\n", 103 | "X_c = []\n", 104 | "y_c = []\n", 105 | "\n", 106 | "for i in range(NUM_CLASSES):\n", 107 | " print(i)\n", 108 | " In = np.where(y_train==i)\n", 109 | " X = X_train[In]\n", 110 | " tf.keras.backend.clear_session()\n", 111 | " optim = Adam(lr=0.0002, beta_1=0.5)\n", 112 | " gen = pg.MNIST_Generator(optim=optim)\n", 113 | " dis = pg.MNIST_Discriminator(optim=optim)\n", 114 | " \n", 115 | " #learn generator per digit \n", 116 | " (generator, _, _, _) = pg.SimpGAN(X, generator = gen, discriminator = dis, \n", 117 | " optim = optim, \n", 118 | " epochs = 1, batchSize = 256)\n", 119 | " \n", 120 | " noise = np.random.normal(0, 1, size=[len(X), 100])\n", 121 | " X_c += [generator.predict(noise)]\n", 122 | " y_c += [i]*len(X)\n", 123 | " \n", 124 | "X_c = np.concatenate(X_c) \n", 125 | "y_c = np.array(y_c)\n", 126 | "\n", 127 | "\n", 128 | "## Shuffle labels around\n", 129 | "arr = np.arange(len(X_c))\n", 130 | "np.random.shuffle(arr)\n", 131 | "X_c = X_c[arr]\n", 132 | "y_c = y_c[arr]" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "# CNN model to classify SimpGan generated images\n", 142 | "y_tr = tf.keras.utils.to_categorical(y_c, NUM_CLASSES)\n", 143 | "y_t = tf.keras.utils.to_categorical(y_test, NUM_CLASSES)\n", 144 | "\n", 145 | "\n", 146 | "x_train = X_c.reshape(X_c.shape[0], 28, 28, 1)\n", 147 | "x_test = X_test.reshape(X_test.shape[0], 28, 28, 1)\n", 148 | "\n", 149 | "classifier = CNNClassifier(NUM_CLASSES,(28,28,1))\n", 150 | "score = classifier.train(x_train,y_tr,x_test,y_t,BATCH_SIZE_PER_EPOCH, NUM_EPOCHS)\n", 151 | "print('Test loss:', score[0])\n", 152 | "print('Test accuracy:', score[1])\n", 153 | "\n", 154 | "r_1 = [score[0],score[1]]" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "## Using privGan generate synthetic images" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "#generate simple synthetic images of same size as X_train with same balance with privGan\n", 171 | "X_c2 = []\n", 172 | "y_c2 = []\n", 173 | "\n", 174 | "for i in range(NUM_CLASSES):\n", 175 | " print(i)\n", 176 | " In = np.where(y_train==i)\n", 177 | " X = X_train[In]\n", 178 | " tf.keras.backend.clear_session()\n", 179 | " optim = Adam(lr=0.0002, beta_1=0.5)\n", 180 | " generators = [mnist_gan.MNIST_Generator(optim = Adam(lr=0.0002, beta_1=0.5)),\n", 181 | " mnist_gan.MNIST_Generator(optim = Adam(lr=0.0002, beta_1=0.5))]\n", 182 | " discriminators = [mnist_gan.MNIST_Discriminator(optim = Adam(lr=0.0002, beta_1=0.5))\n", 183 | " ,mnist_gan.MNIST_Discriminator(optim = Adam(lr=0.0002, beta_1=0.5))]\n", 184 | " pDisc = mnist_gan.MNIST_DiscriminatorPrivate(OutSize = 2, \n", 185 | " optim = Adam(lr=0.0002, beta_1=0.5))\n", 186 | " \n", 187 | " (generators, _, _, _, _, _)= pg.privGAN(X, epochs = 1, \n", 188 | " disc_epochs=1,\n", 189 | " batchSize=256,\n", 190 | " generators = generators, \n", 191 | " discriminators = discriminators,\n", 192 | " pDisc = pDisc,\n", 193 | " optim = optim,\n", 194 | " privacy_ratio = 1.0) \n", 195 | " \n", 196 | " noise1 = np.random.normal(0, 1, size=[len(X)//2, 100])\n", 197 | " noise2 = np.random.normal(0, 1, size=[len(X)//2, 100])\n", 198 | " X_c2 += [generators[0].predict(noise1)]\n", 199 | " X_c2 += [generators[1].predict(noise2)]\n", 200 | " y_c2 += [i]*(len(noise1) + len(noise2))\n", 201 | " \n", 202 | "X_c2 = np.concatenate(X_c2) \n", 203 | "y_c2 = np.array(y_c2)" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "## Shuffle labels around\n", 213 | "arr = np.arange(len(X_c2))\n", 214 | "np.random.shuffle(arr)\n", 215 | "X_c2 = X_c2[arr]\n", 216 | "y_c2 = y_c2[arr]" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "#train CNN model for images created by privGan\n", 226 | "y_tr = tf.keras.utils.to_categorical(y_c2, NUM_CLASSES)\n", 227 | "y_t = tf.keras.utils.to_categorical(y_test, NUM_CLASSES)\n", 228 | "\n", 229 | "x_train = X_c2.reshape(X_c2.shape[0], 28, 28, 1)\n", 230 | "x_test = X_test.reshape(X_test.shape[0], 28, 28, 1)\n", 231 | "\n", 232 | "\n", 233 | "\n", 234 | "classifier = CNNClassifier(NUM_CLASSES,(28,28,1))\n", 235 | "score = classifier.train(x_train,y_tr,x_test,y_t,BATCH_SIZE_PER_EPOCH, NUM_EPOCHS)\n", 236 | "print('Test loss:', score[0])\n", 237 | "print('Test accuracy:', score[1])\n", 238 | "r_2 = [score[0],score[1]]\n" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "## Plot the results of the CNN classifier on the three datasets" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [ 254 | "plt.bar([0,1,2],[r_0[1],r_1[1],r_2[1]])\n", 255 | "plt.xticks([0,1,2],['Real','GAN','privGAN (1.0)'], rotation=45)\n", 256 | "plt.ylabel('Accuracy')" 257 | ] 258 | } 259 | ], 260 | "metadata": { 261 | "kernelspec": { 262 | "display_name": "Python 3", 263 | "language": "python", 264 | "name": "python3" 265 | }, 266 | "language_info": { 267 | "codemirror_mode": { 268 | "name": "ipython", 269 | "version": 3 270 | }, 271 | "file_extension": ".py", 272 | "mimetype": "text/x-python", 273 | "name": "python", 274 | "nbconvert_exporter": "python", 275 | "pygments_lexer": "ipython3", 276 | "version": "3.8.3" 277 | } 278 | }, 279 | "nbformat": 4, 280 | "nbformat_minor": 4 281 | } 282 | -------------------------------------------------------------------------------- /privacygan/privacy_gan.py: -------------------------------------------------------------------------------- 1 | #Copyright (c) Microsoft Corporation. All rights reserved. 2 | #Licensed under the MIT License. 3 | 4 | import os 5 | import numpy as np 6 | from tqdm import tqdm 7 | import matplotlib.pyplot as plt 8 | 9 | import tensorflow as tf 10 | from tensorflow.keras import Input 11 | from tensorflow.keras import Model, Sequential 12 | from tensorflow.keras.layers import Reshape, Dense, Dropout, Flatten, LeakyReLU, Conv2D, MaxPool2D, ZeroPadding2D, Conv2DTranspose, UpSampling2D, BatchNormalization 13 | from tensorflow.keras.optimizers import Adam 14 | 15 | from tensorflow.keras.datasets import mnist,cifar10 16 | from tensorflow.keras.optimizers import Adam 17 | from tensorflow.keras import initializers 18 | from scipy import stats 19 | import warnings 20 | import pandas as pd 21 | from privacygan.mnist.mnist_gan import MNIST_Discriminator, MNIST_Generator, MNIST_DiscriminatorPrivate 22 | warnings.filterwarnings("ignore") 23 | 24 | 25 | ########################################## GANs ###################################################################### 26 | 27 | def SimpGAN(X_train, generator = MNIST_Generator(), discriminator = MNIST_Discriminator(), 28 | randomDim=100, epochs=200, batchSize=128, optim = Adam(lr=0.0002, beta_1=0.5), 29 | verbose = 1, lSmooth = 0.9, SplitTF = False): 30 | 31 | # Combined network 32 | discriminator.trainable = False 33 | ganInput = Input(shape=(randomDim,)) 34 | x = generator(ganInput) 35 | ganOutput = discriminator(x) 36 | gan = Model(inputs=ganInput, outputs=ganOutput) 37 | gan.compile(loss='binary_crossentropy', optimizer=optim) 38 | 39 | dLosses = [] 40 | gLosses = [] 41 | 42 | batchCount = X_train.shape[0] / batchSize 43 | print('Epochs:', epochs) 44 | print('Batch size:', batchSize) 45 | print('Batches per epoch:', batchCount) 46 | 47 | for e in range(1, epochs+1): 48 | g_t = [] 49 | d_t = [] 50 | for i in range(int(batchCount)): 51 | # Get a random set of input noise and images 52 | noise = np.random.normal(0, 1, size=[batchSize, randomDim]) 53 | imageBatch = X_train[np.random.randint(0, X_train.shape[0], size=batchSize)] 54 | 55 | # Generate fake MNIST images 56 | generatedImages = generator.predict(noise) 57 | # print np.shape(imageBatch), np.shape(generatedImages) 58 | X = np.concatenate([imageBatch, generatedImages]) 59 | 60 | # Labels for generated and real data 61 | yDis = np.zeros(2*batchSize) 62 | # One-sided label smoothing 63 | yDis[:batchSize] = lSmooth 64 | 65 | # Train discriminator 66 | discriminator.trainable = True 67 | #dloss = discriminator.train_on_batch(X, yDis) 68 | if SplitTF: 69 | d_r = discriminator.train_on_batch(imageBatch, lSmooth*np.ones(batchSize)) 70 | d_f = discriminator.train_on_batch(generatedImages,np.zeros(batchSize)) 71 | dloss = d_r + d_f 72 | else: 73 | dloss = discriminator.train_on_batch(X, yDis) 74 | 75 | discriminator.trainable = False 76 | 77 | 78 | # Train generator 79 | noise = np.random.normal(0, 1, size=[batchSize, randomDim]) 80 | yGen = np.ones(batchSize) 81 | gloss = gan.train_on_batch(noise, yGen) 82 | 83 | if verbose ==1: 84 | 85 | print( 86 | 'epoch = %d/%d, batch = %d/%d, d_loss=%.3f, g_loss=%.3f' % 87 | (e, epochs, i, batchCount, dloss, gloss), 88 | 100*' ', 89 | end='\r' 90 | ) 91 | 92 | d_t += [dloss] 93 | g_t += [gloss] 94 | 95 | # Store loss of most recent batch from this epoch 96 | dLosses.append(np.mean(d_t)) 97 | gLosses.append(np.mean(g_t)) 98 | 99 | if e%verbose == 0: 100 | print('epoch = %d/%d, d_loss=%.3f, g_loss=%.3f' % (e, epochs, np.mean(d_t),np.mean(g_t)), 100*' ') 101 | 102 | return (generator, discriminator, dLosses, gLosses) 103 | 104 | 105 | 106 | def TrainDiscriminator(X_train, y_train, discriminator = MNIST_DiscriminatorPrivate(OutSize = 2), 107 | randomDim=100, epochs=200, batchSize=128, optim = Adam(lr=0.0002, beta_1=0.5), 108 | verbose = 1): 109 | 110 | 111 | discriminator.fit(X_train, y_train, 112 | batch_size=batchSize, 113 | epochs=epochs, 114 | verbose=verbose, 115 | validation_data=(X_train, y_train)) 116 | 117 | 118 | return (discriminator) 119 | 120 | 121 | 122 | def privGAN(X_train, generators = [MNIST_Generator(),MNIST_Generator()], 123 | discriminators = [MNIST_Discriminator(),MNIST_Discriminator()], 124 | pDisc = MNIST_DiscriminatorPrivate(OutSize = 2), 125 | randomDim=100, disc_epochs = 50, epochs=200, dp_delay = 100, 126 | batchSize=128, optim = Adam(lr=0.0002, beta_1=0.5), verbose = 1, 127 | lSmooth = 0.95, privacy_ratio = 1.0, SplitTF = False): 128 | 129 | 130 | #make sure the number of generators is the same as the number of discriminators 131 | if len(generators) != len(discriminators): 132 | print('Different number of generators and discriminators') 133 | return() 134 | else: 135 | n_reps = len(generators) 136 | 137 | #throw error if n_reps = 1 138 | if n_reps == 1: 139 | print('You cannot have only one generator-discriminator pair') 140 | return() 141 | 142 | 143 | X = [] 144 | t = len(X_train)//n_reps 145 | y_train = [] 146 | 147 | for i in range(n_reps): 148 | if i= dp_delay: 246 | pDisc2.trainable = True 247 | dp_t[i] = pDisc2.train_on_batch(generatedImages, yDis2) 248 | pDisc2.trainable = False 249 | 250 | 251 | yGen = [np.ones(batchSize)]*n_reps + yDis2f 252 | 253 | #Train combined model 254 | g_t[i] = gan.train_on_batch(noise, yGen)[0] 255 | 256 | if verbose == 1: 257 | print( 258 | 'epoch = %d/%d, batch = %d/%d' % (e, epochs, i, batchCount), 259 | 100*' ', 260 | end='\r' 261 | ) 262 | 263 | 264 | 265 | # Store loss of most recent batch from this epoch 266 | dLosses[:,e] = np.mean(d_t, axis = 1) 267 | dpLosses[e] = np.mean(dp_t) 268 | gLosses[e] = np.mean(g_t) 269 | 270 | if e%verbose == 0: 271 | print('epoch =',e) 272 | print('dLosses =', np.mean(d_t, axis = 1)) 273 | print('dpLosses =', np.mean(dp_t)) 274 | print('gLosses =', np.mean(g_t)) 275 | yp= np.argmax(pDisc2.predict(generatedImages), axis = 1) 276 | print('dp-Accuracy:',np.sum(yDis2 == yp)/len(yp)) 277 | 278 | return (generators, discriminators, pDisc2, dLosses, dpLosses, gLosses) 279 | 280 | 281 | ######################################### Ancillary functions ########################################################## 282 | 283 | 284 | 285 | def DisplayImages(generator, randomDim = 100, NoImages = 100, figSize = (10,10), TargetShape = (28,28)): 286 | 287 | #check to see if the figure size is valid 288 | if (len(figSize)!=2) or (figSize[0]*figSize[1]= 0.5*M: 444 | res += [1] 445 | else: 446 | res += [0] 447 | 448 | 449 | return(np.mean(res)) 450 | 451 | 452 | 453 | 454 | def MC_eps_attack_priv(X, X_comp, X_ho, generators, N = 100000, M = 100, n_pc = 40, reps = 10): 455 | 456 | #flatten images 457 | if len(X.shape)==3: 458 | sh = X.shape[1]*X.shape[2] 459 | elif len(X.shape)==2: 460 | sh = X.shape[1] 461 | else: 462 | sh = X.shape[1]*X.shape[2]*X.shape[3] 463 | 464 | X = np.reshape(X, (len(X),sh)) 465 | X_comp = np.reshape(X_comp, (len(X_comp),sh)) 466 | X_ho = np.reshape(X_ho, (len(X_ho),sh)) 467 | 468 | #fit PCA 469 | pca = PCA(n_components=n_pc) 470 | pca.fit(X_ho) 471 | 472 | res = [] 473 | 474 | for r in range(reps): 475 | 476 | #generate, flatten and dimensionality reduce a ton of synthetic samples 477 | n_g = len(generators) 478 | X_fake_dr = [] 479 | for j in range(n_g): 480 | noise = np.random.normal(0, 1, size=[int(N/n_g), 100]) 481 | X_fake = generators[j].predict(noise) 482 | X_fake = np.reshape(X_fake,(len(X_fake),sh)) 483 | X_fake_dr += [pca.transform(X_fake)] 484 | 485 | X_fake_dr = np.vstack(X_fake_dr) 486 | 487 | 488 | idx1 = np.random.randint(len(X), size=M) 489 | idx2 = np.random.randint(len(X_comp), size=M) 490 | 491 | M_x = pca.transform(np.reshape(X[idx1,:],(len(X[idx1,:]),sh))) 492 | M_xc = pca.transform(np.reshape(X_comp[idx1,:],(len(X_comp[idx1,:]),sh))) 493 | 494 | min_x = [] 495 | min_xc = [] 496 | 497 | #calculate median epsilon 498 | for i in range(M): 499 | temp_x = np.tile(M_x[i,:],(len(X_fake_dr),1)) 500 | temp_xc = np.tile(M_xc[i,:],(len(X_fake_dr),1)) 501 | 502 | D_x = np.sqrt(np.sum((temp_x-X_fake_dr)**2,axis=1)) 503 | D_xc = np.sqrt(np.sum((temp_xc-X_fake_dr)**2,axis=1)) 504 | 505 | min_x += [np.min(D_x)] 506 | min_xc += [np.min(D_xc)] 507 | 508 | eps = np.median(min_x + min_xc) 509 | 510 | s_x = [] 511 | s_xc = [] 512 | 513 | #estimate the integral 514 | for i in range(M): 515 | temp_x = np.tile(M_x[i,:],(len(X_fake_dr),1)) 516 | temp_xc = np.tile(M_xc[i,:],(len(X_fake_dr),1)) 517 | 518 | D_x = np.sqrt(np.sum((temp_x-X_fake_dr)**2,axis=1)) 519 | D_xc = np.sqrt(np.sum((temp_xc-X_fake_dr)**2,axis=1)) 520 | 521 | s_x += [np.sum(D_x <= eps)/len(X_fake_dr)] 522 | s_xc += [np.sum(D_xc <= eps)/len(X_fake_dr)] 523 | 524 | s_x_xc = np.array(s_x + s_xc) 525 | In = np.argsort(-s_x_xc)[:M] 526 | 527 | 528 | if np.sum(In= 0.5*M: 529 | res += [1] 530 | else: 531 | res += [0] 532 | 533 | 534 | return(np.mean(res)) 535 | --------------------------------------------------------------------------------