├── cleverhans ├── devtools │ ├── __init__.py │ ├── tests │ │ ├── __init__.py │ │ └── test_format.py │ ├── version.py │ ├── mocks.py │ ├── checks.py │ ├── LICENSE.txt │ └── list_files.py ├── __init__.py ├── utils_mnist.py ├── model.py ├── attacks_th.py ├── utils_keras.py └── utils_th.py ├── cleverhans_tutorials ├── __init__.py ├── mnist_tutorial_th.py ├── mnist_tutorial_keras_tf.py ├── mnist_tutorial_tf.py ├── mnist_tutorial_jsma.py └── mnist_tutorial_cw.py ├── requirements.txt ├── assets ├── logo.png └── logo.psd ├── examples ├── robust_vision_benchmark │ ├── cleverhans_attack_example │ │ ├── requirements.txt │ │ ├── main.py │ │ ├── Dockerfile │ │ └── utils.py │ └── README.md ├── nips17_adversarial_competition │ ├── .gitignore │ ├── sample_attacks │ │ ├── fgsm │ │ │ ├── metadata.json │ │ │ ├── run_attack.sh │ │ │ └── attack_fgsm.py │ │ ├── noop │ │ │ ├── metadata.json │ │ │ ├── run_attack.sh │ │ │ └── attack_noop.py │ │ ├── random_noise │ │ │ ├── metadata.json │ │ │ ├── run_attack.sh │ │ │ └── attack_random_noise.py │ │ └── download_checkpoints.sh │ ├── sample_defenses │ │ ├── adv_inception_v3 │ │ │ ├── metadata.json │ │ │ ├── run_defense.sh │ │ │ └── defense.py │ │ ├── base_inception_model │ │ │ ├── metadata.json │ │ │ ├── run_defense.sh │ │ │ └── defense.py │ │ ├── ens_adv_inception_resnet_v2 │ │ │ ├── metadata.json │ │ │ ├── run_defense.sh │ │ │ └── defense.py │ │ └── download_checkpoints.sh │ ├── sample_targeted_attacks │ │ ├── iter_target_class │ │ │ ├── metadata.json │ │ │ ├── run_attack.sh │ │ │ └── attack_iter_target_class.py │ │ ├── step_target_class │ │ │ ├── metadata.json │ │ │ ├── run_attack.sh │ │ │ └── attack_step_target_class.py │ │ └── download_checkpoints.sh │ ├── download_data.sh │ ├── validation_tool │ │ ├── README.md │ │ └── validate_submission.py │ ├── run_attacks_and_defenses.sh │ ├── dataset │ │ ├── README.md │ │ └── download_images.py │ └── README.md ├── madry_lab_challenges │ ├── LICENSE │ ├── README │ ├── madry_mnist_model.py │ └── attack_model.py └── README.md ├── docs ├── _templates │ └── layout.html ├── source │ ├── model.md │ └── attacks.md ├── Makefile ├── index.md └── conf.py ├── setup.py ├── CODE_OF_CONDUCT.rst ├── tests_tf ├── test_mnist_tutorial_cw.py ├── test_attacks_tf.py ├── test_mnist_blackbox.py ├── test_mnist_tutorial_keras_tf.py ├── test_mnist_tutorial_jsma.py ├── test_utils_tf.py ├── test_mnist_tutorial_tf.py ├── test_model.py ├── test_utils.py └── test_utils_keras.py ├── LICENSE ├── CONTRIBUTING.md ├── .gitignore ├── tests_th └── test_attack_class.py ├── .travis.yml └── README.md /cleverhans/devtools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cleverhans_tutorials/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cleverhans/devtools/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | nose 2 | pycodestyle 3 | scipy 4 | tensorflow 5 | matplotlib 6 | -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AngusG/cleverhans-attacking-bnns/HEAD/assets/logo.png -------------------------------------------------------------------------------- /assets/logo.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AngusG/cleverhans-attacking-bnns/HEAD/assets/logo.psd -------------------------------------------------------------------------------- /examples/robust_vision_benchmark/cleverhans_attack_example/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | tensorflow 3 | -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends '!layout.html' %} 2 | 3 | {% block footer %} 4 | 5 | {% endblock %} -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/.gitignore: -------------------------------------------------------------------------------- 1 | # ignore Tensorflow checkpoints 2 | *.ckpt 3 | # ignore dataset images 4 | dataset/images 5 | -------------------------------------------------------------------------------- /docs/source/model.md: -------------------------------------------------------------------------------- 1 | `model` module 2 | ------------------------ 3 | 4 | .. automodule:: cleverhans.model 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/attacks.md: -------------------------------------------------------------------------------- 1 | `attacks` module 2 | -------------------------- 3 | 4 | .. automodule:: cleverhans.attacks 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /cleverhans/__init__.py: -------------------------------------------------------------------------------- 1 | from cleverhans.devtools.version import dev_version 2 | 3 | # Attach a hex digest to the version string to keep track of changes 4 | # in the development branch 5 | __version__ = '2.0.0-' + dev_version() 6 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_attacks/fgsm/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "attack", 3 | "container": "goodfellow/competition:cpu", 4 | "container_gpu": "goodfellow/competition:gpu", 5 | "entry_point": "run_attack.sh" 6 | } 7 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_attacks/noop/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "attack", 3 | "container": "gcr.io/tensorflow/tensorflow:1.1.0", 4 | "container_gpu": "gcr.io/tensorflow/tensorflow:1.1.0-gpu", 5 | "entry_point": "run_attack.sh" 6 | } 7 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_attacks/random_noise/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "attack", 3 | "container": "gcr.io/tensorflow/tensorflow:1.1.0", 4 | "container_gpu": "gcr.io/tensorflow/tensorflow:1.1.0-gpu", 5 | "entry_point": "run_attack.sh" 6 | } 7 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_defenses/adv_inception_v3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "defense", 3 | "container": "gcr.io/tensorflow/tensorflow:1.1.0", 4 | "container_gpu": "gcr.io/tensorflow/tensorflow:1.1.0-gpu", 5 | "entry_point": "run_defense.sh" 6 | } 7 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_defenses/base_inception_model/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "defense", 3 | "container": "gcr.io/tensorflow/tensorflow:1.1.0", 4 | "container_gpu": "gcr.io/tensorflow/tensorflow:1.1.0-gpu", 5 | "entry_point": "run_defense.sh" 6 | } 7 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_defenses/ens_adv_inception_resnet_v2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "defense", 3 | "container": "gcr.io/tensorflow/tensorflow:1.1.0", 4 | "container_gpu": "gcr.io/tensorflow/tensorflow:1.1.0-gpu", 5 | "entry_point": "run_defense.sh" 6 | } 7 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_targeted_attacks/iter_target_class/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "targeted_attack", 3 | "container": "gcr.io/tensorflow/tensorflow:1.1.0", 4 | "container_gpu": "gcr.io/tensorflow/tensorflow:1.1.0-gpu", 5 | "entry_point": "run_attack.sh" 6 | } 7 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_targeted_attacks/step_target_class/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "targeted_attack", 3 | "container": "gcr.io/tensorflow/tensorflow:1.1.0", 4 | "container_gpu": "gcr.io/tensorflow/tensorflow:1.1.0-gpu", 5 | "entry_point": "run_attack.sh" 6 | } 7 | -------------------------------------------------------------------------------- /examples/madry_lab_challenges/LICENSE: -------------------------------------------------------------------------------- 1 | Note: 2 | 3 | Some files in this directory are derivative works of the Madry Lab's 4 | MNIST challenge, which has the following copyright: 5 | 6 | Copyright (c) 2017 Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, 7 | Dimitris Tsipras, and Adrian Vladu. 8 | 9 | The MNIST challenge is released under the MIT License, as is CleverHans. 10 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_attacks/download_checkpoints.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Scripts which download checkpoints for provided models. 4 | # 5 | 6 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 7 | 8 | # Download inception v3 checkpoint for fgsm attack. 9 | cd "${SCRIPT_DIR}/fgsm/" 10 | wget http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz 11 | tar -xvzf inception_v3_2016_08_28.tar.gz 12 | rm inception_v3_2016_08_28.tar.gz 13 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/download_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd "$( dirname "${BASH_SOURCE[0]}" )" 4 | 5 | # Download checkpoints for sample attacks and defenses. 6 | sample_attacks/download_checkpoints.sh 7 | sample_targeted_attacks/download_checkpoints.sh 8 | sample_defenses/download_checkpoints.sh 9 | 10 | # Download dataset. 11 | mkdir dataset/images 12 | python dataset/download_images.py \ 13 | --input_file=dataset/dev_dataset.csv \ 14 | --output_dir=dataset/images/ 15 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | * **CIFAR10 with FGSM using the TensorFlow backend** ([code](ex_cifar10_tf.py)): this tutorial covers how to train a CIFAR10 model using TensorFlow, 4 | craft adversarial examples using the [fast gradient sign method](https://arxiv.org/abs/1412.6572), 5 | and make the model more robust to adversarial 6 | examples using adversarial training. 7 | 8 | # Additional dependencies 9 | 10 | Besides the standard CleverHans dependencies, these examples also require 11 | `wget`. 12 | -------------------------------------------------------------------------------- /examples/robust_vision_benchmark/cleverhans_attack_example/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import numpy as np 4 | from cleverhans.attacks import FastGradientMethod 5 | from robust_vision_benchmark import attack_server 6 | from utils import cleverhans_attack_wrapper 7 | 8 | 9 | def attack(model, session, a): 10 | fgsm = FastGradientMethod(model, sess=session) 11 | image = a.original_image[np.newaxis] 12 | return fgsm.generate_np(image) 13 | 14 | 15 | attack_server(cleverhans_attack_wrapper(attack)) 16 | -------------------------------------------------------------------------------- /examples/madry_lab_challenges/README: -------------------------------------------------------------------------------- 1 | This example provides a CleverHans wrapper for the Madry Lab challenges. 2 | 3 | To run the attack on MNIST: 4 | 5 | 1. Clone the MNIST challenge repository from: 6 | https://github.com/MadryLab/mnist_challenge 7 | 8 | 2. Obtain some weights for the Madry Lab model, following the readme 9 | from their project (either download their presupplied weights or train 10 | your own). 11 | 12 | 3. Set your MNIST_CHALLENGE_DIR environment variable to point to 13 | the repository you cloned in step 1 14 | 15 | 4. Run 16 | 17 | python attack_model.py 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_defenses/adv_inception_v3/run_defense.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # run_defense.sh is a script which executes the defense 4 | # 5 | # Envoronment which runs attacks and defences calls it in a following way: 6 | # run_defense.sh INPUT_DIR OUTPUT_FILE 7 | # where: 8 | # INPUT_DIR - directory with input PNG images 9 | # OUTPUT_FILE - file to store classification labels 10 | # 11 | 12 | INPUT_DIR=$1 13 | OUTPUT_FILE=$2 14 | 15 | python defense.py \ 16 | --input_dir="${INPUT_DIR}" \ 17 | --output_file="${OUTPUT_FILE}" \ 18 | --checkpoint_path=adv_inception_v3.ckpt 19 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_defenses/base_inception_model/run_defense.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # run_defense.sh is a script which executes the defense 4 | # 5 | # Envoronment which runs attacks and defences calls it in a following way: 6 | # run_defense.sh INPUT_DIR OUTPUT_FILE 7 | # where: 8 | # INPUT_DIR - directory with input PNG images 9 | # OUTPUT_FILE - file to store classification labels 10 | # 11 | 12 | INPUT_DIR=$1 13 | OUTPUT_FILE=$2 14 | 15 | python defense.py \ 16 | --input_dir="${INPUT_DIR}" \ 17 | --output_file="${OUTPUT_FILE}" \ 18 | --checkpoint_path=inception_v3.ckpt 19 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_defenses/ens_adv_inception_resnet_v2/run_defense.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # run_defense.sh is a script which executes the defense 4 | # 5 | # Envoronment which runs attacks and defences calls it in a following way: 6 | # run_defense.sh INPUT_DIR OUTPUT_FILE 7 | # where: 8 | # INPUT_DIR - directory with input PNG images 9 | # OUTPUT_FILE - file to store classification labels 10 | # 11 | 12 | INPUT_DIR=$1 13 | OUTPUT_FILE=$2 14 | 15 | python defense.py \ 16 | --input_dir="${INPUT_DIR}" \ 17 | --output_file="${OUTPUT_FILE}" \ 18 | --checkpoint_path=ens_adv_inception_resnet_v2.ckpt 19 | -------------------------------------------------------------------------------- /examples/robust_vision_benchmark/cleverhans_attack_example/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6 2 | 3 | # set workdir to the home directory 4 | WORKDIR /root 5 | 6 | # install required packages 7 | RUN pip3 install --no-cache-dir foolbox 8 | RUN pip3 install --no-cache-dir robust_vision_benchmark 9 | RUN pip3 install --no-cache-dir -e git+http://github.com/tensorflow/cleverhans.git#egg=cleverhans 10 | 11 | # install other python packages 12 | COPY requirements.txt requirements.txt 13 | RUN pip3 install --no-cache-dir -r requirements.txt 14 | 15 | # add your model script 16 | COPY main.py main.py 17 | COPY utils.py utils.py 18 | 19 | CMD ["python3", "./main.py"] 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | 5 | setup(name='cleverhans', 6 | version='1.0.0', 7 | url='https://github.com/tensorflow/cleverhans', 8 | license='MIT', 9 | install_requires=[ 10 | 'nose', 11 | 'pycodestyle', 12 | 'scipy', 13 | 'matplotlib'], 14 | # Explicit dependence on TensorFlow is not supported. 15 | # See https://github.com/tensorflow/tensorflow/issues/7166 16 | extras_require={ 17 | "tf": ["tensorflow>=1.0.0"], 18 | "tf_gpu": ["tensorflow-gpu>=1.0.0"], 19 | }, 20 | packages=find_packages()) 21 | -------------------------------------------------------------------------------- /cleverhans/devtools/version.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for keeping track of the version of CleverHans. 3 | 4 | These functions provide a finer level of granularity than the 5 | manually specified version string attached to each release. 6 | """ 7 | import hashlib 8 | from cleverhans.devtools.list_files import list_files 9 | 10 | 11 | def dev_version(): 12 | """ 13 | Returns a hexdigest of all the python files in the module. 14 | """ 15 | 16 | m = hashlib.md5() 17 | py_files = sorted(list_files(suffix=".py")) 18 | for filename in py_files: 19 | with open(filename, 'rb') as f: 20 | content = f.read() 21 | m.update(content) 22 | return m.hexdigest() 23 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_attacks/noop/run_attack.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # run_attack.sh is a script which executes the attack 4 | # 5 | # Envoronment which runs attacks and defences calls it in a following way: 6 | # run_attack.sh INPUT_DIR OUTPUT_DIR MAX_EPSILON 7 | # where: 8 | # INPUT_DIR - directory with input PNG images 9 | # OUTPUT_DIR - directory where adversarial images should be written 10 | # MAX_EPSILON - maximum allowed L_{\infty} norm of adversarial perturbation 11 | # 12 | 13 | INPUT_DIR=$1 14 | OUTPUT_DIR=$2 15 | # Noop attack does not care aboout epsilon 16 | # MAX_EPSILON=$3 17 | 18 | python attack_noop.py \ 19 | --input_dir="${INPUT_DIR}" \ 20 | --output_dir="${OUTPUT_DIR}" 21 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_attacks/random_noise/run_attack.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # run_attack.sh is a script which executes the attack 4 | # 5 | # Envoronment which runs attacks and defences calls it in a following way: 6 | # run_attack.sh INPUT_DIR OUTPUT_DIR MAX_EPSILON 7 | # where: 8 | # INPUT_DIR - directory with input PNG images 9 | # OUTPUT_DIR - directory where adversarial images should be written 10 | # MAX_EPSILON - maximum allowed L_{\infty} norm of adversarial perturbation 11 | # 12 | 13 | INPUT_DIR=$1 14 | OUTPUT_DIR=$2 15 | MAX_EPSILON=$3 16 | 17 | python attack_random_noise.py \ 18 | --input_dir="${INPUT_DIR}" \ 19 | --output_dir="${OUTPUT_DIR}" \ 20 | --max_epsilon="${MAX_EPSILON}" 21 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_attacks/fgsm/run_attack.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # run_attack.sh is a script which executes the attack 4 | # 5 | # Envoronment which runs attacks and defences calls it in a following way: 6 | # run_attack.sh INPUT_DIR OUTPUT_DIR MAX_EPSILON 7 | # where: 8 | # INPUT_DIR - directory with input PNG images 9 | # OUTPUT_DIR - directory where adversarial images should be written 10 | # MAX_EPSILON - maximum allowed L_{\infty} norm of adversarial perturbation 11 | # 12 | 13 | INPUT_DIR=$1 14 | OUTPUT_DIR=$2 15 | MAX_EPSILON=$3 16 | 17 | python attack_fgsm.py \ 18 | --input_dir="${INPUT_DIR}" \ 19 | --output_dir="${OUTPUT_DIR}" \ 20 | --max_epsilon="${MAX_EPSILON}" \ 21 | --checkpoint_path=inception_v3.ckpt 22 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.rst: -------------------------------------------------------------------------------- 1 | CleverHans is dedicated to providing a harassment-free experience for 2 | everyone, regardless of gender, gender identity and expression, sexual 3 | orientation, disability, physical appearance, body size, age, race, or 4 | religion. We do not tolerate harassment of participants in any form. 5 | 6 | This code of conduct applies to all CleverHans spaces (including Gist 7 | comments) both online and off. Anyone who violates this code of 8 | conduct may be sanctioned or expelled from these spaces at the 9 | discretion of the OpenAI / Pennsylvania State University team. 10 | 11 | We may add additional rules over time, which will be made clearly 12 | available to participants. Participants are responsible for knowing 13 | and abiding by these rules. 14 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = python -msphinx 7 | SPHINXPROJ = CleverHans 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | full-api: 16 | sphinx-apidoc --suffix .md -o source .. 17 | 18 | 19 | .PHONY: help Makefile 20 | 21 | # Catch-all target: route all unknown targets to Sphinx using the new 22 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 23 | %: Makefile 24 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_targeted_attacks/download_checkpoints.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Scripts which download checkpoints for provided models. 4 | # 5 | 6 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 7 | 8 | # Download inception v3 checkpoint for step_target_class attack. 9 | cd "${SCRIPT_DIR}/step_target_class/" 10 | wget http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz 11 | tar -xvzf inception_v3_2016_08_28.tar.gz 12 | 13 | # Another copy of inception v3 checkpoint for iter_target_class attack 14 | mv inception_v3_2016_08_28.tar.gz "${SCRIPT_DIR}/iter_target_class/" 15 | cd "${SCRIPT_DIR}/iter_target_class/" 16 | tar -xvzf inception_v3_2016_08_28.tar.gz 17 | rm inception_v3_2016_08_28.tar.gz 18 | 19 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_targeted_attacks/step_target_class/run_attack.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # run_attack.sh is a script which executes the attack 4 | # 5 | # Envoronment which runs attacks and defences calls it in a following way: 6 | # run_attack.sh INPUT_DIR OUTPUT_DIR MAX_EPSILON 7 | # where: 8 | # INPUT_DIR - directory with input PNG images 9 | # OUTPUT_DIR - directory where adversarial images should be written 10 | # MAX_EPSILON - maximum allowed L_{\infty} norm of adversarial perturbation 11 | # 12 | 13 | INPUT_DIR=$1 14 | OUTPUT_DIR=$2 15 | MAX_EPSILON=$3 16 | 17 | python attack_step_target_class.py \ 18 | --input_dir="${INPUT_DIR}" \ 19 | --output_dir="${OUTPUT_DIR}" \ 20 | --max_epsilon="${MAX_EPSILON}" \ 21 | --checkpoint_path=inception_v3.ckpt 22 | -------------------------------------------------------------------------------- /cleverhans/devtools/mocks.py: -------------------------------------------------------------------------------- 1 | """Utility functions for mocking up tests. 2 | 3 | """ 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | 10 | def random_feed_dict(rng, placeholders): 11 | """ 12 | Returns random data to be used with `feed_dict`. 13 | :param rng: A numpy.random.RandomState instance 14 | :param placeholders: List of tensorflow placeholders 15 | :return: A dict mapping placeholders to random numpy values 16 | """ 17 | 18 | output = {} 19 | 20 | for placeholder in placeholders: 21 | if placeholder.dtype != 'float32': 22 | raise NotImplementedError() 23 | value = rng.randn(*placeholder.shape).astype('float32') 24 | output[placeholder] = value 25 | 26 | return output 27 | -------------------------------------------------------------------------------- /examples/robust_vision_benchmark/README.md: -------------------------------------------------------------------------------- 1 | ## CleverHans Example Submission to the Robust Vision Benchmark 2 | 3 | Using the wrappers in this repository, submitting a CleverHans attack to the [Robust Vision Benchmark](https://robust.vision/benchmark) requires just [a few lines of code](cleverhans_attack_example/main.py). The full example can be found in the `cleverhans_attack_example` folder. 4 | 5 | ### Testing an attack 6 | 7 | Just install the latest version of the [robust-vision-benchmark python package](https://github.com/bethgelab/robust-vision-benchmark) using 8 | 9 | ```bash 10 | pip install --upgrade robust-vision-benchmark 11 | ``` 12 | 13 | and run 14 | 15 | ```bash 16 | rvb-test-attack cleverhans_attack_example/ 17 | ``` 18 | 19 | to test the attack. Once the test succeeds, you can **[submit your attack](https://github.com/bethgelab/robust-vision-benchmark)**. 20 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_targeted_attacks/iter_target_class/run_attack.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # run_attack.sh is a script which executes the attack 4 | # 5 | # Envoronment which runs attacks and defences calls it in a following way: 6 | # run_attack.sh INPUT_DIR OUTPUT_DIR MAX_EPSILON 7 | # where: 8 | # INPUT_DIR - directory with input PNG images 9 | # OUTPUT_DIR - directory where adversarial images should be written 10 | # MAX_EPSILON - maximum allowed L_{\infty} norm of adversarial perturbation 11 | # 12 | 13 | INPUT_DIR=$1 14 | OUTPUT_DIR=$2 15 | MAX_EPSILON=$3 16 | 17 | # For how many iterations run this attack 18 | NUM_ITERATIONS=20 19 | 20 | python attack_iter_target_class.py \ 21 | --input_dir="${INPUT_DIR}" \ 22 | --output_dir="${OUTPUT_DIR}" \ 23 | --max_epsilon="${MAX_EPSILON}" \ 24 | --num_iter="${NUM_ITERATIONS}" \ 25 | --checkpoint_path=inception_v3.ckpt 26 | -------------------------------------------------------------------------------- /cleverhans/devtools/checks.py: -------------------------------------------------------------------------------- 1 | """Functionality for building tests. 2 | 3 | We have to call this file "checks" and not anything with "test" as a 4 | substring or nosetests will execute it. 5 | """ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | import time 13 | import unittest 14 | 15 | 16 | class CleverHansTest(unittest.TestCase): 17 | 18 | def setUp(self): 19 | self.test_start = time.time() 20 | # seed the randomness 21 | np.random.seed(1234) 22 | 23 | def tearDown(self): 24 | print(self.id(), "took", time.time() - self.test_start, "seconds") 25 | 26 | def assertClose(self, x, y, *args, **kwargs): 27 | # self.assertTrue(np.allclose(x, y)) doesn't give a useful message 28 | # on failure 29 | assert np.allclose(x, y, *args, **kwargs), (x, y) 30 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | .. CleverHans documentation master file, created by 2 | sphinx-quickstart on Wed Sep 20 15:14:07 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | WARNING: This file has a markdown suffix, but is in fact .rst 7 | 8 | CleverHans Documentation 9 | ====================================== 10 | 11 | 12 | This documentation is auto-generated from the docstrings of modules of the current `master` branch of `tensorflow/cleverhans 13 | `_. 14 | 15 | To get started, we recommend reading the `github readme 16 | `_. Afterwards, you can learn more by looking at the following modules: 17 | 18 | 19 | .. toctree:: 20 | :maxdepth: 4 21 | 22 | source/attacks 23 | source/model 24 | 25 | 26 | 27 | Indices and tables 28 | ================== 29 | 30 | * :ref:`genindex` 31 | * :ref:`modindex` 32 | * :ref:`search` 33 | 34 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/validation_tool/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Submission validation tool 3 | 4 | This tool verifies that a submission file is valid or reports an error. 5 | It extracts the submission, verifies presence and validity of metadata and runs 6 | the submission on sample data. 7 | 8 | Usage is following: 9 | 10 | ```bash 11 | # FILENAME - filename of the submission 12 | # TYPE - type of the submission, one of the following without quotes: 13 | # "attack", "targeted_attack" or "defense" 14 | # You can omit --usegpu argument, then submission will be run on CPU 15 | python validate_submission.py \ 16 | --submission_filename=FILENAME \ 17 | --submission_type=TYPE \ 18 | --usegpu 19 | ``` 20 | 21 | After run this tool will print whether submission is valid or not. 22 | If submission is invalid then log messages will contain explanation why. 23 | 24 | NOTE: This tool might not work properly if the file is located on NFS or if 25 | the directory containing submission file can't be mounted in Docker using `-v` 26 | command line argument. In such case copy file to different location and try 27 | again. 28 | -------------------------------------------------------------------------------- /tests_tf/test_mnist_tutorial_cw.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | class TestMNISTTutorialCW(unittest.TestCase): 5 | def test_mnist_tutorial_cw(self): 6 | from cleverhans_tutorials import mnist_tutorial_cw 7 | 8 | # Run the MNIST tutorial on a dataset of reduced size 9 | # and disable visualization. 10 | cw_tutorial_args = {'train_start': 0, 11 | 'train_end': 10000, 12 | 'test_start': 0, 13 | 'test_end': 1666, 14 | 'viz_enabled': False} 15 | report = mnist_tutorial_cw.mnist_tutorial_cw(**cw_tutorial_args) 16 | 17 | # Check accuracy values contained in the AccuracyReport object 18 | self.assertTrue(report.clean_train_clean_eval > 0.85) 19 | self.assertTrue(report.clean_train_adv_eval == 0.00) 20 | 21 | # There is no adversarial training in the CW tutorial 22 | self.assertTrue(report.adv_train_clean_eval == 0.) 23 | self.assertTrue(report.adv_train_adv_eval == 0.) 24 | 25 | 26 | if __name__ == '__main__': 27 | unittest.main() 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Google Inc., OpenAI and Pennsylvania State University 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests_tf/test_attacks_tf.py: -------------------------------------------------------------------------------- 1 | """Tests of cleverhans.attacks_tf 2 | 3 | """ 4 | import numpy as np 5 | import tensorflow as tf 6 | from cleverhans.attacks_tf import fgm 7 | from cleverhans.devtools.mocks import random_feed_dict 8 | 9 | 10 | def test_fgm_gradient_max(): 11 | input_dim = 2 12 | num_classes = 3 13 | batch_size = 4 14 | rng = np.random.RandomState([2017, 8, 23]) 15 | x = tf.placeholder(tf.float32, [batch_size, input_dim]) 16 | weights = tf.placeholder(tf.float32, [input_dim, num_classes]) 17 | logits = tf.matmul(x, weights) 18 | probs = tf.nn.softmax(logits) 19 | adv_x = fgm(x, probs) 20 | random_example = rng.randint(batch_size) 21 | random_feature = rng.randint(input_dim) 22 | output = tf.slice(adv_x, [random_example, random_feature], [1, 1]) 23 | dx, = tf.gradients(output, x) 24 | # The following line catches GitHub issue #243 25 | assert dx is not None 26 | sess = tf.Session() 27 | dx = sess.run(dx, feed_dict=random_feed_dict(rng, [x, weights])) 28 | ground_truth = np.zeros((batch_size, input_dim)) 29 | ground_truth[random_example, random_feature] = 1. 30 | assert np.allclose(dx, ground_truth), (dx, ground_truth) 31 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_defenses/download_checkpoints.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Scripts which download checkpoints for provided models. 4 | # 5 | 6 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 7 | 8 | # Download inception v3 checkpoint into base_inception_model subdirectory 9 | cd "${SCRIPT_DIR}/base_inception_model/" 10 | wget http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz 11 | tar -xvzf inception_v3_2016_08_28.tar.gz 12 | rm inception_v3_2016_08_28.tar.gz 13 | 14 | # Download adversarially trained inception v3 checkpoint 15 | # into adv_inception_v3 subdirectory 16 | cd "${SCRIPT_DIR}/adv_inception_v3/" 17 | wget http://download.tensorflow.org/models/adv_inception_v3_2017_08_18.tar.gz 18 | tar -xvzf adv_inception_v3_2017_08_18.tar.gz 19 | rm adv_inception_v3_2017_08_18.tar.gz 20 | 21 | # Download ensemble adversarially trained inception resnet v2 checkpoint 22 | # into ens_adv_inception_resnet_v2 subdirectory 23 | cd "${SCRIPT_DIR}/ens_adv_inception_resnet_v2/" 24 | wget http://download.tensorflow.org/models/ens_adv_inception_resnet_v2_2017_08_18.tar.gz 25 | tar -xvzf ens_adv_inception_resnet_v2_2017_08_18.tar.gz 26 | rm ens_adv_inception_resnet_v2_2017_08_18.tar.gz 27 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to CleverHans 2 | 3 | First off, thank you for considering contributing to CleverHans. 4 | Following these guidelines helps to communicate that you respect 5 | the time of the researchers and developers managing and developing this open 6 | source project. In return, they should reciprocate that respect in 7 | addressing your issue, assessing changes, and helping you finalize 8 | your pull requests. 9 | 10 | Adding new features, improving documentation, bug triaging, or 11 | writing tutorials are all 12 | examples of helpful contributions. 13 | Furthermore, if you are publishing a new attack or defense, 14 | we strongly encourage you to add it to CleverHans so that others 15 | may evaluate it fairly in their own work. 16 | 17 | To speed the code review process, we ask that: 18 | 19 | * New efforts and features be coordinated 20 | on the mailing list for CleverHans development: [cleverhans-dev@googlegroups.com](https://groups.google.com/forum/#!forum/cleverhans-dev). 21 | * When making code contributions to CleverHans, you follow the 22 | `PEP8` coding style in your pull requests. 23 | * When making your first pull request, you [sign the Google CLA](https://cla.developers.google.com/clas) 24 | 25 | Bug fixes can be initiated through Github pull requests. 26 | -------------------------------------------------------------------------------- /tests_tf/test_mnist_blackbox.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | 4 | 5 | class TestMNISTBlackboxF(unittest.TestCase): 6 | def test_mnist_blackbox(self): 7 | from cleverhans_tutorials import mnist_blackbox 8 | 9 | np.random.seed(42) 10 | import tensorflow as tf 11 | tf.set_random_seed(42) 12 | 13 | # Run the MNIST tutorial on a dataset of reduced size, reduced number 14 | # of data augmentations, increased substitute holdout for faster runtime. 15 | mnist_blackbox_args = {'train_start': 0, 16 | 'train_end': 5000, 17 | 'test_start': 0, 18 | 'test_end': 2000, 19 | 'data_aug': 1, 20 | 'holdout': 1000, 21 | 'nb_epochs': 2, 22 | 'nb_epochs_s': 6} 23 | report = mnist_blackbox.mnist_blackbox(**mnist_blackbox_args) 24 | 25 | # Check accuracy values contained in the AccuracyReport object 26 | self.assertTrue(report['bbox'] > 0.7, report['bbox']) 27 | self.assertTrue(report['sub'] > 0.7, report['sub']) 28 | self.assertTrue(report['bbox_on_sub_adv_ex'] < 0.2, report['bbox_on_sub_adv_ex']) 29 | 30 | if __name__ == '__main__': 31 | unittest.main() 32 | -------------------------------------------------------------------------------- /tests_tf/test_mnist_tutorial_keras_tf.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | 4 | 5 | class TestMNISTTutorialKerasTF(unittest.TestCase): 6 | def test_mnist_tutorial_keras_tf(self): 7 | 8 | np.random.seed(42) 9 | import tensorflow as tf 10 | tf.set_random_seed(42) 11 | 12 | from cleverhans_tutorials import mnist_tutorial_keras_tf 13 | 14 | # Run the MNIST tutorial on a dataset of reduced size 15 | test_dataset_indices = {'train_start': 0, 16 | 'train_end': 5000, 17 | 'test_start': 0, 18 | 'test_end': 333, 19 | 'nb_epochs': 3, 20 | 'train_dir': '/tmp', 21 | 'filename': 'mnist.ckpt', 22 | 'load_model': False, 23 | 'testing': True} 24 | report = mnist_tutorial_keras_tf.mnist_tutorial(**test_dataset_indices) 25 | 26 | # Check accuracy values contained in the AccuracyReport object 27 | self.assertTrue(report.train_clean_train_clean_eval > 0.90) 28 | self.assertTrue(report.train_clean_train_adv_eval < 0.05) 29 | self.assertTrue(report.train_adv_train_clean_eval > 0.90) 30 | self.assertTrue(report.train_adv_train_adv_eval > 0.30) 31 | 32 | 33 | if __name__ == '__main__': 34 | unittest.main() 35 | -------------------------------------------------------------------------------- /tests_tf/test_mnist_tutorial_jsma.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | 4 | 5 | class TestMNISTTutorialJSMA(unittest.TestCase): 6 | def test_mnist_tutorial_jsma(self): 7 | 8 | np.random.seed(42) 9 | import tensorflow as tf 10 | tf.set_random_seed(42) 11 | 12 | from cleverhans_tutorials import mnist_tutorial_jsma 13 | 14 | # Run the MNIST tutorial on a dataset of reduced size 15 | # and disable visualization. 16 | jsma_tutorial_args = {'train_start': 0, 17 | 'train_end': 1000, 18 | 'test_start': 0, 19 | 'test_end': 1666, 20 | 'viz_enabled': False, 21 | 'source_samples': 1, 22 | 'nb_epochs': 2} 23 | report = mnist_tutorial_jsma.mnist_tutorial_jsma(**jsma_tutorial_args) 24 | 25 | # Check accuracy values contained in the AccuracyReport object 26 | # We already have JSMA tests in test_attacks.py, so just sanity 27 | # check the values here. 28 | self.assertTrue(report.clean_train_clean_eval > 0.65) 29 | self.assertTrue(report.clean_train_adv_eval < 0.25) 30 | 31 | # There is no adversarial training in the JSMA tutorial 32 | self.assertTrue(report.adv_train_clean_eval == 0.) 33 | self.assertTrue(report.adv_train_adv_eval == 0.) 34 | 35 | 36 | if __name__ == '__main__': 37 | unittest.main() 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | # PyCharm related 92 | .idea/ 93 | .nfs* 94 | *.npy 95 | *.npz 96 | -------------------------------------------------------------------------------- /cleverhans/devtools/LICENSE.txt: -------------------------------------------------------------------------------- 1 | The devtools module is a derivative work from the devtools module of pylearn2. 2 | We reproduce the corresponding license here. 3 | 4 | Copyright (c) 2011--2014, Université de Montréal 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | 1. Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | 2. Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | 3. Neither the name of the copyright holder nor the names of its contributors 18 | may be used to endorse or promote products derived from this software without 19 | specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /tests_tf/test_utils_tf.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import numpy as np 7 | import unittest 8 | import tensorflow as tf 9 | 10 | from cleverhans import utils_tf 11 | 12 | 13 | def numpy_kl_with_logits(p_logits, q_logits): 14 | def numpy_softmax(logits): 15 | logits -= np.max(logits, axis=1, keepdims=True) 16 | exp_logits = np.exp(logits) 17 | return exp_logits / np.sum(exp_logits, axis=1, keepdims=True) 18 | 19 | p = numpy_softmax(p_logits) 20 | log_p = p_logits - np.log(np.sum(np.exp(p_logits), axis=1, keepdims=True)) 21 | log_q = q_logits - np.log(np.sum(np.exp(q_logits), axis=1, keepdims=True)) 22 | return (p * (log_p - log_q)).sum(axis=1).mean() 23 | 24 | 25 | class TestUtilsTF(unittest.TestCase): 26 | def test_l2_batch_normalize(self): 27 | with tf.Session() as sess: 28 | x = tf.random_normal((100, 1000)) 29 | x_norm = sess.run(utils_tf.l2_batch_normalize(x)) 30 | self.assertTrue( 31 | np.allclose(np.sum(x_norm**2, axis=1), 1, atol=1e-6)) 32 | 33 | def test_kl_with_logits(self): 34 | p_logits = tf.placeholder(tf.float32, shape=(100, 20)) 35 | q_logits = tf.placeholder(tf.float32, shape=(100, 20)) 36 | p_logits_np = np.random.normal(0, 10, size=(100, 20)) 37 | q_logits_np = np.random.normal(0, 10, size=(100, 20)) 38 | with tf.Session() as sess: 39 | kl_div_tf = sess.run(utils_tf.kl_with_logits(p_logits, q_logits), 40 | feed_dict={p_logits: p_logits_np, 41 | q_logits: q_logits_np}) 42 | kl_div_ref = numpy_kl_with_logits(p_logits_np, q_logits_np) 43 | self.assertTrue(np.allclose(kl_div_ref, kl_div_tf)) 44 | 45 | 46 | if __name__ == '__main__': 47 | unittest.main() 48 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/run_attacks_and_defenses.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # exit on first error 4 | set -e 5 | 6 | # directory where this script is located 7 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 8 | 9 | ATTACKS_DIR="${SCRIPT_DIR}/sample_attacks" 10 | TARGETED_ATTACKS_DIR="${SCRIPT_DIR}/sample_targeted_attacks" 11 | DEFENSES_DIR="${SCRIPT_DIR}/sample_defenses" 12 | DATASET_DIR="${SCRIPT_DIR}/dataset/images" 13 | DATASET_METADATA_FILE="${SCRIPT_DIR}/dataset/dev_dataset.csv" 14 | MAX_EPSILON=16 15 | 16 | # Prepare working directory and copy all necessary files. 17 | # In particular copy attacks defenses and dataset, so originals won't 18 | # be overwritten. 19 | if [[ "${OSTYPE}" == "darwin"* ]]; then 20 | WORKING_DIR="/private"$(mktemp -d) 21 | else 22 | WORKING_DIR=$(mktemp -d) 23 | fi 24 | echo "Preparing working directory: ${WORKING_DIR}" 25 | mkdir "${WORKING_DIR}/attacks" 26 | mkdir "${WORKING_DIR}/targeted_attacks" 27 | mkdir "${WORKING_DIR}/defenses" 28 | mkdir "${WORKING_DIR}/dataset" 29 | mkdir "${WORKING_DIR}/intermediate_results" 30 | mkdir "${WORKING_DIR}/output_dir" 31 | cp -R "${ATTACKS_DIR}"/* "${WORKING_DIR}/attacks" 32 | cp -R "${TARGETED_ATTACKS_DIR}"/* "${WORKING_DIR}/targeted_attacks" 33 | cp -R "${DEFENSES_DIR}"/* "${WORKING_DIR}/defenses" 34 | cp -R "${DATASET_DIR}"/* "${WORKING_DIR}/dataset" 35 | cp "${DATASET_METADATA_FILE}" "${WORKING_DIR}/dataset.csv" 36 | 37 | echo "Running attacks and defenses" 38 | python "${SCRIPT_DIR}/run_attacks_and_defenses.py" \ 39 | --attacks_dir="${WORKING_DIR}/attacks" \ 40 | --targeted_attacks_dir="${WORKING_DIR}/targeted_attacks" \ 41 | --defenses_dir="${WORKING_DIR}/defenses" \ 42 | --dataset_dir="${WORKING_DIR}/dataset" \ 43 | --intermediate_results_dir="${WORKING_DIR}/intermediate_results" \ 44 | --dataset_metadata="${WORKING_DIR}/dataset.csv" \ 45 | --output_dir="${WORKING_DIR}/output_dir" \ 46 | --epsilon="${MAX_EPSILON}" \ 47 | --save_all_classification 48 | 49 | echo "Output is saved in directory '${WORKING_DIR}/output_dir'" 50 | -------------------------------------------------------------------------------- /tests_tf/test_mnist_tutorial_tf.py: -------------------------------------------------------------------------------- 1 | from distutils.version import LooseVersion 2 | import unittest 3 | import numpy as np 4 | 5 | from cleverhans.devtools.checks import CleverHansTest 6 | 7 | 8 | class TestMNISTTutorialTF(CleverHansTest): 9 | def test_mnist_tutorial_tf(self): 10 | 11 | np.random.seed(42) 12 | import tensorflow as tf 13 | tf.set_random_seed(42) 14 | 15 | from cleverhans_tutorials import mnist_tutorial_tf 16 | 17 | # Run the MNIST tutorial on a dataset of reduced size 18 | test_dataset_indices = {'train_start': 0, 19 | 'train_end': 5000, 20 | 'test_start': 0, 21 | 'test_end': 333, 22 | 'nb_epochs': 2, 23 | 'testing': True} 24 | report = mnist_tutorial_tf.mnist_tutorial(**test_dataset_indices) 25 | 26 | # Check accuracy values contained in the AccuracyReport object 27 | self.assertGreater(report.train_clean_train_clean_eval, 0.97) 28 | self.assertLess(report.train_clean_train_adv_eval, 0.036) 29 | self.assertGreater(report.train_adv_train_clean_eval, 0.93) 30 | self.assertGreater(report.train_adv_train_adv_eval, 0.4) 31 | 32 | # Check that the tutorial is deterministic (seeded properly) 33 | if LooseVersion(tf.__version__) >= LooseVersion('1.1.0'): 34 | atol_fac = 1 35 | else: 36 | atol_fac = 2 37 | report_2 = mnist_tutorial_tf.mnist_tutorial(**test_dataset_indices) 38 | self.assertClose(report.train_clean_train_clean_eval, 39 | report_2.train_clean_train_clean_eval, 40 | atol=atol_fac * 5e-3) 41 | self.assertClose(report.train_clean_train_adv_eval, 42 | report_2.train_clean_train_adv_eval, 43 | atol=atol_fac * 5e-3) 44 | self.assertClose(report.train_adv_train_clean_eval, 45 | report_2.train_adv_train_clean_eval, 46 | atol=atol_fac * 2e-2) 47 | self.assertClose(report.train_adv_train_adv_eval, 48 | report_2.train_adv_train_adv_eval, 49 | atol=atol_fac * 2e-1) 50 | 51 | if __name__ == '__main__': 52 | unittest.main() 53 | -------------------------------------------------------------------------------- /tests_tf/test_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import unittest 7 | 8 | from cleverhans.model import Model, CallableModelWrapper 9 | 10 | 11 | class TestModelClass(unittest.TestCase): 12 | def test_get_layer(self): 13 | # Define empty model 14 | model = Model() 15 | x = [] 16 | 17 | # Exception is thrown when `get_layer` not implemented 18 | with self.assertRaises(Exception) as context: 19 | model.get_layer(x, layer='') 20 | self.assertTrue(context.exception) 21 | 22 | def test_get_logits(self): 23 | # Define empty model 24 | model = Model() 25 | x = [] 26 | 27 | # Exception is thrown when `get_logits` not implemented 28 | with self.assertRaises(Exception) as context: 29 | model.get_logits(x) 30 | self.assertTrue(context.exception) 31 | 32 | def test_get_probs(self): 33 | # Define empty model 34 | model = Model() 35 | x = [] 36 | 37 | # Exception is thrown when `get_probs` not implemented 38 | with self.assertRaises(Exception) as context: 39 | model.get_probs(x) 40 | self.assertTrue(context.exception) 41 | 42 | def test_get_layer_names(self): 43 | # Define empty model 44 | model = Model() 45 | 46 | # Exception is thrown when `get_layer_names` not implemented 47 | with self.assertRaises(Exception) as context: 48 | model.get_layer_names() 49 | self.assertTrue(context.exception) 50 | 51 | def test_fprop(self): 52 | # Define empty model 53 | model = Model() 54 | x = [] 55 | 56 | # Exception is thrown when `fprop` not implemented 57 | with self.assertRaises(Exception) as context: 58 | model.fprop(x) 59 | self.assertTrue(context.exception) 60 | 61 | 62 | class TestCallableModelWrapperInitArguments(unittest.TestCase): 63 | def test_output_layer(self): 64 | def model(): 65 | return True 66 | 67 | # The following two calls should not raise Exceptions 68 | wrap = CallableModelWrapper(model, 'probs') 69 | wrap = CallableModelWrapper(model, 'logits') 70 | 71 | 72 | if __name__ == '__main__': 73 | unittest.main() 74 | -------------------------------------------------------------------------------- /tests_th/test_attack_class.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import unittest 7 | 8 | from cleverhans.attacks import Attack 9 | 10 | 11 | class TestAttackClassInitArguments(unittest.TestCase): 12 | def test_model(self): 13 | import theano.tensor as T 14 | 15 | # Exception is thrown when model does not have __call__ attribute 16 | with self.assertRaises(Exception) as context: 17 | model = T.matrix('y') 18 | Attack(model, back='th', sess=None) 19 | self.assertTrue(context.exception) 20 | 21 | def test_back(self): 22 | # Define empty model 23 | def model(): 24 | return True 25 | 26 | # Exception is thrown when back is not tf or th 27 | with self.assertRaises(Exception) as context: 28 | Attack(model, back='test', sess=None) 29 | self.assertTrue(context.exception) 30 | 31 | def test_sess(self): 32 | # Define empty model 33 | def model(): 34 | return True 35 | 36 | # Exception is thrown when session provided with TH 37 | with self.assertRaises(Exception) as context: 38 | Attack(model, back='th', sess=1) 39 | self.assertTrue(context.exception) 40 | 41 | 42 | class TestAttackGenerate(unittest.TestCase): 43 | def test_inf_loop(self): 44 | def model(x): 45 | return x 46 | 47 | import theano.tensor as T 48 | x = T.tensor4('x') 49 | test_attack = Attack(model, back='th', sess=None) 50 | 51 | with self.assertRaises(Exception) as context: 52 | test_attack.generate(x) 53 | self.assertTrue(context.exception) 54 | 55 | 56 | class TestAttackGenerateNp(unittest.TestCase): 57 | def test_inf_loop(self): 58 | def model(x): 59 | return x 60 | 61 | import numpy as np 62 | x_val = np.zeros((10, 5, 5, 1)) 63 | 64 | test_attack = Attack(model, back='th', sess=None) 65 | with self.assertRaises(Exception) as context: 66 | test_attack.generate_np(x_val) 67 | self.assertTrue(context.exception) 68 | 69 | 70 | class TestParseParams(unittest.TestCase): 71 | def test_parse(self): 72 | def model(): 73 | return True 74 | 75 | test_attack = Attack(model, back='th', sess=None) 76 | self.assertTrue(test_attack.parse_params({})) 77 | 78 | 79 | if __name__ == '__main__': 80 | unittest.main() 81 | -------------------------------------------------------------------------------- /cleverhans/devtools/list_files.py: -------------------------------------------------------------------------------- 1 | """Code for listing files that belong to the library.""" 2 | import logging 3 | import cleverhans 4 | import os 5 | __authors__ = "Ian Goodfellow" 6 | __copyright__ = "Copyright 2010-2012, Universite de Montreal" 7 | __credits__ = ["Ian Goodfellow"] 8 | __license__ = "3-clause BSD" 9 | __maintainer__ = "LISA Lab" 10 | __email__ = "pylearn-dev@googlegroups" 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | def list_files(suffix=""): 16 | """ 17 | Returns a list of all files in CleverHans with the given suffix. 18 | 19 | Parameters 20 | ---------- 21 | suffix : str 22 | 23 | Returns 24 | ------- 25 | 26 | file_list : list 27 | A list of all files in CleverHans whose filepath ends with `suffix` 28 | """ 29 | 30 | cleverhans_path = os.path.abspath(cleverhans.__path__[0]) 31 | repo_path = os.path.abspath(os.path.join(cleverhans_path, os.pardir)) 32 | 33 | file_list = _list_files(cleverhans_path, suffix) 34 | 35 | tutorials_path = os.path.join(repo_path, "cleverhans_tutorials") 36 | tutorials_files = _list_files(tutorials_path, suffix) 37 | tutorials_files = [os.path.join(os.pardir, path) for path in 38 | tutorials_files] 39 | examples_path = os.path.join(repo_path, "examples") 40 | examples_files = _list_files(examples_path, suffix) 41 | examples_files = [os.path.join(os.pardir, path) for path in 42 | examples_files] 43 | 44 | file_list = file_list + tutorials_files + examples_files 45 | 46 | return file_list 47 | 48 | 49 | def _list_files(path, suffix=""): 50 | """ 51 | Returns a list of all files ending in `suffix` contained within `path`. 52 | 53 | Parameters 54 | ---------- 55 | path : str 56 | a filepath 57 | suffix : str 58 | 59 | Returns 60 | ------- 61 | l : list 62 | A list of all files ending in `suffix` contained within `path`. 63 | (If `path` is a file rather than a directory, it is considered 64 | to "contain" itself) 65 | """ 66 | if os.path.isdir(path): 67 | incomplete = os.listdir(path) 68 | complete = [os.path.join(path, entry) for entry in incomplete] 69 | lists = [_list_files(subpath, suffix) for subpath in complete] 70 | flattened = [] 71 | for l in lists: 72 | for elem in l: 73 | flattened.append(elem) 74 | return flattened 75 | else: 76 | assert os.path.exists(path), "couldn't find file '%s'" % path 77 | if path.endswith(suffix): 78 | return [path] 79 | return [] 80 | 81 | 82 | if __name__ == '__main__': 83 | # Print all .py files in the library 84 | result = list_files('.py') 85 | for path in result: 86 | logger.info(path) 87 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/dataset/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Dataset for adversarial competition. 3 | 4 | Two datasets will be used for the competition: 5 | 6 | * **DEV** dataset which is available here for development and experimenting. 7 | * **TEST** dataset which will be kept secret until after the competition 8 | and will be used for final scoring. 9 | 10 | Both datasets are composed from publicly available images which were posted 11 | online under CC-BY license. 12 | 13 | ## Dataset format 14 | 15 | DEV dataset is defined by `dev_dataset.csv` 16 | which contains URLs of the images along with bounding boxes 17 | and classification labels. 18 | 19 | `dev_dataset.csv` is a table in 20 | [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) 21 | format with the following columns: 22 | 23 | * **ImageId** - id of the image. 24 | * **URL** - URL of the image. 25 | * **x1**, **y1**, **x2**, **y2** - bounding box of the area of interest in 26 | the image. Bounding box is relative, which means that all coordinates are 27 | between 0 and 1. 28 | * **TrueLabel** - true label of the image. 29 | * **TargetClass** - label for targeted adversarial attack. 30 | * **OriginalLandingURL** - original landing page where this image was found. 31 | * **License** - licence under which image was distributed by author. 32 | * **Author** - author of the image. 33 | * **AuthorProfileURL** - URL of the author's profile. 34 | 35 | Dataset is labelled with 36 | [ImageNet](http://www.image-net.org/challenges/LSVRC/2012/) labels. 37 | Specific values of labels are compatible with pre-trained Inception models, 38 | which are available as a part of 39 | [TF-Slim](https://github.com/tensorflow/models/tree/master/slim). 40 | In particular pre-trained Inception v3 and InceptionResnet v2 could be used 41 | to classify dataset with high accuracy. 42 | 43 | ## Downloading images 44 | 45 | `dev_dataset.csv` contains only URLs of the images. 46 | Actual images have to be downloaded before being used for experiments. 47 | 48 | `download_images.py` is a Python program which downloads images for all 49 | records in `dev_dataset.csv`. Usage: 50 | 51 | ``` 52 | # Replace CSV_FILE with path to dev_dataset.csv 53 | CSV_FILE=dev_dataset.csv 54 | # Replace OUTPUT_DIR with path to directory where all images should be stored 55 | OUTPUT_DIR=images 56 | # Download images 57 | python download_images.py --input_file=${CSV_FILE} --output_dir=${OUTPUT_DIR} 58 | ``` 59 | To accelerate the image downloading, multiple threads are spawned (the default is CPU_COUNT + 1). 60 | You can change this setting using --threads flag to any other positive value. 61 | 62 | All downloaded images will be cropped according to the bounding boxes in 63 | `dev_dataset.csv` and resized to 299x299 pixels. 64 | Each image will be saved in PNG format with filename `IMAGE_ID.png` 65 | where `IMAGE_ID` is the id of the image from `dev_dataset.csv`. 66 | -------------------------------------------------------------------------------- /examples/madry_lab_challenges/madry_mnist_model.py: -------------------------------------------------------------------------------- 1 | """cleverhans.model.Model implementation of mnist_challenge.model.Model 2 | 3 | This re-implementation factors variable creation apart from forward 4 | propagation so it is possible to run forward propagation more than once 5 | in the same model. 6 | """ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | from collections import OrderedDict 13 | import tensorflow as tf 14 | from cleverhans.model import Model 15 | from cleverhans.utils import deterministic_dict 16 | 17 | 18 | class MadryMNIST(Model): 19 | 20 | def __init__(self, **kwargs): 21 | # NOTE: for compatibility with Madry Lab downloadable checkpoints, 22 | # we cannot use scopes, give these variables names, etc. 23 | self.W_conv1 = self._weight_variable([5, 5, 1, 32]) 24 | self.b_conv1 = self._bias_variable([32]) 25 | self.W_conv2 = self._weight_variable([5, 5, 32, 64]) 26 | self.b_conv2 = self._bias_variable([64]) 27 | self.W_fc1 = self._weight_variable([7 * 7 * 64, 1024]) 28 | self.b_fc1 = self._bias_variable([1024]) 29 | self.W_fc2 = self._weight_variable([1024, 10]) 30 | self.b_fc2 = self._bias_variable([10]) 31 | super(MadryMNIST, self).__init__(**kwargs) 32 | 33 | def fprop(self, x): 34 | 35 | output = OrderedDict() 36 | # first convolutional layer 37 | h_conv1 = tf.nn.relu(self._conv2d(x, self.W_conv1) + self.b_conv1) 38 | h_pool1 = self._max_pool_2x2(h_conv1) 39 | 40 | # second convolutional layer 41 | h_conv2 = tf.nn.relu( 42 | self._conv2d(h_pool1, self.W_conv2) + self.b_conv2) 43 | h_pool2 = self._max_pool_2x2(h_conv2) 44 | 45 | # first fully connected layer 46 | 47 | h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) 48 | h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, self.W_fc1) + self.b_fc1) 49 | 50 | # output layer 51 | logits = tf.matmul(h_fc1, self.W_fc2) + self.b_fc2 52 | 53 | output = deterministic_dict(locals()) 54 | del output["self"] 55 | 56 | return output 57 | 58 | @staticmethod 59 | def _weight_variable(shape): 60 | initial = tf.truncated_normal(shape, stddev=0.1) 61 | return tf.Variable(initial) 62 | 63 | @staticmethod 64 | def _bias_variable(shape): 65 | initial = tf.constant(0.1, shape=shape) 66 | return tf.Variable(initial) 67 | 68 | @staticmethod 69 | def _conv2d(x, W): 70 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 71 | 72 | @staticmethod 73 | def _max_pool_2x2(x): 74 | return tf.nn.max_pool(x, 75 | ksize=[1, 2, 2, 1], 76 | strides=[1, 2, 2, 1], 77 | padding='SAME') 78 | -------------------------------------------------------------------------------- /cleverhans/utils_mnist.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import numpy as np 7 | import sys 8 | import warnings 9 | 10 | from . import utils 11 | 12 | 13 | def data_mnist(datadir='/tmp/', train_start=0, train_end=60000, test_start=0, 14 | test_end=10000): 15 | """ 16 | Load and preprocess MNIST dataset 17 | :param datadir: path to folder where data should be stored 18 | :param train_start: index of first training set example 19 | :param train_end: index of last training set example 20 | :param test_start: index of first test set example 21 | :param test_end: index of last test set example 22 | :return: tuple of four arrays containing training data, training labels, 23 | testing data and testing labels. 24 | """ 25 | assert isinstance(train_start, int) 26 | assert isinstance(train_end, int) 27 | assert isinstance(test_start, int) 28 | assert isinstance(test_end, int) 29 | 30 | if 'tensorflow' in sys.modules: 31 | from tensorflow.examples.tutorials.mnist import input_data 32 | mnist = input_data.read_data_sets(datadir, one_hot=True, reshape=False) 33 | X_train = np.vstack((mnist.train.images, mnist.validation.images)) 34 | Y_train = np.vstack((mnist.train.labels, mnist.validation.labels)) 35 | X_test = mnist.test.images 36 | Y_test = mnist.test.labels 37 | else: 38 | warnings.warn("CleverHans support for Theano is deprecated and " 39 | "will be dropped on 2017-11-08.") 40 | import keras 41 | from keras.datasets import mnist 42 | from keras.utils import np_utils 43 | 44 | # These values are specific to MNIST 45 | img_rows = 28 46 | img_cols = 28 47 | nb_classes = 10 48 | 49 | # the data, shuffled and split between train and test sets 50 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 51 | 52 | if keras.backend.image_dim_ordering() == 'th': 53 | X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) 54 | X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols) 55 | 56 | X_train = X_train.astype('float32') 57 | X_test = X_test.astype('float32') 58 | X_train /= 255 59 | X_test /= 255 60 | 61 | # convert class vectors to binary class matrices 62 | Y_train = np_utils.to_categorical(y_train, nb_classes) 63 | Y_test = np_utils.to_categorical(y_test, nb_classes) 64 | 65 | X_train = X_train[train_start:train_end] 66 | Y_train = Y_train[train_start:train_end] 67 | X_test = X_test[test_start:test_end] 68 | Y_test = Y_test[test_start:test_end] 69 | 70 | print('X_train shape:', X_train.shape) 71 | print('X_test shape:', X_test.shape) 72 | 73 | return X_train, Y_train, X_test, Y_test 74 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_attacks/noop/attack_noop.py: -------------------------------------------------------------------------------- 1 | """Implementation of sample attack.""" 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import os 8 | import numpy as np 9 | 10 | from scipy.misc import imread 11 | from scipy.misc import imsave 12 | 13 | import tensorflow as tf 14 | 15 | 16 | tf.flags.DEFINE_string( 17 | 'master', '', 'The address of the TensorFlow master to use.') 18 | 19 | tf.flags.DEFINE_string( 20 | 'input_dir', '', 'Input directory with images.') 21 | 22 | tf.flags.DEFINE_string( 23 | 'output_dir', '', 'Output directory with images.') 24 | 25 | tf.flags.DEFINE_float( 26 | 'max_epsilon', 16.0, 'Maximum size of adversarial perturbation.') 27 | 28 | tf.flags.DEFINE_integer( 29 | 'image_width', 299, 'Width of each input images.') 30 | 31 | tf.flags.DEFINE_integer( 32 | 'image_height', 299, 'Height of each input images.') 33 | 34 | tf.flags.DEFINE_integer( 35 | 'batch_size', 16, 'How many images process at one time.') 36 | 37 | FLAGS = tf.flags.FLAGS 38 | 39 | 40 | def load_images(input_dir, batch_shape): 41 | """Read png images from input directory in batches. 42 | 43 | Args: 44 | input_dir: input directory 45 | batch_shape: shape of minibatch array, i.e. [batch_size, height, width, 3] 46 | 47 | Yields: 48 | filenames: list file names without path of each image 49 | Length of this list could be less than batch_size, in this case only 50 | first few images of the result are elements of the minibatch. 51 | images: array with all images from this batch 52 | """ 53 | images = np.zeros(batch_shape) 54 | filenames = [] 55 | idx = 0 56 | batch_size = batch_shape[0] 57 | for filepath in tf.gfile.Glob(os.path.join(input_dir, '*.png')): 58 | with tf.gfile.Open(filepath) as f: 59 | images[idx, :, :, :] = imread(f, mode='RGB').astype(np.float) / 255.0 60 | filenames.append(os.path.basename(filepath)) 61 | idx += 1 62 | if idx == batch_size: 63 | yield filenames, images 64 | filenames = [] 65 | images = np.zeros(batch_shape) 66 | idx = 0 67 | if idx > 0: 68 | yield filenames, images 69 | 70 | 71 | def save_images(images, filenames, output_dir): 72 | """Saves images to the output directory. 73 | 74 | Args: 75 | images: array with minibatch of images 76 | filenames: list of filenames without path 77 | If number of file names in this list less than number of images in 78 | the minibatch then only first len(filenames) images will be saved. 79 | output_dir: directory where to save images 80 | """ 81 | for i, filename in enumerate(filenames): 82 | with tf.gfile.Open(os.path.join(output_dir, filename), 'w') as f: 83 | imsave(f, images[i, :, :, :], format='png') 84 | 85 | 86 | def main(_): 87 | batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] 88 | for filenames, images in load_images(FLAGS.input_dir, batch_shape): 89 | save_images(images, filenames, FLAGS.output_dir) 90 | 91 | 92 | if __name__ == '__main__': 93 | tf.app.run() 94 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/validation_tool/validate_submission.py: -------------------------------------------------------------------------------- 1 | r"""Tool to validate submission for adversarial competition. 2 | 3 | Usage: 4 | python validate_submission.py \ 5 | --submission_filename=FILENAME \ 6 | --submission_type=TYPE \ 7 | [--use_gpu] 8 | 9 | Where: 10 | FILENAME - filename of the submission 11 | TYPE - type of the submission, one of the following without quotes: 12 | "attack", "targeted_attack" or "defense" 13 | --use_gpu - if argument specified then submission will be run on GPU using 14 | nvidia-docker, otherwise will be run on CPU. 15 | 16 | """ 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import argparse 23 | import logging 24 | import random 25 | import subprocess 26 | import tempfile 27 | import validate_submission_lib 28 | 29 | 30 | def print_in_box(text): 31 | print('') 32 | print('*' * (len(text) + 6)) 33 | print('** ' + text + ' **') 34 | print('*' * (len(text) + 6)) 35 | print('') 36 | 37 | 38 | def main(args): 39 | print_in_box('Validating submission ' + args.submission_filename) 40 | random.seed() 41 | temp_dir = args.temp_dir 42 | delete_temp_dir = False 43 | if not temp_dir: 44 | temp_dir = tempfile.mkdtemp() 45 | logging.info('Created temporary directory: %s', temp_dir) 46 | delete_temp_dir = True 47 | validator = validate_submission_lib.SubmissionValidator(temp_dir, 48 | args.use_gpu) 49 | if validator.validate_submission(args.submission_filename, 50 | args.submission_type): 51 | print_in_box('Submission is VALID!') 52 | else: 53 | print_in_box('Submission is INVALID, see log messages for details') 54 | if delete_temp_dir: 55 | logging.info('Deleting temporary directory: %s', temp_dir) 56 | subprocess.call(['rm', '-rf', temp_dir]) 57 | 58 | 59 | if __name__ == '__main__': 60 | parser = argparse.ArgumentParser( 61 | description='Submission validation script.') 62 | parser.add_argument('--submission_filename', 63 | required=True, 64 | help='Filename of the submission.') 65 | parser.add_argument('--submission_type', 66 | required=True, 67 | help='Type of the submission, ' 68 | 'one of "attack", "targeted_attack" or "defense"') 69 | parser.add_argument('--temp_dir', 70 | required=False, 71 | default='', 72 | help='Temporary directory to extract and run submission. ' 73 | 'If empty then temporary directory will be created ' 74 | 'by the script and then deleted in the end.') 75 | parser.add_argument('--use_gpu', dest='use_gpu', action='store_true') 76 | parser.add_argument('--nouse_gpu', dest='use_gpu', action='store_false') 77 | parser.set_defaults(use_gpu=False) 78 | loggint_format = ('%(asctime)s - %(filename)s:%(lineno)d - %(levelname)s -- ' 79 | '%(message)s') 80 | logging.basicConfig(format=loggint_format, 81 | level=logging.INFO, 82 | datefmt='%Y-%m-%d %H:%M:%S') 83 | main(parser.parse_args()) 84 | -------------------------------------------------------------------------------- /examples/robust_vision_benchmark/cleverhans_attack_example/utils.py: -------------------------------------------------------------------------------- 1 | import cleverhans.model 2 | import tensorflow as tf 3 | import numpy as np 4 | 5 | 6 | def cleverhans_attack_wrapper(cleverhans_attack_fn, reset=True): 7 | def attack(a): 8 | session = tf.Session() 9 | with session.as_default(): 10 | model = RVBCleverhansModel(a) 11 | adversarial_image = cleverhans_attack_fn(model, session, a) 12 | adversarial_image = np.squeeze(adversarial_image, axis=0) 13 | if reset: 14 | # optionally, reset to ignore other adversarials 15 | # found during the search 16 | a._reset() 17 | # run predictions to make sure the returned adversarial 18 | # is taken into account 19 | min_, max_ = a.bounds() 20 | adversarial_image = np.clip(adversarial_image, min_, max_) 21 | a.predictions(adversarial_image) 22 | return attack 23 | 24 | 25 | def py_func_grad(func, inp, Tout, stateful=True, name=None, grad=None): 26 | """Custom py_func with gradient support 27 | 28 | """ 29 | # Need to generate a unique name to avoid duplicates: 30 | rnd_name = 'PyFuncGrad' + str(np.random.randint(0, 1E+8)) 31 | 32 | tf.RegisterGradient(rnd_name)(grad) 33 | g = tf.get_default_graph() 34 | with g.gradient_override_map({ 35 | "PyFunc": rnd_name, 36 | "PyFuncStateless": rnd_name}): 37 | return tf.py_func(func, inp, Tout, stateful=stateful, name=name) 38 | 39 | 40 | class RVBCleverhansModel(cleverhans.model.Model): 41 | """This is a cleverhans model that wraps a robust vision benchmark model. 42 | 43 | """ 44 | 45 | def __init__(self, adversarial): 46 | self.adversarial = adversarial 47 | 48 | def get_layer_names(self): 49 | return ['logits'] 50 | 51 | def fprop(self, x): 52 | return {'logits': self._logits_op(x)} 53 | 54 | def _logits_op(self, x, name=None): 55 | with tf.name_scope(name, "logits", [x]) as name: 56 | 57 | num_classes = self.adversarial.num_classes() 58 | 59 | def _backward_py(gradient_y, x): 60 | x = np.squeeze(x, axis=0) 61 | gradient_y = np.squeeze(gradient_y, axis=0) 62 | gradient_x = self.adversarial.backward(gradient_y, x) 63 | gradient_x = gradient_x.astype(np.float32) 64 | return gradient_x[np.newaxis] 65 | 66 | def _backward_tf(op, grad): 67 | images = op.inputs[0] 68 | gradient_x = tf.py_func( 69 | _backward_py, [grad, images], tf.float32) 70 | gradient_x.set_shape(images.shape) 71 | return gradient_x 72 | 73 | def _forward_py(x): 74 | predictions = self.adversarial.batch_predictions( 75 | x, strict=False)[0] 76 | predictions = predictions.astype(np.float32) 77 | return predictions 78 | 79 | op = py_func_grad( 80 | _forward_py, 81 | [x], 82 | [tf.float32], 83 | name=name, 84 | grad=_backward_tf) 85 | 86 | logits = op[0] 87 | logits.set_shape((x.shape[0], num_classes)) 88 | 89 | return logits 90 | -------------------------------------------------------------------------------- /tests_tf/test_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import numpy as np 7 | import unittest 8 | 9 | from cleverhans import utils 10 | 11 | 12 | class TestUtils(unittest.TestCase): 13 | def test_to_categorical_no_nb_classes_arg(self): 14 | vec = np.asarray([0, 1, 2]) 15 | cat = np.asarray([[1, 0, 0], 16 | [0, 1, 0], 17 | [0, 0, 1]]) 18 | self.assertTrue(np.all(utils.to_categorical(vec) == cat)) 19 | 20 | def test_to_categorical_with_nb_classes_arg(self): 21 | vec = np.asarray([0]) 22 | cat = np.asarray([[1, 0, 0]]) 23 | self.assertTrue(np.all(utils.to_categorical(vec, 3) == cat)) 24 | 25 | def test_random_targets_vector(self): 26 | # Test utils.random_targets with a vector of labels as the input 27 | gt_labels = np.asarray([0, 1, 2, 3]) 28 | rt = utils.random_targets(gt_labels, 5) 29 | 30 | # Make sure random_targets returns a one-hot encoded labels 31 | self.assertTrue(len(rt.shape) == 2) 32 | rt_labels = np.argmax(rt, axis=1) 33 | 34 | # Make sure all labels are different from the correct labels 35 | self.assertTrue(np.all(rt_labels != gt_labels)) 36 | 37 | def test_random_targets_one_hot(self): 38 | # Test utils.random_targets with one-hot encoded labels as the input 39 | gt = np.asarray([[0, 0, 1, 0, 0], 40 | [1, 0, 0, 0, 0], 41 | [0, 0, 0, 1, 0], 42 | [1, 0, 0, 0, 0]]) 43 | gt_labels = np.argmax(gt, axis=1) 44 | rt = utils.random_targets(gt, 5) 45 | 46 | # Make sure random_targets returns a one-hot encoded labels 47 | self.assertTrue(len(rt.shape) == 2) 48 | rt_labels = np.argmax(rt, axis=1) 49 | 50 | # Make sure all labels are different from the correct labels 51 | self.assertTrue(np.all(rt_labels != gt_labels)) 52 | 53 | def test_random_targets_one_hot_single_label(self): 54 | # Test utils.random_targets with a single one-hot encoded label 55 | gt = np.asarray([0, 0, 1, 0, 0]) 56 | gt = gt.reshape((1, 5)) 57 | gt_labels = np.argmax(gt, axis=1) 58 | rt = utils.random_targets(gt, 5) 59 | 60 | # Make sure random_targets returns a one-hot encoded labels 61 | self.assertTrue(len(rt.shape) == 2) 62 | rt_labels = np.argmax(rt, axis=1) 63 | 64 | # Make sure all labels are different from the correct labels 65 | self.assertTrue(np.all(rt_labels != gt_labels)) 66 | 67 | def test_other_classes_neg_class_ind(self): 68 | with self.assertRaises(Exception) as context: 69 | utils.other_classes(10, -1) 70 | self.assertTrue(context.exception) 71 | 72 | def test_other_classes_invalid_class_ind(self): 73 | with self.assertRaises(Exception) as context: 74 | utils.other_classes(5, 8) 75 | self.assertTrue(context.exception) 76 | 77 | def test_other_classes_return_val(self): 78 | res = utils.other_classes(5, 2) 79 | res_expected = [0, 1, 3, 4] 80 | self.assertTrue(res == res_expected) 81 | 82 | 83 | if __name__ == '__main__': 84 | unittest.main() 85 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # adapted from https://github.com/fchollet/keras/blob/master/.travis.yml 2 | sudo: required 3 | dist: trusty 4 | language: python 5 | matrix: 6 | include: 7 | - python: 2.7 8 | env: 9 | - KERAS_BACKEND=tensorflow 10 | - TENSORFLOW_V=1.0.1 11 | - python: 2.7 12 | env: 13 | - KERAS_BACKEND=tensorflow 14 | - TENSORFLOW_V=1.1.0 15 | - python: 3.5 16 | env: 17 | - KERAS_BACKEND=tensorflow 18 | - TENSORFLOW_V=1.0.1 19 | - python: 3.5 20 | env: 21 | - KERAS_BACKEND=tensorflow 22 | - TENSORFLOW_V=1.1.0 23 | - python: 2.7 24 | env: 25 | - KERAS_BACKEND=theano 26 | - TENSORFLOW_V=1.1.0 27 | - python: 3.5 28 | env: 29 | - KERAS_BACKEND=theano 30 | - TENSORFLOW_V=1.1.0 31 | install: 32 | # code below is taken from http://conda.pydata.org/docs/travis.html 33 | # We do this conditionally because it saves us some downloading if the 34 | # version is the same. 35 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 36 | wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; 37 | else 38 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 39 | fi 40 | - bash miniconda.sh -b -p $HOME/miniconda 41 | - export PATH="$HOME/miniconda/bin:$PATH" 42 | - hash -r 43 | - conda config --set always_yes yes --set changeps1 no 44 | - conda update -q conda 45 | # Useful for debugging any issues with conda 46 | - conda info -a 47 | 48 | - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy pyqt=4.11 matplotlib pandas h5py six mkl-service 49 | - source activate test-environment 50 | # install TensorFlow 51 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" && "$TENSORFLOW_V" == "1.0.1" ]]; then 52 | pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.0.1-cp27-none-linux_x86_64.whl; 53 | elif [[ "$TRAVIS_PYTHON_VERSION" == "2.7" && "$TENSORFLOW_V" == "1.1.0" ]]; then 54 | pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.1.0-cp27-none-linux_x86_64.whl; 55 | elif [[ "$TRAVIS_PYTHON_VERSION" == "3.5" && "$TENSORFLOW_V" == "1.0.1" ]]; then 56 | pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.0.1-cp35-cp35m-linux_x86_64.whl; 57 | elif [[ "$TRAVIS_PYTHON_VERSION" == "3.5" && "$TENSORFLOW_V" == "1.1.0" ]]; then 58 | pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.1.0-cp35-cp35m-linux_x86_64.whl; 59 | fi 60 | - if [[ "$KERAS_BACKEND" == "theano" ]]; then 61 | pip install theano; 62 | fi 63 | - pip install keras 64 | - python setup.py install 65 | 66 | # command to run tests 67 | script: 68 | # run keras backend init to initialize backend config 69 | - python -c "import keras.backend" 70 | # create dataset directory to avoid concurrent directory creation at runtime 71 | - mkdir ~/.keras/datasets 72 | # set up keras backend 73 | - sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json; 74 | - echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)" 75 | - nosetests -v --nologcapture cleverhans 76 | - if [[ "$KERAS_BACKEND" == "tensorflow" ]]; then 77 | nosetests --nologcapture -v tests_tf/; 78 | else 79 | nosetests -v tests_th/; 80 | fi 81 | -------------------------------------------------------------------------------- /tests_tf/test_utils_keras.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import unittest 7 | import numpy as np 8 | 9 | from cleverhans.utils_keras import KerasModelWrapper 10 | 11 | 12 | class TestKerasModelWrapper(unittest.TestCase): 13 | def setUp(self): 14 | from keras.models import Sequential 15 | from keras.layers import Dense, Activation 16 | import tensorflow as tf 17 | 18 | def dummy_model(): 19 | input_shape = (100,) 20 | return Sequential([Dense(20, name='l1', 21 | input_shape=input_shape), 22 | Dense(10, name='l2'), 23 | Activation('softmax', name='softmax')]) 24 | 25 | self.sess = tf.Session() 26 | self.sess.as_default() 27 | self.model = dummy_model() 28 | 29 | def test_softmax_layer_name_is_softmax(self): 30 | model = KerasModelWrapper(self.model) 31 | softmax_name = model._get_softmax_name() 32 | self.assertEqual(softmax_name, 'softmax') 33 | 34 | def test_logit_layer_name_is_logits(self): 35 | model = KerasModelWrapper(self.model) 36 | logits_name = model._get_logits_name() 37 | self.assertEqual(logits_name, 'l2') 38 | 39 | def test_get_logits(self): 40 | import tensorflow as tf 41 | model = KerasModelWrapper(self.model) 42 | x = tf.placeholder(tf.float32, shape=(None, 100)) 43 | preds = model.get_probs(x) 44 | logits = model.get_logits(x) 45 | 46 | x_val = np.random.rand(2, 100) 47 | tf.global_variables_initializer().run(session=self.sess) 48 | p_val, logits = self.sess.run([preds, logits], feed_dict={x: x_val}) 49 | p_gt = np.exp(logits)/np.sum(np.exp(logits), axis=1, keepdims=True) 50 | self.assertTrue(np.allclose(p_val, p_gt, atol=1e-6)) 51 | 52 | def test_get_probs(self): 53 | import tensorflow as tf 54 | model = KerasModelWrapper(self.model) 55 | x = tf.placeholder(tf.float32, shape=(None, 100)) 56 | preds = model.get_probs(x) 57 | 58 | x_val = np.random.rand(2, 100) 59 | tf.global_variables_initializer().run(session=self.sess) 60 | p_val = self.sess.run(preds, feed_dict={x: x_val}) 61 | self.assertTrue(np.allclose(np.sum(p_val, axis=1), 1, atol=1e-6)) 62 | self.assertTrue(np.all(p_val>=0)) 63 | self.assertTrue(np.all(p_val<=1)) 64 | 65 | def test_get_layer_names(self): 66 | model = KerasModelWrapper(self.model) 67 | layer_names = model.get_layer_names() 68 | self.assertEqual(layer_names, ['l1', 'l2', 'softmax']) 69 | 70 | def test_fprop(self): 71 | import tensorflow as tf 72 | model = KerasModelWrapper(self.model) 73 | x = tf.placeholder(tf.float32, shape=(None, 100)) 74 | out_dict = model.fprop(x) 75 | 76 | self.assertEqual(set(out_dict.keys()), set(['l1', 'l2', 'softmax'])) 77 | # Test the dimension of the hidden represetation 78 | self.assertEqual(int(out_dict['l1'].shape[1]), 20) 79 | self.assertEqual(int(out_dict['l2'].shape[1]), 10) 80 | 81 | # Test the caching 82 | x2 = tf.placeholder(tf.float32, shape=(None, 100)) 83 | out_dict2 = model.fprop(x2) 84 | self.assertEqual(set(out_dict2.keys()), set(['l1', 'l2', 'softmax'])) 85 | self.assertEqual(int(out_dict2['l1'].shape[1]), 20) 86 | 87 | 88 | if __name__ == '__main__': 89 | unittest.main() 90 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_attacks/random_noise/attack_random_noise.py: -------------------------------------------------------------------------------- 1 | """Implementation of sample attack.""" 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import os 8 | import numpy as np 9 | 10 | from scipy.misc import imread 11 | from scipy.misc import imsave 12 | 13 | import tensorflow as tf 14 | 15 | 16 | tf.flags.DEFINE_string( 17 | 'master', '', 'The address of the TensorFlow master to use.') 18 | 19 | tf.flags.DEFINE_string( 20 | 'input_dir', '', 'Input directory with images.') 21 | 22 | tf.flags.DEFINE_string( 23 | 'output_dir', '', 'Output directory with images.') 24 | 25 | tf.flags.DEFINE_float( 26 | 'max_epsilon', 16.0, 'Maximum size of adversarial perturbation.') 27 | 28 | tf.flags.DEFINE_integer( 29 | 'image_width', 299, 'Width of each input images.') 30 | 31 | tf.flags.DEFINE_integer( 32 | 'image_height', 299, 'Height of each input images.') 33 | 34 | tf.flags.DEFINE_integer( 35 | 'batch_size', 16, 'How many images process at one time.') 36 | 37 | FLAGS = tf.flags.FLAGS 38 | 39 | 40 | def load_images(input_dir, batch_shape): 41 | """Read png images from input directory in batches. 42 | 43 | Args: 44 | input_dir: input directory 45 | batch_shape: shape of minibatch array, i.e. [batch_size, height, width, 3] 46 | 47 | Yields: 48 | filenames: list file names without path of each image 49 | Lenght of this list could be less than batch_size, in this case only 50 | first few images of the result are elements of the minibatch. 51 | images: array with all images from this batch 52 | """ 53 | images = np.zeros(batch_shape) 54 | filenames = [] 55 | idx = 0 56 | batch_size = batch_shape[0] 57 | for filepath in tf.gfile.Glob(os.path.join(input_dir, '*.png')): 58 | with tf.gfile.Open(filepath) as f: 59 | images[idx, :, :, :] = imread(f, mode='RGB').astype(np.float) / 255.0 60 | filenames.append(os.path.basename(filepath)) 61 | idx += 1 62 | if idx == batch_size: 63 | yield filenames, images 64 | filenames = [] 65 | images = np.zeros(batch_shape) 66 | idx = 0 67 | if idx > 0: 68 | yield filenames, images 69 | 70 | 71 | def save_images(images, filenames, output_dir): 72 | """Saves images to the output directory. 73 | 74 | Args: 75 | images: array with minibatch of images 76 | filenames: list of filenames without path 77 | If number of file names in this list less than number of images in 78 | the minibatch then only first len(filenames) images will be saved. 79 | output_dir: directory where to save images 80 | """ 81 | for i, filename in enumerate(filenames): 82 | with tf.gfile.Open(os.path.join(output_dir, filename), 'w') as f: 83 | imsave(f, images[i, :, :, :], format='png') 84 | 85 | 86 | def main(_): 87 | eps = FLAGS.max_epsilon / 255.0 88 | batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] 89 | 90 | with tf.Graph().as_default(): 91 | x_input = tf.placeholder(tf.float32, shape=batch_shape) 92 | noisy_images = x_input + eps * tf.sign(tf.random_normal(batch_shape)) 93 | x_output = tf.clip_by_value(noisy_images, 0.0, 1.0) 94 | 95 | with tf.Session(FLAGS.master) as sess: 96 | for filenames, images in load_images(FLAGS.input_dir, batch_shape): 97 | out_images = sess.run(x_output, feed_dict={x_input: images}) 98 | save_images(out_images, filenames, FLAGS.output_dir) 99 | 100 | 101 | if __name__ == '__main__': 102 | tf.app.run() 103 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_defenses/adv_inception_v3/defense.py: -------------------------------------------------------------------------------- 1 | """Implementation of sample defense. 2 | 3 | This defense loads inception v3 checkpoint and classifies all images 4 | using loaded checkpoint. 5 | """ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | 13 | import numpy as np 14 | from scipy.misc import imread 15 | 16 | import tensorflow as tf 17 | from tensorflow.contrib.slim.nets import inception 18 | 19 | slim = tf.contrib.slim 20 | 21 | 22 | tf.flags.DEFINE_string( 23 | 'master', '', 'The address of the TensorFlow master to use.') 24 | 25 | tf.flags.DEFINE_string( 26 | 'checkpoint_path', '', 'Path to checkpoint for inception network.') 27 | 28 | tf.flags.DEFINE_string( 29 | 'input_dir', '', 'Input directory with images.') 30 | 31 | tf.flags.DEFINE_string( 32 | 'output_file', '', 'Output file to save labels.') 33 | 34 | tf.flags.DEFINE_integer( 35 | 'image_width', 299, 'Width of each input images.') 36 | 37 | tf.flags.DEFINE_integer( 38 | 'image_height', 299, 'Height of each input images.') 39 | 40 | tf.flags.DEFINE_integer( 41 | 'batch_size', 16, 'How many images process at one time.') 42 | 43 | FLAGS = tf.flags.FLAGS 44 | 45 | 46 | def load_images(input_dir, batch_shape): 47 | """Read png images from input directory in batches. 48 | 49 | Args: 50 | input_dir: input directory 51 | batch_shape: shape of minibatch array, i.e. [batch_size, height, width, 3] 52 | 53 | Yields: 54 | filenames: list file names without path of each image 55 | Lenght of this list could be less than batch_size, in this case only 56 | first few images of the result are elements of the minibatch. 57 | images: array with all images from this batch 58 | """ 59 | images = np.zeros(batch_shape) 60 | filenames = [] 61 | idx = 0 62 | batch_size = batch_shape[0] 63 | for filepath in tf.gfile.Glob(os.path.join(input_dir, '*.png')): 64 | with tf.gfile.Open(filepath) as f: 65 | image = imread(f, mode='RGB').astype(np.float) / 255.0 66 | # Images for inception classifier are normalized to be in [-1, 1] interval. 67 | images[idx, :, :, :] = image * 2.0 - 1.0 68 | filenames.append(os.path.basename(filepath)) 69 | idx += 1 70 | if idx == batch_size: 71 | yield filenames, images 72 | filenames = [] 73 | images = np.zeros(batch_shape) 74 | idx = 0 75 | if idx > 0: 76 | yield filenames, images 77 | 78 | 79 | def main(_): 80 | batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] 81 | num_classes = 1001 82 | 83 | tf.logging.set_verbosity(tf.logging.INFO) 84 | 85 | with tf.Graph().as_default(): 86 | # Prepare graph 87 | x_input = tf.placeholder(tf.float32, shape=batch_shape) 88 | 89 | with slim.arg_scope(inception.inception_v3_arg_scope()): 90 | _, end_points = inception.inception_v3( 91 | x_input, num_classes=num_classes, is_training=False) 92 | 93 | predicted_labels = tf.argmax(end_points['Predictions'], 1) 94 | 95 | # Run computation 96 | saver = tf.train.Saver(slim.get_model_variables()) 97 | session_creator = tf.train.ChiefSessionCreator( 98 | scaffold=tf.train.Scaffold(saver=saver), 99 | checkpoint_filename_with_path=FLAGS.checkpoint_path, 100 | master=FLAGS.master) 101 | 102 | with tf.train.MonitoredSession(session_creator=session_creator) as sess: 103 | with tf.gfile.Open(FLAGS.output_file, 'w') as out_file: 104 | for filenames, images in load_images(FLAGS.input_dir, batch_shape): 105 | labels = sess.run(predicted_labels, feed_dict={x_input: images}) 106 | for filename, label in zip(filenames, labels): 107 | out_file.write('{0},{1}\n'.format(filename, label)) 108 | 109 | 110 | if __name__ == '__main__': 111 | tf.app.run() 112 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_defenses/base_inception_model/defense.py: -------------------------------------------------------------------------------- 1 | """Implementation of sample defense. 2 | 3 | This defense loads inception v3 checkpoint and classifies all images 4 | using loaded checkpoint. 5 | """ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | 13 | import numpy as np 14 | from scipy.misc import imread 15 | 16 | import tensorflow as tf 17 | from tensorflow.contrib.slim.nets import inception 18 | 19 | slim = tf.contrib.slim 20 | 21 | 22 | tf.flags.DEFINE_string( 23 | 'master', '', 'The address of the TensorFlow master to use.') 24 | 25 | tf.flags.DEFINE_string( 26 | 'checkpoint_path', '', 'Path to checkpoint for inception network.') 27 | 28 | tf.flags.DEFINE_string( 29 | 'input_dir', '', 'Input directory with images.') 30 | 31 | tf.flags.DEFINE_string( 32 | 'output_file', '', 'Output file to save labels.') 33 | 34 | tf.flags.DEFINE_integer( 35 | 'image_width', 299, 'Width of each input images.') 36 | 37 | tf.flags.DEFINE_integer( 38 | 'image_height', 299, 'Height of each input images.') 39 | 40 | tf.flags.DEFINE_integer( 41 | 'batch_size', 16, 'How many images process at one time.') 42 | 43 | FLAGS = tf.flags.FLAGS 44 | 45 | 46 | def load_images(input_dir, batch_shape): 47 | """Read png images from input directory in batches. 48 | 49 | Args: 50 | input_dir: input directory 51 | batch_shape: shape of minibatch array, i.e. [batch_size, height, width, 3] 52 | 53 | Yields: 54 | filenames: list file names without path of each image 55 | Lenght of this list could be less than batch_size, in this case only 56 | first few images of the result are elements of the minibatch. 57 | images: array with all images from this batch 58 | """ 59 | images = np.zeros(batch_shape) 60 | filenames = [] 61 | idx = 0 62 | batch_size = batch_shape[0] 63 | for filepath in tf.gfile.Glob(os.path.join(input_dir, '*.png')): 64 | with tf.gfile.Open(filepath) as f: 65 | image = imread(f, mode='RGB').astype(np.float) / 255.0 66 | # Images for inception classifier are normalized to be in [-1, 1] interval. 67 | images[idx, :, :, :] = image * 2.0 - 1.0 68 | filenames.append(os.path.basename(filepath)) 69 | idx += 1 70 | if idx == batch_size: 71 | yield filenames, images 72 | filenames = [] 73 | images = np.zeros(batch_shape) 74 | idx = 0 75 | if idx > 0: 76 | yield filenames, images 77 | 78 | 79 | def main(_): 80 | batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] 81 | num_classes = 1001 82 | 83 | tf.logging.set_verbosity(tf.logging.INFO) 84 | 85 | with tf.Graph().as_default(): 86 | # Prepare graph 87 | x_input = tf.placeholder(tf.float32, shape=batch_shape) 88 | 89 | with slim.arg_scope(inception.inception_v3_arg_scope()): 90 | _, end_points = inception.inception_v3( 91 | x_input, num_classes=num_classes, is_training=False) 92 | 93 | predicted_labels = tf.argmax(end_points['Predictions'], 1) 94 | 95 | # Run computation 96 | saver = tf.train.Saver(slim.get_model_variables()) 97 | session_creator = tf.train.ChiefSessionCreator( 98 | scaffold=tf.train.Scaffold(saver=saver), 99 | checkpoint_filename_with_path=FLAGS.checkpoint_path, 100 | master=FLAGS.master) 101 | 102 | with tf.train.MonitoredSession(session_creator=session_creator) as sess: 103 | with tf.gfile.Open(FLAGS.output_file, 'w') as out_file: 104 | for filenames, images in load_images(FLAGS.input_dir, batch_shape): 105 | labels = sess.run(predicted_labels, feed_dict={x_input: images}) 106 | for filename, label in zip(filenames, labels): 107 | out_file.write('{0},{1}\n'.format(filename, label)) 108 | 109 | 110 | if __name__ == '__main__': 111 | tf.app.run() 112 | -------------------------------------------------------------------------------- /examples/madry_lab_challenges/attack_model.py: -------------------------------------------------------------------------------- 1 | """Runs CleverHans attacks on the Madry Lab MNIST challenge model 2 | 3 | """ 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import os 10 | import time 11 | 12 | import tensorflow as tf 13 | from tensorflow.examples.tutorials.mnist import input_data 14 | from tensorflow.python.platform import app 15 | from tensorflow.python.platform import flags 16 | from madry_mnist_model import MadryMNIST 17 | from cleverhans.utils_tf import model_eval 18 | from cleverhans.attacks import FastGradientMethod 19 | from cleverhans.attacks import BasicIterativeMethod 20 | from cleverhans.utils_mnist import data_mnist 21 | 22 | 23 | FLAGS = flags.FLAGS 24 | 25 | 26 | def main(argv): 27 | checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) 28 | 29 | if checkpoint is None: 30 | raise ValueError("Couldn't find latest checkpoint in " + 31 | FLAGS.checkpoint_dir) 32 | 33 | train_start = 0 34 | train_end = 60000 35 | test_start = 0 36 | test_end = 10000 37 | X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, 38 | train_end=train_end, 39 | test_start=test_start, 40 | test_end=test_end) 41 | 42 | assert Y_train.shape[1] == 10 43 | 44 | # NOTE: for compatibility with Madry Lab downloadable checkpoints, 45 | # we cannot enclose this in a scope or do anything else that would 46 | # change the automatic naming of the variables. 47 | model = MadryMNIST() 48 | 49 | x_input = tf.placeholder(tf.float32, shape=[None, 784]) 50 | x_image = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) 51 | y = tf.placeholder(tf.float32, shape=[None, 10]) 52 | 53 | if FLAGS.attack_type == 'fgsm': 54 | fgsm = FastGradientMethod(model) 55 | fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} 56 | adv_x = fgsm.generate(x_image, **fgsm_params) 57 | elif FLAGS.attack_type == 'bim': 58 | bim = BasicIterativeMethod(model) 59 | bim_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1., 60 | 'nb_iter': 50, 61 | 'eps_iter': .01} 62 | adv_x = bim.generate(x_image, **bim_params) 63 | else: 64 | raise ValueError(FLAGS.attack_type) 65 | preds_adv = model.get_probs(adv_x) 66 | 67 | saver = tf.train.Saver() 68 | 69 | with tf.Session() as sess: 70 | # Restore the checkpoint 71 | saver.restore(sess, checkpoint) 72 | 73 | # Evaluate the accuracy of the MNIST model on adversarial examples 74 | eval_par = {'batch_size': FLAGS.batch_size} 75 | t1 = time.time() 76 | acc = model_eval( 77 | sess, x_image, y, preds_adv, X_test, Y_test, args=eval_par) 78 | t2 = time.time() 79 | print("Took", t2 - t1, "seconds") 80 | print('Test accuracy on adversarial examples: %0.4f\n' % acc) 81 | 82 | 83 | if __name__ == '__main__': 84 | 85 | dirs = ['models', 'adv_trained'] 86 | if "MNIST_CHALLENGE_DIR" in os.environ: 87 | dirs.insert(0, os.environ['MNIST_CHALLENGE_DIR']) 88 | default_checkpoint_dir = os.path.join(*dirs) 89 | 90 | flags.DEFINE_integer('batch_size', 128, "batch size") 91 | flags.DEFINE_float( 92 | 'label_smooth', 0.1, ("Amount to subtract from correct label " 93 | "and distribute among other labels")) 94 | flags.DEFINE_string( 95 | 'attack_type', 'fgsm', ("Attack type: 'fgsm'->fast gradient sign" 96 | "method, 'bim'->'basic iterative method'")) 97 | flags.DEFINE_string('checkpoint_dir', default_checkpoint_dir, 98 | 'Checkpoint directory to load') 99 | app.run(main) 100 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_defenses/ens_adv_inception_resnet_v2/defense.py: -------------------------------------------------------------------------------- 1 | """Implementation of sample defense. 2 | 3 | This defense loads inception resnet v2 checkpoint and classifies all images 4 | using loaded checkpoint. 5 | """ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | 13 | import numpy as np 14 | from scipy.misc import imread 15 | 16 | import tensorflow as tf 17 | 18 | import inception_resnet_v2 19 | 20 | slim = tf.contrib.slim 21 | 22 | 23 | tf.flags.DEFINE_string( 24 | 'master', '', 'The address of the TensorFlow master to use.') 25 | 26 | tf.flags.DEFINE_string( 27 | 'checkpoint_path', '', 'Path to checkpoint for inception network.') 28 | 29 | tf.flags.DEFINE_string( 30 | 'input_dir', '', 'Input directory with images.') 31 | 32 | tf.flags.DEFINE_string( 33 | 'output_file', '', 'Output file to save labels.') 34 | 35 | tf.flags.DEFINE_integer( 36 | 'image_width', 299, 'Width of each input images.') 37 | 38 | tf.flags.DEFINE_integer( 39 | 'image_height', 299, 'Height of each input images.') 40 | 41 | tf.flags.DEFINE_integer( 42 | 'batch_size', 16, 'How many images process at one time.') 43 | 44 | FLAGS = tf.flags.FLAGS 45 | 46 | 47 | def load_images(input_dir, batch_shape): 48 | """Read png images from input directory in batches. 49 | 50 | Args: 51 | input_dir: input directory 52 | batch_shape: shape of minibatch array, i.e. [batch_size, height, width, 3] 53 | 54 | Yields: 55 | filenames: list file names without path of each image 56 | Lenght of this list could be less than batch_size, in this case only 57 | first few images of the result are elements of the minibatch. 58 | images: array with all images from this batch 59 | """ 60 | images = np.zeros(batch_shape) 61 | filenames = [] 62 | idx = 0 63 | batch_size = batch_shape[0] 64 | for filepath in tf.gfile.Glob(os.path.join(input_dir, '*.png')): 65 | with tf.gfile.Open(filepath) as f: 66 | image = imread(f, mode='RGB').astype(np.float) / 255.0 67 | # Images for inception classifier are normalized to be in [-1, 1] interval. 68 | images[idx, :, :, :] = image * 2.0 - 1.0 69 | filenames.append(os.path.basename(filepath)) 70 | idx += 1 71 | if idx == batch_size: 72 | yield filenames, images 73 | filenames = [] 74 | images = np.zeros(batch_shape) 75 | idx = 0 76 | if idx > 0: 77 | yield filenames, images 78 | 79 | 80 | def main(_): 81 | batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] 82 | num_classes = 1001 83 | 84 | tf.logging.set_verbosity(tf.logging.INFO) 85 | 86 | with tf.Graph().as_default(): 87 | # Prepare graph 88 | x_input = tf.placeholder(tf.float32, shape=batch_shape) 89 | 90 | with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): 91 | _, end_points = inception_resnet_v2.inception_resnet_v2( 92 | x_input, num_classes=num_classes, is_training=False) 93 | 94 | predicted_labels = tf.argmax(end_points['Predictions'], 1) 95 | 96 | # Run computation 97 | saver = tf.train.Saver(slim.get_model_variables()) 98 | session_creator = tf.train.ChiefSessionCreator( 99 | scaffold=tf.train.Scaffold(saver=saver), 100 | checkpoint_filename_with_path=FLAGS.checkpoint_path, 101 | master=FLAGS.master) 102 | 103 | with tf.train.MonitoredSession(session_creator=session_creator) as sess: 104 | with tf.gfile.Open(FLAGS.output_file, 'w') as out_file: 105 | for filenames, images in load_images(FLAGS.input_dir, batch_shape): 106 | labels = sess.run(predicted_labels, feed_dict={x_input: images}) 107 | for filename, label in zip(filenames, labels): 108 | out_file.write('{0},{1}\n'.format(filename, label)) 109 | 110 | 111 | if __name__ == '__main__': 112 | tf.app.run() 113 | -------------------------------------------------------------------------------- /cleverhans/devtools/tests/test_format.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unit tests for format checking 3 | """ 4 | 5 | from __future__ import print_function 6 | 7 | from nose.plugins.skip import SkipTest 8 | 9 | import os 10 | 11 | import cleverhans 12 | from cleverhans.devtools.tests.docscrape import docstring_errors 13 | from cleverhans.devtools.list_files import list_files 14 | from pycodestyle import StyleGuide 15 | 16 | # Enter a manual list of files that are allowed to violate PEP8 here 17 | whitelist_pep8 = [ 18 | ] 19 | 20 | # The NIPS 2017 competition code is allowed to violate PEP8 because it 21 | # follows the Google style guide instead (e.g., 2 spaces instead of 4) 22 | whitelist_pep8.extend([os.path.relpath(path, cleverhans.__path__[0]) 23 | for path in list_files() 24 | if "nips17_adversarial_competition" in path]) 25 | 26 | 27 | whitelist_docstrings = [ 28 | ] 29 | 30 | 31 | def test_format_pep8(): 32 | """ 33 | Test if pep8 is respected. 34 | """ 35 | pep8_checker = StyleGuide() 36 | files_to_check = [] 37 | for path in list_files(".py"): 38 | rel_path = os.path.relpath(path, cleverhans.__path__[0]) 39 | if rel_path in whitelist_pep8: 40 | continue 41 | else: 42 | files_to_check.append(path) 43 | report = pep8_checker.check_files(files_to_check) 44 | if report.total_errors > 0: 45 | raise AssertionError("PEP8 Format not respected") 46 | 47 | 48 | def print_files_information_pep8(): 49 | """ 50 | Print the list of files which can be removed from the whitelist and the 51 | list of files which do not respect PEP8 formatting that aren't in the 52 | whitelist 53 | """ 54 | infracting_files = [] 55 | non_infracting_files = [] 56 | pep8_checker = StyleGuide(quiet=True) 57 | for path in list_files(".py"): 58 | number_of_infractions = pep8_checker.input_file(path) 59 | rel_path = os.path.relpath(path, cleverhans.__path__[0]) 60 | if number_of_infractions > 0: 61 | if rel_path not in whitelist_pep8: 62 | infracting_files.append(path) 63 | else: 64 | if rel_path in whitelist_pep8: 65 | non_infracting_files.append(path) 66 | print("Files that must be corrected or added to whitelist:") 67 | for file in infracting_files: 68 | print(file) 69 | print("Files that can be removed from whitelist:") 70 | for file in non_infracting_files: 71 | print(file) 72 | 73 | 74 | def test_format_docstrings(): 75 | """ 76 | Test if docstrings are well formatted. 77 | """ 78 | # Disabled for now 79 | return True 80 | 81 | try: 82 | verify_format_docstrings() 83 | except SkipTest as e: 84 | import traceback 85 | traceback.print_exc(e) 86 | raise AssertionError( 87 | "Some file raised SkipTest on import, and inadvertently" 88 | " canceled the documentation testing." 89 | ) 90 | 91 | 92 | def verify_format_docstrings(): 93 | """ 94 | Implementation of `test_format_docstrings`. The implementation is 95 | factored out so it can be placed inside a guard against SkipTest. 96 | """ 97 | format_infractions = [] 98 | 99 | for path in list_files(".py"): 100 | rel_path = os.path.relpath(path, cleverhans.__path__[0]) 101 | if rel_path in whitelist_docstrings: 102 | continue 103 | try: 104 | format_infractions.extend(docstring_errors(path)) 105 | except Exception as e: 106 | format_infractions.append(["%s failed to run so format cannot " 107 | "be checked. Error message:\n %s" % 108 | (rel_path, e)]) 109 | 110 | if len(format_infractions) > 0: 111 | msg = "\n".join(':'.join(line) for line in format_infractions) 112 | raise AssertionError("Docstring format not respected:\n%s" % msg) 113 | 114 | 115 | if __name__ == "__main__": 116 | print_files_information_pep8() 117 | -------------------------------------------------------------------------------- /cleverhans/model.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | 3 | 4 | class Model(object): 5 | 6 | """ 7 | An abstract interface for model wrappers that exposes model symbols 8 | needed for making an attack. This abstraction removes the dependency on 9 | any specific neural network package (e.g. Keras) from the core 10 | code of CleverHans. It can also simplify exposing the hidden features of a 11 | model when a specific package does not directly expose them. 12 | """ 13 | __metaclass__ = ABCMeta 14 | 15 | def __init__(self): 16 | pass 17 | 18 | def __call__(self, *args, **kwargs): 19 | """ 20 | For compatibility with functions used as model definitions (taking 21 | an input tensor and returning the tensor giving the output 22 | of the model on that input). 23 | """ 24 | return self.get_probs(*args, **kwargs) 25 | 26 | def get_layer(self, x, reuse, layer): 27 | """ 28 | Expose the hidden features of a model given a layer name. 29 | :param x: A symbolic representation of the network input 30 | :param layer: The name of the hidden layer to return features at. 31 | :return: A symbolic representation of the hidden features 32 | :raise: NoSuchLayerError if `layer` is not in the model. 33 | """ 34 | # Return the symbolic representation for this layer. 35 | output = self.fprop(x, reuse) 36 | try: 37 | requested = output[layer] 38 | except KeyError: 39 | raise NoSuchLayerError() 40 | return requested 41 | 42 | def get_logits(self, x, reuse): 43 | """ 44 | :param x: A symbolic representation of the network input 45 | :return: A symbolic representation of the output logits (i.e., the 46 | values fed as inputs to the softmax layer). 47 | """ 48 | return self.get_layer(x, reuse, 'logits') 49 | 50 | def get_probs(self, x, reuse=True): 51 | """ 52 | :param x: A symbolic representation of the network input 53 | :return: A symbolic representation of the output probabilities (i.e., 54 | the output values produced by the softmax layer). 55 | """ 56 | try: 57 | return self.get_layer(x, reuse, 'probs') 58 | except NoSuchLayerError: 59 | import tensorflow as tf 60 | return tf.nn.softmax(self.get_logits(x, True)) 61 | 62 | def get_layer_names(self): 63 | """ 64 | :return: a list of names for the layers that can be exposed by this 65 | model abstraction. 66 | """ 67 | 68 | if hasattr(self, 'layer_names'): 69 | return self.layer_names 70 | 71 | raise NotImplementedError('`get_layer_names` not implemented.') 72 | 73 | def fprop(self, x, reuse): 74 | """ 75 | Exposes all the layers of the model returned by get_layer_names. 76 | :param x: A symbolic representation of the network input 77 | :return: A dictionary mapping layer names to the symbolic 78 | representation of their output. 79 | """ 80 | raise NotImplementedError('`fprop` not implemented.') 81 | 82 | 83 | class CallableModelWrapper(Model): 84 | 85 | def __init__(self, callable_fn, output_layer): 86 | """ 87 | Wrap a callable function that takes a tensor as input and returns 88 | a tensor as output with the given layer name. 89 | :param callable_fn: The callable function taking a tensor and 90 | returning a given layer as output. 91 | :param output_layer: A string of the output layer returned by the 92 | function. (Usually either "probs" or "logits".) 93 | """ 94 | 95 | self.output_layer = output_layer 96 | self.callable_fn = callable_fn 97 | 98 | def get_layer_names(self): 99 | return [self.output_layer] 100 | 101 | def fprop(self, x, reuse): 102 | return {self.output_layer: self.callable_fn(x)} 103 | 104 | 105 | class NoSuchLayerError(ValueError): 106 | 107 | """Raised when a layer that does not exist is requested.""" 108 | -------------------------------------------------------------------------------- /cleverhans/attacks_th.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import theano 4 | import warnings 5 | from theano import gradient, tensor as T 6 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams 7 | 8 | from . import utils_th 9 | 10 | floatX = theano.config.floatX 11 | 12 | 13 | def fgsm(x, predictions, eps, clip_min=None, clip_max=None): 14 | return fgm(x, predictions, y=None, eps=eps, ord=np.inf, clip_min=clip_min, 15 | clip_max=clip_max) 16 | 17 | 18 | def fgm(x, predictions, y=None, eps=0.3, ord=np.inf, clip_min=None, 19 | clip_max=None): 20 | """ 21 | Theano implementation of the Fast Gradient 22 | Sign method. 23 | :param x: the input placeholder 24 | :param predictions: the model's output tensor 25 | :param y: the output placeholder. Use None (the default) to avoid the 26 | label leaking effect. 27 | :param eps: the epsilon (input variation parameter) 28 | :param ord: (optional) Order of the norm (mimics Numpy). 29 | Possible values: np.inf (other norms not implemented yet). 30 | :param clip_min: optional parameter that can be used to set a minimum 31 | value for components of the example returned 32 | :param clip_max: optional parameter that can be used to set a maximum 33 | value for components of the example returned 34 | :return: a tensor for the adversarial example 35 | """ 36 | warnings.warn("CleverHans support for Theano is deprecated and " 37 | "will be dropped on 2017-11-08.") 38 | assert ord == np.inf, "Theano implementation not available for this norm." 39 | eps = np.asarray(eps, dtype=floatX) 40 | 41 | if y is None: 42 | # Using model predictions as ground truth to avoid label leaking 43 | y = T.eq(predictions, T.max(predictions, axis=1, keepdims=True)) 44 | y = T.cast(y, utils_th.floatX) 45 | y = y / T.sum(y, 1, keepdims=True) 46 | # Compute loss 47 | loss = utils_th.model_loss(y, predictions, mean=True) 48 | 49 | # Define gradient of loss wrt input 50 | grad = T.grad(loss, x) 51 | 52 | # Take sign of gradient 53 | signed_grad = T.sgn(grad) 54 | 55 | # Multiply by constant epsilon 56 | scaled_signed_grad = eps * signed_grad 57 | 58 | # Add perturbation to original example to obtain adversarial example 59 | adv_x = gradient.disconnected_grad(x + scaled_signed_grad) 60 | 61 | # If clipping is needed, reset all values outside of [clip_min, clip_max] 62 | if (clip_min is not None) and (clip_max is not None): 63 | adv_x = T.clip(adv_x, clip_min, clip_max) 64 | 65 | return adv_x 66 | 67 | 68 | def vatm(model, x, predictions, eps, num_iterations=1, xi=1e-6, 69 | clip_min=None, clip_max=None, seed=12345): 70 | """ 71 | Theano implementation of the perturbation method used for virtual 72 | adversarial training: https://arxiv.org/abs/1507.00677 73 | :param model: the model which returns the network unnormalized logits 74 | :param x: the input placeholder 75 | :param predictions: the model's unnormalized output tensor 76 | :param eps: the epsilon (input variation parameter) 77 | :param num_iterations: the number of iterations 78 | :param xi: the finite difference parameter 79 | :param clip_min: optional parameter that can be used to set a minimum 80 | value for components of the example returned 81 | :param clip_max: optional parameter that can be used to set a maximum 82 | value for components of the example returned 83 | :param seed: the seed for random generator 84 | :return: a tensor for the adversarial example 85 | """ 86 | eps = np.asarray(eps, dtype=floatX) 87 | xi = np.asarray(xi, dtype=floatX) 88 | rng = RandomStreams(seed=seed) 89 | d = rng.normal(size=x.shape, dtype=x.dtype) 90 | for i in range(num_iterations): 91 | d = xi * utils_th.l2_batch_normalize(d) 92 | logits_d = model(x + d) 93 | kl = utils_th.kl_with_logits(predictions, logits_d) 94 | Hd = T.grad(kl.sum(), d) 95 | d = gradient.disconnected_grad(Hd) 96 | d = eps * utils_th.l2_batch_normalize(d) 97 | adv_x = gradient.disconnected_grad(x + d) 98 | if (clip_min is not None) and (clip_max is not None): 99 | adv_x = T.clip(adv_x, clip_min, clip_max) 100 | return adv_x 101 | -------------------------------------------------------------------------------- /cleverhans_tutorials/mnist_tutorial_th.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import argparse 7 | import keras 8 | from keras import backend 9 | import theano.tensor as T 10 | 11 | from cleverhans.utils_keras import cnn_model 12 | from cleverhans.utils_mnist import data_mnist 13 | from cleverhans.utils_th import th_model_train, th_model_eval 14 | from cleverhans.attacks import FastGradientMethod 15 | 16 | 17 | def main(): 18 | """ 19 | MNIST CleverHans tutorial 20 | :return: 21 | """ 22 | 23 | if not hasattr(backend, "theano"): 24 | raise RuntimeError("This tutorial requires keras to be configured" 25 | " to use the Theano backend.") 26 | 27 | # Image dimensions ordering should follow the Theano convention 28 | if keras.backend.image_dim_ordering() != 'th': 29 | keras.backend.set_image_dim_ordering('th') 30 | print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " 31 | "'tf', temporarily setting to 'th'") 32 | 33 | parser = argparse.ArgumentParser() 34 | parser.add_argument('--batch_size', '-b', default=128, 35 | help='Size of training batches') 36 | parser.add_argument('--train_dir', '-d', default='/tmp', 37 | help='Directory storing the saved model.') 38 | parser.add_argument('--filename', '-f', default='mnist.ckpt', 39 | help='Filename to save model under.') 40 | parser.add_argument('--nb_epochs', '-e', default=6, type=int, 41 | help='Number of epochs to train model') 42 | parser.add_argument('--learning_rate', '-lr', default=0.5, type=float, 43 | help='Learning rate for training') 44 | args = parser.parse_args() 45 | 46 | # Get MNIST test data 47 | X_train, Y_train, X_test, Y_test = data_mnist() 48 | print("Loaded MNIST test data.") 49 | 50 | assert Y_train.shape[1] == 10 51 | label_smooth = .1 52 | Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) 53 | 54 | # Define input Theano placeholder 55 | x_shape = (None, 1, 28, 28) 56 | x = T.tensor4('x') 57 | y = T.matrix('y') 58 | 59 | # Define Theano model graph 60 | model = cnn_model() 61 | model.build(x_shape) 62 | predictions = model(x) 63 | print("Defined Theano model graph.") 64 | 65 | def evaluate(): 66 | # Evaluate the accuracy of the MNIST model on legitimate test examples 67 | accuracy = th_model_eval(x, y, predictions, X_test, Y_test, args=args) 68 | assert X_test.shape[0] == 10000, X_test.shape 69 | print('Test accuracy on legitimate test examples: ' + str(accuracy)) 70 | pass 71 | 72 | # Train an MNIST model 73 | th_model_train(x, y, predictions, model.trainable_weights, X_train, 74 | Y_train, evaluate=evaluate, args=args) 75 | 76 | # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph 77 | fgsm = FastGradientMethod(model, back='th') 78 | adv_x = fgsm.generate(x, params={'eps': 0.3}) 79 | 80 | # Evaluate the accuracy of the MNIST model on adversarial examples 81 | accuracy = th_model_eval(x, y, model(adv_x), X_test, Y_test, args=args) 82 | print('Test accuracy on adversarial examples: ' + str(accuracy)) 83 | 84 | print("Repeating the process, using adversarial training") 85 | # Redefine Theano model graph 86 | model_2 = cnn_model() 87 | model_2.build(x_shape) 88 | preds_2 = model_2(x) 89 | fgsm = FastGradientMethod(model_2, back='th') 90 | preds_2_adv = model_2(fgsm.generate(x, params={'eps': 0.3})) 91 | 92 | def evaluate_2(): 93 | # Evaluate the accuracy of the adversarialy trained MNIST model on 94 | # legitimate test examples 95 | accuracy = th_model_eval(x, y, preds_2, X_test, Y_test, args=args) 96 | print('Test accuracy on legitimate test examples: ' + str(accuracy)) 97 | 98 | # Evaluate the accuracy of the adversarially trained MNIST model on 99 | # adversarial examples 100 | acc_adv = th_model_eval(x, y, preds_2_adv, X_test, Y_test, args=args) 101 | print('Test accuracy on adversarial examples: ' + str(acc_adv)) 102 | 103 | # Perform adversarial training 104 | th_model_train(x, y, preds_2, model_2.trainable_weights, X_train, Y_train, 105 | predictions_adv=preds_2_adv, evaluate=evaluate_2, args=args) 106 | 107 | 108 | if __name__ == '__main__': 109 | main() 110 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_attacks/fgsm/attack_fgsm.py: -------------------------------------------------------------------------------- 1 | """Implementation of sample attack.""" 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import os 8 | 9 | from cleverhans.attacks import FastGradientMethod 10 | import numpy as np 11 | from PIL import Image 12 | 13 | import tensorflow as tf 14 | from tensorflow.contrib.slim.nets import inception 15 | 16 | slim = tf.contrib.slim 17 | 18 | 19 | tf.flags.DEFINE_string( 20 | 'master', '', 'The address of the TensorFlow master to use.') 21 | 22 | tf.flags.DEFINE_string( 23 | 'checkpoint_path', '', 'Path to checkpoint for inception network.') 24 | 25 | tf.flags.DEFINE_string( 26 | 'input_dir', '', 'Input directory with images.') 27 | 28 | tf.flags.DEFINE_string( 29 | 'output_dir', '', 'Output directory with images.') 30 | 31 | tf.flags.DEFINE_float( 32 | 'max_epsilon', 16.0, 'Maximum size of adversarial perturbation.') 33 | 34 | tf.flags.DEFINE_integer( 35 | 'image_width', 299, 'Width of each input images.') 36 | 37 | tf.flags.DEFINE_integer( 38 | 'image_height', 299, 'Height of each input images.') 39 | 40 | tf.flags.DEFINE_integer( 41 | 'batch_size', 16, 'How many images process at one time.') 42 | 43 | FLAGS = tf.flags.FLAGS 44 | 45 | 46 | def load_images(input_dir, batch_shape): 47 | """Read png images from input directory in batches. 48 | 49 | Args: 50 | input_dir: input directory 51 | batch_shape: shape of minibatch array, i.e. [batch_size, height, width, 3] 52 | 53 | Yields: 54 | filenames: list file names without path of each image 55 | Lenght of this list could be less than batch_size, in this case only 56 | first few images of the result are elements of the minibatch. 57 | images: array with all images from this batch 58 | """ 59 | images = np.zeros(batch_shape) 60 | filenames = [] 61 | idx = 0 62 | batch_size = batch_shape[0] 63 | for filepath in tf.gfile.Glob(os.path.join(input_dir, '*.png')): 64 | with tf.gfile.Open(filepath) as f: 65 | image = np.array(Image.open(f).convert('RGB')).astype(np.float) / 255.0 66 | # Images for inception classifier are normalized to be in [-1, 1] interval. 67 | images[idx, :, :, :] = image * 2.0 - 1.0 68 | filenames.append(os.path.basename(filepath)) 69 | idx += 1 70 | if idx == batch_size: 71 | yield filenames, images 72 | filenames = [] 73 | images = np.zeros(batch_shape) 74 | idx = 0 75 | if idx > 0: 76 | yield filenames, images 77 | 78 | 79 | def save_images(images, filenames, output_dir): 80 | """Saves images to the output directory. 81 | 82 | Args: 83 | images: array with minibatch of images 84 | filenames: list of filenames without path 85 | If number of file names in this list less than number of images in 86 | the minibatch then only first len(filenames) images will be saved. 87 | output_dir: directory where to save images 88 | """ 89 | for i, filename in enumerate(filenames): 90 | # Images for inception classifier are normalized to be in [-1, 1] interval, 91 | # so rescale them back to [0, 1]. 92 | with tf.gfile.Open(os.path.join(output_dir, filename), 'w') as f: 93 | img = (((images[i, :, :, :] + 1.0) * 0.5) * 255.0).astype(np.uint8) 94 | Image.fromarray(img).save(f, format='PNG') 95 | 96 | 97 | class InceptionModel(object): 98 | """Model class for CleverHans library.""" 99 | 100 | def __init__(self, num_classes): 101 | self.num_classes = num_classes 102 | self.built = False 103 | 104 | def __call__(self, x_input): 105 | """Constructs model and return probabilities for given input.""" 106 | reuse = True if self.built else None 107 | with slim.arg_scope(inception.inception_v3_arg_scope()): 108 | _, end_points = inception.inception_v3( 109 | x_input, num_classes=self.num_classes, is_training=False, 110 | reuse=reuse) 111 | self.built = True 112 | output = end_points['Predictions'] 113 | # Strip off the extra reshape op at the output 114 | probs = output.op.inputs[0] 115 | return probs 116 | 117 | 118 | def main(_): 119 | # Images for inception classifier are normalized to be in [-1, 1] interval, 120 | # eps is a difference between pixels so it should be in [0, 2] interval. 121 | # Renormalizing epsilon from [0, 255] to [0, 2]. 122 | eps = 2.0 * FLAGS.max_epsilon / 255.0 123 | batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] 124 | num_classes = 1001 125 | 126 | tf.logging.set_verbosity(tf.logging.INFO) 127 | 128 | with tf.Graph().as_default(): 129 | # Prepare graph 130 | x_input = tf.placeholder(tf.float32, shape=batch_shape) 131 | 132 | model = InceptionModel(num_classes) 133 | 134 | fgsm = FastGradientMethod(model) 135 | x_adv = fgsm.generate(x_input, eps=eps, clip_min=-1., clip_max=1.) 136 | 137 | # Run computation 138 | saver = tf.train.Saver(slim.get_model_variables()) 139 | session_creator = tf.train.ChiefSessionCreator( 140 | scaffold=tf.train.Scaffold(saver=saver), 141 | checkpoint_filename_with_path=FLAGS.checkpoint_path, 142 | master=FLAGS.master) 143 | 144 | with tf.train.MonitoredSession(session_creator=session_creator) as sess: 145 | for filenames, images in load_images(FLAGS.input_dir, batch_shape): 146 | adv_images = sess.run(x_adv, feed_dict={x_input: images}) 147 | save_images(adv_images, filenames, FLAGS.output_dir) 148 | 149 | 150 | if __name__ == '__main__': 151 | tf.app.run() 152 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/dataset/download_images.py: -------------------------------------------------------------------------------- 1 | """Script which downloads dataset images. 2 | 3 | Usage: 4 | python download_images.py --input_file=INPUT_FILE --output_dir=IMAGES_DIR 5 | 6 | where: 7 | INPUT_FILE is input csv file with dataset description, i.e. dev_dataset.csv 8 | IMAGES_DIR is output directory where all images should be downloaded 9 | 10 | Example: 11 | # create directory for images 12 | mkdir images 13 | # download images declared in dev_dataset.csv 14 | python download_images.py --input_file=dev_dataset.csv --output_dir=images 15 | 16 | 17 | Dependencies: 18 | Python 2.7 or higher. 19 | Pillow library: https://python-pillow.org/ 20 | """ 21 | 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | 26 | import argparse 27 | import csv 28 | import multiprocessing 29 | import os 30 | import sys 31 | from functools import partial 32 | from io import BytesIO 33 | from multiprocessing.dummy import Pool as ThreadPool 34 | 35 | from PIL import Image 36 | 37 | try: 38 | from urllib.request import urlopen 39 | except ImportError: 40 | from urllib2 import urlopen 41 | 42 | 43 | def parse_args(): 44 | """Parses command line arguments.""" 45 | parser = argparse.ArgumentParser( 46 | description='Tool to download dataset images.') 47 | parser.add_argument('--input_file', required=True, 48 | help='Location of dataset.csv') 49 | parser.add_argument('--output_dir', required=True, 50 | help='Output path to download images') 51 | parser.add_argument('--threads', default=multiprocessing.cpu_count() + 1, 52 | help='Number of threads to use') 53 | args = parser.parse_args() 54 | return args.input_file, args.output_dir, int(args.threads) 55 | 56 | 57 | def get_image(row, output_dir): 58 | """Downloads the image that corresponds to the given row. 59 | Prints a notification if the download fails.""" 60 | if not download_image(image_id=row[0], 61 | url=row[1], 62 | x1=float(row[2]), 63 | y1=float(row[3]), 64 | x2=float(row[4]), 65 | y2=float(row[5]), 66 | output_dir=output_dir): 67 | print("Download failed: " + str(row[0])) 68 | 69 | 70 | def download_image(image_id, url, x1, y1, x2, y2, output_dir): 71 | """Downloads one image, crops it, resizes it and saves it locally.""" 72 | output_filename = os.path.join(output_dir, image_id + '.png') 73 | if os.path.exists(output_filename): 74 | # Don't download image if it's already there 75 | return True 76 | try: 77 | # Download image 78 | url_file = urlopen(url) 79 | if url_file.getcode() != 200: 80 | return False 81 | image_buffer = url_file.read() 82 | # Crop, resize and save image 83 | image = Image.open(BytesIO(image_buffer)).convert('RGB') 84 | w = image.size[0] 85 | h = image.size[1] 86 | image = image.crop((int(x1 * w), int(y1 * h), int(x2 * w), 87 | int(y2 * h))) 88 | image = image.resize((299, 299), resample=Image.ANTIALIAS) 89 | image.save(output_filename) 90 | except IOError: 91 | return False 92 | return True 93 | 94 | 95 | def main(): 96 | input_filename, output_dir, n_threads = parse_args() 97 | 98 | if not os.path.isdir(output_dir): 99 | print("Output directory {} does not exist".format(output_dir)) 100 | sys.exit() 101 | 102 | with open(input_filename) as input_file: 103 | reader = csv.reader(input_file) 104 | header_row = next(reader) 105 | rows = list(reader) 106 | try: 107 | row_idx_image_id = header_row.index('ImageId') 108 | row_idx_url = header_row.index('URL') 109 | row_idx_x1 = header_row.index('x1') 110 | row_idx_y1 = header_row.index('y1') 111 | row_idx_x2 = header_row.index('x2') 112 | row_idx_y2 = header_row.index('y2') 113 | except ValueError as e: 114 | print('One of the columns was not found in the source file: ', 115 | e.message) 116 | 117 | rows = [(row[row_idx_image_id], row[row_idx_url], float(row[row_idx_x1]), 118 | float(row[row_idx_y1]), float(row[row_idx_x2]), 119 | float(row[row_idx_y2])) for row in rows] 120 | 121 | if n_threads > 1: 122 | pool = ThreadPool(n_threads) 123 | partial_get_images = partial(get_image, output_dir=output_dir) 124 | for i, _ in enumerate(pool.imap_unordered(partial_get_images, rows), 125 | 1): 126 | sys.stderr.write('\rDownloaded {0} images'.format(i + 1)) 127 | pool.close() 128 | pool.join() 129 | else: 130 | failed_to_download = set() 131 | for idx in range(len(rows)): 132 | row = rows[idx] 133 | if not download_image(image_id=row[0], 134 | url=row[1], 135 | x1=float(row[2]), 136 | y1=float(row[3]), 137 | x2=float(row[4]), 138 | y2=float(row[5]), 139 | output_dir=output_dir): 140 | failed_to_download.add(row[row_idx_image_id]) 141 | sys.stdout.write('\rDownloaded {0} images'.format(idx + 1)) 142 | sys.stdout.flush() 143 | 144 | print() 145 | if failed_to_download: 146 | print('\nUnable to download images with the following IDs:') 147 | for image_id in failed_to_download: 148 | print(image_id) 149 | 150 | 151 | if __name__ == '__main__': 152 | main() 153 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # CleverHans documentation build configuration file, created by 5 | # sphinx-quickstart on Wed Sep 20 15:14:07 2017. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | import os 21 | import sys 22 | sys.path.insert(0, os.path.abspath('..')) 23 | 24 | 25 | # -- General configuration ------------------------------------------------ 26 | 27 | # If your documentation needs a minimal Sphinx version, state it here. 28 | # 29 | # needs_sphinx = '1.0' 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = ['sphinx.ext.autodoc', 35 | 'sphinx.ext.mathjax', 36 | 'sphinx.ext.viewcode', 37 | 'sphinx.ext.githubpages'] 38 | 39 | # Add any paths that contain templates here, relative to this directory. 40 | templates_path = ['_templates'] 41 | 42 | # The suffix(es) of source filenames. 43 | # You can specify multiple suffix as a list of string: 44 | # 45 | # source_suffix = ['.rst', '.md'] 46 | source_suffix = '.md' 47 | 48 | # The master toctree document. 49 | master_doc = 'index' 50 | 51 | # General information about the project. 52 | project = 'CleverHans' 53 | 54 | author = 'Ian Goodfellow, Nicolas Papernot, Ryan Sheatsley' 55 | 56 | # The version info for the project you're documenting, acts as replacement for 57 | # |version| and |release|, also used in various other places throughout the 58 | # built documents. 59 | # 60 | # The short X.Y version. 61 | # version = '2.0.0' 62 | # The full version, including alpha/beta/rc tags. 63 | # release = '2.0.0' 64 | 65 | # The language for content autogenerated by Sphinx. Refer to documentation 66 | # for a list of supported languages. 67 | # 68 | # This is also used if you do content translation via gettext catalogs. 69 | # Usually you set "language" from the command line for these cases. 70 | language = None 71 | 72 | # List of patterns, relative to source directory, that match files and 73 | # directories to ignore when looking for source files. 74 | # This patterns also effect to html_static_path and html_extra_path 75 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 76 | 77 | # The name of the Pygments (syntax highlighting) style to use. 78 | pygments_style = 'sphinx' 79 | 80 | # If true, `todo` and `todoList` produce output, else they produce nothing. 81 | todo_include_todos = False 82 | 83 | 84 | # -- Options for HTML output ---------------------------------------------- 85 | 86 | # The theme to use for HTML and HTML Help pages. See the documentation for 87 | # a list of builtin themes. 88 | # 89 | html_theme = 'alabaster' 90 | 91 | # Theme options are theme-specific and customize the look and feel of a theme 92 | # further. For a list of options available for each theme, see the 93 | # documentation. 94 | # 95 | # html_theme_options = {} 96 | 97 | # Add any paths that contain custom static files (such as style sheets) here, 98 | # relative to this directory. They are copied after the builtin static files, 99 | # so a file named "default.css" will overwrite the builtin "default.css". 100 | html_static_path = ['_static'] 101 | 102 | # Custom sidebar templates, must be a dictionary that maps document names 103 | # to template names. 104 | # 105 | # This is required for the alabaster theme 106 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars 107 | html_sidebars = { 108 | '**': [ 109 | 'about.html', 110 | 'navigation.html', 111 | 'relations.html', # needs 'show_related': True theme option to display 112 | 'searchbox.html', 113 | 'donate.html', 114 | ] 115 | } 116 | 117 | 118 | # -- Options for HTMLHelp output ------------------------------------------ 119 | 120 | # Output file base name for HTML help builder. 121 | htmlhelp_basename = 'CleverHansdoc' 122 | 123 | 124 | # -- Options for LaTeX output --------------------------------------------- 125 | 126 | latex_elements = { 127 | # The paper size ('letterpaper' or 'a4paper'). 128 | # 129 | # 'papersize': 'letterpaper', 130 | 131 | # The font size ('10pt', '11pt' or '12pt'). 132 | # 133 | # 'pointsize': '10pt', 134 | 135 | # Additional stuff for the LaTeX preamble. 136 | # 137 | # 'preamble': '', 138 | 139 | # Latex figure (float) alignment 140 | # 141 | # 'figure_align': 'htbp', 142 | } 143 | 144 | # Grouping the document tree into LaTeX files. List of tuples 145 | # (source start file, target name, title, 146 | # author, documentclass [howto, manual, or own class]). 147 | latex_documents = [ 148 | (master_doc, 'CleverHans.tex', 'CleverHans Documentation', 149 | 'Ian Goodfellow, Nicolas Papernot, Ryan Sheatsley', 'manual'), 150 | ] 151 | 152 | 153 | # -- Options for manual page output --------------------------------------- 154 | 155 | # One entry per manual page. List of tuples 156 | # (source start file, name, description, authors, manual section). 157 | man_pages = [ 158 | (master_doc, 'cleverhans', 'CleverHans Documentation', 159 | [author], 1) 160 | ] 161 | 162 | 163 | # -- Options for Texinfo output ------------------------------------------- 164 | 165 | # Grouping the document tree into Texinfo files. List of tuples 166 | # (source start file, target name, title, author, 167 | # dir menu entry, description, category) 168 | texinfo_documents = [ 169 | (master_doc, 'CleverHans', 'CleverHans Documentation', 170 | author, 'CleverHans', 'One line description of project.', 171 | 'Miscellaneous'), 172 | ] 173 | 174 | 175 | 176 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_targeted_attacks/step_target_class/attack_step_target_class.py: -------------------------------------------------------------------------------- 1 | """Implementation of sample attack.""" 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import csv 8 | import os 9 | 10 | import numpy as np 11 | from scipy.misc import imread 12 | from scipy.misc import imsave 13 | 14 | import tensorflow as tf 15 | from tensorflow.contrib.slim.nets import inception 16 | 17 | slim = tf.contrib.slim 18 | 19 | 20 | tf.flags.DEFINE_string( 21 | 'master', '', 'The address of the TensorFlow master to use.') 22 | 23 | tf.flags.DEFINE_string( 24 | 'checkpoint_path', '', 'Path to checkpoint for inception network.') 25 | 26 | tf.flags.DEFINE_string( 27 | 'input_dir', '', 'Input directory with images.') 28 | 29 | tf.flags.DEFINE_string( 30 | 'output_dir', '', 'Output directory with images.') 31 | 32 | tf.flags.DEFINE_float( 33 | 'max_epsilon', 16.0, 'Maximum size of adversarial perturbation.') 34 | 35 | tf.flags.DEFINE_integer( 36 | 'image_width', 299, 'Width of each input images.') 37 | 38 | tf.flags.DEFINE_integer( 39 | 'image_height', 299, 'Height of each input images.') 40 | 41 | tf.flags.DEFINE_integer( 42 | 'batch_size', 16, 'How many images process at one time.') 43 | 44 | FLAGS = tf.flags.FLAGS 45 | 46 | 47 | def load_target_class(input_dir): 48 | """Loads target classes.""" 49 | with tf.gfile.Open(os.path.join(input_dir, 'target_class.csv')) as f: 50 | return {row[0]: int(row[1]) for row in csv.reader(f) if len(row) >= 2} 51 | 52 | 53 | def load_images(input_dir, batch_shape): 54 | """Read png images from input directory in batches. 55 | 56 | Args: 57 | input_dir: input directory 58 | batch_shape: shape of minibatch array, i.e. [batch_size, height, width, 3] 59 | 60 | Yields: 61 | filenames: list file names without path of each image 62 | Lenght of this list could be less than batch_size, in this case only 63 | first few images of the result are elements of the minibatch. 64 | images: array with all images from this batch 65 | """ 66 | images = np.zeros(batch_shape) 67 | filenames = [] 68 | idx = 0 69 | batch_size = batch_shape[0] 70 | for filepath in tf.gfile.Glob(os.path.join(input_dir, '*.png')): 71 | with tf.gfile.Open(filepath) as f: 72 | image = imread(f, mode='RGB').astype(np.float) / 255.0 73 | # Images for inception classifier are normalized to be in [-1, 1] interval. 74 | images[idx, :, :, :] = image * 2.0 - 1.0 75 | filenames.append(os.path.basename(filepath)) 76 | idx += 1 77 | if idx == batch_size: 78 | yield filenames, images 79 | filenames = [] 80 | images = np.zeros(batch_shape) 81 | idx = 0 82 | if idx > 0: 83 | yield filenames, images 84 | 85 | 86 | def save_images(images, filenames, output_dir): 87 | """Saves images to the output directory. 88 | 89 | Args: 90 | images: array with minibatch of images 91 | filenames: list of filenames without path 92 | If number of file names in this list less than number of images in 93 | the minibatch then only first len(filenames) images will be saved. 94 | output_dir: directory where to save images 95 | """ 96 | for i, filename in enumerate(filenames): 97 | # Images for inception classifier are normalized to be in [-1, 1] interval, 98 | # so rescale them back to [0, 1]. 99 | with tf.gfile.Open(os.path.join(output_dir, filename), 'w') as f: 100 | imsave(f, (images[i, :, :, :] + 1.0) * 0.5, format='png') 101 | 102 | 103 | def main(_): 104 | # Images for inception classifier are normalized to be in [-1, 1] interval, 105 | # eps is a difference between pixels so it should be in [0, 2] interval. 106 | # Renormalizing epsilon from [0, 255] to [0, 2]. 107 | eps = 2.0 * FLAGS.max_epsilon / 255.0 108 | batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] 109 | num_classes = 1001 110 | 111 | tf.logging.set_verbosity(tf.logging.INFO) 112 | 113 | all_images_taget_class = load_target_class(FLAGS.input_dir) 114 | 115 | with tf.Graph().as_default(): 116 | # Prepare graph 117 | x_input = tf.placeholder(tf.float32, shape=batch_shape) 118 | 119 | with slim.arg_scope(inception.inception_v3_arg_scope()): 120 | logits, end_points = inception.inception_v3( 121 | x_input, num_classes=num_classes, is_training=False) 122 | 123 | target_class_input = tf.placeholder(tf.int32, shape=[FLAGS.batch_size]) 124 | one_hot_target_class = tf.one_hot(target_class_input, num_classes) 125 | cross_entropy = tf.losses.softmax_cross_entropy(one_hot_target_class, 126 | logits, 127 | label_smoothing=0.1, 128 | weights=1.0) 129 | cross_entropy += tf.losses.softmax_cross_entropy(one_hot_target_class, 130 | end_points['AuxLogits'], 131 | label_smoothing=0.1, 132 | weights=0.4) 133 | x_adv = x_input - eps * tf.sign(tf.gradients(cross_entropy, x_input)[0]) 134 | x_adv = tf.clip_by_value(x_adv, -1.0, 1.0) 135 | 136 | # Run computation 137 | saver = tf.train.Saver(slim.get_model_variables()) 138 | session_creator = tf.train.ChiefSessionCreator( 139 | scaffold=tf.train.Scaffold(saver=saver), 140 | checkpoint_filename_with_path=FLAGS.checkpoint_path, 141 | master=FLAGS.master) 142 | 143 | with tf.train.MonitoredSession(session_creator=session_creator) as sess: 144 | for filenames, images in load_images(FLAGS.input_dir, batch_shape): 145 | target_class_for_batch = ( 146 | [all_images_taget_class[n] for n in filenames] 147 | + [0] * (FLAGS.batch_size - len(filenames))) 148 | adv_images = sess.run(x_adv, 149 | feed_dict={ 150 | x_input: images, 151 | target_class_input: target_class_for_batch 152 | }) 153 | save_images(adv_images, filenames, FLAGS.output_dir) 154 | 155 | 156 | if __name__ == '__main__': 157 | tf.app.run() 158 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/sample_targeted_attacks/iter_target_class/attack_iter_target_class.py: -------------------------------------------------------------------------------- 1 | """Implementation of sample attack.""" 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import csv 8 | import os 9 | 10 | import numpy as np 11 | from scipy.misc import imread 12 | from scipy.misc import imsave 13 | 14 | import tensorflow as tf 15 | from tensorflow.contrib.slim.nets import inception 16 | 17 | slim = tf.contrib.slim 18 | 19 | 20 | tf.flags.DEFINE_string( 21 | 'master', '', 'The address of the TensorFlow master to use.') 22 | 23 | tf.flags.DEFINE_string( 24 | 'checkpoint_path', '', 'Path to checkpoint for inception network.') 25 | 26 | tf.flags.DEFINE_string( 27 | 'input_dir', '', 'Input directory with images.') 28 | 29 | tf.flags.DEFINE_string( 30 | 'output_dir', '', 'Output directory with images.') 31 | 32 | tf.flags.DEFINE_float( 33 | 'max_epsilon', 16.0, 'Maximum size of adversarial perturbation.') 34 | 35 | tf.flags.DEFINE_float( 36 | 'iter_alpha', 1.0, 'Step size for one iteration.') 37 | 38 | tf.flags.DEFINE_integer( 39 | 'num_iter', 20, 'Number of iterations.') 40 | 41 | tf.flags.DEFINE_integer( 42 | 'image_width', 299, 'Width of each input images.') 43 | 44 | tf.flags.DEFINE_integer( 45 | 'image_height', 299, 'Height of each input images.') 46 | 47 | tf.flags.DEFINE_integer( 48 | 'batch_size', 16, 'How many images process at one time.') 49 | 50 | FLAGS = tf.flags.FLAGS 51 | 52 | 53 | def load_target_class(input_dir): 54 | """Loads target classes.""" 55 | with tf.gfile.Open(os.path.join(input_dir, 'target_class.csv')) as f: 56 | return {row[0]: int(row[1]) for row in csv.reader(f) if len(row) >= 2} 57 | 58 | 59 | def load_images(input_dir, batch_shape): 60 | """Read png images from input directory in batches. 61 | 62 | Args: 63 | input_dir: input directory 64 | batch_shape: shape of minibatch array, i.e. [batch_size, height, width, 3] 65 | 66 | Yields: 67 | filenames: list file names without path of each image 68 | Lenght of this list could be less than batch_size, in this case only 69 | first few images of the result are elements of the minibatch. 70 | images: array with all images from this batch 71 | """ 72 | images = np.zeros(batch_shape) 73 | filenames = [] 74 | idx = 0 75 | batch_size = batch_shape[0] 76 | for filepath in tf.gfile.Glob(os.path.join(input_dir, '*.png')): 77 | with tf.gfile.Open(filepath) as f: 78 | image = imread(f, mode='RGB').astype(np.float) / 255.0 79 | # Images for inception classifier are normalized to be in [-1, 1] interval. 80 | images[idx, :, :, :] = image * 2.0 - 1.0 81 | filenames.append(os.path.basename(filepath)) 82 | idx += 1 83 | if idx == batch_size: 84 | yield filenames, images 85 | filenames = [] 86 | images = np.zeros(batch_shape) 87 | idx = 0 88 | if idx > 0: 89 | yield filenames, images 90 | 91 | 92 | def save_images(images, filenames, output_dir): 93 | """Saves images to the output directory. 94 | 95 | Args: 96 | images: array with minibatch of images 97 | filenames: list of filenames without path 98 | If number of file names in this list less than number of images in 99 | the minibatch then only first len(filenames) images will be saved. 100 | output_dir: directory where to save images 101 | """ 102 | for i, filename in enumerate(filenames): 103 | # Images for inception classifier are normalized to be in [-1, 1] interval, 104 | # so rescale them back to [0, 1]. 105 | with tf.gfile.Open(os.path.join(output_dir, filename), 'w') as f: 106 | imsave(f, (images[i, :, :, :] + 1.0) * 0.5, format='png') 107 | 108 | 109 | def main(_): 110 | # Images for inception classifier are normalized to be in [-1, 1] interval, 111 | # eps is a difference between pixels so it should be in [0, 2] interval. 112 | # Renormalizing epsilon from [0, 255] to [0, 2]. 113 | eps = 2.0 * FLAGS.max_epsilon / 255.0 114 | alpha = 2.0 * FLAGS.iter_alpha / 255.0 115 | num_iter = FLAGS.num_iter 116 | batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] 117 | num_classes = 1001 118 | 119 | tf.logging.set_verbosity(tf.logging.INFO) 120 | 121 | all_images_taget_class = load_target_class(FLAGS.input_dir) 122 | 123 | with tf.Graph().as_default(): 124 | # Prepare graph 125 | x_input = tf.placeholder(tf.float32, shape=batch_shape) 126 | x_max = tf.clip_by_value(x_input + eps, -1.0, 1.0) 127 | x_min = tf.clip_by_value(x_input - eps, -1.0, 1.0) 128 | 129 | with slim.arg_scope(inception.inception_v3_arg_scope()): 130 | inception.inception_v3( 131 | x_input, num_classes=num_classes, is_training=False) 132 | 133 | x_adv = x_input 134 | target_class_input = tf.placeholder(tf.int32, shape=[FLAGS.batch_size]) 135 | one_hot_target_class = tf.one_hot(target_class_input, num_classes) 136 | 137 | for _ in range(num_iter): 138 | with slim.arg_scope(inception.inception_v3_arg_scope()): 139 | logits, end_points = inception.inception_v3( 140 | x_adv, num_classes=num_classes, is_training=False, reuse=True) 141 | cross_entropy = tf.losses.softmax_cross_entropy(one_hot_target_class, 142 | logits, 143 | label_smoothing=0.1, 144 | weights=1.0) 145 | cross_entropy += tf.losses.softmax_cross_entropy(one_hot_target_class, 146 | end_points['AuxLogits'], 147 | label_smoothing=0.1, 148 | weights=0.4) 149 | x_next = x_adv - alpha * tf.sign(tf.gradients(cross_entropy, x_adv)[0]) 150 | x_next = tf.clip_by_value(x_next, x_min, x_max) 151 | x_adv = x_next 152 | 153 | # Run computation 154 | saver = tf.train.Saver(slim.get_model_variables()) 155 | session_creator = tf.train.ChiefSessionCreator( 156 | scaffold=tf.train.Scaffold(saver=saver), 157 | checkpoint_filename_with_path=FLAGS.checkpoint_path, 158 | master=FLAGS.master) 159 | 160 | with tf.train.MonitoredSession(session_creator=session_creator) as sess: 161 | for filenames, images in load_images(FLAGS.input_dir, batch_shape): 162 | target_class_for_batch = ( 163 | [all_images_taget_class[n] for n in filenames] 164 | + [0] * (FLAGS.batch_size - len(filenames))) 165 | adv_images = sess.run(x_adv, 166 | feed_dict={ 167 | x_input: images, 168 | target_class_input: target_class_for_batch 169 | }) 170 | save_images(adv_images, filenames, FLAGS.output_dir) 171 | 172 | 173 | if __name__ == '__main__': 174 | tf.app.run() 175 | -------------------------------------------------------------------------------- /cleverhans/utils_keras.py: -------------------------------------------------------------------------------- 1 | """ 2 | Model construction utilities based on keras 3 | """ 4 | from .model import Model 5 | 6 | import keras 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Activation, Flatten 10 | 11 | from distutils.version import LooseVersion 12 | if LooseVersion(keras.__version__) >= LooseVersion('2.0.0'): 13 | from keras.layers import Conv2D 14 | else: 15 | from keras.layers import Convolution2D 16 | 17 | 18 | def conv_2d(filters, kernel_shape, strides, padding, input_shape=None): 19 | """ 20 | Defines the right convolutional layer according to the 21 | version of Keras that is installed. 22 | :param filters: (required integer) the dimensionality of the output 23 | space (i.e. the number output of filters in the 24 | convolution) 25 | :param kernel_shape: (required tuple or list of 2 integers) specifies 26 | the strides of the convolution along the width and 27 | height. 28 | :param padding: (required string) can be either 'valid' (no padding around 29 | input or feature map) or 'same' (pad to ensure that the 30 | output feature map size is identical to the layer input) 31 | :param input_shape: (optional) give input shape if this is the first 32 | layer of the model 33 | :return: the Keras layer 34 | """ 35 | if LooseVersion(keras.__version__) >= LooseVersion('2.0.0'): 36 | if input_shape is not None: 37 | return Conv2D(filters=filters, kernel_size=kernel_shape, 38 | strides=strides, padding=padding, 39 | input_shape=input_shape) 40 | else: 41 | return Conv2D(filters=filters, kernel_size=kernel_shape, 42 | strides=strides, padding=padding) 43 | else: 44 | if input_shape is not None: 45 | return Convolution2D(filters, kernel_shape[0], kernel_shape[1], 46 | subsample=strides, border_mode=padding, 47 | input_shape=input_shape) 48 | else: 49 | return Convolution2D(filters, kernel_shape[0], kernel_shape[1], 50 | subsample=strides, border_mode=padding) 51 | 52 | 53 | def cnn_model(logits=False, input_ph=None, img_rows=28, img_cols=28, 54 | channels=1, nb_filters=64, nb_classes=10): 55 | """ 56 | Defines a CNN model using Keras sequential model 57 | :param logits: If set to False, returns a Keras model, otherwise will also 58 | return logits tensor 59 | :param input_ph: The TensorFlow tensor for the input 60 | (needed if returning logits) 61 | ("ph" stands for placeholder but it need not actually be a 62 | placeholder) 63 | :param img_rows: number of row in the image 64 | :param img_cols: number of columns in the image 65 | :param channels: number of color channels (e.g., 1 for MNIST) 66 | :param nb_filters: number of convolutional filters per layer 67 | :param nb_classes: the number of output classes 68 | :return: 69 | """ 70 | model = Sequential() 71 | 72 | # Define the layers successively (convolution layers are version dependent) 73 | if keras.backend.image_dim_ordering() == 'th': 74 | input_shape = (channels, img_rows, img_cols) 75 | else: 76 | input_shape = (img_rows, img_cols, channels) 77 | 78 | layers = [conv_2d(nb_filters, (8, 8), (2, 2), "same", 79 | input_shape=input_shape), 80 | Activation('relu'), 81 | conv_2d((nb_filters * 2), (6, 6), (2, 2), "valid"), 82 | Activation('relu'), 83 | conv_2d((nb_filters * 2), (5, 5), (1, 1), "valid"), 84 | Activation('relu'), 85 | Flatten(), 86 | Dense(nb_classes)] 87 | 88 | for layer in layers: 89 | model.add(layer) 90 | 91 | if logits: 92 | logits_tensor = model(input_ph) 93 | model.add(Activation('softmax')) 94 | 95 | if logits: 96 | return model, logits_tensor 97 | else: 98 | return model 99 | 100 | 101 | class KerasModelWrapper(Model): 102 | """ 103 | An implementation of `Model` that wraps a Keras model. It 104 | specifically exposes the hidden features of a model by creating new models. 105 | The symbolic graph is reused and so there is little overhead. Splitting 106 | in-place operations can incur an overhead. 107 | """ 108 | 109 | def __init__(self, model=None): 110 | """ 111 | Create a wrapper for a Keras model 112 | :param model: A Keras model 113 | """ 114 | super(KerasModelWrapper, self).__init__() 115 | 116 | if model is None: 117 | raise ValueError('model argument must be supplied.') 118 | 119 | self.model = model 120 | self.keras_model = None 121 | 122 | def _get_softmax_name(self): 123 | """ 124 | Looks for the name of the softmax layer. 125 | :return: Softmax layer name 126 | """ 127 | for i, layer in enumerate(self.model.layers): 128 | cfg = layer.get_config() 129 | if 'activation' in cfg and cfg['activation'] == 'softmax': 130 | return layer.name 131 | 132 | raise Exception("No softmax layers found") 133 | 134 | def _get_logits_name(self): 135 | """ 136 | Looks for the name of the layer producing the logits. 137 | :return: name of layer producing the logits 138 | """ 139 | softmax_name = self._get_softmax_name() 140 | softmax_layer = self.model.get_layer(softmax_name) 141 | node = softmax_layer.inbound_nodes[0] 142 | logits_name = node.inbound_layers[0].name 143 | 144 | return logits_name 145 | 146 | def get_logits(self, x): 147 | """ 148 | :param x: A symbolic representation of the network input. 149 | :return: A symbolic representation of the logits 150 | """ 151 | logits_name = self._get_logits_name() 152 | 153 | return self.get_layer(x, logits_name) 154 | 155 | def get_probs(self, x): 156 | """ 157 | :param x: A symbolic representation of the network input. 158 | :return: A symbolic representation of the probs 159 | """ 160 | name = self._get_softmax_name() 161 | 162 | return self.get_layer(x, name) 163 | 164 | def get_layer_names(self): 165 | """ 166 | :return: Names of all the layers kept by Keras 167 | """ 168 | layer_names = [x.name for x in self.model.layers] 169 | return layer_names 170 | 171 | def fprop(self, x): 172 | """ 173 | Exposes all the layers of the model returned by get_layer_names. 174 | :param x: A symbolic representation of the network input 175 | :return: A dictionary mapping layer names to the symbolic 176 | representation of their output. 177 | """ 178 | from keras.models import Model as KerasModel 179 | 180 | if self.keras_model is None: 181 | # Get the input layer 182 | new_input = self.model.get_input_at(0) 183 | 184 | # Make a new model that returns each of the layers as output 185 | out_layers = [x_layer.output for x_layer in self.model.layers] 186 | self.keras_model = KerasModel(new_input, out_layers) 187 | 188 | # and get the outputs for that model on the input x 189 | outputs = self.keras_model(x) 190 | 191 | # Keras only returns a list for outputs of length >= 1, if the model 192 | # is only one layer, wrap a list 193 | if len(self.model.layers) == 1: 194 | outputs = [outputs] 195 | 196 | # compute the dict to return 197 | fprop_dict = dict(zip(self.get_layer_names(), outputs)) 198 | 199 | return fprop_dict 200 | -------------------------------------------------------------------------------- /cleverhans_tutorials/mnist_tutorial_keras_tf.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import numpy as np 7 | import keras 8 | from keras import backend 9 | import tensorflow as tf 10 | from tensorflow.python.platform import flags 11 | 12 | from cleverhans.utils_mnist import data_mnist 13 | from cleverhans.utils_tf import model_train, model_eval 14 | from cleverhans.attacks import FastGradientMethod 15 | from cleverhans.utils import AccuracyReport 16 | from cleverhans.utils_keras import cnn_model 17 | from cleverhans.utils_keras import KerasModelWrapper 18 | 19 | FLAGS = flags.FLAGS 20 | 21 | 22 | def mnist_tutorial(train_start=0, train_end=60000, test_start=0, 23 | test_end=10000, nb_epochs=6, batch_size=128, 24 | learning_rate=0.001, train_dir="/tmp", 25 | filename="mnist.ckpt", load_model=False, 26 | testing=False): 27 | """ 28 | MNIST CleverHans tutorial 29 | :param train_start: index of first training set example 30 | :param train_end: index of last training set example 31 | :param test_start: index of first test set example 32 | :param test_end: index of last test set example 33 | :param nb_epochs: number of epochs to train model 34 | :param batch_size: size of training batches 35 | :param learning_rate: learning rate for training 36 | :param train_dir: Directory storing the saved model 37 | :param filename: Filename to save model under 38 | :param load_model: True for load, False for not load 39 | :param testing: if true, test error is calculated 40 | :return: an AccuracyReport object 41 | """ 42 | keras.layers.core.K.set_learning_phase(0) 43 | 44 | # Object used to keep track of (and return) key accuracies 45 | report = AccuracyReport() 46 | 47 | # Set TF random seed to improve reproducibility 48 | tf.set_random_seed(1234) 49 | 50 | if not hasattr(backend, "tf"): 51 | raise RuntimeError("This tutorial requires keras to be configured" 52 | " to use the TensorFlow backend.") 53 | 54 | # Image dimensions ordering should follow the Theano convention 55 | if keras.backend.image_dim_ordering() != 'tf': 56 | keras.backend.set_image_dim_ordering('tf') 57 | print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " 58 | "'th', temporarily setting to 'tf'") 59 | 60 | # Create TF session and set as Keras backend session 61 | sess = tf.Session() 62 | keras.backend.set_session(sess) 63 | 64 | # Get MNIST test data 65 | X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, 66 | train_end=train_end, 67 | test_start=test_start, 68 | test_end=test_end) 69 | 70 | # Use label smoothing 71 | assert Y_train.shape[1] == 10 72 | label_smooth = .1 73 | Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) 74 | 75 | # Define input TF placeholder 76 | x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) 77 | y = tf.placeholder(tf.float32, shape=(None, 10)) 78 | 79 | # Define TF model graph 80 | model = cnn_model() 81 | preds = model(x) 82 | print("Defined TensorFlow model graph.") 83 | 84 | def evaluate(): 85 | # Evaluate the accuracy of the MNIST model on legitimate test examples 86 | eval_params = {'batch_size': batch_size} 87 | acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) 88 | report.clean_train_clean_eval = acc 89 | assert X_test.shape[0] == test_end - test_start, X_test.shape 90 | print('Test accuracy on legitimate examples: %0.4f' % acc) 91 | 92 | # Train an MNIST model 93 | train_params = { 94 | 'nb_epochs': nb_epochs, 95 | 'batch_size': batch_size, 96 | 'learning_rate': learning_rate, 97 | 'train_dir': train_dir, 98 | 'filename': filename 99 | } 100 | ckpt = tf.train.get_checkpoint_state(train_dir) 101 | ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path 102 | 103 | rng = np.random.RandomState([2017, 8, 30]) 104 | if load_model and ckpt_path: 105 | saver = tf.train.Saver() 106 | saver.restore(sess, ckpt_path) 107 | print("Model loaded from: {}".format(ckpt_path)) 108 | evaluate() 109 | else: 110 | print("Model was not loaded, training from scratch.") 111 | model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, 112 | args=train_params, save=True, rng=rng) 113 | 114 | # Calculate training error 115 | if testing: 116 | eval_params = {'batch_size': batch_size} 117 | acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params) 118 | report.train_clean_train_clean_eval = acc 119 | 120 | # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph 121 | wrap = KerasModelWrapper(model) 122 | fgsm = FastGradientMethod(wrap, sess=sess) 123 | fgsm_params = {'eps': 0.3, 124 | 'clip_min': 0., 125 | 'clip_max': 1.} 126 | adv_x = fgsm.generate(x, **fgsm_params) 127 | # Consider the attack to be constant 128 | adv_x = tf.stop_gradient(adv_x) 129 | preds_adv = model(adv_x) 130 | 131 | # Evaluate the accuracy of the MNIST model on adversarial examples 132 | eval_par = {'batch_size': batch_size} 133 | acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) 134 | print('Test accuracy on adversarial examples: %0.4f\n' % acc) 135 | report.clean_train_adv_eval = acc 136 | 137 | # Calculating train error 138 | if testing: 139 | eval_par = {'batch_size': batch_size} 140 | acc = model_eval(sess, x, y, preds_adv, X_train, 141 | Y_train, args=eval_par) 142 | report.train_clean_train_adv_eval = acc 143 | 144 | print("Repeating the process, using adversarial training") 145 | # Redefine TF model graph 146 | model_2 = cnn_model() 147 | preds_2 = model_2(x) 148 | wrap_2 = KerasModelWrapper(model_2) 149 | fgsm2 = FastGradientMethod(wrap_2, sess=sess) 150 | preds_2_adv = model_2(fgsm2.generate(x, **fgsm_params)) 151 | 152 | def evaluate_2(): 153 | # Accuracy of adversarially trained model on legitimate test inputs 154 | eval_params = {'batch_size': batch_size} 155 | accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test, 156 | args=eval_params) 157 | print('Test accuracy on legitimate examples: %0.4f' % accuracy) 158 | report.adv_train_clean_eval = accuracy 159 | 160 | # Accuracy of the adversarially trained model on adversarial examples 161 | accuracy = model_eval(sess, x, y, preds_2_adv, X_test, 162 | Y_test, args=eval_params) 163 | print('Test accuracy on adversarial examples: %0.4f' % accuracy) 164 | report.adv_train_adv_eval = accuracy 165 | 166 | # Perform and evaluate adversarial training 167 | model_train(sess, x, y, preds_2, X_train, Y_train, 168 | predictions_adv=preds_2_adv, evaluate=evaluate_2, 169 | args=train_params, save=False, rng=rng) 170 | 171 | # Calculate training errors 172 | if testing: 173 | eval_params = {'batch_size': batch_size} 174 | accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train, 175 | args=eval_params) 176 | report.train_adv_train_clean_eval = accuracy 177 | accuracy = model_eval(sess, x, y, preds_2_adv, X_train, 178 | Y_train, args=eval_params) 179 | report.train_adv_train_adv_eval = accuracy 180 | 181 | return report 182 | 183 | 184 | def main(argv=None): 185 | mnist_tutorial(nb_epochs=FLAGS.nb_epochs, 186 | batch_size=FLAGS.batch_size, 187 | learning_rate=FLAGS.learning_rate, 188 | train_dir=FLAGS.train_dir, 189 | filename=FLAGS.filename, 190 | load_model=FLAGS.load_model) 191 | 192 | 193 | if __name__ == '__main__': 194 | flags.DEFINE_integer('nb_epochs', 6, 'Number of epochs to train model') 195 | flags.DEFINE_integer('batch_size', 128, 'Size of training batches') 196 | flags.DEFINE_float('learning_rate', 0.001, 'Learning rate for training') 197 | flags.DEFINE_string('train_dir', '/tmp', 'Directory where to save model.') 198 | flags.DEFINE_string('filename', 'mnist.ckpt', 'Checkpoint filename.') 199 | flags.DEFINE_boolean('load_model', True, 'Load saved model or train.') 200 | tf.app.run() 201 | -------------------------------------------------------------------------------- /cleverhans_tutorials/mnist_tutorial_tf.py: -------------------------------------------------------------------------------- 1 | """ 2 | This tutorial shows how to generate some simple adversarial examples 3 | and train a model using adversarial training using nothing but pure 4 | TensorFlow. 5 | It is very similar to mnist_tutorial_keras_tf.py, which does the same 6 | thing but with a dependence on keras. 7 | """ 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | from __future__ import unicode_literals 12 | 13 | import numpy as np 14 | import tensorflow as tf 15 | from tensorflow.python.platform import flags 16 | import logging 17 | 18 | from cleverhans.utils_mnist import data_mnist 19 | from cleverhans.utils_tf import model_train, model_eval 20 | from cleverhans.attacks import FastGradientMethod 21 | from cleverhans_tutorials.tutorial_models import make_basic_cnn 22 | from cleverhans.utils import AccuracyReport, set_log_level 23 | 24 | import os 25 | 26 | FLAGS = flags.FLAGS 27 | 28 | 29 | def mnist_tutorial(train_start=0, train_end=60000, test_start=0, 30 | test_end=10000, nb_epochs=6, batch_size=128, 31 | learning_rate=0.001, 32 | clean_train=True, 33 | testing=False, 34 | backprop_through_attack=False, 35 | nb_filters=64): 36 | """ 37 | MNIST cleverhans tutorial 38 | :param train_start: index of first training set example 39 | :param train_end: index of last training set example 40 | :param test_start: index of first test set example 41 | :param test_end: index of last test set example 42 | :param nb_epochs: number of epochs to train model 43 | :param batch_size: size of training batches 44 | :param learning_rate: learning rate for training 45 | :param clean_train: perform normal training on clean examples only 46 | before performing adversarial training. 47 | :param testing: if true, complete an AccuracyReport for unit tests 48 | to verify that performance is adequate 49 | :param backprop_through_attack: If True, backprop through adversarial 50 | example construction process during 51 | adversarial training. 52 | :param clean_train: if true, train on clean examples 53 | :return: an AccuracyReport object 54 | """ 55 | 56 | # Object used to keep track of (and return) key accuracies 57 | report = AccuracyReport() 58 | 59 | # Set TF random seed to improve reproducibility 60 | tf.set_random_seed(1234) 61 | 62 | # Set logging level to see debug information 63 | set_log_level(logging.DEBUG) 64 | 65 | # Create TF session 66 | sess = tf.Session() 67 | 68 | # Get MNIST test data 69 | X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, 70 | train_end=train_end, 71 | test_start=test_start, 72 | test_end=test_end) 73 | 74 | # Use label smoothing 75 | assert Y_train.shape[1] == 10 76 | label_smooth = .1 77 | Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) 78 | 79 | # Define input TF placeholder 80 | x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) 81 | y = tf.placeholder(tf.float32, shape=(None, 10)) 82 | 83 | model_path = "models/mnist" 84 | # Train an MNIST model 85 | train_params = { 86 | 'nb_epochs': nb_epochs, 87 | 'batch_size': batch_size, 88 | 'learning_rate': learning_rate 89 | } 90 | fgsm_params = {'eps': 0.3, 91 | 'clip_min': 0., 92 | 'clip_max': 1.} 93 | rng = np.random.RandomState([2017, 8, 30]) 94 | 95 | if clean_train: 96 | model = make_basic_cnn(nb_filters=nb_filters) 97 | preds = model.get_probs(x) 98 | 99 | def evaluate(): 100 | # Evaluate the accuracy of the MNIST model on legitimate test 101 | # examples 102 | eval_params = {'batch_size': batch_size} 103 | acc = model_eval( 104 | sess, x, y, preds, X_test, Y_test, args=eval_params) 105 | report.clean_train_clean_eval = acc 106 | assert X_test.shape[0] == test_end - test_start, X_test.shape 107 | print('Test accuracy on legitimate examples: %0.4f' % acc) 108 | model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, 109 | args=train_params, rng=rng) 110 | 111 | # Calculate training error 112 | if testing: 113 | eval_params = {'batch_size': batch_size} 114 | acc = model_eval( 115 | sess, x, y, preds, X_train, Y_train, args=eval_params) 116 | report.train_clean_train_clean_eval = acc 117 | 118 | # Initialize the Fast Gradient Sign Method (FGSM) attack object and 119 | # graph 120 | fgsm = FastGradientMethod(model, sess=sess) 121 | adv_x = fgsm.generate(x, **fgsm_params) 122 | preds_adv = model.get_probs(adv_x) 123 | 124 | # Evaluate the accuracy of the MNIST model on adversarial examples 125 | eval_par = {'batch_size': batch_size} 126 | acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) 127 | print('Test accuracy on adversarial examples: %0.4f\n' % acc) 128 | report.clean_train_adv_eval = acc 129 | 130 | # Calculate training error 131 | if testing: 132 | eval_par = {'batch_size': batch_size} 133 | acc = model_eval(sess, x, y, preds_adv, X_train, 134 | Y_train, args=eval_par) 135 | report.train_clean_train_adv_eval = acc 136 | 137 | print("Repeating the process, using adversarial training") 138 | # Redefine TF model graph 139 | model_2 = make_basic_cnn(nb_filters=nb_filters) 140 | preds_2 = model_2(x) 141 | fgsm2 = FastGradientMethod(model_2, sess=sess) 142 | adv_x_2 = fgsm2.generate(x, **fgsm_params) 143 | if not backprop_through_attack: 144 | # For the fgsm attack used in this tutorial, the attack has zero 145 | # gradient so enabling this flag does not change the gradient. 146 | # For some other attacks, enabling this flag increases the cost of 147 | # training, but gives the defender the ability to anticipate how 148 | # the atacker will change their strategy in response to updates to 149 | # the defender's parameters. 150 | adv_x_2 = tf.stop_gradient(adv_x_2) 151 | preds_2_adv = model_2(adv_x_2) 152 | 153 | def evaluate_2(): 154 | # Accuracy of adversarially trained model on legitimate test inputs 155 | eval_params = {'batch_size': batch_size} 156 | accuracy = model_eval(sess, x, y, preds_2, X_test, Y_test, 157 | args=eval_params) 158 | print('Test accuracy on legitimate examples: %0.4f' % accuracy) 159 | report.adv_train_clean_eval = accuracy 160 | 161 | # Accuracy of the adversarially trained model on adversarial examples 162 | accuracy = model_eval(sess, x, y, preds_2_adv, X_test, 163 | Y_test, args=eval_params) 164 | print('Test accuracy on adversarial examples: %0.4f' % accuracy) 165 | report.adv_train_adv_eval = accuracy 166 | 167 | # Perform and evaluate adversarial training 168 | model_train(sess, x, y, preds_2, X_train, Y_train, 169 | predictions_adv=preds_2_adv, evaluate=evaluate_2, 170 | args=train_params, rng=rng) 171 | 172 | # Calculate training errors 173 | if testing: 174 | eval_params = {'batch_size': batch_size} 175 | accuracy = model_eval(sess, x, y, preds_2, X_train, Y_train, 176 | args=eval_params) 177 | report.train_adv_train_clean_eval = accuracy 178 | accuracy = model_eval(sess, x, y, preds_2_adv, X_train, 179 | Y_train, args=eval_params) 180 | report.train_adv_train_adv_eval = accuracy 181 | 182 | return report 183 | 184 | 185 | def main(argv=None): 186 | mnist_tutorial(nb_epochs=FLAGS.nb_epochs, batch_size=FLAGS.batch_size, 187 | learning_rate=FLAGS.learning_rate, 188 | clean_train=FLAGS.clean_train, 189 | backprop_through_attack=FLAGS.backprop_through_attack, 190 | nb_filters=FLAGS.nb_filters) 191 | 192 | 193 | if __name__ == '__main__': 194 | flags.DEFINE_integer('nb_filters', 64, 'Model size multiplier') 195 | flags.DEFINE_integer('nb_epochs', 6, 'Number of epochs to train model') 196 | flags.DEFINE_integer('batch_size', 128, 'Size of training batches') 197 | flags.DEFINE_float('learning_rate', 0.001, 'Learning rate for training') 198 | flags.DEFINE_bool('clean_train', True, 'Train on clean examples') 199 | flags.DEFINE_bool('backprop_through_attack', False, 200 | ('If True, backprop through adversarial example ' 201 | 'construction process during adversarial training')) 202 | 203 | tf.app.run() 204 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CleverHans (latest release: v2.0.0) for Attacking Binarized Neural Networks 2 | 3 | cleverhans logo 4 | 5 | [![Build Status](https://travis-ci.org/tensorflow/cleverhans.svg?branch=master)](https://travis-ci.org/tensorflow/cleverhans) 6 | 7 | This repository contains the source code for CleverHans, a Python library to 8 | benchmark machine learning systems' vulnerability to 9 | [adversarial examples](http://karpathy.github.io/2015/03/30/breaking-convnets/). 10 | You can learn more about such vulnerabilities on the accompanying [blog](http://cleverhans.io). 11 | 12 | The CleverHans library is under continual development, always welcoming 13 | [contributions](https://github.com/tensorflow/cleverhans#contributing) 14 | of the latest attacks and defenses. 15 | In particular, we always welcome help towards resolving the [issues](https://github.com/tensorflow/cleverhans/issues) 16 | currently open. 17 | 18 | ## Attacking Binarized Neural Networks 19 | + `cleverhans_tutorials/mnist_attack.py` - white-box MNIST attacks 20 | + `cleverhans_tutorials/mnist_blackbox.py` - black-box MNIST attack 21 | + `examples/cifar10_attack` - white-box CIFAR-10 attacks 22 | + `examples/cifar10_blackbox` - black-box CIFAR-10 attack 23 | 24 | ## Setting up CleverHans 25 | 26 | ### Dependencies 27 | 28 | This library uses [TensorFlow](https://www.tensorflow.org/) to accelerate graph 29 | computations performed by many machine learning models. 30 | Installing TensorFlow is therefore a pre-requisite. 31 | 32 | You can find instructions 33 | [here](https://www.tensorflow.org/install/). 34 | For better performance, it is also recommended to install TensorFlow 35 | with GPU support (detailed instructions on how to do this are available 36 | in the TensorFlow installation documentation). 37 | 38 | Installing TensorFlow will 39 | take care of all other dependencies like `numpy` and `scipy`. 40 | 41 | ### Installation 42 | 43 | Once dependencies have been taken care of, you can install CleverHans using 44 | `pip` or by cloning this Github repository. 45 | 46 | #### `pip` installation 47 | 48 | If you are installing CleverHans using `pip`, run the following command: 49 | 50 | ``` 51 | pip install -e git+http://github.com/tensorflow/cleverhans.git#egg=cleverhans 52 | ``` 53 | 54 | #### Manual installation 55 | 56 | If you are installing CleverHans manually, you need to install TensorFlow 57 | first. Then, run the following command to clone the CleverHans repository 58 | into a folder of your choice: 59 | 60 | ``` 61 | git clone https://github.com/tensorflow/cleverhans 62 | ``` 63 | 64 | On UNIX machines, it is recommended to add your clone of this repository to the 65 | `PYTHONPATH` variable so as to be able to import `cleverhans` from any folder. 66 | 67 | ``` 68 | export PYTHONPATH="/path/to/cleverhans":$PYTHONPATH 69 | ``` 70 | 71 | You may want to make that change permanent through your shell's profile. 72 | 73 | ### Currently supported setups 74 | 75 | Although CleverHans is likely to work on many other machine configurations, we 76 | currently [test it](https://travis-ci.org/tensorflow/cleverhans) with Python 77 | {2.7, 3.5} and TensorFlow {1.0, 1.1} on Ubuntu 14.04.5 LTS (Trusty Tahr). 78 | 79 | ## Tutorials 80 | 81 | To help you get started with the functionalities provided by this library, the 82 | `cleverhans_tutorials/' folder comes with the following tutorials: 83 | * **MNIST with FGSM** ([code](cleverhans_tutorials/mnist_tutorial_tf.py)): this 84 | tutorial covers how to train a MNIST model using TensorFlow, 85 | craft adversarial examples using the [fast gradient sign method](https://arxiv.org/abs/1412.6572), 86 | and make the model more robust to adversarial examples using adversarial training. 87 | * **MNIST with FGSM using Keras** ([code](cleverhans_tutorials/mnist_tutorial_keras_tf.py)): this 88 | tutorial covers how to define a MNIST model with Keras and train it using TensorFlow, 89 | craft adversarial examples using the [fast gradient sign method](https://arxiv.org/abs/1412.6572), 90 | and make the model more robust to adversarial 91 | examples using adversarial training. 92 | * **MNIST with JSMA** ([code](cleverhans_tutorials/mnist_tutorial_jsma.py)): this second 93 | tutorial covers how to define a MNIST model with Keras and train it using TensorFlow and 94 | craft adversarial examples using the [Jacobian-based saliency map approach](https://arxiv.org/abs/1511.07528). 95 | * **MNIST using a black-box attack** ([code](cleverhans_tutorials/mnist_blackbox.py)): 96 | this tutorial implements the black-box 97 | attack described in this [paper](https://arxiv.org/abs/1602.02697). 98 | The adversary train a substitute model: a copy that imitates the black-box 99 | model by observing the labels that the black-box model assigns to inputs chosen 100 | carefully by the adversary. The adversary then uses the substitute 101 | model’s gradients to find adversarial examples that are misclassified by the 102 | black-box model as well. 103 | 104 | Some models used in the tutorials are defined using [Keras](https://keras.io), 105 | which should be installed before running these tutorials. 106 | Installation instructions for Keras can be found 107 | [here](https://keras.io/#installation). 108 | Note that you should configure Keras to use the TensorFlow backend. You 109 | can find instructions for 110 | setting the Keras backend [on this page](https://keras.io/backend/). 111 | 112 | ## Examples 113 | 114 | The `examples/` folder contains additional scripts to showcase different uses 115 | of the CleverHans library or get you started competing in different adversarial 116 | example contests. 117 | 118 | ## Reporting benchmarks 119 | 120 | When reporting benchmarks, please: 121 | * Use a versioned release of CleverHans. You can find a list of released versions [here](https://github.com/tensorflow/cleverhans/releases). 122 | * Either use the latest version, or, if comparing to an earlier publication, use the same version as the earlier publication. 123 | * Report which attack method was used. 124 | * Report any configuration variables used to determine the behavior of the attack. 125 | 126 | For example, you might report "We benchmarked the robustness of our method to 127 | adversarial attack using v2.0.0 of CleverHans. On a test set modified by the 128 | `FastGradientMethod` with a max-norm `eps` of 0.3, we obtained a test set accuracy of 71.3%." 129 | 130 | ## Contributing 131 | 132 | Contributions are welcomed! To speed the code review process, we ask that: 133 | * New efforts and features be coordinated 134 | on the mailing list for CleverHans development: [cleverhans-dev@googlegroups.com](https://groups.google.com/forum/#!forum/cleverhans-dev). 135 | * When making code contributions to CleverHans, you follow the 136 | `PEP8` coding style in your pull requests. 137 | * When making your first pull request, you [sign the Google CLA](https://cla.developers.google.com/clas) 138 | 139 | Bug fixes can be initiated through Github pull requests. 140 | 141 | ## Citing this work 142 | 143 | If you use CleverHans for academic research, you are highly encouraged 144 | (though not required) to cite the following [paper](https://arxiv.org/abs/1610.00768): 145 | 146 | ``` 147 | @article{papernot2016cleverhans, 148 | title={cleverhans v1.0.0: an adversarial machine learning library}, 149 | author={Papernot, Nicolas and Goodfellow, Ian and Sheatsley, Ryan and Feinman, Reuben and McDaniel, Patrick}, 150 | journal={arXiv preprint arXiv:1610.00768}, 151 | year={2016} 152 | } 153 | ``` 154 | There is not yet an ArXiv tech report for v2.0.0 but one will be prepared soon. 155 | 156 | ## About the name 157 | 158 | The name CleverHans is a reference to a presentation by Bob Sturm titled 159 | “Clever Hans, Clever Algorithms: Are Your Machine Learnings Learning What You 160 | Think?" and the corresponding publication, ["A Simple Method to Determine if a 161 | Music Information Retrieval System is a 162 | 'Horse'."](http://ieeexplore.ieee.org/document/6847693/) Clever Hans was a 163 | horse that appeared to have learned to answer arithmetic questions, but had in 164 | fact only learned to read social cues that enabled him to give the correct 165 | answer. In controlled settings where he could not see people's faces or receive 166 | other feedback, he was unable to answer the same questions. The story of Clever 167 | Hans is a metaphor for machine learning systems that may achieve very high 168 | accuracy on a test set drawn from the same distribution as the training data, 169 | but that do not actually understand the underlying task and perform poorly on 170 | other inputs. 171 | 172 | ## Authors 173 | 174 | This library is managed and maintained by Ian Goodfellow (Google Brain), 175 | Nicolas Papernot (Pennsylvania State University), and 176 | Ryan Sheatsley (Pennsylvania State University). 177 | 178 | The following authors contributed 100 lines or more (ordered according to the GitHub contributors page): 179 | * Nicolas Papernot (Pennsylvania State University, Google Brain intern) 180 | * Nicholas Carlini (UC Berkeley) 181 | * Ian Goodfellow (Google Brain) 182 | * Reuben Feinman (Symantec) 183 | * Fartash Faghri (University of Toronto, Google Brain intern) 184 | * Alexander Matyasko (Nanyang Technological University) 185 | * Karen Hambardzumyan (YerevaNN) 186 | * Yi-Lin Juang (NTUEE) 187 | * Alexey Kurakin (Google Brain) 188 | * Ryan Sheatsley (Pennsylvania State University) 189 | * Abhibhav Garg (IIT Delhi) 190 | * Yen-Chen Lin (National Tsing Hua University) 191 | * Paul Hendricks 192 | 193 | ## Copyright 194 | 195 | Copyright 2017 - Google Inc., OpenAI and Pennsylvania State University. 196 | -------------------------------------------------------------------------------- /cleverhans_tutorials/mnist_tutorial_jsma.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import numpy as np 7 | from six.moves import xrange 8 | import tensorflow as tf 9 | from tensorflow.python.platform import flags 10 | import logging 11 | 12 | from cleverhans.attacks import SaliencyMapMethod 13 | from cleverhans.utils import other_classes, set_log_level 14 | from cleverhans.utils import pair_visual, grid_visual, AccuracyReport 15 | from cleverhans.utils_mnist import data_mnist 16 | from cleverhans.utils_tf import model_train, model_eval, model_argmax 17 | from cleverhans.utils_keras import KerasModelWrapper, cnn_model 18 | from cleverhans_tutorials.tutorial_models import make_basic_cnn 19 | 20 | FLAGS = flags.FLAGS 21 | 22 | 23 | def mnist_tutorial_jsma(train_start=0, train_end=60000, test_start=0, 24 | test_end=10000, viz_enabled=True, nb_epochs=6, 25 | batch_size=128, nb_classes=10, source_samples=10, 26 | learning_rate=0.001): 27 | """ 28 | MNIST tutorial for the Jacobian-based saliency map approach (JSMA) 29 | :param train_start: index of first training set example 30 | :param train_end: index of last training set example 31 | :param test_start: index of first test set example 32 | :param test_end: index of last test set example 33 | :param viz_enabled: (boolean) activate plots of adversarial examples 34 | :param nb_epochs: number of epochs to train model 35 | :param batch_size: size of training batches 36 | :param nb_classes: number of output classes 37 | :param source_samples: number of test inputs to attack 38 | :param learning_rate: learning rate for training 39 | :return: an AccuracyReport object 40 | """ 41 | # Object used to keep track of (and return) key accuracies 42 | report = AccuracyReport() 43 | 44 | # MNIST-specific dimensions 45 | img_rows = 28 46 | img_cols = 28 47 | channels = 1 48 | 49 | # Set TF random seed to improve reproducibility 50 | tf.set_random_seed(1234) 51 | 52 | # Create TF session and set as Keras backend session 53 | sess = tf.Session() 54 | print("Created TensorFlow session.") 55 | 56 | set_log_level(logging.DEBUG) 57 | 58 | # Get MNIST test data 59 | X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, 60 | train_end=train_end, 61 | test_start=test_start, 62 | test_end=test_end) 63 | 64 | # Define input TF placeholder 65 | x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) 66 | y = tf.placeholder(tf.float32, shape=(None, 10)) 67 | 68 | # Define TF model graph 69 | model = make_basic_cnn() 70 | preds = model(x) 71 | print("Defined TensorFlow model graph.") 72 | 73 | ########################################################################### 74 | # Training the model using TensorFlow 75 | ########################################################################### 76 | 77 | # Train an MNIST model 78 | train_params = { 79 | 'nb_epochs': nb_epochs, 80 | 'batch_size': batch_size, 81 | 'learning_rate': learning_rate 82 | } 83 | sess.run(tf.global_variables_initializer()) 84 | rng = np.random.RandomState([2017, 8, 30]) 85 | model_train(sess, x, y, preds, X_train, Y_train, args=train_params, 86 | rng=rng) 87 | 88 | # Evaluate the accuracy of the MNIST model on legitimate test examples 89 | eval_params = {'batch_size': batch_size} 90 | accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) 91 | assert X_test.shape[0] == test_end - test_start, X_test.shape 92 | print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) 93 | report.clean_train_clean_eval = accuracy 94 | 95 | ########################################################################### 96 | # Craft adversarial examples using the Jacobian-based saliency map approach 97 | ########################################################################### 98 | print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) + 99 | ' adversarial examples') 100 | 101 | # Keep track of success (adversarial example classified in target) 102 | results = np.zeros((nb_classes, source_samples), dtype='i') 103 | 104 | # Rate of perturbed features for each test set example and target class 105 | perturbations = np.zeros((nb_classes, source_samples), dtype='f') 106 | 107 | # Initialize our array for grid visualization 108 | grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels) 109 | grid_viz_data = np.zeros(grid_shape, dtype='f') 110 | 111 | # Instantiate a SaliencyMapMethod attack object 112 | jsma = SaliencyMapMethod(model, back='tf', sess=sess) 113 | jsma_params = {'theta': 1., 'gamma': 0.1, 114 | 'clip_min': 0., 'clip_max': 1., 115 | 'y_target': None} 116 | 117 | figure = None 118 | # Loop over the samples we want to perturb into adversarial examples 119 | for sample_ind in xrange(0, source_samples): 120 | print('--------------------------------------') 121 | print('Attacking input %i/%i' % (sample_ind + 1, source_samples)) 122 | sample = X_test[sample_ind:(sample_ind+1)] 123 | 124 | # We want to find an adversarial example for each possible target class 125 | # (i.e. all classes that differ from the label given in the dataset) 126 | current_class = int(np.argmax(Y_test[sample_ind])) 127 | target_classes = other_classes(nb_classes, current_class) 128 | 129 | # For the grid visualization, keep original images along the diagonal 130 | grid_viz_data[current_class, current_class, :, :, :] = np.reshape( 131 | sample, (img_rows, img_cols, channels)) 132 | 133 | # Loop over all target classes 134 | for target in target_classes: 135 | print('Generating adv. example for target class %i' % target) 136 | 137 | # This call runs the Jacobian-based saliency map approach 138 | one_hot_target = np.zeros((1, nb_classes), dtype=np.float32) 139 | one_hot_target[0, target] = 1 140 | jsma_params['y_target'] = one_hot_target 141 | adv_x = jsma.generate_np(sample, **jsma_params) 142 | 143 | # Check if success was achieved 144 | res = int(model_argmax(sess, x, preds, adv_x) == target) 145 | 146 | # Computer number of modified features 147 | adv_x_reshape = adv_x.reshape(-1) 148 | test_in_reshape = X_test[sample_ind].reshape(-1) 149 | nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0] 150 | percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0] 151 | 152 | # Display the original and adversarial images side-by-side 153 | if viz_enabled: 154 | figure = pair_visual( 155 | np.reshape(sample, (img_rows, img_cols)), 156 | np.reshape(adv_x, (img_rows, img_cols)), figure) 157 | 158 | # Add our adversarial example to our grid data 159 | grid_viz_data[target, current_class, :, :, :] = np.reshape( 160 | adv_x, (img_rows, img_cols, channels)) 161 | 162 | # Update the arrays for later analysis 163 | results[target, sample_ind] = res 164 | perturbations[target, sample_ind] = percent_perturb 165 | 166 | print('--------------------------------------') 167 | 168 | # Compute the number of adversarial examples that were successfully found 169 | nb_targets_tried = ((nb_classes - 1) * source_samples) 170 | succ_rate = float(np.sum(results)) / nb_targets_tried 171 | print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate)) 172 | report.clean_train_adv_eval = 1. - succ_rate 173 | 174 | # Compute the average distortion introduced by the algorithm 175 | percent_perturbed = np.mean(perturbations) 176 | print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed)) 177 | 178 | # Compute the average distortion introduced for successful samples only 179 | percent_perturb_succ = np.mean(perturbations * (results == 1)) 180 | print('Avg. rate of perturbed features for successful ' 181 | 'adversarial examples {0:.4f}'.format(percent_perturb_succ)) 182 | 183 | # Close TF session 184 | sess.close() 185 | 186 | # Finally, block & display a grid of all the adversarial examples 187 | if viz_enabled: 188 | import matplotlib.pyplot as plt 189 | plt.close(figure) 190 | _ = grid_visual(grid_viz_data) 191 | 192 | return report 193 | 194 | 195 | def main(argv=None): 196 | mnist_tutorial_jsma(viz_enabled=FLAGS.viz_enabled, 197 | nb_epochs=FLAGS.nb_epochs, 198 | batch_size=FLAGS.batch_size, 199 | nb_classes=FLAGS.nb_classes, 200 | source_samples=FLAGS.source_samples, 201 | learning_rate=FLAGS.learning_rate) 202 | 203 | 204 | if __name__ == '__main__': 205 | flags.DEFINE_boolean('viz_enabled', True, 'Visualize adversarial ex.') 206 | flags.DEFINE_integer('nb_epochs', 6, 'Number of epochs to train model') 207 | flags.DEFINE_integer('batch_size', 128, 'Size of training batches') 208 | flags.DEFINE_integer('nb_classes', 10, 'Number of output classes') 209 | flags.DEFINE_integer('source_samples', 10, 'Nb of test inputs to attack') 210 | flags.DEFINE_float('learning_rate', 0.001, 'Learning rate for training') 211 | 212 | tf.app.run() 213 | -------------------------------------------------------------------------------- /cleverhans_tutorials/mnist_tutorial_cw.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import numpy as np 7 | from six.moves import xrange 8 | import tensorflow as tf 9 | from tensorflow.python.platform import flags 10 | 11 | import logging 12 | import os 13 | from cleverhans.attacks import CarliniWagnerL2 14 | from cleverhans.utils import pair_visual, grid_visual, AccuracyReport 15 | from cleverhans.utils import set_log_level 16 | from cleverhans.utils_mnist import data_mnist 17 | from cleverhans.utils_tf import model_train, model_eval, tf_model_load 18 | from cleverhans_tutorials.tutorial_models import make_basic_cnn 19 | 20 | FLAGS = flags.FLAGS 21 | 22 | 23 | def mnist_tutorial_cw(train_start=0, train_end=60000, test_start=0, 24 | test_end=10000, viz_enabled=True, nb_epochs=6, 25 | batch_size=128, nb_classes=10, source_samples=10, 26 | learning_rate=0.001, attack_iterations=100, 27 | model_path=os.path.join("models", "mnist"), 28 | targeted=True): 29 | """ 30 | MNIST tutorial for Carlini and Wagner's attack 31 | :param train_start: index of first training set example 32 | :param train_end: index of last training set example 33 | :param test_start: index of first test set example 34 | :param test_end: index of last test set example 35 | :param viz_enabled: (boolean) activate plots of adversarial examples 36 | :param nb_epochs: number of epochs to train model 37 | :param batch_size: size of training batches 38 | :param nb_classes: number of output classes 39 | :param source_samples: number of test inputs to attack 40 | :param learning_rate: learning rate for training 41 | :param model_path: path to the model file 42 | :param targeted: should we run a targeted attack? or untargeted? 43 | :return: an AccuracyReport object 44 | """ 45 | # Object used to keep track of (and return) key accuracies 46 | report = AccuracyReport() 47 | 48 | # MNIST-specific dimensions 49 | img_rows = 28 50 | img_cols = 28 51 | channels = 1 52 | 53 | # Set TF random seed to improve reproducibility 54 | tf.set_random_seed(1234) 55 | 56 | # Create TF session 57 | sess = tf.Session() 58 | print("Created TensorFlow session.") 59 | 60 | set_log_level(logging.DEBUG) 61 | 62 | # Get MNIST test data 63 | X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, 64 | train_end=train_end, 65 | test_start=test_start, 66 | test_end=test_end) 67 | 68 | # Define input TF placeholder 69 | x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels)) 70 | y = tf.placeholder(tf.float32, shape=(None, nb_classes)) 71 | 72 | # Define TF model graph 73 | model = make_basic_cnn() 74 | preds = model(x) 75 | print("Defined TensorFlow model graph.") 76 | 77 | ########################################################################### 78 | # Training the model using TensorFlow 79 | ########################################################################### 80 | 81 | # Train an MNIST model 82 | train_params = { 83 | 'nb_epochs': nb_epochs, 84 | 'batch_size': batch_size, 85 | 'learning_rate': learning_rate, 86 | 'train_dir': os.path.join(*os.path.split(model_path)[:-1]), 87 | 'filename': os.path.split(model_path)[-1] 88 | } 89 | 90 | rng = np.random.RandomState([2017, 8, 30]) 91 | # check if we've trained before, and if we have, use that pre-trained model 92 | if os.path.exists(model_path + ".meta"): 93 | tf_model_load(sess, model_path) 94 | else: 95 | model_train(sess, x, y, preds, X_train, Y_train, args=train_params, 96 | save=os.path.exists("models"), rng=rng) 97 | 98 | # Evaluate the accuracy of the MNIST model on legitimate test examples 99 | eval_params = {'batch_size': batch_size} 100 | accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) 101 | assert X_test.shape[0] == test_end - test_start, X_test.shape 102 | print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) 103 | report.clean_train_clean_eval = accuracy 104 | 105 | ########################################################################### 106 | # Craft adversarial examples using Carlini and Wagner's approach 107 | ########################################################################### 108 | nb_adv_per_sample = str(nb_classes - 1) if targeted else '1' 109 | print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample + 110 | ' adversarial examples') 111 | print("This could take some time ...") 112 | 113 | # Instantiate a CW attack object 114 | cw = CarliniWagnerL2(model, back='tf', sess=sess) 115 | 116 | if viz_enabled: 117 | assert source_samples == nb_classes 118 | idxs = [np.where(np.argmax(Y_test, axis=1) == i)[0][0] 119 | for i in range(nb_classes)] 120 | if targeted: 121 | if viz_enabled: 122 | # Initialize our array for grid visualization 123 | grid_shape = (nb_classes, nb_classes, img_rows, img_cols, channels) 124 | grid_viz_data = np.zeros(grid_shape, dtype='f') 125 | 126 | adv_inputs = np.array( 127 | [[instance] * nb_classes for instance in X_test[idxs]], 128 | dtype=np.float32) 129 | else: 130 | adv_inputs = np.array( 131 | [[instance] * nb_classes for 132 | instance in X_test[:source_samples]], dtype=np.float32) 133 | 134 | one_hot = np.zeros((nb_classes, nb_classes)) 135 | one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1 136 | 137 | adv_inputs = adv_inputs.reshape( 138 | (source_samples * nb_classes, img_rows, img_cols, 1)) 139 | adv_ys = np.array([one_hot] * source_samples, 140 | dtype=np.float32).reshape((source_samples * 141 | nb_classes, nb_classes)) 142 | yname = "y_target" 143 | else: 144 | if viz_enabled: 145 | # Initialize our array for grid visualization 146 | grid_shape = (nb_classes, 2, img_rows, img_cols, channels) 147 | grid_viz_data = np.zeros(grid_shape, dtype='f') 148 | 149 | adv_inputs = X_test[idxs] 150 | else: 151 | adv_inputs = X_test[:source_samples] 152 | 153 | adv_ys = None 154 | yname = "y" 155 | 156 | cw_params = {'binary_search_steps': 1, 157 | yname: adv_ys, 158 | 'max_iterations': attack_iterations, 159 | 'learning_rate': 0.1, 160 | 'batch_size': source_samples * nb_classes if 161 | targeted else source_samples, 162 | 'initial_const': 10} 163 | 164 | adv = cw.generate_np(adv_inputs, 165 | **cw_params) 166 | 167 | eval_params = {'batch_size': np.minimum(nb_classes, source_samples)} 168 | if targeted: 169 | adv_accuracy = model_eval( 170 | sess, x, y, preds, adv, adv_ys, args=eval_params) 171 | else: 172 | if viz_enabled: 173 | adv_accuracy = 1 - \ 174 | model_eval(sess, x, y, preds, adv, Y_test[ 175 | idxs], args=eval_params) 176 | else: 177 | adv_accuracy = 1 - \ 178 | model_eval(sess, x, y, preds, adv, Y_test[ 179 | :source_samples], args=eval_params) 180 | 181 | if viz_enabled: 182 | for j in range(nb_classes): 183 | if targeted: 184 | for i in range(nb_classes): 185 | grid_viz_data[i, j] = adv[i * nb_classes + j] 186 | else: 187 | grid_viz_data[j, 0] = adv_inputs[j] 188 | grid_viz_data[j, 1] = adv[j] 189 | 190 | print(grid_viz_data.shape) 191 | 192 | print('--------------------------------------') 193 | 194 | # Compute the number of adversarial examples that were successfully found 195 | print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy)) 196 | report.clean_train_adv_eval = 1. - adv_accuracy 197 | 198 | # Compute the average distortion introduced by the algorithm 199 | percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2, 200 | axis=(1, 2, 3))**.5) 201 | print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) 202 | 203 | # Close TF session 204 | sess.close() 205 | 206 | # Finally, block & display a grid of all the adversarial examples 207 | if viz_enabled: 208 | import matplotlib.pyplot as plt 209 | _ = grid_visual(grid_viz_data) 210 | 211 | return report 212 | 213 | 214 | def main(argv=None): 215 | mnist_tutorial_cw(viz_enabled=FLAGS.viz_enabled, 216 | nb_epochs=FLAGS.nb_epochs, 217 | batch_size=FLAGS.batch_size, 218 | nb_classes=FLAGS.nb_classes, 219 | source_samples=FLAGS.source_samples, 220 | learning_rate=FLAGS.learning_rate, 221 | attack_iterations=FLAGS.attack_iterations, 222 | model_path=FLAGS.model_path, 223 | targeted=FLAGS.targeted) 224 | 225 | 226 | if __name__ == '__main__': 227 | flags.DEFINE_boolean('viz_enabled', True, 'Visualize adversarial ex.') 228 | flags.DEFINE_integer('nb_epochs', 6, 'Number of epochs to train model') 229 | flags.DEFINE_integer('batch_size', 128, 'Size of training batches') 230 | flags.DEFINE_integer('nb_classes', 10, 'Number of output classes') 231 | flags.DEFINE_integer('source_samples', 10, 'Nb of test inputs to attack') 232 | flags.DEFINE_float('learning_rate', 0.001, 'Learning rate for training') 233 | flags.DEFINE_string('model_path', os.path.join("models", "mnist"), 234 | 'Path to save or load the model file') 235 | flags.DEFINE_boolean('attack_iterations', 100, 236 | 'Number of iterations to run attack; 1000 is good') 237 | flags.DEFINE_boolean('targeted', True, 238 | 'Run the tutorial in targeted mode?') 239 | 240 | tf.app.run() 241 | -------------------------------------------------------------------------------- /examples/nips17_adversarial_competition/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Development toolkit for participants of adversarial competition 3 | 4 | This is a development toolkit for the 5 | [Competition on Adversarial Examples and Defenses](https://www.kaggle.com/nips-2017-adversarial-learning-competition) 6 | which will be held as a part of NIPS'17 conference. 7 | 8 | This toolkit includes: 9 | 10 | * Dev dataset which participants can use for development and testing of their 11 | attacks and defenses 12 | * Sample adversarial attacks 13 | * Sample adversarial defenses 14 | * Tool to run attacks against defenses and compute score. 15 | 16 | ## Installation 17 | 18 | ### Prerequisites 19 | 20 | Following software required to use this package: 21 | 22 | * Python 2.7 with installed [Numpy](http://www.numpy.org/) 23 | and [Pillow](https://python-pillow.org/) packages. 24 | * [Docker](https://www.docker.com/) 25 | 26 | Additionally, all provided examples are written with use of 27 | the [TensorFlow](https://www.tensorflow.org/). 28 | Thus you may find useful to install TensorFlow to experiment with the examples, 29 | however this is not strictly necessary. 30 | 31 | ### Installation procedure 32 | 33 | To be able to run the examples you need to download checkpoints for provided models 34 | as well as dataset. 35 | 36 | To download the dataset and all checkpoints run following: 37 | 38 | ```bash 39 | ./download_data.sh 40 | ``` 41 | 42 | If you only need to download the dataset then you can run: 43 | 44 | ```bash 45 | # ${DATASET_IMAGES_DIR} is a directory to save images 46 | python dataset/download_images.py \ 47 | --input_file=dataset/dev_dataset.csv \ 48 | --output_dir=${DATASET_IMAGES_DIR} 49 | ``` 50 | 51 | ## Dataset 52 | 53 | This toolkit includes DEV dataset with 1000 labelled images. 54 | DEV dataset could be used for development and testing of adversarial attacks 55 | and defenses. 56 | 57 | Details about dataset are [here](dataset/README.md). 58 | 59 | ## Sample attacks and defenses 60 | 61 | Toolkit includes examples of attacks and defenses in the following directories: 62 | 63 | * `sample_attacks/` - directory with examples of attacks: 64 | * `sample_attacks/fgsm/` - Fast gradient sign attack. 65 | * `sample_attacks/noop/` - No-op attack, which just copied images unchanged. 66 | * `sample_attacks/random_noise/` - Attack which adds random noise to images. 67 | * `sample_targeted_attacks/` - directory with examples of targeted attacks: 68 | * `sample_targeted_attacks/step_target_class/` - one step towards target 69 | class attack. This is not particularly good targeted attack, but it 70 | demonstrates how targeted attack could be written. 71 | * `sample_targeted_attacks/iter_target_class/` - iterative target class 72 | attack. This is a pretty good white-box attack, 73 | but it does not do well in black box setting. 74 | * `sample_defenses/` - directory with examples of defenses: 75 | * `sample_defenses/base_inception_model/` - baseline inception classifier, 76 | which actually does not provide any defense against adversarial examples. 77 | * `sample_defenses/adv_inception_v3/` - adversarially trained Inception v3 78 | model from [Adversarial Machine Learning at 79 | Scale](https://arxiv.org/abs/1611.01236) paper. 80 | * `sample_defenses/ens_adv_inception_resnet_v2/` - Inception ResNet v2 81 | model which is adversarially trained against an ensemble of different 82 | kind of adversarial examples. Model is described in 83 | [Ensemble Adversarial Training: Attacks and 84 | Defenses](https://arxiv.org/abs/1705.07204) paper. 85 | 86 | ### Structure of attacks and defenses 87 | 88 | Each attack and defense should be stored in a separate subdirectory, 89 | should be self-contained and intended to be run inside Docker container. 90 | 91 | Directory with each attack or defense should contain file `metadata.json` 92 | in JSON format with following fields: 93 | 94 | * `type` could be one of `"attack"`, `"defense"` or `"targeted_attack"`. 95 | * `container` is a URL of Docker container inside which attack or defense 96 | should be run. 97 | * `container_gpu` is an optional field, URL of Docker container with 98 | GPU support. 99 | * `entry_point` is a script which launches attack or defense. 100 | 101 | Example of `metadata.json`: 102 | 103 | ```json 104 | { 105 | "type": "attack", 106 | "container": "gcr.io/tensorflow/tensorflow:1.1.0", 107 | "container_gpu": "gcr.io/tensorflow/tensorflow:1.1.0-gpu", 108 | "entry_point": "run_attack.sh" 109 | } 110 | ``` 111 | 112 | #### Non-targeted attack 113 | 114 | Entry point script for a non-targeted attack should accept three arguments: 115 | input directory, output directory and maximum size of adversarial perturbation 116 | (in [infinity norm](https://en.wikipedia.org/wiki/Uniform_norm)). It will be 117 | invoked in the following way: 118 | 119 | ```bash 120 | attack_entry_point.sh INPUT_DIR OUTPUT_DIR MAX_SIZE_OF_PERTURBAION 121 | ``` 122 | 123 | Input directory will contain source images from dataset in PNG format and attack 124 | has to write adversarial images into output directory. 125 | Input images are 299x299 pixels RGB images, output images should have the same 126 | size and also written in PNG format. 127 | Filenames of adversarial images should be the same as filenames of 128 | corresponding source images from the dataset. 129 | 130 | Non-targeted attack is expected to produce adversarial images which are likely 131 | will be misclassified by image classifier (assuming that it can classify source 132 | images well). 133 | 134 | Difference between each generated adversarial images and corresponding source 135 | image has to be within specified maximum size of adversarial perturbation. 136 | If it's not the case then competition runtime will automatically clip 137 | adversarial image to be within the limits. 138 | 139 | #### Targeted attack 140 | 141 | Entry point script for a targeted attack accepts the same set of arguments as 142 | for non-targeted attack: input directory, output directory, maximum size of 143 | perturbation. 144 | 145 | The only difference is that input directory will contain `target_class.csv` file 146 | addition to images. Each line of `target_class.csv` will contain 147 | comma-separated pairs of image filename and target class. 148 | 149 | Targeted attack is expected to produce adversarial image which will 150 | be likely classified as desired target class by image classifier. 151 | 152 | Difference between source images and generated adversarial images 153 | should be within specified maximum size of perturbation, 154 | similarly to non-targeted attack. 155 | 156 | #### Defense 157 | 158 | Entry point script for a defense accepts two arguments: input directory and 159 | output file. It will be invoked in a following way: 160 | 161 | ```bash 162 | defense_entry_point.sh INPUT_DIR OUTPUT_FILE 163 | ``` 164 | 165 | Input directory will contain bunch of adversarial images in PNG format. 166 | Defense has to classify all these images and write its predictions into 167 | output file. Each line of the output file should contain comma separated image 168 | filename and predicted label. 169 | 170 | ## How to run attacks against defenses 171 | 172 | Script `run_attacks_and_defenses.py` runs all attacks against all defenses 173 | and computes scores of each attack and each defense. 174 | 175 | You can run it in a following way: 176 | 177 | ```bash 178 | python run_attacks_and_defenses.py \ 179 | --attacks_dir="${DIRECTORY_WITH_ATTACKS}" \ 180 | --targeted_attacks_dir="${DIRECTORY_WITH_TARGETED_ATTACKS}" \ 181 | --defenses_dir="${DIRECTORY_WITH_DEFENSES}" \ 182 | --dataset_dir="${DIRECTORY_WITH_DATASET_IMAGES}" \ 183 | --intermediate_results_dir="${TEMP_DIRECTORY_FOR_INTERMEDIATE_RESULTS}" \ 184 | --dataset_metadata=dataset/dataset.csv \ 185 | --output_dir="${OUTPUT_DIRECTORY}" \ 186 | --epsilon="${MAXIMUM_SIZE_OF_ADVERSARIAL_PERTURBATION}" 187 | ``` 188 | 189 | If you have GPU card and 190 | [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) installed then you can 191 | additionally pass `--gpu` argument to `run_attacks_and_defenses.py` 192 | so attacks and defenses will be able to take advantage of GPU to speedup 193 | computations. 194 | 195 | Alternatively instead of running `run_attacks_and_defenses.py` directly and 196 | providing all command line arguments you can use helper script 197 | `run_attacks_and_defenses.sh` to run all attacks and defenses from this toolkit 198 | against each other and save results to temporary directory. 199 | 200 | NOTE: You should cleanup temporary directory created by 201 | `run_attacks_and_defenses.sh` after running it. 202 | 203 | `run_attacks_and_defenses.py` will write following files into output directory: 204 | 205 | * `accuracy_on_attacks.csv` with matrix which will contain number of correctly 206 | classified images for each pair of non-targeted attack and defense. 207 | Columns of the matrix are defenses, rows of the matrix are 208 | non-targeted attacks. 209 | * `accuracy_on_targeted_attacks.csv` with matrix which will contain number of 210 | correctly classified images for each pair of targeted attack and defense. 211 | Columns of the matrix are defenses, rows of the matrix are targeted attacks. 212 | * `hit_target_class.csv` with matrix which will contain number of times images 213 | were classified as target class by defense for each given targeted attack. 214 | Columns of the matrix are defenses, rows of the matrix are targeted attacks. 215 | * `defense_ranking.csv` with ranking of all defenses (best - first, 216 | worst - last, ties in arbitrary order), along with the score of each defense. 217 | Score for each defense is computed as total number of correctly classified 218 | adversarial images by defense classifier. 219 | * `attack_ranking.csv` with ranking of all non-targeted (best - first, 220 | worst - last, ties in arbitrary order), along with the score of each attack. 221 | Score for each attack is computed as total number of time attack was able to 222 | cause incorrect classification 223 | * `targeted_attack_ranking.csv` with ranking of all targeted attacks 224 | (best - first, worst - last, ties in arbitrary order), along with the score of 225 | each targeted attack. 226 | Score is computed as number of times the attack was able to force defense 227 | classifier to recognize adversarial image as specified target class. 228 | 229 | Additionally, if flag `--save_all_classification` is provided then 230 | `run_attacks_and_defenses.py` will save file `all_classification.csv` 231 | which contains classification predictions (along with true classes and 232 | target classes) for each adversarial image generated by each attack 233 | and classified by each defense. This might be useful for debugging. 234 | -------------------------------------------------------------------------------- /cleverhans/utils_th.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import math 7 | import numpy as np 8 | import six 9 | import time 10 | import warnings 11 | 12 | from collections import OrderedDict 13 | 14 | from .utils import batch_indices, _ArgsWrapper 15 | 16 | import theano 17 | import theano.tensor as T 18 | 19 | import keras 20 | 21 | floatX = theano.config.floatX 22 | 23 | _TEST_PHASE = np.uint8(0) 24 | _TRAIN_PHASE = np.uint8(1) 25 | 26 | 27 | def get_or_compute_grads(loss_or_grads, params): 28 | if isinstance(loss_or_grads, list): 29 | return loss_or_grads 30 | else: 31 | return theano.grad(loss_or_grads, params) 32 | 33 | 34 | def adadelta(loss_or_grads, params, learning_rate=1.0, rho=0.95, epsilon=1e-6): 35 | """ From Lasagne 36 | """ 37 | grads = get_or_compute_grads(loss_or_grads, params) 38 | updates = OrderedDict() 39 | 40 | # Using theano constant to prevent upcasting of float32 41 | one = T.constant(1) 42 | 43 | for param, grad in zip(params, grads): 44 | value = param.get_value(borrow=True) 45 | # accu: accumulate gradient magnitudes 46 | accu = theano.shared(np.zeros(value.shape, dtype=value.dtype), 47 | broadcastable=param.broadcastable) 48 | # delta_accu: accumulate update magnitudes (recursively!) 49 | delta_accu = theano.shared(np.zeros(value.shape, dtype=value.dtype), 50 | broadcastable=param.broadcastable) 51 | 52 | # update accu (as in rmsprop) 53 | accu_new = rho * accu + (one - rho) * grad ** 2 54 | updates[accu] = accu_new 55 | 56 | # compute parameter update, using the 'old' delta_accu 57 | update = (grad * T.sqrt(delta_accu + epsilon) / 58 | T.sqrt(accu_new + epsilon)) 59 | updates[param] = param - learning_rate * update 60 | 61 | # update delta_accu (as accu, but accumulating updates) 62 | delta_accu_new = rho * delta_accu + (one - rho) * update ** 2 63 | updates[delta_accu] = delta_accu_new 64 | 65 | return updates 66 | 67 | 68 | def model_loss(y, model, mean=True): 69 | """ 70 | Define loss of Theano graph 71 | :param y: correct labels 72 | :param model: output of the model 73 | :return: return mean of loss if True, otherwise return vector with per 74 | sample loss 75 | """ 76 | warnings.warn("CleverHans support for Theano is deprecated and " 77 | "will be dropped on 2017-11-08.") 78 | 79 | from_logits = "softmax" not in str(model).lower() 80 | 81 | if from_logits: 82 | model = T.nnet.softmax(model) 83 | 84 | out = T.nnet.categorical_crossentropy(model, y) 85 | 86 | if mean: 87 | out = T.mean(out) 88 | return out 89 | 90 | 91 | def th_model_train(x, y, predictions, params, X_train, Y_train, save=False, 92 | predictions_adv=None, evaluate=None, args=None): 93 | """ 94 | Train a Theano graph 95 | :param x: input placeholder 96 | :param y: output placeholder (for labels) 97 | :param predictions: model output predictions 98 | :param params: model trainable weights 99 | :param X_train: numpy array with training inputs 100 | :param Y_train: numpy array with training outputs 101 | :param save: boolean controling the save operation 102 | :param predictions_adv: if set with the adversarial example tensor, 103 | will run adversarial training 104 | :param args: dict or argparse `Namespace` object. 105 | Should contain `nb_epochs`, `learning_rate`, 106 | `batch_size` 107 | :return: True if model trained 108 | """ 109 | warnings.warn("CleverHans support for Theano is deprecated and " 110 | "will be dropped on 2017-11-08.") 111 | 112 | args = _ArgsWrapper(args or {}) 113 | 114 | print("Starting model training using Theano.") 115 | 116 | # Define loss 117 | loss = model_loss(y, predictions) 118 | if predictions_adv is not None: 119 | loss = (loss + model_loss(y, predictions_adv)) / 2 120 | 121 | print("Defined optimizer.") 122 | 123 | train_step = theano.function( 124 | inputs=[x, y], 125 | outputs=[loss], 126 | givens={keras.backend.learning_phase(): _TRAIN_PHASE}, 127 | allow_input_downcast=True, 128 | on_unused_input='ignore', 129 | updates=adadelta( 130 | loss, params, learning_rate=args.learning_rate, rho=0.95, 131 | epsilon=1e-08) 132 | ) 133 | 134 | for epoch in six.moves.xrange(args.nb_epochs): 135 | print("Epoch " + str(epoch)) 136 | 137 | # Compute number of batches 138 | nb_batches = int(math.ceil(float(len(X_train)) / args.batch_size)) 139 | assert nb_batches * args.batch_size >= len(X_train) 140 | 141 | prev = time.time() 142 | for batch in range(nb_batches): 143 | 144 | # Compute batch start and end indices 145 | start, end = batch_indices(batch, len(X_train), args.batch_size) 146 | 147 | # Perform one training step 148 | train_step(X_train[start:end], Y_train[start:end]) 149 | assert end >= len(X_train) # Check that all examples were used 150 | cur = time.time() 151 | print("\tEpoch took " + str(cur - prev) + " seconds") 152 | prev = cur 153 | if evaluate is not None: 154 | evaluate() 155 | 156 | return True 157 | 158 | 159 | def th_model_eval(x, y, model, X_test, Y_test, args=None): 160 | """ 161 | Compute the accuracy of a Theano model on some data 162 | :param x: input placeholder 163 | :param y: output placeholder (for labels) 164 | :param model: model output predictions 165 | :param X_test: numpy array with training inputs 166 | :param Y_test: numpy array with training outputs 167 | :param args: dict or argparse `Namespace` object. 168 | Should contain `batch_size` 169 | :return: a float with the accuracy value 170 | """ 171 | warnings.warn("CleverHans support for Theano is deprecated and " 172 | "will be dropped on 2017-11-08.") 173 | 174 | args = _ArgsWrapper(args or {}) 175 | 176 | # Define symbol for accuracy 177 | acc_value = keras.metrics.categorical_accuracy(y, model) 178 | # Keras 2.0 categorical_accuracy no longer calculates the mean internally 179 | # T.mean is called in here and is backward compatible with previous 180 | # versions of Keras 181 | acc_value = T.mean(acc_value) 182 | 183 | # Init result var 184 | accuracy = 0.0 185 | 186 | nb_batches = int(math.ceil(float(len(X_test)) / args.batch_size)) 187 | assert nb_batches * args.batch_size >= len(X_test) 188 | 189 | eval_step = theano.function( 190 | inputs=[x, y], 191 | outputs=acc_value, 192 | givens={keras.backend.learning_phase(): _TEST_PHASE}, 193 | on_unused_input="ignore", 194 | allow_input_downcast=True, 195 | updates=None 196 | ) 197 | 198 | for batch in range(nb_batches): 199 | if batch % 100 == 0 and batch > 0: 200 | print("Batch " + str(batch)) 201 | 202 | # Must not use the `batch_indices` function here, because it 203 | # repeats some examples. 204 | # It's acceptable to repeat during training, but not eval. 205 | start = batch * args.batch_size 206 | end = min(len(X_test), start + args.batch_size) 207 | cur_batch_size = end - start 208 | 209 | # The last batch may be smaller than all others, so we need to 210 | # account for variable batch size here 211 | accuracy += cur_batch_size * \ 212 | eval_step(X_test[start:end], Y_test[start:end]) 213 | assert end >= len(X_test) 214 | 215 | # Divide by number of examples to get final value 216 | accuracy /= len(X_test) 217 | 218 | return accuracy 219 | 220 | 221 | def batch_eval(th_inputs, th_outputs, numpy_inputs, args=None): 222 | """ 223 | A helper function that computes a tensor on numpy inputs by batches. 224 | 225 | :param th_inputs: 226 | :param th_outputs: 227 | :param numpy_inputs: 228 | :param args: dict or argparse `Namespace` object. 229 | Should contain `batch_size` 230 | """ 231 | warnings.warn("CleverHans support for Theano is deprecated and " 232 | "will be dropped on 2017-11-08.") 233 | 234 | args = _ArgsWrapper(args or {}) 235 | 236 | n = len(numpy_inputs) 237 | assert n > 0 238 | assert n == len(th_inputs) 239 | m = numpy_inputs[0].shape[0] 240 | for i in six.moves.xrange(1, n): 241 | assert numpy_inputs[i].shape[0] == m 242 | out = [] 243 | for _ in th_outputs: 244 | out.append([]) 245 | 246 | eval_step = theano.function( 247 | inputs=th_inputs, 248 | outputs=th_outputs, 249 | givens={keras.backend.learning_phase(): _TEST_PHASE}, 250 | allow_input_downcast=True, 251 | updates=None 252 | ) 253 | 254 | for start in six.moves.xrange(0, m, args.batch_size): 255 | batch = start // args.batch_size 256 | if batch % 100 == 0 and batch > 0: 257 | print("Batch " + str(batch)) 258 | 259 | # Compute batch start and end indices 260 | start = batch * args.batch_size 261 | end = start + args.batch_size 262 | numpy_input_batches = [numpy_input[start:end] 263 | for numpy_input in numpy_inputs] 264 | cur_batch_size = numpy_input_batches[0].shape[0] 265 | assert cur_batch_size <= args.batch_size 266 | for e in numpy_input_batches: 267 | assert e.shape[0] == cur_batch_size 268 | 269 | numpy_output_batches = eval_step(*numpy_input_batches) 270 | for e in numpy_output_batches: 271 | assert e.shape[0] == cur_batch_size, e.shape 272 | for out_elem, numpy_output_batch in zip(out, numpy_output_batches): 273 | out_elem.append(numpy_output_batch) 274 | 275 | out = [np.concatenate(x, axis=0) for x in out] 276 | for e in out: 277 | assert e.shape[0] == m, e.shape 278 | return out 279 | 280 | 281 | def model_argmax(x, predictions, sample): 282 | """ 283 | Helper function that computes the current class prediction 284 | :param x: the input placeholder 285 | :param predictions: the model's symbolic output 286 | :param sample: (1 x 1 x img_rows x img_cols) numpy array with sample input 287 | :return: the argmax output of predictions, i.e. the current predicted class 288 | """ 289 | warnings.warn("CleverHans support for Theano is deprecated and " 290 | "will be dropped on 2017-11-08.") 291 | 292 | probabilities = theano.function( 293 | inputs=[x], 294 | outputs=predictions, 295 | givens={keras.backend.learning_phase(): _TEST_PHASE}, 296 | allow_input_downcast=True, 297 | updates=None 298 | )(x) 299 | 300 | return np.argmax(probabilities) 301 | 302 | 303 | def l2_batch_normalize(x, epsilon=1e-12): 304 | """ 305 | Helper function to normalize a batch of vectors. 306 | :param x: the input placeholder 307 | :param epsilon: stabilizes division 308 | :return: the batch of l2 normalized vector 309 | """ 310 | epsilon = np.asarray(epsilon, dtype=floatX) 311 | x_shape = x.shape 312 | x = T.reshape(x, (x.shape[0], -1)) 313 | x /= (epsilon + T.max(T.abs_(x), 1, keepdims=True)) 314 | square_sum = T.sum(T.sqr(x), 1, keepdims=True) 315 | x /= T.sqrt(np.sqrt(epsilon) + square_sum) 316 | return x.reshape(x_shape) 317 | 318 | 319 | def kl_with_logits(q_logits, p_logits): 320 | """Helper function to compute kl-divergence KL(q || p) 321 | """ 322 | q = T.nnet.softmax(q_logits) 323 | q_log = T.nnet.logsoftmax(q_logits) 324 | p_log = T.nnet.logsoftmax(p_logits) 325 | loss = T.sum(q * (q_log - p_log), axis=1) 326 | return loss 327 | --------------------------------------------------------------------------------