├── .gitattributes
├── images
    ├── cat.jpg
    ├── lemon.jpg
    ├── Ducreux.jpg
    ├── flower.jpg
    ├── orange.jpg
    ├── pretzel.jpg
    ├── Cellsx128.png
    ├── Flower-id.png
    ├── astronauts.jpg
    ├── cell-tower.jpg
    ├── cowboy-hat.jpg
    ├── orangutan.jpg
    ├── mirror-image.jpg
    ├── sickle-cells.jpg
    ├── imagenet-boat.png
    ├── imagenet-caffe2.png
    ├── imagenet-meme.jpg
    ├── aircraft-carrier.jpg
    ├── imagenet-montage.jpg
    └── Places-cnn-visual-example.png
├── CODE_OF_CONDUCT.md
├── start_ipython_notebook.sh
├── new_op
    ├── sample
    │   ├── sample.md
    │   ├── add5_op_test.py
    │   └── run_add5_op.py
    ├── readme.md
    └── template
    │   ├── add5_op.cu
    │   ├── add5_op.h
    │   └── add5_op.cc
├── README.md
├── .jenkins
    └── test.sh
├── py_gen
    ├── Training_a_Model.py
    ├── sparseNN.py
    ├── Model_Quickload.py
    ├── Getting_Caffe1_Models_for_Translation.py
    ├── create_your_own_dataset.py
    ├── Python_Op.py
    ├── MNIST_Dataset_and_Databases.py
    ├── Toy_Regression.py
    ├── Basics.py
    ├── Loading_Pretrained_Models.py
    ├── CIFAR10_Part2.py
    └── Control_Ops.py
├── Training_a_Model.ipynb
├── CONTRIBUTING.md
├── jupyter_notebook_config.py
├── tutorials_to_script_converter.py
├── helpers.py
├── nomnigraph_model_exploration.ipynb
├── Model_Quickload.ipynb
├── NOTICE
├── Getting_Caffe1_Models_for_Translation.ipynb
├── MNIST_Dataset_and_Databases.ipynb
├── experimental
    └── Immediate.ipynb
├── Python_Op.ipynb
└── LICENSE


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-documentation
2 | 


--------------------------------------------------------------------------------
/images/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/cat.jpg


--------------------------------------------------------------------------------
/images/lemon.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/lemon.jpg


--------------------------------------------------------------------------------
/images/Ducreux.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/Ducreux.jpg


--------------------------------------------------------------------------------
/images/flower.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/flower.jpg


--------------------------------------------------------------------------------
/images/orange.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/orange.jpg


--------------------------------------------------------------------------------
/images/pretzel.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/pretzel.jpg


--------------------------------------------------------------------------------
/images/Cellsx128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/Cellsx128.png


--------------------------------------------------------------------------------
/images/Flower-id.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/Flower-id.png


--------------------------------------------------------------------------------
/images/astronauts.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/astronauts.jpg


--------------------------------------------------------------------------------
/images/cell-tower.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/cell-tower.jpg


--------------------------------------------------------------------------------
/images/cowboy-hat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/cowboy-hat.jpg


--------------------------------------------------------------------------------
/images/orangutan.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/orangutan.jpg


--------------------------------------------------------------------------------
/images/mirror-image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/mirror-image.jpg


--------------------------------------------------------------------------------
/images/sickle-cells.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/sickle-cells.jpg


--------------------------------------------------------------------------------
/images/imagenet-boat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/imagenet-boat.png


--------------------------------------------------------------------------------
/images/imagenet-caffe2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/imagenet-caffe2.png


--------------------------------------------------------------------------------
/images/imagenet-meme.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/imagenet-meme.jpg


--------------------------------------------------------------------------------
/images/aircraft-carrier.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/aircraft-carrier.jpg


--------------------------------------------------------------------------------
/images/imagenet-montage.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/imagenet-montage.jpg


--------------------------------------------------------------------------------
/images/Places-cnn-visual-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookarchive/tutorials/HEAD/images/Places-cnn-visual-example.png


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.


--------------------------------------------------------------------------------
/start_ipython_notebook.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | # This script simply starts the ipython notebook and allows all network machines
 3 | # to access it.
 4 | 
 5 | # Use the following command for very verbose prints.
 6 | # GLOG_logtostderr=1 GLOG_v=1 ipython notebook --ip='*'
 7 | 
 8 | # Use the following command for a normal run.
 9 | ipython notebook --ip='*'
10 | 


--------------------------------------------------------------------------------
/new_op/sample/sample.md:
--------------------------------------------------------------------------------
 1 | # Sample implementation for Add5 and Add5Gradient
 2 | 
 3 | ## add5_op.cc
 4 | 
 5 | TODO-1:
 6 | ```cpp
 7 | output_ptr[i] = data_ptr[i] + 5;
 8 | ```
 9 | 
10 | TODO-2:
11 | ```cpp
12 | output_ptr[i] = data_ptr[i];
13 | ```
14 | 
15 | ## add5_op.cu
16 | 
17 | TODO-3:
18 | ```cpp
19 | output[i] = data[i] + 5;
20 | ```
21 | 
22 | TODO-4:
23 | ```cpp
24 | output[i] = data[i];
25 | ```
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Executing tutorials
 2 | 
 3 | In order to run tutorials, use jupyter. For more details refer to https://caffe2.ai/docs/tutorials
 4 | 
 5 | ## Contributing to tutorials
 6 | 
 7 | Python scripts should be generated out of these tutorials in order to make it easy to review the changes. In order to make this process automatic, you could patch your jupyter config using [this file](jupyter_notebook_config.py)
 8 | 
 9 | If you would like to run sync manually, run this [script](tutorials_to_script_converter.py). It will regenerate python scripts for all the tutorials in this folder
10 | 
11 | ## License
12 | 
13 | The Caffe2 Tutorials is [Apache 2.0 licensed](https://github.com/caffe2/caffe2/blob/master/LICENSE).


--------------------------------------------------------------------------------
/.jenkins/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -ex
 4 | 
 5 | LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 6 | ROOT_DIR=$(cd "$LOCAL_DIR"/.. && pwd)
 7 | 
 8 | # Figure out which Python to use
 9 | PYTHON="python"
10 | if [ -n "$BUILD_ENVIRONMENT" ]; then
11 |   if [[ "$BUILD_ENVIRONMENT" == py2* ]]; then
12 |     PYTHON="python2"
13 |   elif [[ "$BUILD_ENVIRONMENT" == py3* ]]; then
14 |     PYTHON="python3"
15 |   fi
16 | fi
17 | 
18 | cd "$ROOT_DIR"
19 | python tutorials_to_script_converter.py
20 | git status
21 | if git diff --quiet HEAD; then
22 |   echo "Source tree is clean."
23 | else
24 |   echo "After running a tutorial -> script sync there are changes. This probably means you edited an ipython notebook without a proper sync to a script. Please see caffe2/python/tutorials/README.md for more information"
25 |   if [ "$exit_code" -eq 0 ]; then
26 |     exit_code=1
27 |   fi
28 | fi
29 | 


--------------------------------------------------------------------------------
/py_gen/Training_a_Model.py:
--------------------------------------------------------------------------------
 1 | #########################################################
 2 | #
 3 | # DO NOT EDIT THIS FILE. IT IS GENERATED AUTOMATICALLY. #
 4 | # PLEASE LOOK INTO THE README FOR MORE INFORMATION.     #
 5 | #
 6 | #########################################################
 7 | 
 8 | 
 9 | # coding: utf-8
10 | 
11 | # ### Dataset Formats
12 | # 
13 | # When you look at a model and its dataset, one of the things that will be specified is how the dataset is organized. Additionally, within Caffe2 when you load the data you will need to relay this specification. When trying to optimize training and increase its speed you may find discussions related to changing this format. For the purposes of this tutorial you don't need to worry about that, but it is good to recognize the different flavors and the fact the the raw data is loaded into temporary databases to facilitate the network's training and testing.
14 | # 
15 | # #### Data Ordering
16 | # 
17 | # * NCHW: [description]
18 | # * Others: [description]
19 | # 
20 | # #### Databases
21 | # 
22 | # * minidb: [description]
23 | # * leveldb: [descrption]
24 | # * others...
25 | # 
26 | 
27 | 


--------------------------------------------------------------------------------
/Training_a_Model.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "### Dataset Formats\n",
 8 |     "\n",
 9 |     "When you look at a model and its dataset, one of the things that will be specified is how the dataset is organized. Additionally, within Caffe2 when you load the data you will need to relay this specification. When trying to optimize training and increase its speed you may find discussions related to changing this format. For the purposes of this tutorial you don't need to worry about that, but it is good to recognize the different flavors and the fact the the raw data is loaded into temporary databases to facilitate the network's training and testing.\n",
10 |     "\n",
11 |     "#### Data Ordering\n",
12 |     "\n",
13 |     "* NCHW: [description]\n",
14 |     "* Others: [description]\n",
15 |     "\n",
16 |     "#### Databases\n",
17 |     "\n",
18 |     "* minidb: [description]\n",
19 |     "* leveldb: [descrption]\n",
20 |     "* others...\n"
21 |    ]
22 |   }
23 |  ],
24 |  "metadata": {
25 |   "kernelspec": {
26 |    "display_name": "Python 2",
27 |    "language": "python",
28 |    "name": "python2"
29 |   },
30 |   "language_info": {
31 |    "codemirror_mode": {
32 |     "name": "ipython",
33 |     "version": 2
34 |    },
35 |    "file_extension": ".py",
36 |    "mimetype": "text/x-python",
37 |    "name": "python",
38 |    "nbconvert_exporter": "python",
39 |    "pygments_lexer": "ipython2",
40 |    "version": "2.7.13"
41 |   }
42 |  },
43 |  "nbformat": 4,
44 |  "nbformat_minor": 2
45 | }
46 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Caffe2 Tutorials
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Our Development Process
 6 | GitHub is the source of truth. Commits are made via pull requests.
 7 | 
 8 | ## Pull Requests
 9 | We actively welcome your pull requests.
10 | 
11 | 1. Fork the repo and create your branch from `master`.
12 | 2. If you've added code that should be tested, add tests.
13 | 3. If you've changed APIs, update the documentation.
14 | 4. Ensure the test suite passes.
15 | 5. Make sure your code lints.
16 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
17 | 
18 | ## Contributor License Agreement ("CLA")
19 | In order to accept your pull request, we need you to submit a CLA. You only need
20 | to do this once to work on any of Facebook's open source projects.
21 | 
22 | Complete your CLA here: <https://code.facebook.com/cla>
23 | 
24 | ## Issues
25 | We use GitHub issues to track public bugs. Please ensure your description is
26 | clear and has sufficient instructions to be able to reproduce the issue.
27 | 
28 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
29 | disclosure of security bugs. In those cases, please go through the process
30 | outlined on that page and do not file a public issue.
31 | 
32 | ## Coding Style  
33 | * 4 spaces(for Python) for indentation rather than tabs
34 | * 80 character line length
35 | 
36 | ## License
37 | By contributing to Caffe2 Tutorials, you agree that your contributions will be licensed
38 | under the LICENSE file in the root directory of this source tree.


--------------------------------------------------------------------------------
/new_op/sample/add5_op_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | from __future__ import unicode_literals
19 | 
20 | from caffe2.python import core
21 | import caffe2.python.hypothesis_test_util as hu
22 | import caffe2.python.serialized_test.serialized_test_util as serial
23 | from hypothesis import given
24 | import hypothesis.strategies as st
25 | import numpy as np
26 | import unittest
27 | 
28 | 
29 | class TestAdd5(hu.HypothesisTestCase):
30 | 
31 |     @given(N=st.integers(min_value=1, max_value=10),
32 |            C=st.integers(min_value=1, max_value=10),
33 |            # NB: use hg.gcs when built with CUDA
34 |            **hu.gcs_cpu_only)
35 |     def test_add5(self, N, C, gc, dc):
36 |         # set th seed
37 |         np.random.seed(101)
38 |         # TODO: test double, int and int64
39 |         data = np.random.rand(N, C).astype(np.float32)
40 |         op = core.CreateOperator("Add5", ["data"], ["output"])
41 | 
42 |         # device check
43 |         self.assertDeviceChecks(dc, op, [data], [0])
44 | 
45 |         # gradient check
46 |         self.assertGradientChecks(gc, op, [data], 0, [0])
47 | 
48 |         # reference check
49 |         def ref_add5(input):
50 |             return [input + 5]
51 |         self.assertReferenceChecks(gc, op, [data], ref_add5)
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     unittest.main()
56 | 


--------------------------------------------------------------------------------
/new_op/sample/run_add5_op.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from caffe2.python import core, workspace, caffe2_pb2
16 | import numpy as np
17 | 
18 | 
19 | def run_add5_and_add5gradient_op(device):
20 |     # clear the workspace before running the operator
21 |     workspace.ResetWorkspace()
22 |     add5 = core.CreateOperator("Add5",
23 |                                ["X"],
24 |                                ["Y"],
25 |                                device_option=device)
26 |     print("==> Running Add5 op:")
27 |     workspace.FeedBlob("X", (np.random.rand(5, 5)), device_option=device)
28 |     print("Input of Add5: ", workspace.FetchBlob("X"))
29 |     workspace.RunOperatorOnce(add5)
30 |     print("Output of Add5: ", workspace.FetchBlob("Y"))
31 | 
32 |     print("\n\n==> Running Add5Gradient op:")
33 |     print("Input of Add5Gradient: ", workspace.FetchBlob("Y"))
34 |     add5gradient = core.CreateOperator("Add5Gradient",
35 |                                        ["Y"],
36 |                                        ["Z"],
37 |                                        device_option=device)
38 |     workspace.RunOperatorOnce(add5gradient)
39 |     print("Output of Add5Gradient: ", workspace.FetchBlob("Z"))
40 | 
41 | 
42 | def main():
43 |     # try device_type=caffe2_pb2.CUDA if CUDA is available in our build
44 |     device = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CPU)
45 |     run_add5_and_add5gradient_op(device)
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     main()
50 | 


--------------------------------------------------------------------------------
/new_op/readme.md:
--------------------------------------------------------------------------------
 1 | # Onboarding Example
 2 | 
 3 | Through this onboarding lab, you can get a sense of how to write
 4 | an operator for caffe2.
 5 | 
 6 | ## Step 0: set up your development envirnoment
 7 | 
 8 | Probably try `https://github.com/pytorch/pytorch/blob/master/scripts/fbcode-dev-setup/onnx_c2_setup.sh`
 9 | 
10 | ## Step 1: starting with the templates
11 | 
12 | Copy `add5_op.h`, `add5_op.cc` to folder [caffe2/operator/](https://github.com/pytorch/pytorch/tree/master/caffe2/operators)
13 | 
14 | ## Step 2: fill the missing the logic for CPU part and build
15 | 
16 | Find the TODO mark in `add5_op.cc`, and fill the code you think it will work
17 | After that, build PyTorch using `python setup.py build_deps develop`
18 | 
19 | You can find some sample implementation in [sample.md](https://github.com/caffe2/tutorials/tree/master/new_op/sample/sample.md)
20 | 
21 | ## Step 3: run `Add5` and `Add5Gradient` operator and test it
22 | 
23 | You can run the `Add5` operator using `python run_add5_op.py`
24 | 
25 | You can follow examples in [caffe2/python/operator_test/](https://github.com/pytorch/pytorch/tree/master/caffe2/operators) to write test
26 | code for `Add5` and `Add5Gradient` ops. There are three important functions to use:
27 | * assertDeviceChecks
28 | * assertGradientChecks
29 | * assertReferenceChecks
30 | 
31 | Find more details in the [source code](https://github.com/pytorch/pytorch/blob/master/caffe2/python/hypothesis_test_util.py)
32 | For buck users, please check ifbpy to run the python script.
33 | 
34 | ## Step 4: fill the missing logic for CUDA part and build
35 | 
36 | Find the TODO mark in `add5_op.cu`, and fill the code you think it will work
37 | After that, build PyTorch using `python setup.py build_deps develop`
38 | 
39 | You can find some sample implementation in [sample.md](https://github.com/caffe2/tutorials/tree/master/new_op/sample/sample.md)
40 | 
41 | ## Step 5: run `Add5` and `Add5Gradient` CUDA version and test it
42 | 
43 | For hypothesis test, use `hu.gcs` instead of `hu.gcs_cpu_only`
44 | 
45 | For `CreateOperator`, pass `caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA)` as
46 | named parameter `device_option`
47 | 
48 | 
49 | ## Further Reading
50 | https://caffe2.ai/docs/custom-operators.html
51 | 
52 | 


--------------------------------------------------------------------------------
/new_op/template/add5_op.cu:
--------------------------------------------------------------------------------
 1 | /**
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | 
17 | #include "caffe2/operators/add5_op.h"
18 | 
19 | #include "caffe2/core/context_gpu.h"
20 | #include "caffe2/utils/math.h"
21 | 
22 | namespace caffe2 {
23 | 
24 | template <typename T>
25 | __global__ void Add5Kernel(const int N, const T* data, T* output) {
26 |   CUDA_1D_KERNEL_LOOP(i, N) {
27 |     // TODO - 3
28 |   }
29 | }
30 | 
31 | template <>
32 | template <typename T>
33 | bool Add5Op<CUDAContext>::DoRunWithType() {
34 |   const auto& data = Input(DATA);
35 |   const auto N = data.size();
36 |   const auto* data_ptr = data.template data<T>();
37 |   auto* output = Output(0);
38 |   output->ResizeLike(data);
39 |   auto* output_ptr = output->template mutable_data<T>();
40 | 
41 |   Add5Kernel<<<
42 |       CAFFE_GET_BLOCKS(N),
43 |       CAFFE_CUDA_NUM_THREADS,
44 |       0,
45 |       context_.cuda_stream()>>>(N, data_ptr, output_ptr);
46 |   return true;
47 | }
48 | 
49 | template <typename T>
50 | __global__ void Add5GradientKernel(const int N, const T* data, T* output) {
51 |   CUDA_1D_KERNEL_LOOP(i, N) {
52 |     // GI[0] = GO[0]
53 |     // TODO - 4
54 |   }
55 | }
56 | 
57 | template <>
58 | template <typename T>
59 | bool Add5GradientOp<CUDAContext>::DoRunWithType() {
60 |   const auto& data = Input(DATA);
61 |   const auto N = data.size();
62 |   const auto* data_ptr = data.template data<T>();
63 |   auto* output = Output(0);
64 |   output->ResizeLike(data);
65 |   auto* output_ptr = output->template mutable_data<T>();
66 | 
67 |   Add5GradientKernel<<<
68 |       CAFFE_GET_BLOCKS(N),
69 |       CAFFE_CUDA_NUM_THREADS,
70 |       0,
71 |       context_.cuda_stream()>>>(N, data_ptr, output_ptr);
72 |   return true;
73 | }
74 | 
75 | REGISTER_CUDA_OPERATOR(Add5, Add5Op<CUDAContext>);
76 | REGISTER_CUDA_OPERATOR(Add5Gradient, Add5GradientOp<CUDAContext>);
77 | 
78 | } // namespace caffe2
79 | 


--------------------------------------------------------------------------------
/jupyter_notebook_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Configuration file for a jupyter-notebook.
16 | # Put this script into ~/.jupyter/ in order to get the post_save_hook implemented below.
17 | # If you already have one, merge the content.
18 | 
19 | import os
20 | from subprocess import check_call
21 | 
22 | def post_save(model, os_path, contents_manager):
23 |     """post-save hook for converting notebooks to .py scripts"""
24 |     if model['type'] != 'notebook':
25 |         return  # only do this for notebooks
26 | 
27 |     notebook_dir, notebook_file_name = os.path.split(os_path)
28 |     check_call(
29 |         ['jupyter', 'nbconvert', '--to', 'script', notebook_file_name],
30 |         cwd=notebook_dir,
31 |     )
32 |     py_name = os.path.splitext(notebook_file_name)[0] + ".py"
33 |     full_py_name = os.path.join(notebook_dir, py_name)
34 | 
35 |     # Create py_gen/ dir if it doesn't exist and move the file there
36 |     new_dir = os.path.join(notebook_dir, 'py_gen')
37 |     if not os.path.exists(new_dir):
38 |         os.makedirs(new_dir)
39 |     new_py_location = os.path.join(new_dir, py_name)
40 |     os.rename(full_py_name, new_py_location)
41 |     full_py_name = new_py_location
42 | 
43 |     with open(full_py_name, 'r') as f:
44 |         data = f.read()
45 |     lines = data.split('\n')
46 |     good_lines = []
47 |     for line in lines:
48 |         if ("get_ipython().magic" not in line
49 |                 and "get_ipython().run_line_magic" not in line):
50 |             good_lines.append(line)
51 |     # Update the file with do not edit preamble
52 |     with open(full_py_name, 'w') as f:
53 |         f.write("#########################################################\n")
54 |         f.write("#\n")
55 |         f.write("# DO NOT EDIT THIS FILE. IT IS GENERATED AUTOMATICALLY. #\n")
56 |         f.write("# PLEASE LOOK INTO THE README FOR MORE INFORMATION.     #\n")
57 |         f.write("#\n")
58 |         f.write("#########################################################\n")
59 |         f.write("\n")
60 | 
61 |         for line in good_lines:
62 |             f.write(line + '\n')
63 | 
64 | 
65 | c.FileContentsManager.post_save_hook = post_save
66 | 


--------------------------------------------------------------------------------
/new_op/template/add5_op.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | 
17 | #ifndef CAFFE_OPERATORS_ADD5_OP_H_
18 | #define CAFFE_OPERATORS_ADD5_OP_H_
19 | 
20 | #include "caffe2/core/context.h"
21 | #include "caffe2/core/logging.h"
22 | #include "caffe2/core/operator.h"
23 | 
24 | namespace caffe2 {
25 | 
26 | template <class Context>
27 | class Add5Op final : public Operator<Context> {
28 |  public:
29 |   USE_OPERATOR_CONTEXT_FUNCTIONS;
30 |   Add5Op(const OperatorDef& operator_def, Workspace* ws)
31 |       : Operator<Context>(operator_def, ws) {
32 |     // In this function, we usually store the argument as a member of the
33 |     // object. No need to do anything in this simple example.
34 |   }
35 | 
36 |   bool RunOnDevice() override {
37 |     // Instantiate the template for int/int64_t/float/double tensors.
38 |     // For details, check:
39 |     // https://github.com/pytorch/pytorch/blob/master/caffe2/core/operator.h
40 |     return DispatchHelper<TensorTypes<int, int64_t, float, double>>::call(this, Input(DATA));
41 |   }
42 | 
43 |   template <typename T>
44 |   bool DoRunWithType();
45 | 
46 |  protected:
47 |   INPUT_TAGS(DATA);
48 | 
49 |  private:
50 |   // Object fields are put here.
51 | };
52 | 
53 | template <class Context>
54 | class Add5GradientOp final : public Operator<Context> {
55 |  public:
56 |   USE_OPERATOR_CONTEXT_FUNCTIONS;
57 |   Add5GradientOp(const OperatorDef& operator_def, Workspace* ws)
58 |       : Operator<Context>(operator_def, ws) {
59 |     // In this function, we usually store the argument as a member of the
60 |     // object. No need to do anything in this simple example.
61 |   }
62 | 
63 |   bool RunOnDevice() override {
64 |     // Instantiate the template for int/int64_t/float/double tensors.
65 |     // For details, check:
66 |     // https://github.com/pytorch/pytorch/blob/master/caffe2/core/operator.h
67 |     return DispatchHelper<TensorTypes<int, int64_t, float, double>>::call(this, Input(DATA));
68 |   }
69 | 
70 |   template <typename T>
71 |   bool DoRunWithType();
72 | 
73 |  protected:
74 |   INPUT_TAGS(DATA);
75 | 
76 |  private:
77 |   // Object fields are put here.
78 | };
79 | 
80 | } // namespace caffe2
81 | 
82 | #endif // CAFFE_OPERATORS_ADD5_OP_H_
83 | 


--------------------------------------------------------------------------------
/new_op/template/add5_op.cc:
--------------------------------------------------------------------------------
  1 | /**
  2 | Copyright (c) Facebook, Inc. and its affiliates.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | #include "caffe2/operators/add5_op.h"
 18 | 
 19 | #include "caffe2/core/operator.h"
 20 | #include "caffe2/core/tensor.h"
 21 | 
 22 | namespace caffe2 {
 23 | 
 24 | template <>
 25 | template <typename T>
 26 | bool Add5Op<CPUContext>::DoRunWithType() {
 27 |   const auto& data = Input(DATA);
 28 |   auto N = data.size();
 29 |   const auto* data_ptr = data.template data<T>();
 30 |   auto* output = Output(0);
 31 |   output->ResizeLike(data);
 32 |   auto* output_ptr = output->template mutable_data<T>();
 33 | 
 34 |   for (auto i = 0; i < N; i++) {
 35 |     // TODO - 1
 36 |   }
 37 |   return true;
 38 | }
 39 | 
 40 | template <>
 41 | template <typename T>
 42 | bool Add5GradientOp<CPUContext>::DoRunWithType() {
 43 |   const auto& data = Input(DATA);
 44 |   auto N = data.size();
 45 |   const auto* data_ptr = data.template data<T>();
 46 |   auto* output = Output(0);
 47 |   output->ResizeLike(data);
 48 |   auto* output_ptr = output->template mutable_data<T>();
 49 | 
 50 |   for (auto i = 0; i < N; i++) {
 51 |     // GI[0] = GO[0]
 52 |     // TODO - 2
 53 |   }
 54 |   return true;
 55 | }
 56 | 
 57 | REGISTER_CPU_OPERATOR(Add5, Add5Op<CPUContext>);
 58 | OPERATOR_SCHEMA(Add5)
 59 |     .NumInputs(1)
 60 |     .NumOutputs(1)
 61 |     .IdenticalTypeAndShape()
 62 |     .AllowInplace({{0, 0}})
 63 |     .SetDoc(R"DOC(
 64 | Element-wise add 5 operation. Each element in the output equals to the
 65 | corresponding element in the input data.
 66 | 
 67 | <details>
 68 | 
 69 | <summary> <b>Example</b> </summary>
 70 | 
 71 | **Code**
 72 | 
 73 | ```
 74 | 
 75 | workspace.ResetWorkspace()
 76 | 
 77 | op = core.CreateOperator(
 78 |     "Add5",
 79 |     ["X"],
 80 |     ["Y"],
 81 | )
 82 | 
 83 | workspace.FeedBlob("X", (np.random.randint(100, size=(5,5))))
 84 | print("X before running op:", workspace.FetchBlob("X"))
 85 | workspace.RunOperatorOnce(op)
 86 | print("X after running op:", workspace.FetchBlob("Y"))
 87 | 
 88 | ```
 89 | 
 90 | **Result**
 91 | 
 92 | ```
 93 | 
 94 | X before running op:
 95 | 
 96 | X after running op:
 97 | [[6 2 3 3 0]
 98 |  [4 5 8 0 5]
 99 |  [4 6 4 3 6]
100 |  [0 6 7 2 8]
101 |  [1 4 6 7 5]]
102 | 
103 | ```
104 | 
105 |  </details>
106 | 
107 | )DOC")
108 |     .Input(0, "X", "Input tensor.")
109 |     .Output(0, "Y", "Output tensor");
110 | 
111 | 
112 | REGISTER_CPU_OPERATOR(Add5Gradient, Add5GradientOp<CPUContext>);
113 | OPERATOR_SCHEMA(Add5Gradient)
114 |     .NumInputs(1)
115 |     .NumOutputs(1);
116 | 
117 | class GetAdd5Gradient final : public GradientMakerBase {
118 |   using GradientMakerBase::GradientMakerBase;
119 | 
120 |   std::vector<OperatorDef> GetGradientDefs() override {
121 |     return SingleGradientDef(
122 |         "Add5Gradient",
123 |         "",
124 |         std::vector<std::string>{GO(0)},
125 |         std::vector<std::string>{GI(0)});
126 |   }
127 | };
128 | 
129 | REGISTER_GRADIENT(Add5, GetAdd5Gradient);
130 | 
131 | } // namespace caffe2
132 | 


--------------------------------------------------------------------------------
/py_gen/sparseNN.py:
--------------------------------------------------------------------------------
  1 | #########################################################
  2 | #
  3 | # DO NOT EDIT THIS FILE. IT IS GENERATED AUTOMATICALLY. #
  4 | # PLEASE LOOK INTO THE README FOR MORE INFORMATION.     #
  5 | #
  6 | #########################################################
  7 | 
  8 | 
  9 | # coding: utf-8
 10 | 
 11 | # In[ ]:
 12 | 
 13 | 
 14 | from caffe2.python import (
 15 |     core,
 16 | )
 17 | 
 18 | from caffe2.python.fb.dper.layer_models.models import sparse_nn
 19 | from fblearner.flow.projects.dper.preprocs.ads import build_preproc
 20 | from fblearner.flow.projects.dper.preprocs.ads_feature_processor import (
 21 |     ads_feature_processor,
 22 | )
 23 | from hiveio import par_init  # noqa
 24 | import fblearner.flow.projects.dper.flow_types as T
 25 | import fblearner.flow.projects.dper.utils.assemble as assemble_utils
 26 | import fblearner.flow.projects.dper.utils.data as data_utils
 27 | import fblearner.flow.projects.dper.utils.visualize as vis_utils
 28 | import fblearner.flow.projects.dper.workflows.ads_config as default_config
 29 | 
 30 | import fblearner.flow.projects.dper.ifbpy.compute_meta as compute_meta
 31 | from fblearner.flow.projects.dper.ifbpy.execution import test_model_locally
 32 | import fblearner.flow.projects.dper.utils.visualize as vis_utils
 33 | import fblearner.flow.projects.dper.utils.perf_estimator_execution as perf_estimator_execution
 34 | 
 35 | import json
 36 | core.GlobalInit(['ifbpy'])
 37 | from IPython.core.debugger import Pdb; 
 38 | ipdb=Pdb()
 39 | 
 40 | 
 41 | # In[ ]:
 42 | 
 43 | 
 44 | # when testing a particular flow, load model options from json file, and pass it to model_options
 45 | # local_prod_jason_file="/home/dongli/fbsource/fbcode/caffe2/caffe2/net_config/33252482/prod_model.json"
 46 | # with open(local_prod_jason_file, 'r') as f:
 47 | #     prod_model_options = sparse_nn.MODEL_OPTIONS.decode(json.loads(f.read()))
 48 | # print(prod_model_options)        
 49 | 
 50 | 
 51 | 
 52 | # In[ ]:
 53 | 
 54 | 
 55 | preproc_options = default_config.DEFAULT_PREPROC_OPTIONS
 56 | 
 57 | # when testing a particular flow, load model options from json file
 58 | # load model preproc options from json file
 59 | # from fblearner.flow.projects.dper.preprocs.ads import build_preproc
 60 | # local_prod_preproc_jason_file="/home/dongli/fbsource/fbcode/caffe2/caffe2/net_config/33252482/prod_preproc.json"
 61 | # with open(local_prod_preproc_jason_file, 'r') as f:
 62 | #     prod_preproc_options = build_preproc.options_flow_type.decode(json.loads(f.read()))
 63 | # print prod_preproc_options
 64 | 
 65 | # preproc_options = prod_preproc_options
 66 | 
 67 | 
 68 | # In[ ]:
 69 | 
 70 | 
 71 | # Finalize config for preprocessor
 72 | compute_meta.resolve_compute_meta(ads_feature_processor, default_config.DEFAULT_DATASET, preproc_options)
 73 | print("Done: resolve_compute_meta")
 74 | 
 75 | 
 76 | # In[ ]:
 77 | 
 78 | 
 79 | # Assemble the model given preprocessor and model building fuction
 80 | model = assemble_utils.assemble_model(
 81 |         name='sparse_nn',
 82 |         input_feature_schema=build_preproc.input_feature_schema(
 83 |             preproc_options),
 84 |         trainer_extra_schema=build_preproc.trainer_extra_schema(
 85 |             preproc_options),
 86 |         build_preproc_fun=build_preproc,
 87 |         build_model_fun=sparse_nn.build_model,
 88 |         preproc_options=preproc_options,
 89 |         model_options= default_config.DEFAULT_MODEL_OPTIONS
 90 | )
 91 | 
 92 | 
 93 | # In[ ]:
 94 | 
 95 | 
 96 | # Train model one the given sample dataset
 97 | estimated_cost = perf_estimator_execution.estimate_perf_locally(model, default_config.DEFAULT_DATASET)
 98 | print(estimated_cost)
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/tutorials_to_script_converter.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | from os import listdir
17 | from subprocess import check_call
18 | 
19 | 
20 | def convert_notebook(notebook_path):
21 |     # Generate .py file from a notebook
22 |     notebook_dir, notebook_file_name = os.path.split(notebook_path)
23 |     check_call(
24 |         ['jupyter', 'nbconvert', '--to', 'script', notebook_file_name],
25 |         cwd=notebook_dir,
26 |     )
27 |     py_name = os.path.splitext(notebook_file_name)[0] + ".py"
28 |     full_py_name = os.path.join(notebook_dir, py_name)
29 | 
30 |     # Create py_gen/ dir if it doesn't exist and move the file there
31 |     new_dir = os.path.join(notebook_dir, 'py_gen')
32 |     if not os.path.exists(new_dir):
33 |         os.makedirs(new_dir)
34 |     new_py_location = os.path.join(new_dir, py_name)
35 |     os.rename(full_py_name, new_py_location)
36 |     full_py_name = new_py_location
37 | 
38 |     with open(full_py_name, 'r') as f:
39 |         data = f.read()
40 |     lines = data.split('\n')
41 |     good_lines = []
42 |     for line in lines:
43 |         if ("get_ipython().magic" not in line
44 |                 and "get_ipython().run_line_magic" not in line):
45 |             good_lines.append(line)
46 |     # Update the file with do not edit preamble
47 |     with open(full_py_name, 'w') as f:
48 |         f.write("# Copyright (c) Facebook, Inc. and its affiliates.\n")
49 |         f.write("#\n")
50 |         f.write("# Licensed under the Apache License, Version 2.0 (the "License");\n")
51 |         f.write("# you may not use this file except in compliance with the License.\n")
52 |         f.write("# You may obtain a copy of the License at\n")
53 |         f.write("#\n")
54 |         f.write("#    http://www.apache.org/licenses/LICENSE-2.0\n")
55 |         f.write("#\n")
56 |         f.write("# Unless required by applicable law or agreed to in writing, software\n")
57 |         f.write("# distributed under the License is distributed on an "AS IS" BASIS,\n")
58 |         f.write("# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n")
59 |         f.write("# See the License for the specific language governing permissions and\n")
60 |         f.write("# limitations under the License.\n")
61 |         f.write("\n")
62 |         f.write("#########################################################\n")
63 |         f.write("#\n")
64 |         f.write("# DO NOT EDIT THIS FILE. IT IS GENERATED AUTOMATICALLY. #\n")
65 |         f.write("# PLEASE LOOK INTO THE README FOR MORE INFORMATION.     #\n")
66 |         f.write("#\n")
67 |         f.write("#########################################################\n")
68 |         f.write("\n")
69 | 
70 |         for line in good_lines:
71 |             f.write(line + '\n')
72 | 
73 | 
74 | def main():
75 |     tutorials_folder = os.path.dirname(os.path.realpath(__file__))
76 |     print("tutorials_folder: ", tutorials_folder)
77 |     files = [
78 |         os.path.join(tutorials_folder, f)
79 |         for f in listdir(tutorials_folder)
80 |         if os.path.isfile(os.path.join(tutorials_folder, f))
81 |         and f.endswith('ipynb')
82 |     ]
83 |     for f in files:
84 |         convert_notebook(f)
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     main()
89 | 


--------------------------------------------------------------------------------
/py_gen/Model_Quickload.py:
--------------------------------------------------------------------------------
 1 | #########################################################
 2 | #
 3 | # DO NOT EDIT THIS FILE. IT IS GENERATED AUTOMATICALLY. #
 4 | # PLEASE LOOK INTO THE README FOR MORE INFORMATION.     #
 5 | #
 6 | #########################################################
 7 | 
 8 | 
 9 | # coding: utf-8
10 | 
11 | # # Model Quickload
12 | # 
13 | # This notebook will show you how to quickly load a pretrained SqueezeNet model and test it on images of your choice in four main steps. 
14 | # 
15 | # 1. Load the model
16 | # 2. Format the input
17 | # 3. Run the test
18 | # 4. Process the results
19 | # 
20 | # The model used in this tutorial has been pretrained on the full 1000 class ImageNet dataset, and is downloaded from Caffe2's [Model Zoo](https://github.com/caffe2/caffe2/wiki/Model-Zoo). For an all around more in-depth tutorial on using pretrained models check out the [Loading Pretrained Models](https://github.com/caffe2/caffe2/blob/master/caffe2/python/tutorials/Loading_Pretrained_Models.ipynb) tutorial.  
21 | # 
22 | # Before this script will work, you need to download the model and install it. You can do this by running:
23 | # 
24 | # ```
25 | # sudo python -m caffe2.python.models.download -i squeezenet
26 | # ```
27 | # 
28 | # Or make a folder named `squeezenet`, download each file listed below to it, and place it in the `/caffe2/python/models/` directory:
29 | # * [predict_net.pb](https://download.caffe2.ai/models/squeezenet/predict_net.pb)
30 | # * [init_net.pb](https://download.caffe2.ai/models/squeezenet/init_net.pb)
31 | # 
32 | # Notice, the helper function *parseResults* will translate the integer class label of the top result to an English label by searching through the [inference codes file](inference_codes.txt). If you want to really test the model's capabilities, pick a code from the file, find an image representing that code, and test the model with it!
33 | 
34 | # In[6]:
35 | 
36 | 
37 | from __future__ import absolute_import
38 | from __future__ import division
39 | from __future__ import print_function
40 | from __future__ import unicode_literals
41 | import numpy as np
42 | import operator
43 | # load up the caffe2 workspace
44 | from caffe2.python import workspace
45 | # choose your model here (use the downloader first)
46 | from caffe2.python.models import squeezenet as mynet
47 | # helper image processing functions
48 | import helpers
49 | 
50 | ##### Load the Model
51 | # Load the pre-trained model
52 | init_net = mynet.init_net
53 | predict_net = mynet.predict_net
54 | 
55 | # Initialize the predictor with SqueezeNet's init_net and predict_net
56 | p = workspace.Predictor(init_net, predict_net)
57 | 
58 | ##### Select and format the input image
59 | # use whatever image you want (urls work too)
60 | # img = "https://upload.wikimedia.org/wikipedia/commons/a/ac/Pretzel.jpg"
61 | # img = "images/cat.jpg"
62 | # img = "images/cowboy-hat.jpg"
63 | # img = "images/cell-tower.jpg"
64 | # img = "images/Ducreux.jpg"
65 | # img = "images/pretzel.jpg"
66 | # img = "images/orangutan.jpg"
67 | # img = "images/aircraft-carrier.jpg"
68 | img = "images/flower.jpg"
69 | 
70 | # average mean to subtract from the image
71 | mean = 128
72 | # the size of images that the model was trained with
73 | input_size = 227
74 | 
75 | # use the image helper to load the image and convert it to NCHW
76 | img = helpers.loadToNCHW(img, mean, input_size)
77 | 
78 | ##### Run the test
79 | # submit the image to net and get a tensor of results
80 | results = p.run({'data': img})  
81 | 
82 | ##### Process the results
83 | # Quick way to get the top-1 prediction result
84 | # Squeeze out the unnecessary axis. This returns a 1-D array of length 1000
85 | preds = np.squeeze(results)
86 | # Get the prediction and the confidence by finding the maximum value and index of maximum value in preds array
87 | curr_pred, curr_conf = max(enumerate(preds), key=operator.itemgetter(1))
88 | print("Top-1 Prediction: {}".format(curr_pred))
89 | print("Top-1 Confidence: {}\n".format(curr_conf))
90 | 
91 | # Lookup our result from the inference list
92 | response = helpers.parseResults(results)
93 | print(response)
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/helpers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | ## @package helpers
 16 | # Module caffe2.python.tutorials.helpers
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | from __future__ import unicode_literals
 21 | import numpy as np
 22 | import skimage.io
 23 | import skimage.transform
 24 | 
 25 | 
 26 | def crop_center(img, cropx, cropy):
 27 |     y, x, c = img.shape
 28 |     startx = x // 2 - (cropx // 2)
 29 |     starty = y // 2 - (cropy // 2)
 30 |     return img[starty:starty + cropy, startx:startx + cropx]
 31 | 
 32 | 
 33 | def rescale(img, input_height, input_width):
 34 |     # print("Original image shape:" + str(img.shape) + " --> it should be in H, W, C!")
 35 |     # print("Model's input shape is %dx%d") % (input_height, input_width)
 36 |     aspect = img.shape[1] / float(img.shape[0])
 37 |     # print("Orginal aspect ratio: " + str(aspect))
 38 |     if(aspect > 1):
 39 |         # landscape orientation - wide image
 40 |         res = int(aspect * input_height)
 41 |         imgScaled = skimage.transform.resize(
 42 |             img,
 43 |             (input_width, res),
 44 |             preserve_range=False)
 45 |     if(aspect < 1):
 46 |         # portrait orientation - tall image
 47 |         res = int(input_width / aspect)
 48 |         imgScaled = skimage.transform.resize(
 49 |             img,
 50 |             (res, input_height),
 51 |             preserve_range=False)
 52 |     if(aspect == 1):
 53 |         imgScaled = skimage.transform.resize(
 54 |             img,
 55 |             (input_width, input_height),
 56 |             preserve_range=False)
 57 |     return imgScaled
 58 | 
 59 | 
 60 | def load(img):
 61 |     # load and transform image
 62 |     img = skimage.img_as_float(skimage.io.imread(img)).astype(np.float32)
 63 |     return img
 64 | 
 65 | 
 66 | def chw(img):
 67 |     # switch to CHW
 68 |     img = img.swapaxes(1, 2).swapaxes(0, 1)
 69 |     return img
 70 | 
 71 | 
 72 | def bgr(img):
 73 |     # switch to BGR
 74 |     img = img[(2, 1, 0), :, :]
 75 |     return img
 76 | 
 77 | 
 78 | def removeMean(img, mean):
 79 |     # remove mean for better results
 80 |     img = img * 255 - mean
 81 |     return img
 82 | 
 83 | 
 84 | def batch(img):
 85 |     # add batch size
 86 |     img = img[np.newaxis, :, :, :].astype(np.float32)
 87 |     return img
 88 | 
 89 | 
 90 | def parseResults(results):
 91 |     results = np.asarray(results)
 92 |     results = np.delete(results, 1)
 93 |     index = 0
 94 |     highest = 0
 95 |     arr = np.empty((0, 2), dtype=object)
 96 |     arr[:, 0] = int(10)
 97 |     arr[:, 1:] = float(10)
 98 |     for i, r in enumerate(results):
 99 |         # imagenet index begins with 1!
100 |         i = i + 1
101 |         arr = np.append(arr, np.array([[i, r]]), axis=0)
102 |         if (r > highest):
103 |             highest = r
104 |             index = i
105 | 
106 |     # top 3 results
107 |     print("Raw top 3 results:", sorted(arr, key=lambda x: x[1], reverse=True)[:3])
108 | 
109 |     # now we can grab the code list
110 |     with open('inference_codes.txt', 'r') as f:
111 |         for line in f:
112 |             code, result = line.partition(":")[::2]
113 |             if (code.strip() == str(index)):
114 |                 answer = "The image contains a %s with a %s percent probability." \
115 |                     % (result.strip()[1:-2], highest * 100)
116 |     f.closed
117 |     return answer
118 | 
119 | 
120 | def loadToNCHW(img, mean, input_size):
121 |     img = load(img)
122 |     img = rescale(img, input_size, input_size)
123 |     img = crop_center(img, input_size, input_size)
124 |     img = chw(img)
125 |     img = bgr(img)
126 |     img = removeMean(img, mean)
127 |     img = batch(img)
128 |     return img
129 | 


--------------------------------------------------------------------------------
/nomnigraph_model_exploration.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from caffe2.python import core, workspace, test_util, dyndep, nomnigraph as ng\n",
 10 |     "from caffe2.proto import caffe2_pb2\n",
 11 |     "import pprint as pp\n",
 12 |     "import graphviz as gv\n",
 13 |     "import google.protobuf.text_format"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "## Load model from a protobuf file\n",
 21 |     "Load a caffe2 model from protobuf and convert it to nomnigraph representation (https://github.com/pytorch/pytorch/tree/master/caffe2/core/nomnigraph)."
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "# Protobuf file of the model.\n",
 31 |     "MODEL_FILE = \"model.pb\"\n",
 32 |     "with open(MODEL_FILE, 'rb') as f:\n",
 33 |     "    netdef_proto = caffe2_pb2.NetDef()\n",
 34 |     "    # Use this for text format protobuf file\n",
 35 |     "    #google.protobuf.text_format.Merge(f.read(), netdef_proto)\n",
 36 |     "    # Use this for binary format protobuf file\n",
 37 |     "    netdef_proto.ParseFromString(f.read())\n",
 38 |     "    nnmodule = ng.NNModule(netdef_proto)\n",
 39 |     "    dfGraph = nnmodule.dataFlow"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "## Simple graph exploration\n",
 47 |     "Basic graph traversal is supported by nomnigraph."
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "# Number of operators\n",
 57 |     "print(len(dfGraph.operators))"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "# Print operator names\n",
 67 |     "for op in dfGraph.operators:\n",
 68 |     "    print(op.name)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "# Get inputs, outputs for a particular operator\n",
 78 |     "OP_NAME = \"Mul\"\n",
 79 |     "for op in dfGraph.operators:\n",
 80 |     "    if op.name == OP_NAME:\n",
 81 |     "        print(op.name)\n",
 82 |     "        print(\"Inputs\")\n",
 83 |     "        pp.pprint([tensor.name for tensor in op.inputs])\n",
 84 |     "        print(\"Outputs\")\n",
 85 |     "        pp.pprint([tensor.name for tensor in op.outputs])\n",
 86 |     "        break\n",
 87 |     "    "
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "## Visualize graph"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "def viz(graph):\n",
104 |     "    return gv.Source(str(graph))\n",
105 |     "viz(dfGraph)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "## Subgraph matching\n",
113 |     "Nomnigraph can be used to perform subgraph pattern matching."
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": [
122 |     "mg = ng.NNMatchGraph()\n",
123 |     "matchMul = mg.createNode(ng.NeuralNetOperator(\"Mul\"), strict=True)\n",
124 |     "matchT = mg.createNode(ng.NeuralNetData(\"*\"), strict=True)\n",
125 |     "matchReplaceNan = mg.createNode(ng.NeuralNetOperator(\"ReplaceNaN\"))\n",
126 |     "mg.createEdge(matchMul, matchT)\n",
127 |     "mg.createEdge(matchT, matchReplaceNan)\n",
128 |     "\n",
129 |     "matches = nnmodule.match(mg)\n",
130 |     "for match in matches:\n",
131 |     "    # TODO: visualize subgraph\n",
132 |     "    for node in match.nodes:\n",
133 |     "        print(node.name)\n",
134 |     "    break"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "## Construct and visualize a subgraph"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "sg = ng.NNSubgraph()\n",
151 |     "feature_name = \"TEST_FEATURE\"\n",
152 |     "for blob in dfGraph.tensors:\n",
153 |     "    if feature_name in blob.name:\n",
154 |     "        #print(blob.name)\n",
155 |     "        sg.addNode(blob)\n",
156 |     "        [sg.addNode(x) for x in blob.consumers]\n",
157 |     "        if blob.hasProducer():\n",
158 |     "            pro = blob.producer\n",
159 |     "            [sg.addNode(x) for x in pro.inputs]\n",
160 |     "            sg.addNode(pro)\n",
161 |     "sg.induceEdges()\n",
162 |     "viz(sg)"
163 |    ]
164 |   }
165 |  ],
166 |  "metadata": {
167 |   "kernelspec": {
168 |    "display_name": "Python 2",
169 |    "language": "python",
170 |    "name": "python2"
171 |   },
172 |   "language_info": {
173 |    "codemirror_mode": {
174 |     "name": "ipython",
175 |     "version": 2
176 |    },
177 |    "file_extension": ".py",
178 |    "mimetype": "text/x-python",
179 |    "name": "python",
180 |    "nbconvert_exporter": "python",
181 |    "pygments_lexer": "ipython2",
182 |    "version": "2.7.3"
183 |   }
184 |  },
185 |  "nbformat": 4,
186 |  "nbformat_minor": 2
187 | }
188 | 


--------------------------------------------------------------------------------
/Model_Quickload.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Model Quickload\n",
  8 |     "\n",
  9 |     "This notebook will show you how to quickly load a pretrained SqueezeNet model and test it on images of your choice in four main steps. \n",
 10 |     "\n",
 11 |     "1. Load the model\n",
 12 |     "2. Format the input\n",
 13 |     "3. Run the test\n",
 14 |     "4. Process the results\n",
 15 |     "\n",
 16 |     "The model used in this tutorial has been pretrained on the full 1000 class ImageNet dataset, and is downloaded from Caffe2's [Model Zoo](https://github.com/caffe2/caffe2/wiki/Model-Zoo). For an all around more in-depth tutorial on using pretrained models check out the [Loading Pretrained Models](https://github.com/caffe2/caffe2/blob/master/caffe2/python/tutorials/Loading_Pretrained_Models.ipynb) tutorial.  \n",
 17 |     "\n",
 18 |     "Before this script will work, you need to download the model and install it. You can do this by running:\n",
 19 |     "\n",
 20 |     "```\n",
 21 |     "sudo python -m caffe2.python.models.download -i squeezenet\n",
 22 |     "```\n",
 23 |     "\n",
 24 |     "Or make a folder named `squeezenet`, download each file listed below to it, and place it in the `/caffe2/python/models/` directory:\n",
 25 |     "* [predict_net.pb](https://download.caffe2.ai/models/squeezenet/predict_net.pb)\n",
 26 |     "* [init_net.pb](https://download.caffe2.ai/models/squeezenet/init_net.pb)\n",
 27 |     "\n",
 28 |     "Notice, the helper function *parseResults* will translate the integer class label of the top result to an English label by searching through the [inference codes file](inference_codes.txt). If you want to really test the model's capabilities, pick a code from the file, find an image representing that code, and test the model with it!"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 6,
 34 |    "metadata": {},
 35 |    "outputs": [
 36 |     {
 37 |      "name": "stdout",
 38 |      "output_type": "stream",
 39 |      "text": [
 40 |       "Top-1 Prediction: 985\n",
 41 |       "Top-1 Confidence: 0.982226848602\n",
 42 |       "\n",
 43 |       "Raw top 3 results: [array([985.0, 0.9822268486022949], dtype=object), array([309.0, 0.011943653225898743], dtype=object), array([946.0, 0.0048101237043738365], dtype=object)]\n",
 44 |       "The image contains a daisy with a 98.22268486022949 percent probability.\n"
 45 |      ]
 46 |     }
 47 |    ],
 48 |    "source": [
 49 |     "from __future__ import absolute_import\n",
 50 |     "from __future__ import division\n",
 51 |     "from __future__ import print_function\n",
 52 |     "from __future__ import unicode_literals\n",
 53 |     "import numpy as np\n",
 54 |     "import operator\n",
 55 |     "# load up the caffe2 workspace\n",
 56 |     "from caffe2.python import workspace\n",
 57 |     "# choose your model here (use the downloader first)\n",
 58 |     "from caffe2.python.models import squeezenet as mynet\n",
 59 |     "# helper image processing functions\n",
 60 |     "import helpers\n",
 61 |     "\n",
 62 |     "##### Load the Model\n",
 63 |     "# Load the pre-trained model\n",
 64 |     "init_net = mynet.init_net\n",
 65 |     "predict_net = mynet.predict_net\n",
 66 |     "\n",
 67 |     "# Initialize the predictor with SqueezeNet's init_net and predict_net\n",
 68 |     "p = workspace.Predictor(init_net, predict_net)\n",
 69 |     "\n",
 70 |     "##### Select and format the input image\n",
 71 |     "# use whatever image you want (urls work too)\n",
 72 |     "# img = \"https://upload.wikimedia.org/wikipedia/commons/a/ac/Pretzel.jpg\"\n",
 73 |     "# img = \"images/cat.jpg\"\n",
 74 |     "# img = \"images/cowboy-hat.jpg\"\n",
 75 |     "# img = \"images/cell-tower.jpg\"\n",
 76 |     "# img = \"images/Ducreux.jpg\"\n",
 77 |     "# img = \"images/pretzel.jpg\"\n",
 78 |     "# img = \"images/orangutan.jpg\"\n",
 79 |     "# img = \"images/aircraft-carrier.jpg\"\n",
 80 |     "img = \"images/flower.jpg\"\n",
 81 |     "\n",
 82 |     "# average mean to subtract from the image\n",
 83 |     "mean = 128\n",
 84 |     "# the size of images that the model was trained with\n",
 85 |     "input_size = 227\n",
 86 |     "\n",
 87 |     "# use the image helper to load the image and convert it to NCHW\n",
 88 |     "img = helpers.loadToNCHW(img, mean, input_size)\n",
 89 |     "\n",
 90 |     "##### Run the test\n",
 91 |     "# submit the image to net and get a tensor of results\n",
 92 |     "results = p.run({'data': img})  \n",
 93 |     "\n",
 94 |     "##### Process the results\n",
 95 |     "# Quick way to get the top-1 prediction result\n",
 96 |     "# Squeeze out the unnecessary axis. This returns a 1-D array of length 1000\n",
 97 |     "preds = np.squeeze(results)\n",
 98 |     "# Get the prediction and the confidence by finding the maximum value and index of maximum value in preds array\n",
 99 |     "curr_pred, curr_conf = max(enumerate(preds), key=operator.itemgetter(1))\n",
100 |     "print(\"Top-1 Prediction: {}\".format(curr_pred))\n",
101 |     "print(\"Top-1 Confidence: {}\\n\".format(curr_conf))\n",
102 |     "\n",
103 |     "# Lookup our result from the inference list\n",
104 |     "response = helpers.parseResults(results)\n",
105 |     "print(response)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {
112 |     "collapsed": true
113 |    },
114 |    "outputs": [],
115 |    "source": []
116 |   }
117 |  ],
118 |  "metadata": {
119 |   "kernelspec": {
120 |    "display_name": "Python 2",
121 |    "language": "python",
122 |    "name": "python2"
123 |   },
124 |   "language_info": {
125 |    "codemirror_mode": {
126 |     "name": "ipython",
127 |     "version": 2
128 |    },
129 |    "file_extension": ".py",
130 |    "mimetype": "text/x-python",
131 |    "name": "python",
132 |    "nbconvert_exporter": "python",
133 |    "pygments_lexer": "ipython2",
134 |    "version": "2.7.14"
135 |   }
136 |  },
137 |  "nbformat": 4,
138 |  "nbformat_minor": 2
139 | }
140 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
  1 | Copyright (c) 2016-present, Facebook Inc. All rights reserved.
  2 | 
  3 | All contributions by Facebook:
  4 | Copyright (c) 2016 Facebook Inc.
  5 |  
  6 | All contributions by Google:
  7 | Copyright (c) 2015 Google Inc.
  8 | All rights reserved.
  9 |  
 10 | All contributions by Yangqing Jia:
 11 | Copyright (c) 2015 Yangqing Jia
 12 | All rights reserved.
 13 |  
 14 | All contributions from Caffe:
 15 | Copyright(c) 2013, 2014, 2015, the respective contributors
 16 | All rights reserved.
 17 |  
 18 | All other contributions:
 19 | Copyright(c) 2015, 2016 the respective contributors
 20 | All rights reserved.
 21 |  
 22 | Caffe2 uses a copyright model similar to Caffe: each contributor holds
 23 | copyright over their contributions to Caffe2. The project versioning records
 24 | all such contribution and copyright details. If a contributor wants to further
 25 | mark their specific copyright on a particular contribution, they should
 26 | indicate their copyright solely in the commit message of the change when it is
 27 | committed.
 28 | 
 29 | =======================================================================
 30 | Software under third_party
 31 | =======================================================================
 32 | Software libraries under third_party are provided as github submodule
 33 | links, and their content is not part of the Caffe2 codebase. Their
 34 | licences can be found under the respective software repositories.
 35 | 
 36 | =======================================================================
 37 | Earlier BSD License
 38 | =======================================================================
 39 | Early development of Caffe2 in 2015 and early 2016 is licensed under the
 40 | BSD license. The license is attached below:
 41 | 
 42 | All contributions by Facebook:
 43 | Copyright (c) 2016 Facebook Inc.
 44 | 
 45 | All contributions by Google:
 46 | Copyright (c) 2015 Google Inc.
 47 | All rights reserved.
 48 | 
 49 | All contributions by Yangqing Jia:
 50 | Copyright (c) 2015 Yangqing Jia
 51 | All rights reserved.
 52 | 
 53 | All other contributions:
 54 | Copyright(c) 2015, 2016 the respective contributors
 55 | All rights reserved.
 56 | 
 57 | Redistribution and use in source and binary forms, with or without
 58 | modification, are permitted provided that the following conditions are met:
 59 | 
 60 | 1. Redistributions of source code must retain the above copyright notice, this
 61 |    list of conditions and the following disclaimer.
 62 | 2. Redistributions in binary form must reproduce the above copyright notice,
 63 |    this list of conditions and the following disclaimer in the documentation
 64 |    and/or other materials provided with the distribution.
 65 | 
 66 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 67 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 68 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 69 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 70 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 71 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 72 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 73 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 74 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 75 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 76 | 
 77 | 
 78 | =======================================================================
 79 | Caffe's BSD License
 80 | =======================================================================
 81 | Some parts of the caffe2 code is derived from the original Caffe code, which is
 82 | created by Yangqing Jia and is now a BSD-licensed open-source project. The Caffe
 83 | license is as follows:
 84 | 
 85 | COPYRIGHT
 86 | 
 87 | All contributions by the University of California:
 88 | Copyright (c) 2014, The Regents of the University of California (Regents)
 89 | All rights reserved.
 90 | 
 91 | All other contributions:
 92 | Copyright (c) 2014, the respective contributors
 93 | All rights reserved.
 94 | 
 95 | Caffe uses a shared copyright model: each contributor holds copyright over
 96 | their contributions to Caffe. The project versioning records all such
 97 | contribution and copyright details. If a contributor wants to further mark
 98 | their specific copyright on a particular contribution, they should indicate
 99 | their copyright solely in the commit message of the change when it is
100 | committed.
101 | 
102 | LICENSE
103 | 
104 | Redistribution and use in source and binary forms, with or without
105 | modification, are permitted provided that the following conditions are met:
106 | 
107 | 1. Redistributions of source code must retain the above copyright notice, this
108 |    list of conditions and the following disclaimer.
109 | 2. Redistributions in binary form must reproduce the above copyright notice,
110 |    this list of conditions and the following disclaimer in the documentation
111 |    and/or other materials provided with the distribution.
112 | 
113 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
114 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
115 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
116 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
117 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
118 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
119 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
120 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
121 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
122 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
123 | 
124 | CONTRIBUTION AGREEMENT
125 | 
126 | By contributing to the BVLC/caffe repository through pull-request, comment,
127 | or otherwise, the contributor releases their content to the
128 | license and copyright terms herein.
129 | 
130 | 


--------------------------------------------------------------------------------
/py_gen/Getting_Caffe1_Models_for_Translation.py:
--------------------------------------------------------------------------------
  1 | #########################################################
  2 | #
  3 | # DO NOT EDIT THIS FILE. IT IS GENERATED AUTOMATICALLY. #
  4 | # PLEASE LOOK INTO THE README FOR MORE INFORMATION.     #
  5 | #
  6 | #########################################################
  7 | 
  8 | 
  9 | # coding: utf-8
 10 | 
 11 | # # Getting Caffe1 Models and Datasets
 12 | # 
 13 | # This tutorial will help you acquire a variety of pre-trained models from the original Caffe repo, and translate these models to a format that Caffe2 expects. If you don't already have the Caffe repo, then clone it like so:
 14 | # 
 15 | # ```
 16 | # git clone https://github.com/BVLC/caffe.git
 17 | # ```
 18 | # 
 19 | # Start by importing the required modules.
 20 | 
 21 | # In[ ]:
 22 | 
 23 | 
 24 | from __future__ import absolute_import
 25 | from __future__ import division
 26 | from __future__ import print_function
 27 | from __future__ import unicode_literals
 28 | 
 29 | import os
 30 | print("Required modules imported.")
 31 | 
 32 | 
 33 | # Now you can setup your root folder for Caffe below if you put it somewhere else. You should only be changing the path that's being set for `CAFFE_ROOT`.
 34 | 
 35 | # In[ ]:
 36 | 
 37 | 
 38 | # You should have checked out original Caffe
 39 | # git clone https://github.com/BVLC/caffe.git
 40 | # change the CAFFE_ROOT directory below accordingly
 41 | CAFFE_ROOT = os.path.expanduser('~/caffe')
 42 | 
 43 | # Make sure Caffe exists where you specified
 44 | if not os.path.exists(CAFFE_ROOT):
 45 |     print("Houston, you may have a problem.") 
 46 |     print("Did you change CAFFE_ROOT to point to your local Caffe repo?")
 47 |     print("Try running: git clone https://github.com/BVLC/caffe.git")
 48 | 
 49 | 
 50 | # Here's where you pick your model. There are several listed below such as AlexNet, GoogleNet, and Flickr Style. Uncomment the model you want to download.
 51 | 
 52 | # In[ ]:
 53 | 
 54 | 
 55 | # Pick a model, and if you don't have it, it will be downloaded
 56 | # format below is the model's folder, model's dataset inside that folder
 57 | 
 58 | #MODEL = 'bvlc_alexnet', 'bvlc_alexnet.caffemodel' 
 59 | #MODEL = 'bvlc_googlenet', 'bvlc_googlenet.caffemodel'
 60 | #MODEL = 'finetune_flickr_style', 'finetune_flickr_style.caffemodel'
 61 | #MODEL = 'bvlc_reference_caffenet', 'bvlc_reference_caffenet.caffemodel'
 62 | MODEL = 'bvlc_reference_rcnn_ilsvrc13', 'bvlc_reference_rcnn_ilsvrc13.caffemodel'
 63 | 
 64 | 
 65 | # As a reminder, in Caffe, the deploy model is saved in two parts:
 66 | # 
 67 | #     1) deploy.prototxt: contained the network architecture in human-readable protobuf format
 68 | #     2) .caffemodel file: contained the model weights and parameters for loading
 69 | # 
 70 | # Therefore, to translate the model to Caffe2, we need both of these files. We already have the `deploy.prototxt` files for all of the models in `~/caffe/models`, so we need the learned weights.
 71 | # 
 72 | # Below, we'll check to see if the `.caffemodel` file from the last model that we uncommented above already exists. If it does not already exist in the location that we specify, we will download it using the `download_model_binary.py` script in the Caffe repo. **Note that .caffemodel files are typically fairly large files, so downloading one will take a few moments.** We will be sure to print a message so we know when we can continue.
 73 | 
 74 | # In[ ]:
 75 | 
 76 | 
 77 | # Scripts to download the models reside here (~/caffe/models)
 78 | # After downloading the data will exist with the script
 79 | CAFFE_MODELS = os.path.join(CAFFE_ROOT, 'models')
 80 | 
 81 | # this is like: ~/caffe/models/bvlc_alexnet/deploy.prototxt
 82 | CAFFE_MODEL_FILE = os.path.join(CAFFE_MODELS, MODEL[0], 'deploy.prototxt')
 83 | # this is like: ~/caffe/models/bvlc_alexnet/bvlc_alexnet.caffemodel
 84 | CAFFE_PRETRAINED = os.path.join(CAFFE_MODELS, MODEL[0], MODEL[1])
 85 |     
 86 | # If the model folder doesn't have the goods, then download it
 87 | # This is usually a pretty big file with the .caffemodel extension
 88 | if not os.path.exists(CAFFE_PRETRAINED):
 89 |     print(CAFFE_PRETRAINED + " not found. Attempting download. Be patient...\n")
 90 |     os.system(
 91 |         os.path.join(CAFFE_ROOT, 'scripts/download_model_binary.py') +
 92 |         ' ' +
 93 |         os.path.join(CAFFE_ROOT, 'models', MODEL[0]))
 94 | else:
 95 |     print("You already have " + CAFFE_PRETRAINED + ", skipping download...\n")
 96 | 
 97 | # If the .prototxt file was missing then you're in trouble; cannot continue
 98 | if not os.path.exists(CAFFE_MODEL_FILE):
 99 |     print("Caffe model file, " + CAFFE_MODEL_FILE + " was not found!")
100 | else:
101 |     print("Both the deploy.prototxt and .caffemodel files were found, ready to continue!")
102 |     # Now we have init net and predict net .pb files to use
103 | 
104 | 
105 | # Now that we have both the `deploy.prototxt` and `.caffemodel` files, we can translate the model to the Caffe2 saved model format, which consists of two serialized protobuf files:
106 | # 
107 | #     1) init_net.pb
108 | #     2) predict_net.pb
109 | #     
110 | # To do this, we will use Caffe2's translator script at `~/caffe2/caffe2/python/caffe_translator.py`.
111 | # 
112 | # **Again, depending on the size of the model, this may take a minute or two**
113 | 
114 | # In[ ]:
115 | 
116 | 
117 | # Set the CAFFE2_ROOT
118 | CAFFE2_ROOT = os.path.expanduser('~/caffe2')
119 | init_net_out = os.path.join(CAFFE_MODELS, MODEL[0], 'init_net.pb')
120 | predict_net_out = os.path.join(CAFFE_MODELS, MODEL[0], 'predict_net.pb')
121 | 
122 | # Run the caffe_translator.py script to translate to Caffe2 if files do not already exist
123 | if (not os.path.exists(init_net_out)) or (not os.path.exists(predict_net_out)):
124 |     print("Protobuf files not found. Running translation. Be patient...\n")
125 |     os.system(
126 |         'python' + ' ' + os.path.join(CAFFE2_ROOT, 'caffe2/python/caffe_translator.py') +
127 |         ' ' + CAFFE_MODEL_FILE + ' ' + CAFFE_PRETRAINED + ' ' + 
128 |         '--init_net' + ' ' + init_net_out + ' ' +
129 |         '--predict_net' + ' ' + predict_net_out
130 |     )
131 | else:
132 |     print("You already have both .pb files, skipping translation...\n")    
133 | 
134 | # Print if files are where they are expected to be
135 | if (not os.path.exists(init_net_out)) or (not os.path.exists(predict_net_out)):
136 |     print(init_net_out + " and/or " + predict_net_out + " was NOT FOUND!")
137 | else:
138 |     print("Protobuf files can be found at: \n", 
139 |               os.path.join(CAFFE_MODELS, MODEL[0])), "!"
140 | 
141 | 
142 | # At this point, we have translated the model from Caffe to a format that Caffe2 can use. Have a look at our other tutorials, such as *Loading Pretrained Models* to see an example of how to use these .pb files for inference.
143 | 
144 | 


--------------------------------------------------------------------------------
/py_gen/create_your_own_dataset.py:
--------------------------------------------------------------------------------
  1 | #########################################################
  2 | #
  3 | # DO NOT EDIT THIS FILE. IT IS GENERATED AUTOMATICALLY. #
  4 | # PLEASE LOOK INTO THE README FOR MORE INFORMATION.     #
  5 | #
  6 | #########################################################
  7 | 
  8 | 
  9 | # coding: utf-8
 10 | 
 11 | # # How do I create my own dataset?
 12 | # 
 13 | # So Caffe2 uses a binary DB format to store the data that we would like to train models on. A Caffe2 DB is a glorified name of a key-value storage where the keys are usually randomized so that the batches are approximately i.i.d. The values are the real stuff here: they contain the serialized strings of the specific data formats that you would like your training algorithm to ingest. So, the stored DB would look (semantically) like this:
 14 | # 
 15 | # key1 value1
 16 | # key2 value2
 17 | # key3 value3
 18 | # ...
 19 | # 
 20 | # To a DB, it treats the keys and values as strings, but you probably want structured contents. One way to do this is to use a TensorProtos protocol buffer: it essentially wraps Tensors, aka multi-dimensional arrays, together with the tensor data type and shape information. Then, one can use the TensorProtosDBInput operator to load the data into an SGD training fashion.
 21 | # 
 22 | # Here, we will show you one example of how to create your own dataset. To this end, we will use the UCI Iris dataset - which was a very popular classical dataset for classifying Iris flowers. It contains 4 real-valued features representing the dimensions of the flower, and classifies things into 3 types of Iris flowers. The dataset can be downloaded [here](https://archive.ics.uci.edu/ml/datasets/Iris).
 23 | 
 24 | # In[1]:
 25 | 
 26 | 
 27 | # First let's import some necessities
 28 | from __future__ import absolute_import
 29 | from __future__ import division
 30 | from __future__ import print_function
 31 | from __future__ import unicode_literals
 32 | 
 33 | import urllib2 # for downloading the dataset from the web.
 34 | import numpy as np
 35 | from matplotlib import pyplot
 36 | from StringIO import StringIO
 37 | from caffe2.python import core, utils, workspace
 38 | from caffe2.proto import caffe2_pb2
 39 | 
 40 | 
 41 | # In[2]:
 42 | 
 43 | 
 44 | f = urllib2.urlopen('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data')
 45 | raw_data = f.read()
 46 | print('Raw data looks like this:')
 47 | print(raw_data[:100] + '...')
 48 | 
 49 | 
 50 | # In[3]:
 51 | 
 52 | 
 53 | # load the features to a feature matrix.
 54 | features = np.loadtxt(StringIO(raw_data), dtype=np.float32, delimiter=',', usecols=(0, 1, 2, 3))
 55 | # load the labels to a feature matrix
 56 | label_converter = lambda s : {'Iris-setosa':0, 'Iris-versicolor':1, 'Iris-virginica':2}[s]
 57 | labels = np.loadtxt(StringIO(raw_data), dtype=np.int, delimiter=',', usecols=(4,), converters={4: label_converter})
 58 | 
 59 | 
 60 | # Before we do training, one thing that is often beneficial is to separate the dataset into training and testing. In this case, let's randomly shuffle the data, use the first 100 data points to do training, and the remaining 50 to do testing. For more sophisticated approaches, you can use e.g. cross validation to separate your dataset into multiple training and testing splits. Read more about cross validation [here](http://scikit-learn.org/stable/modules/cross_validation.html).
 61 | 
 62 | # In[4]:
 63 | 
 64 | 
 65 | random_index = np.random.permutation(150)
 66 | features = features[random_index]
 67 | labels = labels[random_index]
 68 | 
 69 | train_features = features[:100]
 70 | train_labels = labels[:100]
 71 | test_features = features[100:]
 72 | test_labels = labels[100:]
 73 | 
 74 | 
 75 | # In[5]:
 76 | 
 77 | 
 78 | # Let's plot the first two features together with the label.
 79 | # Remember, while we are plotting the testing feature distribution
 80 | # here too, you might not be supposed to do so in real research,
 81 | # because one should not peek into the testing data.
 82 | legend = ['rx', 'b+', 'go']
 83 | pyplot.title("Training data distribution, feature 0 and 1")
 84 | for i in range(3):
 85 |     pyplot.plot(train_features[train_labels==i, 0], train_features[train_labels==i, 1], legend[i])
 86 | pyplot.figure()
 87 | pyplot.title("Testing data distribution, feature 0 and 1")
 88 | for i in range(3):
 89 |     pyplot.plot(test_features[test_labels==i, 0], test_features[test_labels==i, 1], legend[i])
 90 | 
 91 | 
 92 | # Now, as promised, let's put things into a Caffe2 DB. In this DB, what would happen is that we will use "train_xxx" as the key, and use a TensorProtos object to store two tensors for each data point: one as the feature and one as the label. We will use Caffe2's Python DB interface to do so.
 93 | 
 94 | # In[6]:
 95 | 
 96 | 
 97 | # First, let's see how one can construct a TensorProtos protocol buffer from numpy arrays.
 98 | feature_and_label = caffe2_pb2.TensorProtos()
 99 | feature_and_label.protos.extend([
100 |     utils.NumpyArrayToCaffe2Tensor(features[0]),
101 |     utils.NumpyArrayToCaffe2Tensor(labels[0])])
102 | print('This is what the tensor proto looks like for a feature and its label:')
103 | print(str(feature_and_label))
104 | print('This is the compact string that gets written into the db:')
105 | print(feature_and_label.SerializeToString())
106 | 
107 | 
108 | # In[7]:
109 | 
110 | 
111 | # Now, actually write the db.
112 | 
113 | def write_db(db_type, db_name, features, labels):
114 |     db = core.C.create_db(db_type, db_name, core.C.Mode.write)
115 |     transaction = db.new_transaction()
116 |     for i in range(features.shape[0]):
117 |         feature_and_label = caffe2_pb2.TensorProtos()
118 |         feature_and_label.protos.extend([
119 |             utils.NumpyArrayToCaffe2Tensor(features[i]),
120 |             utils.NumpyArrayToCaffe2Tensor(labels[i])])
121 |         transaction.put(
122 |             'train_%03d'.format(i),
123 |             feature_and_label.SerializeToString())
124 |     # Close the transaction, and then close the db.
125 |     del transaction
126 |     del db
127 | 
128 | write_db("minidb", "iris_train.minidb", train_features, train_labels)
129 | write_db("minidb", "iris_test.minidb", test_features, test_labels)
130 | 
131 | 
132 | # Now, let's create a very simple network that only consists of one single TensorProtosDBInput operator, to showcase how we load data from the DB that we created. For training, you might want to do something more complex: creating a network, train it, get the model, and run the prediction service. To this end you can look at the MNIST tutorial for details.
133 | 
134 | # In[8]:
135 | 
136 | 
137 | net_proto = core.Net("example_reader")
138 | dbreader = net_proto.CreateDB([], "dbreader", db="iris_train.minidb", db_type="minidb")
139 | net_proto.TensorProtosDBInput([dbreader], ["X", "Y"], batch_size=16)
140 | 
141 | print("The net looks like this:")
142 | print(str(net_proto.Proto()))
143 | 
144 | 
145 | # In[9]:
146 | 
147 | 
148 | workspace.CreateNet(net_proto)
149 | 
150 | 
151 | # In[10]:
152 | 
153 | 
154 | # Let's run it to get batches of features.
155 | workspace.RunNet(net_proto.Proto().name)
156 | print("The first batch of feature is:")
157 | print(workspace.FetchBlob("X"))
158 | print("The first batch of label is:")
159 | print(workspace.FetchBlob("Y"))
160 | 
161 | # Let's run again.
162 | workspace.RunNet(net_proto.Proto().name)
163 | print("The second batch of feature is:")
164 | print(workspace.FetchBlob("X"))
165 | print("The second batch of label is:")
166 | print(workspace.FetchBlob("Y"))
167 | 
168 | 
169 | 


--------------------------------------------------------------------------------
/py_gen/Python_Op.py:
--------------------------------------------------------------------------------
  1 | #########################################################
  2 | #
  3 | # DO NOT EDIT THIS FILE. IT IS GENERATED AUTOMATICALLY. #
  4 | # PLEASE LOOK INTO THE README FOR MORE INFORMATION.     #
  5 | #
  6 | #########################################################
  7 | 
  8 | 
  9 | # coding: utf-8
 10 | 
 11 | # # Python Op Tutorial
 12 | # In this tutorial we cover the `Python` operator that allows writing Caffe2 operators using Python. We'll also discuss some of the underlying implementation details of the operator.
 13 | 
 14 | # ### Forward Python Operator
 15 | # 
 16 | # Caffe2 provides a high-level interface that helps with creating Python ops. Let's consider the following example, in which we create a basic operator `f`, which outputs the input * 2:
 17 | 
 18 | # In[1]:
 19 | 
 20 | 
 21 | from __future__ import absolute_import
 22 | from __future__ import division
 23 | from __future__ import print_function
 24 | from __future__ import unicode_literals
 25 | 
 26 | from caffe2.python import core, workspace
 27 | import numpy as np
 28 | 
 29 | def f(inputs, outputs):
 30 |     outputs[0].feed(2 * inputs[0].data)  # use 'feed' to set the output tensor to 2*input
 31 | 
 32 | workspace.ResetWorkspace()
 33 | net = core.Net("tutorial")
 34 | net.Python(f)(["x"], ["y"])
 35 | workspace.FeedBlob("x", np.array([3.]))
 36 | workspace.RunNetOnce(net)
 37 | print(workspace.FetchBlob("y"))
 38 | 
 39 | 
 40 | # As seen in the example, the `net.Python()` function returns a callable that can be used just like any other operator. In this example, we add a new Python operator to the net with input "x" and output "y". Note that you can save the output of `net.Python()` and call it multiple times to combine multiple Python operators with different inputs and outputs.
 41 | # 
 42 | # Let's take a closer look at `net.Python()` function and the corresponding body of a new Python operator `f`. Every time `net.Python(f)` is called, it serializes a given function `f` and saves it in a global registry under a known key (token; passed to a PythonOp as an argument). After this, `net.Python()` returns a lambda that accepts positional and keyword arguments (typically inputs, outputs and extra arguments) and attaches a new Python operator to the net that calls function `f` on a given list of inputs and outputs.
 43 | # 
 44 | # ---
 45 | 
 46 | # Python operator's function `f` expects two positional arguments: a list of inputs and a list of outputs. When an operator is executed it transparently converts Caffe2 blobs into the elements of these lists. CPU tensor blobs are converted into `TensorCPU` objects that act as wrappers around Numpy arrays. Let's take a closer look at a relationship between Caffe2 CPU tensor, Python's `TensorCPU` object and a Numpy array:
 47 | # 
 48 | # 1) Conversion between C++ tensor objects and Numpy objects happens automatically and is handled by PyBind library.
 49 | # 
 50 | # 2) When generating a `TensorCPU` wrapper, a new Numpy array object is created which **shares** the same memory storage as a corresponding Caffe2 CPU tensor. This Numpy array is accessible in Python as a `.data` property of a `TensorCPU` object.
 51 | # 
 52 | # 3) Although Numpy array and Caffe2 tensor might share the same storage, other tensor data (e.g. shape) of Caffe2 tensor is stored **separately** from a Numpy array. Furthermore, Numpy may copy and reallocate its array to a different location in memory (e.g. when we try to resize an array) during operator's function execution. It's important to keep that in mind when writing a Python operator's code to ensure that Caffe2 and Numpy output tensors are in sync.
 53 | # 
 54 | # 4) `TensorCPU`'s `feed` method accepts a Numpy tensor, resizes an underying Caffe2 tensor and copies Numpy's tensor data into a Caffe2 tensor.
 55 | # 
 56 | # 5) Another way to ensure that Caffe2's output tensor is properly set is to call the `reshape` function on a corresponding `TensorCPU` output, and copy the data in Python to the output's `.data` tensor.
 57 | # 
 58 | # Below is an example of ensuring that Caffe2's output tensor is properly configured using `reshape`:
 59 | 
 60 | # In[2]:
 61 | 
 62 | 
 63 | def f_reshape(inputs, outputs):
 64 |     outputs[0].reshape(inputs[0].shape)        # set the output tensor to be the same shape as the input tensor
 65 |     outputs[0].data[...] = 2 * inputs[0].data  # assign output tensor (Numpy) to 2 * input tensor (Numpy)
 66 | 
 67 | workspace.ResetWorkspace()
 68 | net = core.Net("tutorial")
 69 | net.Python(f_reshape)(["x"], ["z"])
 70 | workspace.FeedBlob("x", np.array([3.]))
 71 | workspace.RunNetOnce(net)
 72 | print(workspace.FetchBlob("z"))
 73 | 
 74 | 
 75 | # This example works correctly because `reshape` method updates an underlying Caffe2 tensor and a subsequent call to the `.data` property returns a Numpy array that shares memory with a Caffe2 tensor. The last line in `f_reshape` copies data into the shared memory location.
 76 | 
 77 | # There're several additional arguments that `net.Python()` accepts. When `pass_workspace=True` is passed, a workspace is passed to an operator's `Python` function:
 78 | 
 79 | # In[3]:
 80 | 
 81 | 
 82 | def f_workspace(inputs, outputs, workspace):
 83 |     # use 'feed' to set the output tensor to 2 * (a blob in the workspace called "x")
 84 |     outputs[0].feed(2 * workspace.blobs["x"].fetch())
 85 | 
 86 | workspace.ResetWorkspace()
 87 | net = core.Net("tutorial")
 88 | net.Python(f_workspace, pass_workspace=True)([], ["y"])  # add Python operator to net without specifying input blob
 89 | workspace.FeedBlob("x", np.array([3.]))  # manually feed the "x" blob the the f_workspace operator expects into the workspace
 90 | workspace.RunNetOnce(net)
 91 | print(workspace.FetchBlob("y"))
 92 | 
 93 | 
 94 | # ### Gradient Python Operator
 95 | # 
 96 | # Another important `net.Python()` operator to discuss is a gradient operator which corresponds to another custom `net.Python()` operator. In the example below, `grad_f` is a corresponding gradient operator for `f`.
 97 | # 
 98 | # Note that we are using the same `f` as before ( $y = 2x$ ). So when we expect the gradient with respect to x: ( $\frac{dy}{dx} = 2$ )
 99 | # 
100 | 
101 | # In[4]:
102 | 
103 | 
104 | def f(inputs, outputs):
105 |             outputs[0].reshape(inputs[0].shape)
106 |             outputs[0].data[...] = inputs[0].data * 2
107 | 
108 | def grad_f(inputs, outputs):
109 |     # Ordering of inputs is [fwd inputs, outputs, grad_outputs]
110 |     grad_output = inputs[2]
111 |     grad_input = outputs[0]
112 |     grad_input.reshape(grad_output.shape)
113 |     grad_input.data[...] = grad_output.data * 2
114 | 
115 | workspace.ResetWorkspace()
116 | net = core.Net("tutorial")
117 | net.Python(f, grad_f)(["x"], ["y"])
118 | workspace.FeedBlob("x", np.array([3.]))
119 | net.AddGradientOperators(["y"])
120 | workspace.RunNetOnce(net)
121 | print(workspace.FetchBlob("x_grad"))
122 | 
123 | 
124 | # When `net.Python()` is called with a gradient function specified, it also registers a serialized gradient function that is used by a corresponding gradient Python operator (**PythonGradient**). This operator executes a gradient function that expects two arguments - input and output lists. 
125 | # 
126 | # - The input list argument contains all forward function inputs, followed by all of its outputs, followed by the gradients of forward function outputs. 
127 | # - The output list contains the gradients of forward function inputs.
128 | # 
129 | # Note: `net.Python()`'s **grad_output_indices**/**grad_input_indices** allow specifying indices of gradient output/input blobs that the gradient function reads/writes to.
130 | 
131 | # #### Note on GPU tensors:
132 | # 
133 | # PythonOp implementation is CPU specific, it uses Numpy arrays that expect CPU memory storage. In order to be able to use a Python operator with GPU tensors, we define a CUDA version of PythonOp using GPUFallbackOp. This operator wraps a CPU-operator and adds GPU-to-CPU (and opposite direction) copy operations. Thus, when using a PythonOp with a CUDA device options, all input CUDA tensors are automatically copied to CPU memory and all CPU output tensors are copied back to GPU.
134 | 
135 | 


--------------------------------------------------------------------------------
/py_gen/MNIST_Dataset_and_Databases.py:
--------------------------------------------------------------------------------
  1 | #########################################################
  2 | #
  3 | # DO NOT EDIT THIS FILE. IT IS GENERATED AUTOMATICALLY. #
  4 | # PLEASE LOOK INTO THE README FOR MORE INFORMATION.     #
  5 | #
  6 | #########################################################
  7 | 
  8 | 
  9 | # coding: utf-8
 10 | 
 11 | # # MNIST Dataset & Database
 12 | # 
 13 | # In the [MNIST tutorial](https://github.com/caffe2/caffe2/blob/master/caffe2/python/tutorials/MNIST.ipynb) we use an lmdb database. You can also use leveldb or even minidb by changing the type reference when you get ready to read from the dbs. In this tutorial, we will go over how to download, extract, and generate lmdb and leveldb variants of the MNIST dataset.
 14 | 
 15 | # ## Dataset:
 16 | # 
 17 | # You can download the raw [MNIST dataset](https://download.caffe2.ai/datasets/mnist/mnist.zip), g/unzip the dataset and labels, and make the database yourself. 
 18 | # 
 19 | # 
 20 | # ## Databases:
 21 | # 
 22 | # We provide a few database formats for you to try with the MNIST tutorial. The default is lmdb. 
 23 | # 
 24 | # * [MNIST-nchw-lmdb](https://download.caffe2.ai/databases/mnist-lmdb.zip) - contains both the train and test lmdb MNIST databases in NCHW format
 25 | # * [MNIST-nchw-leveldb](https://download.caffe2.ai/databases/mnist-leveldb.zip) - contains both the train and test leveldb MNIST databases in NCHW format
 26 | # * [MNIST-nchw-minidb](https://download.caffe2.ai/databases/mnist-minidb.zip) - contains both the train and test minidb MNIST databases in NCHW format
 27 | # 
 28 | # 
 29 | # ## Tools:
 30 | # 
 31 | # ### make_mnist_db
 32 | # 
 33 | # If you like LevelDB you can use Caffe2's `make_mnist_db` binary to generate leveldb databases. This binary is found in `/caffe2/build/caffe2/binaries/` or depending on your OS and installation, in `/usr/local/bin/`.
 34 | # 
 35 | # Here is an example call to `make_mnist_db`:
 36 | # 
 37 | # ```
 38 | # ./make_mnist_db --channel_first --db leveldb --image_file ~/Downloads/train-images-idx3-ubyte --label_file ~/Downloads/train-labels-idx1-ubyte --output_file ~/caffe2/caffe2/python/tutorials/tutorial_data/mnist/mnist-train-nchw-leveldb
 39 | # 
 40 | # ./make_mnist_db --channel_first --db leveldb --image_file ~/Downloads/t10k-images-idx3-ubyte --label_file ~/Downloads/t10k-labels-idx1-ubyte --output_file ~/caffe2/caffe2/python/tutorials/tutorial_data/mnist/mnist-test-nchw-leveldb
 41 | # ```
 42 | # Note leveldb can get deadlocked if more than one user attempts to open the leveldb at the same time. This is why there is logic in the Python below to delete LOCK files if they're found.
 43 | # 
 44 | # 
 45 | # ### Python script
 46 | # 
 47 | # You can use the Python in the code blocks below to download and extract the dataset with `DownloadResource`, call the `make_mnist_db` binary, and generate your database with `GenerateDB`. 
 48 | # 
 49 | # First, we will define our functions.
 50 | 
 51 | # In[1]:
 52 | 
 53 | 
 54 | from __future__ import absolute_import
 55 | from __future__ import division
 56 | from __future__ import print_function
 57 | from __future__ import unicode_literals
 58 | 
 59 | import os
 60 | 
 61 | def DownloadResource(url, path):
 62 |     '''Downloads resources from s3 by url and unzips them to the provided path'''
 63 |     import requests, zipfile, StringIO
 64 |     print("Downloading... {} to {}".format(url, path))
 65 |     r = requests.get(url, stream=True)
 66 |     z = zipfile.ZipFile(StringIO.StringIO(r.content))
 67 |     z.extractall(path)
 68 |     print("Completed download and extraction.")
 69 | 
 70 |     
 71 | def GenerateDB(image, label, name):
 72 |     '''Calls the make_mnist_db binary to generate a leveldb from a mnist dataset'''
 73 |     name = os.path.join(data_folder, name)
 74 |     print('DB: ', name)
 75 |     if not os.path.exists(name):
 76 |         syscall = "/usr/local/bin/make_mnist_db --channel_first --db leveldb --image_file " + image + " --label_file " + label + " --output_file " + name
 77 |         # print "Creating database with: ", syscall
 78 |         os.system(syscall)
 79 |     else:
 80 |         print("Database exists already. Delete the folder if you have issues/corrupted DB, then rerun this.")
 81 |         if os.path.exists(os.path.join(name, "LOCK")):
 82 |             # print "Deleting the pre-existing lock file"
 83 |             os.remove(os.path.join(name, "LOCK"))
 84 | 
 85 | 
 86 | # Now that we have our functions for loading, extracting, and generating our dbs, we will put these functions to use and generate the MNIST data in both lmdb and leveldb formats (if they do not already exist).
 87 | # 
 88 | # First, we **download and extract the MNIST dataset (train and test) in lmdb format** using:
 89 | # 
 90 | # ```python
 91 | # DownloadResource("http://download.caffe2.ai/databases/mnist-lmdb.zip", data_folder)
 92 | # ```
 93 | # 
 94 | # 
 95 | # Next, we focus on **downloading, extracting, and generating MNIST train and test leveldbs**. We start by downloading and extracting the raw MNIST dataset (in ubyte format). This will ultimately extract four files, consisting of training images and labels, and testing images and labels.
 96 | # 
 97 | # ```python
 98 | # DownloadResource("http://download.caffe2.ai/datasets/mnist/mnist.zip", data_folder)
 99 | # ```
100 | # 
101 | # 
102 | # Finally, we **generate the leveldb train and test databases** (or regenerate; it can get locked with multi-user setups or abandoned threads). We do this by passing our `GenerateDB` function the names of the corresponding ubyte files along with an output file name.
103 | # 
104 | # ```python
105 | # GenerateDB(image_file_train, label_file_train, "mnist-train-nchw-leveldb")
106 | # GenerateDB(image_file_test, label_file_test, "mnist-test-nchw-leveldb")
107 | # ```
108 | 
109 | # In[ ]:
110 | 
111 | 
112 | current_folder = os.path.join(os.path.expanduser('~'), 'caffe2_notebooks')
113 | data_folder = os.path.join(current_folder, 'tutorial_data', 'mnist')
114 | 
115 | # If the data_folder does not already exist, create it
116 | if not os.path.exists(data_folder):
117 |     os.makedirs(data_folder)   
118 | 
119 | # Downloads and extracts the lmdb databases of MNIST images - both test and train
120 | if not os.path.exists(os.path.join(data_folder,"mnist-train-nchw-lmdb")):
121 |     DownloadResource("http://download.caffe2.ai/databases/mnist-lmdb.zip", data_folder)
122 | else:
123 |     print("mnist-lmdb already downloaded and extracted")
124 | 
125 | # Downloads and extracts the MNIST data set
126 | if not os.path.exists(os.path.join(data_folder, "train-images-idx3-ubyte")):
127 |     DownloadResource("http://download.caffe2.ai/datasets/mnist/mnist.zip", data_folder)
128 | else:
129 |     print("Raw mnist ubyte data already downloaded and extracted")
130 | 
131 | # (Re)generate the leveldb database (it can get locked with multi-user setups or abandoned threads)
132 | # Requires the download of the dataset (mnist.zip) - see DownloadResource above.
133 | # You also need to change references in the MNIST tutorial code where you train or test from lmdb to leveldb
134 | image_file_train = os.path.join(data_folder, "train-images-idx3-ubyte")
135 | label_file_train = os.path.join(data_folder, "train-labels-idx1-ubyte")
136 | image_file_test = os.path.join(data_folder, "t10k-images-idx3-ubyte")
137 | label_file_test = os.path.join(data_folder, "t10k-labels-idx1-ubyte")
138 | GenerateDB(image_file_train, label_file_train, "mnist-train-nchw-leveldb")
139 | GenerateDB(image_file_test, label_file_test, "mnist-test-nchw-leveldb")
140 | 
141 | 
142 | # ## Code Changes for Other DBs
143 | # 
144 | # If you chose to use a format other than lmdb you will need to change a couple lines of code. When you use `ModelHelper` to instantiate the CNN, you pass in the `db` parameter with a path and the `db_type` with the type of db. You would need to update both of these values. Since you create two networks, one for training and one for testing, you would need to update the code for both of these.
145 | # 
146 | # **Default code using lmdb**
147 | # ```python
148 | # train_model = model_helper.ModelHelper(name="mnist_train", arg_scope=arg_scope)
149 | # data, label = AddInput(
150 | #     train_model, batch_size=64,
151 | #     db=os.path.join(data_folder, 'mnist-train-nchw-lmdb'),
152 | #     db_type='lmdb')
153 | # ```
154 | # 
155 | # **Updated code using leveldb**
156 | # ```python
157 | # train_model = model_helper.ModelHelper(name="mnist_train", arg_scope=arg_scope)
158 | # data, label = AddInput(
159 | #     train_model, batch_size=64,
160 | #     db=os.path.join(data_folder, 'mnist-train-nchw-leveldb'),
161 | #     db_type='leveldb')
162 | # ```
163 | 
164 | 


--------------------------------------------------------------------------------
/Getting_Caffe1_Models_for_Translation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Getting Caffe1 Models and Datasets\n",
  8 |     "\n",
  9 |     "This tutorial will help you acquire a variety of pre-trained models from the original Caffe repo, and translate these models to a format that Caffe2 expects. If you don't already have the Caffe repo, then clone it like so:\n",
 10 |     "\n",
 11 |     "```\n",
 12 |     "git clone https://github.com/BVLC/caffe.git\n",
 13 |     "```\n",
 14 |     "\n",
 15 |     "Start by importing the required modules."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "from __future__ import absolute_import\n",
 25 |     "from __future__ import division\n",
 26 |     "from __future__ import print_function\n",
 27 |     "from __future__ import unicode_literals\n",
 28 |     "\n",
 29 |     "import os\n",
 30 |     "print(\"Required modules imported.\")"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "Now you can setup your root folder for Caffe below if you put it somewhere else. You should only be changing the path that's being set for `CAFFE_ROOT`."
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "# You should have checked out original Caffe\n",
 47 |     "# git clone https://github.com/BVLC/caffe.git\n",
 48 |     "# change the CAFFE_ROOT directory below accordingly\n",
 49 |     "CAFFE_ROOT = os.path.expanduser('~/caffe')\n",
 50 |     "\n",
 51 |     "# Make sure Caffe exists where you specified\n",
 52 |     "if not os.path.exists(CAFFE_ROOT):\n",
 53 |     "    print(\"Houston, you may have a problem.\") \n",
 54 |     "    print(\"Did you change CAFFE_ROOT to point to your local Caffe repo?\")\n",
 55 |     "    print(\"Try running: git clone https://github.com/BVLC/caffe.git\")"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "Here's where you pick your model. There are several listed below such as AlexNet, GoogleNet, and Flickr Style. Uncomment the model you want to download."
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "# Pick a model, and if you don't have it, it will be downloaded\n",
 72 |     "# format below is the model's folder, model's dataset inside that folder\n",
 73 |     "\n",
 74 |     "#MODEL = 'bvlc_alexnet', 'bvlc_alexnet.caffemodel' \n",
 75 |     "#MODEL = 'bvlc_googlenet', 'bvlc_googlenet.caffemodel'\n",
 76 |     "#MODEL = 'finetune_flickr_style', 'finetune_flickr_style.caffemodel'\n",
 77 |     "#MODEL = 'bvlc_reference_caffenet', 'bvlc_reference_caffenet.caffemodel'\n",
 78 |     "MODEL = 'bvlc_reference_rcnn_ilsvrc13', 'bvlc_reference_rcnn_ilsvrc13.caffemodel'"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "As a reminder, in Caffe, the deploy model is saved in two parts:\n",
 86 |     "\n",
 87 |     "    1) deploy.prototxt: contained the network architecture in human-readable protobuf format\n",
 88 |     "    2) .caffemodel file: contained the model weights and parameters for loading\n",
 89 |     "\n",
 90 |     "Therefore, to translate the model to Caffe2, we need both of these files. We already have the `deploy.prototxt` files for all of the models in `~/caffe/models`, so we need the learned weights.\n",
 91 |     "\n",
 92 |     "Below, we'll check to see if the `.caffemodel` file from the last model that we uncommented above already exists. If it does not already exist in the location that we specify, we will download it using the `download_model_binary.py` script in the Caffe repo. **Note that .caffemodel files are typically fairly large files, so downloading one will take a few moments.** We will be sure to print a message so we know when we can continue."
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": [
101 |     "# Scripts to download the models reside here (~/caffe/models)\n",
102 |     "# After downloading the data will exist with the script\n",
103 |     "CAFFE_MODELS = os.path.join(CAFFE_ROOT, 'models')\n",
104 |     "\n",
105 |     "# this is like: ~/caffe/models/bvlc_alexnet/deploy.prototxt\n",
106 |     "CAFFE_MODEL_FILE = os.path.join(CAFFE_MODELS, MODEL[0], 'deploy.prototxt')\n",
107 |     "# this is like: ~/caffe/models/bvlc_alexnet/bvlc_alexnet.caffemodel\n",
108 |     "CAFFE_PRETRAINED = os.path.join(CAFFE_MODELS, MODEL[0], MODEL[1])\n",
109 |     "    \n",
110 |     "# If the model folder doesn't have the goods, then download it\n",
111 |     "# This is usually a pretty big file with the .caffemodel extension\n",
112 |     "if not os.path.exists(CAFFE_PRETRAINED):\n",
113 |     "    print(CAFFE_PRETRAINED + \" not found. Attempting download. Be patient...\\n\")\n",
114 |     "    os.system(\n",
115 |     "        os.path.join(CAFFE_ROOT, 'scripts/download_model_binary.py') +\n",
116 |     "        ' ' +\n",
117 |     "        os.path.join(CAFFE_ROOT, 'models', MODEL[0]))\n",
118 |     "else:\n",
119 |     "    print(\"You already have \" + CAFFE_PRETRAINED + \", skipping download...\\n\")\n",
120 |     "\n",
121 |     "# If the .prototxt file was missing then you're in trouble; cannot continue\n",
122 |     "if not os.path.exists(CAFFE_MODEL_FILE):\n",
123 |     "    print(\"Caffe model file, \" + CAFFE_MODEL_FILE + \" was not found!\")\n",
124 |     "else:\n",
125 |     "    print(\"Both the deploy.prototxt and .caffemodel files were found, ready to continue!\")\n",
126 |     "    # Now we have init net and predict net .pb files to use"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "Now that we have both the `deploy.prototxt` and `.caffemodel` files, we can translate the model to the Caffe2 saved model format, which consists of two serialized protobuf files:\n",
134 |     "\n",
135 |     "    1) init_net.pb\n",
136 |     "    2) predict_net.pb\n",
137 |     "    \n",
138 |     "To do this, we will use Caffe2's translator script at `~/caffe2/caffe2/python/caffe_translator.py`.\n",
139 |     "\n",
140 |     "**Again, depending on the size of the model, this may take a minute or two**"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {},
147 |    "outputs": [],
148 |    "source": [
149 |     "# Set the CAFFE2_ROOT\n",
150 |     "CAFFE2_ROOT = os.path.expanduser('~/caffe2')\n",
151 |     "init_net_out = os.path.join(CAFFE_MODELS, MODEL[0], 'init_net.pb')\n",
152 |     "predict_net_out = os.path.join(CAFFE_MODELS, MODEL[0], 'predict_net.pb')\n",
153 |     "\n",
154 |     "# Run the caffe_translator.py script to translate to Caffe2 if files do not already exist\n",
155 |     "if (not os.path.exists(init_net_out)) or (not os.path.exists(predict_net_out)):\n",
156 |     "    print(\"Protobuf files not found. Running translation. Be patient...\\n\")\n",
157 |     "    os.system(\n",
158 |     "        'python' + ' ' + os.path.join(CAFFE2_ROOT, 'caffe2/python/caffe_translator.py') +\n",
159 |     "        ' ' + CAFFE_MODEL_FILE + ' ' + CAFFE_PRETRAINED + ' ' + \n",
160 |     "        '--init_net' + ' ' + init_net_out + ' ' +\n",
161 |     "        '--predict_net' + ' ' + predict_net_out\n",
162 |     "    )\n",
163 |     "else:\n",
164 |     "    print(\"You already have both .pb files, skipping translation...\\n\")    \n",
165 |     "\n",
166 |     "# Print if files are where they are expected to be\n",
167 |     "if (not os.path.exists(init_net_out)) or (not os.path.exists(predict_net_out)):\n",
168 |     "    print(init_net_out + \" and/or \" + predict_net_out + \" was NOT FOUND!\")\n",
169 |     "else:\n",
170 |     "    print(\"Protobuf files can be found at: \\n\", \n",
171 |     "              os.path.join(CAFFE_MODELS, MODEL[0])), \"!\""
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "markdown",
176 |    "metadata": {},
177 |    "source": [
178 |     "At this point, we have translated the model from Caffe to a format that Caffe2 can use. Have a look at our other tutorials, such as *Loading Pretrained Models* to see an example of how to use these .pb files for inference."
179 |    ]
180 |   }
181 |  ],
182 |  "metadata": {
183 |   "kernelspec": {
184 |    "display_name": "Python 2",
185 |    "language": "python",
186 |    "name": "python2"
187 |   },
188 |   "language_info": {
189 |    "codemirror_mode": {
190 |     "name": "ipython",
191 |     "version": 2
192 |    },
193 |    "file_extension": ".py",
194 |    "mimetype": "text/x-python",
195 |    "name": "python",
196 |    "nbconvert_exporter": "python",
197 |    "pygments_lexer": "ipython2",
198 |    "version": "2.7.14"
199 |   }
200 |  },
201 |  "nbformat": 4,
202 |  "nbformat_minor": 2
203 | }
204 | 


--------------------------------------------------------------------------------
/MNIST_Dataset_and_Databases.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# MNIST Dataset & Database\n",
  8 |     "\n",
  9 |     "In the [MNIST tutorial](https://github.com/caffe2/caffe2/blob/master/caffe2/python/tutorials/MNIST.ipynb) we use an lmdb database. You can also use leveldb or even minidb by changing the type reference when you get ready to read from the dbs. In this tutorial, we will go over how to download, extract, and generate lmdb and leveldb variants of the MNIST dataset."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "## Dataset:\n",
 17 |     "\n",
 18 |     "You can download the raw [MNIST dataset](https://download.caffe2.ai/datasets/mnist/mnist.zip), g/unzip the dataset and labels, and make the database yourself. \n",
 19 |     "\n",
 20 |     "\n",
 21 |     "## Databases:\n",
 22 |     "\n",
 23 |     "We provide a few database formats for you to try with the MNIST tutorial. The default is lmdb. \n",
 24 |     "\n",
 25 |     "* [MNIST-nchw-lmdb](https://download.caffe2.ai/databases/mnist-lmdb.zip) - contains both the train and test lmdb MNIST databases in NCHW format\n",
 26 |     "* [MNIST-nchw-leveldb](https://download.caffe2.ai/databases/mnist-leveldb.zip) - contains both the train and test leveldb MNIST databases in NCHW format\n",
 27 |     "* [MNIST-nchw-minidb](https://download.caffe2.ai/databases/mnist-minidb.zip) - contains both the train and test minidb MNIST databases in NCHW format\n",
 28 |     "\n",
 29 |     "\n",
 30 |     "## Tools:\n",
 31 |     "\n",
 32 |     "### make_mnist_db\n",
 33 |     "\n",
 34 |     "If you like LevelDB you can use Caffe2's `make_mnist_db` binary to generate leveldb databases. This binary is found in `/caffe2/build/caffe2/binaries/` or depending on your OS and installation, in `/usr/local/bin/`.\n",
 35 |     "\n",
 36 |     "Here is an example call to `make_mnist_db`:\n",
 37 |     "\n",
 38 |     "```\n",
 39 |     "./make_mnist_db --channel_first --db leveldb --image_file ~/Downloads/train-images-idx3-ubyte --label_file ~/Downloads/train-labels-idx1-ubyte --output_file ~/caffe2/caffe2/python/tutorials/tutorial_data/mnist/mnist-train-nchw-leveldb\n",
 40 |     "\n",
 41 |     "./make_mnist_db --channel_first --db leveldb --image_file ~/Downloads/t10k-images-idx3-ubyte --label_file ~/Downloads/t10k-labels-idx1-ubyte --output_file ~/caffe2/caffe2/python/tutorials/tutorial_data/mnist/mnist-test-nchw-leveldb\n",
 42 |     "```\n",
 43 |     "Note leveldb can get deadlocked if more than one user attempts to open the leveldb at the same time. This is why there is logic in the Python below to delete LOCK files if they're found.\n",
 44 |     "\n",
 45 |     "\n",
 46 |     "### Python script\n",
 47 |     "\n",
 48 |     "You can use the Python in the code blocks below to download and extract the dataset with `DownloadResource`, call the `make_mnist_db` binary, and generate your database with `GenerateDB`. \n",
 49 |     "\n",
 50 |     "First, we will define our functions."
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 1,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "from __future__ import absolute_import\n",
 60 |     "from __future__ import division\n",
 61 |     "from __future__ import print_function\n",
 62 |     "from __future__ import unicode_literals\n",
 63 |     "\n",
 64 |     "import os\n",
 65 |     "\n",
 66 |     "def DownloadResource(url, path):\n",
 67 |     "    '''Downloads resources from s3 by url and unzips them to the provided path'''\n",
 68 |     "    import requests, zipfile, StringIO\n",
 69 |     "    print(\"Downloading... {} to {}\".format(url, path))\n",
 70 |     "    r = requests.get(url, stream=True)\n",
 71 |     "    z = zipfile.ZipFile(StringIO.StringIO(r.content))\n",
 72 |     "    z.extractall(path)\n",
 73 |     "    print(\"Completed download and extraction.\")\n",
 74 |     "\n",
 75 |     "    \n",
 76 |     "def GenerateDB(image, label, name):\n",
 77 |     "    '''Calls the make_mnist_db binary to generate a leveldb from a mnist dataset'''\n",
 78 |     "    name = os.path.join(data_folder, name)\n",
 79 |     "    print('DB: ', name)\n",
 80 |     "    if not os.path.exists(name):\n",
 81 |     "        syscall = \"/usr/local/bin/make_mnist_db --channel_first --db leveldb --image_file \" + image + \" --label_file \" + label + \" --output_file \" + name\n",
 82 |     "        # print \"Creating database with: \", syscall\n",
 83 |     "        os.system(syscall)\n",
 84 |     "    else:\n",
 85 |     "        print(\"Database exists already. Delete the folder if you have issues/corrupted DB, then rerun this.\")\n",
 86 |     "        if os.path.exists(os.path.join(name, \"LOCK\")):\n",
 87 |     "            # print \"Deleting the pre-existing lock file\"\n",
 88 |     "            os.remove(os.path.join(name, \"LOCK\"))"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "Now that we have our functions for loading, extracting, and generating our dbs, we will put these functions to use and generate the MNIST data in both lmdb and leveldb formats (if they do not already exist).\n",
 96 |     "\n",
 97 |     "First, we **download and extract the MNIST dataset (train and test) in lmdb format** using:\n",
 98 |     "\n",
 99 |     "```python\n",
100 |     "DownloadResource(\"http://download.caffe2.ai/databases/mnist-lmdb.zip\", data_folder)\n",
101 |     "```\n",
102 |     "\n",
103 |     "\n",
104 |     "Next, we focus on **downloading, extracting, and generating MNIST train and test leveldbs**. We start by downloading and extracting the raw MNIST dataset (in ubyte format). This will ultimately extract four files, consisting of training images and labels, and testing images and labels.\n",
105 |     "\n",
106 |     "```python\n",
107 |     "DownloadResource(\"http://download.caffe2.ai/datasets/mnist/mnist.zip\", data_folder)\n",
108 |     "```\n",
109 |     "\n",
110 |     "\n",
111 |     "Finally, we **generate the leveldb train and test databases** (or regenerate; it can get locked with multi-user setups or abandoned threads). We do this by passing our `GenerateDB` function the names of the corresponding ubyte files along with an output file name.\n",
112 |     "\n",
113 |     "```python\n",
114 |     "GenerateDB(image_file_train, label_file_train, \"mnist-train-nchw-leveldb\")\n",
115 |     "GenerateDB(image_file_test, label_file_test, \"mnist-test-nchw-leveldb\")\n",
116 |     "```"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": null,
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": [
125 |     "current_folder = os.path.join(os.path.expanduser('~'), 'caffe2_notebooks')\n",
126 |     "data_folder = os.path.join(current_folder, 'tutorial_data', 'mnist')\n",
127 |     "\n",
128 |     "# If the data_folder does not already exist, create it\n",
129 |     "if not os.path.exists(data_folder):\n",
130 |     "    os.makedirs(data_folder)   \n",
131 |     "\n",
132 |     "# Downloads and extracts the lmdb databases of MNIST images - both test and train\n",
133 |     "if not os.path.exists(os.path.join(data_folder,\"mnist-train-nchw-lmdb\")):\n",
134 |     "    DownloadResource(\"http://download.caffe2.ai/databases/mnist-lmdb.zip\", data_folder)\n",
135 |     "else:\n",
136 |     "    print(\"mnist-lmdb already downloaded and extracted\")\n",
137 |     "\n",
138 |     "# Downloads and extracts the MNIST data set\n",
139 |     "if not os.path.exists(os.path.join(data_folder, \"train-images-idx3-ubyte\")):\n",
140 |     "    DownloadResource(\"http://download.caffe2.ai/datasets/mnist/mnist.zip\", data_folder)\n",
141 |     "else:\n",
142 |     "    print(\"Raw mnist ubyte data already downloaded and extracted\")\n",
143 |     "\n",
144 |     "# (Re)generate the leveldb database (it can get locked with multi-user setups or abandoned threads)\n",
145 |     "# Requires the download of the dataset (mnist.zip) - see DownloadResource above.\n",
146 |     "# You also need to change references in the MNIST tutorial code where you train or test from lmdb to leveldb\n",
147 |     "image_file_train = os.path.join(data_folder, \"train-images-idx3-ubyte\")\n",
148 |     "label_file_train = os.path.join(data_folder, \"train-labels-idx1-ubyte\")\n",
149 |     "image_file_test = os.path.join(data_folder, \"t10k-images-idx3-ubyte\")\n",
150 |     "label_file_test = os.path.join(data_folder, \"t10k-labels-idx1-ubyte\")\n",
151 |     "GenerateDB(image_file_train, label_file_train, \"mnist-train-nchw-leveldb\")\n",
152 |     "GenerateDB(image_file_test, label_file_test, \"mnist-test-nchw-leveldb\")"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "metadata": {},
158 |    "source": [
159 |     "## Code Changes for Other DBs\n",
160 |     "\n",
161 |     "If you chose to use a format other than lmdb you will need to change a couple lines of code. When you use `ModelHelper` to instantiate the CNN, you pass in the `db` parameter with a path and the `db_type` with the type of db. You would need to update both of these values. Since you create two networks, one for training and one for testing, you would need to update the code for both of these.\n",
162 |     "\n",
163 |     "**Default code using lmdb**\n",
164 |     "```python\n",
165 |     "train_model = model_helper.ModelHelper(name=\"mnist_train\", arg_scope=arg_scope)\n",
166 |     "data, label = AddInput(\n",
167 |     "    train_model, batch_size=64,\n",
168 |     "    db=os.path.join(data_folder, 'mnist-train-nchw-lmdb'),\n",
169 |     "    db_type='lmdb')\n",
170 |     "```\n",
171 |     "\n",
172 |     "**Updated code using leveldb**\n",
173 |     "```python\n",
174 |     "train_model = model_helper.ModelHelper(name=\"mnist_train\", arg_scope=arg_scope)\n",
175 |     "data, label = AddInput(\n",
176 |     "    train_model, batch_size=64,\n",
177 |     "    db=os.path.join(data_folder, 'mnist-train-nchw-leveldb'),\n",
178 |     "    db_type='leveldb')\n",
179 |     "```"
180 |    ]
181 |   }
182 |  ],
183 |  "metadata": {
184 |   "kernelspec": {
185 |    "display_name": "Python 2",
186 |    "language": "python",
187 |    "name": "python2"
188 |   },
189 |   "language_info": {
190 |    "codemirror_mode": {
191 |     "name": "ipython",
192 |     "version": 2
193 |    },
194 |    "file_extension": ".py",
195 |    "mimetype": "text/x-python",
196 |    "name": "python",
197 |    "nbconvert_exporter": "python",
198 |    "pygments_lexer": "ipython2",
199 |    "version": "2.7.14"
200 |   }
201 |  },
202 |  "nbformat": 4,
203 |  "nbformat_minor": 2
204 | }
205 | 


--------------------------------------------------------------------------------
/experimental/Immediate.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Tutorial 4. Immediate mode\n",
  8 |     "\n",
  9 |     "In this tutorial we will talk about a cute feature about Caffe2: immediate mode.\n",
 10 |     "\n",
 11 |     "From the previous tutorials you have seen that Caffe2 *declares* a network, and during this declaration phase, nothing gets actually executed - it's like writing the source of a program, and \"compilation/execution\" only happens later.\n",
 12 |     "\n",
 13 |     "This sometimes gets a bit tricky if we are in a researchy mind, and want to inspect typical intermediate outputs as we go. This is when the immediate mode come to help. At a high level, what the immediate mode does is to run the corresponding operators as you write them. The results live under a special workspace that can then be accessed via `FetchImmediate()` and `FeedImmediate()` runs.\n",
 14 |     "\n",
 15 |     "Let's show some examples."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {
 22 |     "collapsed": false
 23 |    },
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "%matplotlib inline\n",
 27 |     "from caffe2.python import cnn, core, visualize, workspace, model_helper, brew\n",
 28 |     "import numpy as np\n",
 29 |     "import os\n",
 30 |     "core.GlobalInit(['caffe2', '--caffe2_log_level=-1'])"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {
 36 |     "collapsed": true
 37 |    },
 38 |    "source": [
 39 |     "Now, as we have known before, in the normal mode, when you create an operator, we are *declaring* it only, and nothing gets actually executed. Let's re-confirm that."
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {
 46 |     "collapsed": false
 47 |    },
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "workspace.ResetWorkspace()\n",
 51 |     "# declaration\n",
 52 |     "op = core.CreateOperator(\"GaussianFill\", [], \"X\", shape=[3, 5])\n",
 53 |     "print('Before execution, workspace contains X: {}'\n",
 54 |     "      .format(workspace.HasBlob(\"X\")))\n",
 55 |     "# execution\n",
 56 |     "workspace.RunOperatorOnce(op)\n",
 57 |     "print('After execution, workspace contains X: {}'\n",
 58 |     "      .format(workspace.HasBlob(\"X\")))"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {
 64 |     "collapsed": true
 65 |    },
 66 |    "source": [
 67 |     "## Entering and exiting immediate mode.\n",
 68 |     "\n",
 69 |     "Entering immediate mode is easy: you basically invoke `workspace.StartImmediate()`. Since immediate mode has quite a lot of side effects, it would be good to read through the warning message to make sure you understand the implications.\n",
 70 |     "\n",
 71 |     "(If you don't want to see the messages, pass `i_know=True` to `StartImmediate` to suppress that.)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {
 78 |     "collapsed": false
 79 |    },
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "workspace.StartImmediate()"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "Now that you have enabled immediate mode, any operators you run will simultaneously be executed in a separate immediate workspace. Note - the main workspace that you are working on is not affected. We designed the immediate workspace to be separate from the main workspace, so that nothing in the main workspace gets polluted."
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {
 96 |     "collapsed": false
 97 |    },
 98 |    "outputs": [],
 99 |    "source": [
100 |     "# declaration, and since we are in immediate mode, run it in the immediate workspace.\n",
101 |     "op = core.CreateOperator(\"GaussianFill\", [], \"X\", shape=[3, 5])\n",
102 |     "print('Before execution, does workspace contain X? {}'\n",
103 |     "      .format(workspace.HasBlob(\"X\")))\n",
104 |     "print('But we can access it using the Immediate related functions.'\n",
105 |     "      'Here is a list of immediate blobs:')\n",
106 |     "print(workspace.ImmediateBlobs())\n",
107 |     "print('The content is like this:')\n",
108 |     "print(workspace.FetchImmediate('X'))"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {
115 |     "collapsed": true
116 |    },
117 |    "outputs": [],
118 |    "source": [
119 |     "# After the immediate execution, you can invoke StopImmediate() to clean up.\n",
120 |     "workspace.StopImmediate()"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "## Manually feeding blobs\n",
128 |     "\n",
129 |     "But wait, you say - what if I want to create an operator that uses an input that is \"declared\" but not present yet? Since the immediate workspace does not have the input, we will encounter an exception:"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {
136 |     "collapsed": false
137 |    },
138 |    "outputs": [],
139 |    "source": [
140 |     "workspace.StartImmediate(i_know=True)\n",
141 |     "op = core.CreateOperator(\"Relu\", \"X\", \"Y\")"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "markdown",
146 |    "metadata": {
147 |     "collapsed": true
148 |    },
149 |    "source": [
150 |     "This is because immediate mode, being completely imperative, requires any input to be used to already exist in the immediate workspace. To make the immediate mode aware of such external inputs, we can manually feed blobs to the immediate workspace."
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {
157 |     "collapsed": false
158 |    },
159 |    "outputs": [],
160 |    "source": [
161 |     "X = np.random.randn(2, 3).astype(np.float32)\n",
162 |     "workspace.FeedImmediate(\"X\", X)\n",
163 |     "# Now, we can safely run CreateOperator since immediate mode knows what X looks like\n",
164 |     "op = core.CreateOperator(\"Relu\", \"X\", \"Y\")\n",
165 |     "print(\"Example input is:\\n{}\".format(workspace.FetchImmediate(\"X\")))\n",
166 |     "print(\"Example output is:\\n{}\".format(workspace.FetchImmediate(\"Y\")))"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": null,
172 |    "metadata": {
173 |     "collapsed": true
174 |    },
175 |    "outputs": [],
176 |    "source": [
177 |     "workspace.StopImmediate()"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "## When is immediate mode useful?\n",
185 |     "\n",
186 |     "You might want to use immediate mode when you are not very sure about the shape of the intermediate results, such as in a CNN where there are multiple convolution and pooling layers. Let's say that you are creating an MNIST convnet model but don't want to calculate the number of dimensions for the final FC layer. Here is what you might want to do."
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {
193 |     "collapsed": false
194 |    },
195 |    "outputs": [],
196 |    "source": [
197 |     "model = model_helper.ModelHelper(name=\"mnist\")\n",
198 |     "# Start the immediate mode.\n",
199 |     "workspace.StartImmediate(i_know=True)\n",
200 |     "\n",
201 |     "data_folder = os.path.join(os.path.expanduser('~'), 'caffe2_notebooks', 'tutorial_data')\n",
202 |     "data_uint8, label = model.TensorProtosDBInput(\n",
203 |     "    [], [\"data_uint8\", \"label\"], batch_size=64,\n",
204 |     "    db=os.path.join(data_folder, 'mnist/mnist-train-nchw-leveldb'),\n",
205 |     "    db_type='leveldb')\n",
206 |     "data = model.net.Cast(data_uint8, \"data\", to=core.DataType.FLOAT)\n",
207 |     "data = model.net.Scale(data, data, scale=float(1./256))\n",
208 |     "data = model.net.StopGradient(data, data)\n",
209 |     "conv1 = brew.conv(model, data, 'conv1', 1, 20, 5)\n",
210 |     "pool1 = brew.max_pool(model, conv1, 'pool1', kernel=2, stride=2)\n",
211 |     "conv2 = brew.conv(model, pool1, 'conv2', 20, 50, 5)\n",
212 |     "pool2 = brew.max_pool(model, conv2, 'pool2', kernel=2, stride=2)\n",
213 |     "\n",
214 |     "# What is the shape of pool2 again...?\n",
215 |     "feature_dimensions = workspace.FetchImmediate(\"pool2\").shape[1:]\n",
216 |     "print(\"Feature dimensions before FC layer: {}\".format(feature_dimensions))\n",
217 |     "\n",
218 |     "fc3 = brew.fc(model, pool2, 'fc3', int(np.prod(feature_dimensions)), 500)\n",
219 |     "fc3 = brew.relu(model, fc3, fc3)\n",
220 |     "pred = brew.fc(model, fc3, 'pred', 500, 10)\n",
221 |     "softmax = brew.softmax(model, pred, 'softmax')\n",
222 |     "\n",
223 |     "# Let's see if the dimensions are all correct:\n",
224 |     "for blob in [\"data\", \"conv1\", \"pool1\", \"conv2\", \"pool2\", \"fc3\", \"pred\"]:\n",
225 |     "    print(\"Blob {} has shape: {}\".format(\n",
226 |     "          blob, workspace.FetchImmediate(blob).shape))\n",
227 |     "# Let's also visualize a sample input.\n",
228 |     "print(\"Sample input:\")\n",
229 |     "visualize.NCHW.ShowMultiple(workspace.FetchImmediate(\"data\"))\n",
230 |     "workspace.StopImmediate()"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "markdown",
235 |    "metadata": {},
236 |    "source": [
237 |     "Remember, immediate mode is only intended to be used in debugging mode, and are only intended for you to verify things interactively. For example, in the use case above, what you want to do eventually is to remove the feature_dimensions argument and replace it with code that do not depend on immediate mode, such as hard-coding it."
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "markdown",
242 |    "metadata": {},
243 |    "source": [
244 |     "## Departing words\n",
245 |     "\n",
246 |     "Immediate mode could be a useful tool for quick iterations. But it could also easily go wrong. Make sure that you understand its purpose, and never abuse it in real product environments. The philosophy of Caffe2 is to make things very flexible and this is one example of it, but it also makes you easy to shoot yourself in the foot. Take care :)"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": null,
252 |    "metadata": {
253 |     "collapsed": true
254 |    },
255 |    "outputs": [],
256 |    "source": []
257 |   }
258 |  ],
259 |  "metadata": {
260 |   "kernelspec": {
261 |    "display_name": "Python 2",
262 |    "language": "python",
263 |    "name": "python2"
264 |   },
265 |   "language_info": {
266 |    "codemirror_mode": {
267 |     "name": "ipython",
268 |     "version": 2
269 |    },
270 |    "file_extension": ".py",
271 |    "mimetype": "text/x-python",
272 |    "name": "python",
273 |    "nbconvert_exporter": "python",
274 |    "pygments_lexer": "ipython2",
275 |    "version": "2.7.6"
276 |   }
277 |  },
278 |  "nbformat": 4,
279 |  "nbformat_minor": 0
280 | }
281 | 


--------------------------------------------------------------------------------
/Python_Op.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Python Op Tutorial\n",
  8 |     "In this tutorial we cover the `Python` operator that allows writing Caffe2 operators using Python. We'll also discuss some of the underlying implementation details of the operator."
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "metadata": {},
 14 |    "source": [
 15 |     "### Forward Python Operator\n",
 16 |     "\n",
 17 |     "Caffe2 provides a high-level interface that helps with creating Python ops. Let's consider the following example, in which we create a basic operator `f`, which outputs the input * 2:"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 1,
 23 |    "metadata": {},
 24 |    "outputs": [
 25 |     {
 26 |      "name": "stderr",
 27 |      "output_type": "stream",
 28 |      "text": [
 29 |       "WARNING:root:This caffe2 python run does not have GPU support. Will run in CPU only mode.\n",
 30 |       "WARNING:root:Debug message: No module named caffe2_pybind11_state_gpu\n"
 31 |      ]
 32 |     },
 33 |     {
 34 |      "name": "stdout",
 35 |      "output_type": "stream",
 36 |      "text": [
 37 |       "[6.]\n"
 38 |      ]
 39 |     }
 40 |    ],
 41 |    "source": [
 42 |     "from __future__ import absolute_import\n",
 43 |     "from __future__ import division\n",
 44 |     "from __future__ import print_function\n",
 45 |     "from __future__ import unicode_literals\n",
 46 |     "\n",
 47 |     "from caffe2.python import core, workspace\n",
 48 |     "import numpy as np\n",
 49 |     "\n",
 50 |     "def f(inputs, outputs):\n",
 51 |     "    outputs[0].feed(2 * inputs[0].data)  # use 'feed' to set the output tensor to 2*input\n",
 52 |     "\n",
 53 |     "workspace.ResetWorkspace()\n",
 54 |     "net = core.Net(\"tutorial\")\n",
 55 |     "net.Python(f)([\"x\"], [\"y\"])\n",
 56 |     "workspace.FeedBlob(\"x\", np.array([3.]))\n",
 57 |     "workspace.RunNetOnce(net)\n",
 58 |     "print(workspace.FetchBlob(\"y\"))"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "As seen in the example, the `net.Python()` function returns a callable that can be used just like any other operator. In this example, we add a new Python operator to the net with input \"x\" and output \"y\". Note that you can save the output of `net.Python()` and call it multiple times to combine multiple Python operators with different inputs and outputs.\n",
 66 |     "\n",
 67 |     "Let's take a closer look at `net.Python()` function and the corresponding body of a new Python operator `f`. Every time `net.Python(f)` is called, it serializes a given function `f` and saves it in a global registry under a known key (token; passed to a PythonOp as an argument). After this, `net.Python()` returns a lambda that accepts positional and keyword arguments (typically inputs, outputs and extra arguments) and attaches a new Python operator to the net that calls function `f` on a given list of inputs and outputs.\n",
 68 |     "\n",
 69 |     "---"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "Python operator's function `f` expects two positional arguments: a list of inputs and a list of outputs. When an operator is executed it transparently converts Caffe2 blobs into the elements of these lists. CPU tensor blobs are converted into `TensorCPU` objects that act as wrappers around Numpy arrays. Let's take a closer look at a relationship between Caffe2 CPU tensor, Python's `TensorCPU` object and a Numpy array:\n",
 77 |     "\n",
 78 |     "1) Conversion between C++ tensor objects and Numpy objects happens automatically and is handled by PyBind library.\n",
 79 |     "\n",
 80 |     "2) When generating a `TensorCPU` wrapper, a new Numpy array object is created which **shares** the same memory storage as a corresponding Caffe2 CPU tensor. This Numpy array is accessible in Python as a `.data` property of a `TensorCPU` object.\n",
 81 |     "\n",
 82 |     "3) Although Numpy array and Caffe2 tensor might share the same storage, other tensor data (e.g. shape) of Caffe2 tensor is stored **separately** from a Numpy array. Furthermore, Numpy may copy and reallocate its array to a different location in memory (e.g. when we try to resize an array) during operator's function execution. It's important to keep that in mind when writing a Python operator's code to ensure that Caffe2 and Numpy output tensors are in sync.\n",
 83 |     "\n",
 84 |     "4) `TensorCPU`'s `feed` method accepts a Numpy tensor, resizes an underying Caffe2 tensor and copies Numpy's tensor data into a Caffe2 tensor.\n",
 85 |     "\n",
 86 |     "5) Another way to ensure that Caffe2's output tensor is properly set is to call the `reshape` function on a corresponding `TensorCPU` output, and copy the data in Python to the output's `.data` tensor.\n",
 87 |     "\n",
 88 |     "Below is an example of ensuring that Caffe2's output tensor is properly configured using `reshape`:"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 2,
 94 |    "metadata": {},
 95 |    "outputs": [
 96 |     {
 97 |      "name": "stdout",
 98 |      "output_type": "stream",
 99 |      "text": [
100 |       "[6.]\n"
101 |      ]
102 |     }
103 |    ],
104 |    "source": [
105 |     "def f_reshape(inputs, outputs):\n",
106 |     "    outputs[0].reshape(inputs[0].shape)        # set the output tensor to be the same shape as the input tensor\n",
107 |     "    outputs[0].data[...] = 2 * inputs[0].data  # assign output tensor (Numpy) to 2 * input tensor (Numpy)\n",
108 |     "\n",
109 |     "workspace.ResetWorkspace()\n",
110 |     "net = core.Net(\"tutorial\")\n",
111 |     "net.Python(f_reshape)([\"x\"], [\"z\"])\n",
112 |     "workspace.FeedBlob(\"x\", np.array([3.]))\n",
113 |     "workspace.RunNetOnce(net)\n",
114 |     "print(workspace.FetchBlob(\"z\"))"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "This example works correctly because `reshape` method updates an underlying Caffe2 tensor and a subsequent call to the `.data` property returns a Numpy array that shares memory with a Caffe2 tensor. The last line in `f_reshape` copies data into the shared memory location."
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "There're several additional arguments that `net.Python()` accepts. When `pass_workspace=True` is passed, a workspace is passed to an operator's `Python` function:"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 3,
134 |    "metadata": {},
135 |    "outputs": [
136 |     {
137 |      "name": "stdout",
138 |      "output_type": "stream",
139 |      "text": [
140 |       "[6.]\n"
141 |      ]
142 |     }
143 |    ],
144 |    "source": [
145 |     "def f_workspace(inputs, outputs, workspace):\n",
146 |     "    # use 'feed' to set the output tensor to 2 * (a blob in the workspace called \"x\")\n",
147 |     "    outputs[0].feed(2 * workspace.blobs[\"x\"].fetch())\n",
148 |     "\n",
149 |     "workspace.ResetWorkspace()\n",
150 |     "net = core.Net(\"tutorial\")\n",
151 |     "net.Python(f_workspace, pass_workspace=True)([], [\"y\"])  # add Python operator to net without specifying input blob\n",
152 |     "workspace.FeedBlob(\"x\", np.array([3.]))  # manually feed the \"x\" blob the the f_workspace operator expects into the workspace\n",
153 |     "workspace.RunNetOnce(net)\n",
154 |     "print(workspace.FetchBlob(\"y\"))"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "### Gradient Python Operator\n",
162 |     "\n",
163 |     "Another important `net.Python()` operator to discuss is a gradient operator which corresponds to another custom `net.Python()` operator. In the example below, `grad_f` is a corresponding gradient operator for `f`.\n",
164 |     "\n",
165 |     "Note that we are using the same `f` as before ( $y = 2x$ ). So when we expect the gradient with respect to x: ( $\\frac{dy}{dx} = 2$ )\n"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 4,
171 |    "metadata": {},
172 |    "outputs": [
173 |     {
174 |      "name": "stdout",
175 |      "output_type": "stream",
176 |      "text": [
177 |       "[2.]\n"
178 |      ]
179 |     }
180 |    ],
181 |    "source": [
182 |     "def f(inputs, outputs):\n",
183 |     "            outputs[0].reshape(inputs[0].shape)\n",
184 |     "            outputs[0].data[...] = inputs[0].data * 2\n",
185 |     "\n",
186 |     "def grad_f(inputs, outputs):\n",
187 |     "    # Ordering of inputs is [fwd inputs, outputs, grad_outputs]\n",
188 |     "    grad_output = inputs[2]\n",
189 |     "    grad_input = outputs[0]\n",
190 |     "    grad_input.reshape(grad_output.shape)\n",
191 |     "    grad_input.data[...] = grad_output.data * 2\n",
192 |     "\n",
193 |     "workspace.ResetWorkspace()\n",
194 |     "net = core.Net(\"tutorial\")\n",
195 |     "net.Python(f, grad_f)([\"x\"], [\"y\"])\n",
196 |     "workspace.FeedBlob(\"x\", np.array([3.]))\n",
197 |     "net.AddGradientOperators([\"y\"])\n",
198 |     "workspace.RunNetOnce(net)\n",
199 |     "print(workspace.FetchBlob(\"x_grad\"))"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "markdown",
204 |    "metadata": {},
205 |    "source": [
206 |     "When `net.Python()` is called with a gradient function specified, it also registers a serialized gradient function that is used by a corresponding gradient Python operator (**PythonGradient**). This operator executes a gradient function that expects two arguments - input and output lists. \n",
207 |     "\n",
208 |     "- The input list argument contains all forward function inputs, followed by all of its outputs, followed by the gradients of forward function outputs. \n",
209 |     "- The output list contains the gradients of forward function inputs.\n",
210 |     "\n",
211 |     "Note: `net.Python()`'s **grad_output_indices**/**grad_input_indices** allow specifying indices of gradient output/input blobs that the gradient function reads/writes to."
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "markdown",
216 |    "metadata": {},
217 |    "source": [
218 |     "#### Note on GPU tensors:\n",
219 |     "\n",
220 |     "PythonOp implementation is CPU specific, it uses Numpy arrays that expect CPU memory storage. In order to be able to use a Python operator with GPU tensors, we define a CUDA version of PythonOp using GPUFallbackOp. This operator wraps a CPU-operator and adds GPU-to-CPU (and opposite direction) copy operations. Thus, when using a PythonOp with a CUDA device options, all input CUDA tensors are automatically copied to CPU memory and all CPU output tensors are copied back to GPU."
221 |    ]
222 |   }
223 |  ],
224 |  "metadata": {
225 |   "kernelspec": {
226 |    "display_name": "Python 2",
227 |    "language": "python",
228 |    "name": "python2"
229 |   },
230 |   "language_info": {
231 |    "codemirror_mode": {
232 |     "name": "ipython",
233 |     "version": 2
234 |    },
235 |    "file_extension": ".py",
236 |    "mimetype": "text/x-python",
237 |    "name": "python",
238 |    "nbconvert_exporter": "python",
239 |    "pygments_lexer": "ipython2",
240 |    "version": "2.7.14"
241 |   }
242 |  },
243 |  "nbformat": 4,
244 |  "nbformat_minor": 2
245 | }
246 | 


--------------------------------------------------------------------------------
/py_gen/Toy_Regression.py:
--------------------------------------------------------------------------------
  1 | #########################################################
  2 | #
  3 | # DO NOT EDIT THIS FILE. IT IS GENERATED AUTOMATICALLY. #
  4 | # PLEASE LOOK INTO THE README FOR MORE INFORMATION.     #
  5 | #
  6 | #########################################################
  7 | 
  8 | 
  9 | # coding: utf-8
 10 | 
 11 | # # Tutorial 2. A Simple Toy Regression
 12 | # 
 13 | # This is a quick example showing how one can use the concepts introduced in [Tutorial 1: Basics](https://caffe2.ai/docs/tutorial-basics-of-caffe2.html) to do regression. This tutorial is split up into two parts. **Part I** is a more verbose example of creating and training a polynomial regression model and **Part II** is a concise linear regression example.
 14 | # 
 15 | # ## Part I: Polynomial Regression
 16 | # 
 17 | # The problem we are dealing with is a relatively simple one and involves a one-dimensional input $x$ and one-dimensional output $y.$ Because we seek a second order polynomial as the regression model, the weight vector will contain two weights ($\beta_2$ and $\beta_1$) and there will be a single bias ($\beta_0$) or intercept. The desired solution is of the form:
 18 | # 
 19 | # $$y = \beta_2x^2 + \beta_1x + \beta_0$$
 20 | # 
 21 | # For this tutorial, we will generate and format an arbitrary set of input data that possess a strong second order polynomial relationship. We will then construct the model, specify the training algorithm, perform the training, and finally look at the results.
 22 | 
 23 | # In[1]:
 24 | 
 25 | 
 26 | from __future__ import absolute_import
 27 | from __future__ import division
 28 | from __future__ import print_function
 29 | from __future__ import unicode_literals
 30 | from caffe2.python import workspace, brew, optimizer
 31 | from caffe2.python.model_helper import ModelHelper
 32 | import numpy as np
 33 | import matplotlib.pyplot as plt
 34 | import sklearn.datasets
 35 | from sklearn.preprocessing import PolynomialFeatures
 36 | 
 37 | 
 38 | # ### Inputs
 39 | # 
 40 | # Specify the input parameters of the regression model here including: number of samples in the input data, number of training iterations, learning rate of SGD algorithm, and the initial weights of the model
 41 | 
 42 | # In[2]:
 43 | 
 44 | 
 45 | # Number of training sample to generate
 46 | num_samples = 200
 47 | # Learning Rate of SGD algorithm
 48 | learning_rate = .05
 49 | # Number of iterations to train
 50 | training_iters = 100
 51 | # Initial model weights
 52 | initial_weights = [0.,0.]
 53 | 
 54 | 
 55 | # ### Create and Prepare the Dataset
 56 | # 
 57 | # Now, we will create and prepare the dataset for use with the model. Note, we are just constructing numpy arrays here. Any other data can be used as long as it is shaped properly before being input into the model.
 58 | 
 59 | # In[3]:
 60 | 
 61 | 
 62 | # Create the original observations
 63 | orig_X,_ = sklearn.datasets.make_regression(n_samples=num_samples,n_features=1,noise=5)
 64 | poly = PolynomialFeatures(degree=2, include_bias=False)
 65 | # Transform the features into second order polynomial features
 66 | xx_ = poly.fit_transform(orig_X)
 67 | 
 68 | # Extract the predictors and the values from the manufactured data
 69 | X = [i[0] for i in xx_]
 70 | Y_gt = [i[1] for i in xx_]
 71 | noise = np.random.uniform(size=(len(Y_gt)))
 72 | # Add some noise to the ground truth values
 73 | Y_gt += noise
 74 | 
 75 | # Shape the ground truth values for use with the model
 76 | Y_gt = np.reshape(Y_gt,(-1,1))
 77 | # Format the input features. Recall, we accomplish polynomial regression by
 78 | #   including the original and the polynomial version of the predictors
 79 | #   as features of the model
 80 | X = np.hstack((np.array(X).reshape(-1,1),np.array(X).reshape(-1,1)**2))
 81 | 
 82 | # Print a sample of the input data. X is the list of 2-feature input observations 
 83 | #   and Y is the list of ground truth values associated with each observation
 84 | print("X Sample:\n{}".format(X[:5]))
 85 | print("Y Sample:\n{}".format(Y_gt[:5]))
 86 | 
 87 | # Plot the input data
 88 | plt.scatter([i[0] for i in X],Y_gt,label="original data",color='b')
 89 | plt.xlabel("x")
 90 | plt.ylabel("y")
 91 | plt.title("Input Training Data")
 92 | 
 93 | 
 94 | # ### Create the Model
 95 | # 
 96 | # #### Define the model architecture
 97 | # With our training data created and our second order polynomial assumption stated, we can now create a model to learn the regression line. We will use a 'FC' layer as the main component of the model. Since we desire two weights ($\beta_2$ and $\beta_1$), we set our input dimension to 2, and since we only expect a single quantitative result, our output dimension is 1. Note, when using an 'FC' layer it is implied that there is a bias, which we will use as our $\beta_0.$
 98 | # 
 99 | # Also, before continuing take a look at the protobuf created in this step. The first print out is of the 'net,' and contains the architecture of the model. At a glance, we see that as expected, there is a single op in the network that expects an input $X,$ a weight and bias, and outputs $y_{pred}.$ In the print out of the 'param_init_net,' we see that this is where the initializations for the weights and biases exist. This is an important observation that gives insight into how a model in Caffe2 is constructed and maintained.
100 | 
101 | # In[4]:
102 | 
103 | 
104 | # Create the model helper object we will use to create the regression model
105 | regression_model = ModelHelper(name="regression_model")
106 | 
107 | # Add the FC layer, which is the main component of a linear regression model
108 | y_pred = brew.fc(regression_model,'X','y_pred', dim_in=2, dim_out=1)
109 | 
110 | # Print the predict and init net to see what protobuf was created for this model
111 | print("************* Predict Net *************")
112 | print(regression_model.net.Proto())
113 | print("\n************* Init Net *************")
114 | print(regression_model.param_init_net.Proto())
115 | 
116 | 
117 | # #### Add the training operators and prime the workspace
118 | # 
119 | # In this **very important** step, we specify the loss function, setup the SGD training algorithm, prime and initialize the workspace, and initialize our model's weights and biases.
120 | 
121 | # In[5]:
122 | 
123 | 
124 | # The loss function is computed by a squared L2 distance, 
125 | #   and then averaged over all items.
126 | dist = regression_model.SquaredL2Distance(['Y_gt', y_pred], "dist")
127 | loss = regression_model.AveragedLoss(dist, "loss")
128 | 
129 | # Add the gradient operators and setup the SGD algorithm
130 | regression_model.AddGradientOperators([loss])
131 | optimizer.build_sgd(regression_model, base_learning_rate=learning_rate)
132 | 
133 | # Prime the workspace with some data
134 | workspace.FeedBlob("Y_gt",Y_gt.astype(np.float32))
135 | workspace.FeedBlob("X",X.astype(np.float32))
136 | 
137 | # Run the init net to prepare the workspace then create the net
138 | workspace.RunNetOnce(regression_model.param_init_net)
139 | workspace.CreateNet(regression_model.net)
140 | 
141 | # Inject our desired initial weights and bias
142 | workspace.FeedBlob("y_pred_w",np.array([initial_weights]).astype(np.float32))
143 | workspace.FeedBlob("y_pred_b",np.array([0.]).astype(np.float32))
144 | 
145 | 
146 | # #### Run the training
147 | 
148 | # In[6]:
149 | 
150 | 
151 | # Run the training for training_iters
152 | for i in range(training_iters):
153 |     workspace.RunNet(regression_model.net)
154 | 
155 | print("Training Complete")
156 | 
157 | 
158 | # ### Extract Results
159 | # 
160 | # Now that our model is trained, we can pull out the learned weights and biases which exist as blobs in the workspace named 'y_pred_w' and 'y_pred_b.'
161 | 
162 | # In[7]:
163 | 
164 | 
165 | # Extract the learned coes and intercept from the workspace
166 | coes = workspace.FetchBlob("y_pred_w")[0]
167 | intercept = workspace.FetchBlob("y_pred_b")
168 | 
169 | # Calculate the regression line for plotting
170 | x_vals = np.linspace(orig_X.min(), orig_X.max(),100)
171 | regression_result = intercept[0] + coes[0]*x_vals + coes[1]*(x_vals**2)
172 | print("Best Fit Line: {}*x^2 + {}*x + {}".format(round(coes[1],5), round(coes[0],5), round(intercept[0],5)))
173 | 
174 | # Plot the results of the regression
175 | plt.scatter([i[0] for i in X],Y_gt,label="original data",color='b')
176 | plt.plot(x_vals,regression_result,label="regression result",color='r')
177 | plt.legend()
178 | plt.xlabel("x")
179 | plt.ylabel("y")
180 | plt.title("Polynomial Regression Fit: ${{{}}}x^2 + {{{}}}x + {{{}}}$".format(round(coes[1],5), round(coes[0],5), round(intercept[0],5)))
181 | plt.show()
182 | 
183 | 
184 | # ## Part II: Express Linear Regression Example
185 | # 
186 | # The above example shows you how to create a polynomial regression model that is easily adapted to handle higher order polynomials. Now, we will consider the baseline case where we desire a simple first order model, with 1-D input $x,$ 1-D output $y,$ and a solution of the form:
187 | # 
188 | # $$y = \beta_1x + \beta_0$$
189 | # 
190 | # The structure of Part II will be similar to Part I. First, we will generate the dataset, then we will construct the model and specify the training routine, and finally we will train and extract our results.
191 | 
192 | # In[8]:
193 | 
194 | 
195 | #####################################################################
196 | # Initialize data
197 | #####################################################################
198 | X,Y_gt = sklearn.datasets.make_regression(n_samples=100,n_features=1,noise=10)
199 | Y_gt = np.reshape(Y_gt,(-1,1))
200 | Y_gt /= 100.
201 | 
202 | #####################################################################
203 | # Create and train model
204 | #####################################################################
205 | # Construct model with single FC layer
206 | regression_model = ModelHelper(name="regression_model")
207 | y_pred = brew.fc(regression_model,'X','y_pred', dim_in=1, dim_out=1)
208 | 
209 | # Specify Loss function
210 | dist = regression_model.SquaredL2Distance(['Y_gt', y_pred], "dist")
211 | loss = regression_model.AveragedLoss(dist, "loss")
212 | 
213 | # Get gradients for all the computations above.
214 | regression_model.AddGradientOperators([loss])
215 | optimizer.build_sgd(regression_model, base_learning_rate=0.05)
216 | 
217 | # Prime and prepare workspace for training
218 | workspace.FeedBlob("Y_gt",Y_gt.astype(np.float32))
219 | workspace.FeedBlob("X",X.astype(np.float32))
220 | workspace.RunNetOnce(regression_model.param_init_net)
221 | workspace.CreateNet(regression_model.net)
222 | 
223 | # Set the initial weight and bias to 0
224 | workspace.FeedBlob("y_pred_w",np.array([[0.]]).astype(np.float32))
225 | workspace.FeedBlob("y_pred_b",np.array([0.]).astype(np.float32))
226 | 
227 | # Train the model
228 | for i in range(100):
229 |     workspace.RunNet(regression_model.net)
230 | 
231 | #####################################################################
232 | # Collect and format results
233 | #####################################################################
234 | # Grab the learned weight and bias from workspace
235 | coe = workspace.FetchBlob("y_pred_w")[0]
236 | intercept = workspace.FetchBlob("y_pred_b")
237 | 
238 | # Calculate the regression line for plotting
239 | x_vals = range(-3,4)
240 | regression_result = x_vals*coe + intercept
241 | 
242 | # Plot the results
243 | plt.scatter(X,Y_gt,label="original data",color='b')
244 | plt.plot(x_vals,regression_result,label="regression result",color='r')
245 | plt.legend()
246 | plt.xlabel("x")
247 | plt.ylabel("y")
248 | plt.title("Regression Line: ${{{}}}x + {{{}}}$".format(round(coe,5), round(intercept[0],5)))
249 | plt.show()
250 | 
251 | 
252 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/py_gen/Basics.py:
--------------------------------------------------------------------------------
  1 | #########################################################
  2 | #
  3 | # DO NOT EDIT THIS FILE. IT IS GENERATED AUTOMATICALLY. #
  4 | # PLEASE LOOK INTO THE README FOR MORE INFORMATION.     #
  5 | #
  6 | #########################################################
  7 | 
  8 | 
  9 | # coding: utf-8
 10 | 
 11 | # # Caffe2 Basic Concepts - Operators & Nets
 12 | # 
 13 | # In this tutorial we will go through a set of Caffe2 basics: the basic concepts including how operators and nets are being written.
 14 | # 
 15 | # First, let's import Caffe2. `core` and `workspace` are usually the two that you need most. If you want to manipulate protocol buffers generated by Caffe2, you probably also want to import `caffe2_pb2` from `caffe2.proto`.
 16 | 
 17 | # In[1]:
 18 | 
 19 | 
 20 | from __future__ import absolute_import
 21 | from __future__ import division
 22 | from __future__ import print_function
 23 | from __future__ import unicode_literals
 24 | 
 25 | # We'll also import a few standard python libraries
 26 | from matplotlib import pyplot
 27 | import numpy as np
 28 | import time
 29 | 
 30 | # These are the droids you are looking for.
 31 | from caffe2.python import core, workspace
 32 | from caffe2.proto import caffe2_pb2
 33 | 
 34 | # Let's show all plots inline.
 35 | 
 36 | 
 37 | # You might see a warning saying that caffe2 does not have GPU support. That means you are running a CPU-only build. Don't be alarmed - anything CPU is still runnable without a problem.
 38 | 
 39 | # ## Workspaces
 40 | # 
 41 | # Let's cover workspaces first, where all the data resides.
 42 | # 
 43 | # Similar to Matlab, the Caffe2 workspace consists of blobs you create and store in memory. For now, consider a blob to be a N-dimensional Tensor similar to numpy's ndarray, but contiguous. Down the road, we will show you that a blob is actually a typed pointer that can store any type of C++ objects, but Tensor is the most common type stored in a blob. Let's show what the interface looks like.
 44 | # 
 45 | # `Blobs()` prints out all existing blobs in the workspace. 
 46 | # `HasBlob()` queries if a blob exists in the workspace. As of now, we don't have any.
 47 | 
 48 | # In[2]:
 49 | 
 50 | 
 51 | print("Current blobs in the workspace: {}".format(workspace.Blobs()))
 52 | print("Workspace has blob 'X'? {}".format(workspace.HasBlob("X")))
 53 | 
 54 | 
 55 | # We can feed blobs into the workspace using `FeedBlob()`.
 56 | 
 57 | # In[3]:
 58 | 
 59 | 
 60 | X = np.random.randn(2, 3).astype(np.float32)
 61 | print("Generated X from numpy:\n{}".format(X))
 62 | workspace.FeedBlob("X", X)
 63 | 
 64 | 
 65 | # Now, let's take a look at what blobs are in the workspace.
 66 | 
 67 | # In[4]:
 68 | 
 69 | 
 70 | print("Current blobs in the workspace: {}".format(workspace.Blobs()))
 71 | print("Workspace has blob 'X'? {}".format(workspace.HasBlob("X")))
 72 | print("Fetched X:\n{}".format(workspace.FetchBlob("X")))
 73 | 
 74 | 
 75 | # Let's verify that the arrays are equal.
 76 | 
 77 | # In[5]:
 78 | 
 79 | 
 80 | np.testing.assert_array_equal(X, workspace.FetchBlob("X"))
 81 | 
 82 | 
 83 | # Note that if you try to access a blob that does not exist, an error will be thrown:
 84 | 
 85 | # In[6]:
 86 | 
 87 | 
 88 | try:
 89 |     workspace.FetchBlob("invincible_pink_unicorn")
 90 | except RuntimeError as err:
 91 |     print(err)
 92 | 
 93 | 
 94 | # One thing that you might not use immediately: you can have multiple workspaces in Python using different names, and switch between them. Blobs in different workspaces are separate from each other. You can query the current workspace using `CurrentWorkspace`. Let's try switching the workspace by name (gutentag) and creating a new one if it doesn't exist.
 95 | 
 96 | # In[7]:
 97 | 
 98 | 
 99 | print("Current workspace: {}".format(workspace.CurrentWorkspace()))
100 | print("Current blobs in the workspace: {}".format(workspace.Blobs()))
101 | 
102 | # Switch the workspace. The second argument "True" means creating 
103 | # the workspace if it is missing.
104 | workspace.SwitchWorkspace("gutentag", True)
105 | 
106 | # Let's print the current workspace. Note that there is nothing in the
107 | # workspace yet.
108 | print("Current workspace: {}".format(workspace.CurrentWorkspace()))
109 | print("Current blobs in the workspace: {}".format(workspace.Blobs()))
110 | 
111 | 
112 | # Let's switch back to the default workspace.
113 | 
114 | # In[8]:
115 | 
116 | 
117 | workspace.SwitchWorkspace("default")
118 | print("Current workspace: {}".format(workspace.CurrentWorkspace()))
119 | print("Current blobs in the workspace: {}".format(workspace.Blobs()))
120 | 
121 | 
122 | # Finally, `ResetWorkspace()` clears anything that is in the current workspace.
123 | 
124 | # In[9]:
125 | 
126 | 
127 | workspace.ResetWorkspace()
128 | print("Current blobs in the workspace after reset: {}".format(workspace.Blobs()))
129 | 
130 | 
131 | # ## Operators
132 | # 
133 | # Operators in Caffe2 are kind of like functions. From the C++ side, they all derive from a common interface, and are registered by type, so that we can call different operators during runtime. The interface of operators is defined in `caffe2/proto/caffe2.proto`. Basically, it takes in a bunch of inputs, and produces a bunch of outputs.
134 | # 
135 | # Remember, when we say "create an operator" in Caffe2 Python, nothing gets run yet. All it does is create the protocol buffer that specifies what the operator should be. At a later time it will be sent to the C++ backend for execution. If you are not familiar with protobuf, it is a json-like serialization tool for structured data. Find more about protocol buffers [here](https://developers.google.com/protocol-buffers/).
136 | # 
137 | # Let's see an actual example.
138 | 
139 | # In[10]:
140 | 
141 | 
142 | # Create an operator.
143 | op = core.CreateOperator(
144 |     "Relu", # The type of operator that we want to run
145 |     ["X"], # A list of input blobs by their names
146 |     ["Y"], # A list of output blobs by their names
147 | )
148 | # and we are done!
149 | 
150 | 
151 | # As we mentioned, the created op is actually a protobuf object. Let's show the content.
152 | 
153 | # In[11]:
154 | 
155 | 
156 | print("Type of the created op is: {}".format(type(op)))
157 | print("Content:\n")
158 | print(str(op))
159 | 
160 | 
161 | # Ok, let's run the operator. We first feed the input X to the workspace. 
162 | # Then the simplest way to run an operator is to do `workspace.RunOperatorOnce(operator)`
163 | 
164 | # In[12]:
165 | 
166 | 
167 | workspace.FeedBlob("X", np.random.randn(2, 3).astype(np.float32))
168 | workspace.RunOperatorOnce(op)
169 | 
170 | 
171 | # After execution, let's see if the operator is doing the right thing.
172 | # 
173 | # In this case, the operator is a common activation function used in neural networks, called [ReLU](https://en.wikipedia.org/wiki/Rectifier_(neural_networks), or Rectified Linear Unit activation. ReLU activation helps to add necessary non-linear characteristics to the neural network classifier, and is defined as:
174 | # 
175 | # $$ReLU(x) = max(0, x)$$
176 | 
177 | # In[13]:
178 | 
179 | 
180 | print("Current blobs in the workspace: {}\n".format(workspace.Blobs()))
181 | print("X:\n{}\n".format(workspace.FetchBlob("X")))
182 | print("Y:\n{}\n".format(workspace.FetchBlob("Y")))
183 | print("Expected:\n{}\n".format(np.maximum(workspace.FetchBlob("X"), 0)))
184 | 
185 | 
186 | # This is working if your Expected output matches your Y output in this example.
187 | # 
188 | # Operators also take optional arguments if needed. They are specified as key-value pairs. Let's take a look at one simple example, which takes a tensor and fills it with Gaussian random variables.
189 | 
190 | # In[14]:
191 | 
192 | 
193 | op = core.CreateOperator(
194 |     "GaussianFill",
195 |     [], # GaussianFill does not need any parameters.
196 |     ["Z"],
197 |     shape=[100, 100], # shape argument as a list of ints.
198 |     mean=1.0,  # mean as a single float
199 |     std=1.0, # std as a single float
200 | )
201 | print("Content of op:\n")
202 | print(str(op))
203 | 
204 | 
205 | # Let's run it and see if things are as intended.
206 | 
207 | # In[15]:
208 | 
209 | 
210 | workspace.RunOperatorOnce(op)
211 | temp = workspace.FetchBlob("Z")
212 | pyplot.hist(temp.flatten(), bins=50)
213 | pyplot.title("Distribution of Z")
214 | 
215 | 
216 | # If you see a bell shaped curve then it worked!
217 | 
218 | # ## Nets
219 | # 
220 | # Nets are essentially computation graphs. We keep the name `Net` for backward consistency (and also to pay tribute to neural nets). A Net is composed of multiple operators just like a program written as a sequence of commands. Let's take a look.
221 | # 
222 | # When we talk about nets, we will also talk about BlobReference, which is an object that wraps around a string so we can do easy chaining of operators.
223 | # 
224 | # Let's create a network that is essentially the equivalent of the following python math:
225 | # ```
226 | # X = np.random.randn(2, 3)
227 | # W = np.random.randn(5, 3)
228 | # b = np.ones(5)
229 | # Y = X * W^T + b
230 | # ```
231 | # We'll show the progress step by step. Caffe2's `core.Net` is a wrapper class around a NetDef protocol buffer.
232 | 
233 | # When creating a network, its underlying protocol buffer is essentially empty other than the network name. Let's create the net and then show the proto content.
234 | 
235 | # In[16]:
236 | 
237 | 
238 | net = core.Net("my_first_net")
239 | print("Current network proto:\n\n{}".format(net.Proto()))
240 | 
241 | 
242 | # Let's create a blob called X, and use GaussianFill to fill it with some random data.
243 | 
244 | # In[17]:
245 | 
246 | 
247 | X = net.GaussianFill([], ["X"], mean=0.0, std=1.0, shape=[2, 3], run_once=0)
248 | print("New network proto:\n\n{}".format(net.Proto()))
249 | 
250 | 
251 | # You might have observed a few differences from the earlier `core.CreateOperator` call. Basically, when using a net, you can directly create an operator *and* add it to the net at the same time by calling `net.SomeOp` where SomeOp is a registered type string of an operator. This gets translated to
252 | # ```
253 | # op = core.CreateOperator("SomeOp", ...)
254 | # net.Proto().op.append(op)
255 | # ```
256 | # 
257 | # Also, you might be wondering what X is. X is a `BlobReference` which records two things:
258 | # 
259 | # - The blob's name, which is accessed with `str(X)`
260 | # 
261 | # - The net it got created from, which is recorded by the internal variable `_from_net`
262 | # 
263 | # Let's verify it. Also, remember, we are not actually running anything yet, so X contains nothing but a symbol. Don't expect to get any numerical values out of it right now :)
264 | 
265 | # In[18]:
266 | 
267 | 
268 | print("Type of X is: {}".format(type(X)))
269 | print("The blob name is: {}".format(str(X)))
270 | 
271 | 
272 | # Let's continue to create W and b.
273 | 
274 | # In[19]:
275 | 
276 | 
277 | W = net.GaussianFill([], ["W"], mean=0.0, std=1.0, shape=[5, 3], run_once=0)
278 | b = net.ConstantFill([], ["b"], shape=[5,], value=1.0, run_once=0)
279 | 
280 | 
281 | # Now, one simple code sugar: since the BlobReference objects know what net it is generated from, in addition to creating operators from net, you can also create operators from BlobReferences. Let's create the FC operator in this way.
282 | 
283 | # In[20]:
284 | 
285 | 
286 | Y = X.FC([W, b], ["Y"])
287 | 
288 | 
289 | # Under the hood, `X.FC(...)` simply delegates to `net.FC` by inserting `X` as the first input of the corresponding operator, so what we did above is equivalent to
290 | # ```
291 | # Y = net.FC([X, W, b], ["Y"])
292 | # ```
293 | # 
294 | # Let's take a look at the current network.
295 | 
296 | # In[21]:
297 | 
298 | 
299 | print("Current network proto:\n\n{}".format(net.Proto()))
300 | 
301 | 
302 | # Too verbose huh? Let's try to visualize it as a graph. Caffe2 ships with a very minimal graph visualization tool for this purpose.
303 | 
304 | # In[22]:
305 | 
306 | 
307 | from caffe2.python import net_drawer
308 | from IPython import display
309 | graph = net_drawer.GetPydotGraph(net, rankdir="LR")
310 | display.Image(graph.create_png(), width=800)
311 | 
312 | 
313 | # So we have defined a Net, but nothing has been executed yet. Remember that the net above is essentially a protobuf that holds the definition of the network. When we actually run the network, what happens under the hood is:
314 | # - A C++ net object is instantiated from the protobuf
315 | # - The instantiated net's Run() function is called
316 | # 
317 | # Before we do anything, we should clear any earlier workspace variables with `ResetWorkspace()`.
318 | # 
319 | # Then there are two ways to run a net from Python. We will do the first option in the example below.
320 | # 
321 | # 1. Call `workspace.RunNetOnce()`, which instantiates, runs and immediately destructs the network 
322 | # 2. Call `workspace.CreateNet()` to create the C++ net object owned by the workspace, then call `workspace.RunNet()`, passing the name of the network to it
323 | #     
324 | # 
325 | 
326 | # In[23]:
327 | 
328 | 
329 | workspace.ResetWorkspace()
330 | print("Current blobs in the workspace: {}".format(workspace.Blobs()))
331 | workspace.RunNetOnce(net)
332 | print("Blobs in the workspace after execution: {}".format(workspace.Blobs()))
333 | # Let's dump the contents of the blobs
334 | for name in workspace.Blobs():
335 |     print("{}:\n{}".format(name, workspace.FetchBlob(name)))
336 | 
337 | 
338 | # Now let's try the second way to create the net, and run it. First, clear the variables with `ResetWorkspace()`. Then create the net with the workspace's `net` object that we created earlier using `CreateNet(net_object)`. Finally, run the net with `RunNet(net_name)`.
339 | 
340 | # In[24]:
341 | 
342 | 
343 | workspace.ResetWorkspace()
344 | print("Current blobs in the workspace: {}".format(workspace.Blobs()))
345 | workspace.CreateNet(net)
346 | workspace.RunNet(net.Proto().name)
347 | print("Blobs in the workspace after execution: {}".format(workspace.Blobs()))
348 | for name in workspace.Blobs():
349 |     print("{}:\n{}".format(name, workspace.FetchBlob(name)))
350 | 
351 | 
352 | # There are a few differences between `RunNetOnce` and `RunNet`, but the main difference is the computational overhead. Since `RunNetOnce` involves serializing the protobuf to pass between Python and C and instantiating the network, it may take longer to run. Let's run a test and see what the time overhead is.
353 | 
354 | # In[25]:
355 | 
356 | 
357 | # It seems that %timeit magic does not work well with
358 | # C++ extensions so we'll basically do for loops
359 | start = time.time()
360 | for i in range(1000):
361 |     workspace.RunNetOnce(net)
362 | end = time.time()
363 | print('Run time per RunNetOnce: {}'.format((end - start) / 1000))
364 | 
365 | start = time.time()
366 | for i in range(1000):
367 |     workspace.RunNet(net.Proto().name)
368 | end = time.time()
369 | print('Run time per RunNet: {}'.format((end - start) / 1000))
370 | 
371 | 
372 | # Congratulations, you now know the many of the key components of the Caffe2 Python API! Ready for more Caffe2? Check out the rest of the tutorials for a variety of interesting use-cases!
373 | 
374 | 


--------------------------------------------------------------------------------
/py_gen/Loading_Pretrained_Models.py:
--------------------------------------------------------------------------------
  1 | #########################################################
  2 | #
  3 | # DO NOT EDIT THIS FILE. IT IS GENERATED AUTOMATICALLY. #
  4 | # PLEASE LOOK INTO THE README FOR MORE INFORMATION.     #
  5 | #
  6 | #########################################################
  7 | 
  8 | 
  9 | # coding: utf-8
 10 | 
 11 | # # Loading Pre-Trained Models
 12 | # 
 13 | # ## Description
 14 | # 
 15 | # In this tutorial, we will use the pre-trained `squeezenet` model from the [ModelZoo](https://github.com/caffe2/caffe2/wiki/Model-Zoo) to classify our own images. As input, we will provide the path (or URL) to an image we want to classify. It will also be helpful to know the [ImageNet object code](https://gist.githubusercontent.com/aaronmarkham/cd3a6b6ac071eca6f7b4a6e40e6038aa/raw/9edb4038a37da6b5a44c3b5bc52e448ff09bfe5b/alexnet_codes) for the image so we can verify our results. The 'object code' is nothing more than the integer label for the class used during training, for example "985" is the code for the class "daisy". Note, although we are using squeezenet here, this tutorial serves as a somewhat universal method for running inference on pretrained models.
 16 | # 
 17 | # If you came from the [Image Pre-Processing Tutorial](https://caffe2.ai/docs/tutorial-image-pre-processing.html), you will see that we are using rescale and crop functions to prep the image, as well as reformatting the image to be CHW, BGR, and finally NCHW. We also correct for the image mean by either using the calculated mean from a provided npy file, or statically removing 128 as a placeholder average.
 18 | # 
 19 | # Hopefully, you will find that loading pre-trained models is simple and syntactically concise. From a high level, these are the three required steps for running inference on a pretrained model:
 20 | # 
 21 | # 1. Read the init and predict protobuf (.pb) files of the pretrained model
 22 | # 
 23 | #         with open("init_net.pb", "rb") as f:
 24 | #             init_net = f.read()
 25 | #         with open("predict_net.pb", "rb") as f:
 26 | #             predict_net = f.read()        
 27 | # 
 28 | # 2. Initialize a Predictor in your workspace with the blobs from the protobufs
 29 | # 
 30 | #         p = workspace.Predictor(init_net, predict_net)
 31 | # 
 32 | # 3. Run the net on some data and get the (softmax) results!
 33 | # 
 34 | #         results = p.run({'data': img})
 35 | # 
 36 | # Note, assuming the last layer of the network is a softmax layer, the results come back as a multidimensional array of probabilities with length equal to the number of classes that the model was trained on. The probabilities may be indexed by the object code (integer type), so if you know the object code you can index the results array at that index to view the network's confidence that the input image is of that class.
 37 | # 
 38 | # **Model Download Options**
 39 | # 
 40 | # Although we will use `squeezenet` here, you can check out the [Model Zoo for pre-trained models](https://github.com/caffe2/caffe2/wiki/Model-Zoo) to browse/download a variety of pretrained models, or you can use Caffe2's `caffe2.python.models.download` module to easily acquire pre-trained models from [Github caffe2/models](http://github.com/caffe2/models). 
 41 | # 
 42 | # For our purposes, we will use the `models.download` module to download `squeezenet` into the `/caffe2/python/models` folder of our local Caffe2 installation with the following command:
 43 | # 
 44 | # ```
 45 | # python -m caffe2.python.models.download -i squeezenet
 46 | # ```
 47 | # 
 48 | # If the above download worked then you should have a directory named squeezenet in your `/caffe2/python/models` folder that contains `init_net.pb` and `predict_net.pb`. Note, if you do not use the `-i` flag, the model will be downloaded to your CWD, however it will still be a directory named squeezenet containing two protobuf files. Alternatively, if you wish to download all of the models, you can clone the entire repo using: 
 49 | # 
 50 | # ```
 51 | # git clone https://github.com/caffe2/models
 52 | # ```
 53 | # 
 54 | # ## Code 
 55 | # 
 56 | # Before we start, lets take care of the required imports.
 57 | 
 58 | # In[1]:
 59 | 
 60 | 
 61 | from __future__ import absolute_import
 62 | from __future__ import division
 63 | from __future__ import print_function
 64 | from __future__ import unicode_literals
 65 | from caffe2.proto import caffe2_pb2
 66 | import numpy as np
 67 | import skimage.io
 68 | import skimage.transform
 69 | from matplotlib import pyplot
 70 | import os
 71 | from caffe2.python import core, workspace, models
 72 | import urllib2
 73 | import operator
 74 | print("Required modules imported.")
 75 | 
 76 | 
 77 | # ### Inputs
 78 | # 
 79 | # Here, we will specify the inputs to be used for this run, including the input image, the model location, the mean file (optional), the required size of the image, and the location of the label mapping file.
 80 | 
 81 | # In[2]:
 82 | 
 83 | 
 84 | # Configuration --- Change to your setup and preferences!
 85 | # This directory should contain the models downloaded from the model zoo. To run this 
 86 | #   tutorial, make sure there is a 'squeezenet' directory at this location that 
 87 | #   contains both the 'init_net.pb' and 'predict_net.pb'
 88 | CAFFE_MODELS = "~/caffe2/caffe2/python/models"
 89 | 
 90 | # Some sample images you can try, or use any URL to a regular image.
 91 | # IMAGE_LOCATION = "https://upload.wikimedia.org/wikipedia/commons/thumb/f/f8/Whole-Lemon.jpg/1235px-Whole-Lemon.jpg"
 92 | # IMAGE_LOCATION = "https://upload.wikimedia.org/wikipedia/commons/7/7b/Orange-Whole-%26-Split.jpg"
 93 | # IMAGE_LOCATION = "https://upload.wikimedia.org/wikipedia/commons/a/ac/Pretzel.jpg"
 94 | # IMAGE_LOCATION = "https://cdn.pixabay.com/photo/2015/02/10/21/28/flower-631765_1280.jpg"
 95 | IMAGE_LOCATION = "images/flower.jpg"
 96 | 
 97 | # What model are we using?
 98 | #    Format below is the model's: <folder, INIT_NET, predict_net, mean, input image size>
 99 | #    You can switch 'squeezenet' out with 'bvlc_alexnet', 'bvlc_googlenet' or others that you have downloaded
100 | MODEL = 'squeezenet', 'init_net.pb', 'predict_net.pb', 'ilsvrc_2012_mean.npy', 227
101 | 
102 | # codes - these help decypher the output and source from a list from ImageNet's object codes 
103 | #    to provide an result like "tabby cat" or "lemon" depending on what's in the picture 
104 | #   you submit to the CNN.
105 | codes =  "https://gist.githubusercontent.com/aaronmarkham/cd3a6b6ac071eca6f7b4a6e40e6038aa/raw/9edb4038a37da6b5a44c3b5bc52e448ff09bfe5b/alexnet_codes"
106 | print("Config set!")
107 | 
108 | 
109 | # ### Setup paths
110 | # 
111 | # With the configs set, we can now load the mean file (if it exists), as well as the predict net and the init net.
112 | 
113 | # In[3]:
114 | 
115 | 
116 | # set paths and variables from model choice and prep image
117 | CAFFE_MODELS = os.path.expanduser(CAFFE_MODELS)
118 | 
119 | # mean can be 128 or custom based on the model
120 | # gives better results to remove the colors found in all of the training images
121 | MEAN_FILE = os.path.join(CAFFE_MODELS, MODEL[0], MODEL[3])
122 | if not os.path.exists(MEAN_FILE):
123 |     print("No mean file found!")
124 |     mean = 128
125 | else:
126 |     print ("Mean file found!")
127 |     mean = np.load(MEAN_FILE).mean(1).mean(1)
128 |     mean = mean[:, np.newaxis, np.newaxis]
129 | print("mean was set to: ", mean)
130 | 
131 | # some models were trained with different image sizes, this helps you calibrate your image
132 | INPUT_IMAGE_SIZE = MODEL[4]
133 | 
134 | # make sure all of the files are around...
135 | INIT_NET = os.path.join(CAFFE_MODELS, MODEL[0], MODEL[1])
136 | PREDICT_NET = os.path.join(CAFFE_MODELS, MODEL[0], MODEL[2])
137 | 
138 | # Check to see if the files exist
139 | if not os.path.exists(INIT_NET):
140 |     print("WARNING: " + INIT_NET + " not found!")
141 | else:
142 |     if not os.path.exists(PREDICT_NET):
143 |         print("WARNING: " + PREDICT_NET + " not found!")
144 |     else:
145 |         print("All needed files found!")
146 |         
147 | 
148 | 
149 | # ### Image Preprocessing
150 | # 
151 | # Now that we have our inputs specified and verified the existance of the input network, we can load the image and pre-processing the image for ingestion into a Caffe2 convolutional neural network! This is a very important step as the trained CNN requires a specifically sized input image whose values are from a particular distribution.
152 | 
153 | # In[4]:
154 | 
155 | 
156 | # Function to crop the center cropX x cropY pixels from the input image
157 | def crop_center(img,cropx,cropy):
158 |     y,x,c = img.shape
159 |     startx = x//2-(cropx//2)
160 |     starty = y//2-(cropy//2)    
161 |     return img[starty:starty+cropy,startx:startx+cropx]
162 | 
163 | # Function to rescale the input image to the desired height and/or width. This function will preserve
164 | #   the aspect ratio of the original image while making the image the correct scale so we can retrieve
165 | #   a good center crop. This function is best used with center crop to resize any size input images into
166 | #   specific sized images that our model can use.
167 | def rescale(img, input_height, input_width):
168 |     # Get original aspect ratio
169 |     aspect = img.shape[1]/float(img.shape[0])
170 |     if(aspect>1):
171 |         # landscape orientation - wide image
172 |         res = int(aspect * input_height)
173 |         imgScaled = skimage.transform.resize(img, (input_width, res))
174 |     if(aspect<1):
175 |         # portrait orientation - tall image
176 |         res = int(input_width/aspect)
177 |         imgScaled = skimage.transform.resize(img, (res, input_height))
178 |     if(aspect == 1):
179 |         imgScaled = skimage.transform.resize(img, (input_width, input_height))
180 |     return imgScaled
181 | 
182 | # Load the image as a 32-bit float
183 | #    Note: skimage.io.imread returns a HWC ordered RGB image of some size
184 | img = skimage.img_as_float(skimage.io.imread(IMAGE_LOCATION)).astype(np.float32)
185 | print("Original Image Shape: " , img.shape)
186 | 
187 | # Rescale the image to comply with our desired input size. This will not make the image 227x227
188 | #    but it will make either the height or width 227 so we can get the ideal center crop.
189 | img = rescale(img, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE)
190 | print("Image Shape after rescaling: " , img.shape)
191 | pyplot.figure()
192 | pyplot.imshow(img)
193 | pyplot.title('Rescaled image')
194 | 
195 | # Crop the center 227x227 pixels of the image so we can feed it to our model
196 | img = crop_center(img, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE)
197 | print("Image Shape after cropping: " , img.shape)
198 | pyplot.figure()
199 | pyplot.imshow(img)
200 | pyplot.title('Center Cropped')
201 | 
202 | # switch to CHW (HWC --> CHW)
203 | img = img.swapaxes(1, 2).swapaxes(0, 1)
204 | print("CHW Image Shape: " , img.shape)
205 | 
206 | pyplot.figure()
207 | for i in range(3):
208 |     # For some reason, pyplot subplot follows Matlab's indexing
209 |     # convention (starting with 1). Well, we'll just follow it...
210 |     pyplot.subplot(1, 3, i+1)
211 |     pyplot.imshow(img[i])
212 |     pyplot.axis('off')
213 |     pyplot.title('RGB channel %d' % (i+1))
214 | 
215 | # switch to BGR (RGB --> BGR)
216 | img = img[(2, 1, 0), :, :]
217 | 
218 | # remove mean for better results
219 | img = img * 255 - mean
220 | 
221 | # add batch size axis which completes the formation of the NCHW shaped input that we want
222 | img = img[np.newaxis, :, :, :].astype(np.float32)
223 | 
224 | print("NCHW image (ready to be used as input): ", img.shape)
225 | 
226 | 
227 | # ### Prepare the CNN and run the net!
228 | # 
229 | # Now that the image is ready to be ingested by the CNN, let's open the protobufs, load them into the workspace, and run the net. 
230 | # 
231 | 
232 | # In[5]:
233 | 
234 | 
235 | # Read the contents of the input protobufs into local variables
236 | with open(INIT_NET, "rb") as f:
237 |     init_net = f.read()
238 | with open(PREDICT_NET, "rb") as f:
239 |     predict_net = f.read()
240 | 
241 | # Initialize the predictor from the input protobufs
242 | p = workspace.Predictor(init_net, predict_net)
243 | 
244 | # Run the net and return prediction
245 | results = p.run({'data': img})
246 | 
247 | # Turn it into something we can play with and examine which is in a multi-dimensional array
248 | results = np.asarray(results)
249 | print("results shape: ", results.shape)
250 | 
251 | # Quick way to get the top-1 prediction result
252 | # Squeeze out the unnecessary axis. This returns a 1-D array of length 1000
253 | preds = np.squeeze(results)
254 | # Get the prediction and the confidence by finding the maximum value and index of maximum value in preds array
255 | curr_pred, curr_conf = max(enumerate(preds), key=operator.itemgetter(1))
256 | print("Prediction: ", curr_pred)
257 | print("Confidence: ", curr_conf)
258 | 
259 | 
260 | # ### Process Results
261 | # 
262 | # Recall ImageNet is a 1000 class dataset and observe that it is no coincidence that the third axis of results is length 1000. This axis is holding the probability for each category in the pre-trained model. So when you look at the results array at a specific index, the number can be interpreted as the probability that the input belongs to the class corresponding to that index. Now that we have run the predictor and collected the results, we can interpret them by matching them to their corresponding english labels.
263 | # 
264 | 
265 | # In[6]:
266 | 
267 | 
268 | # the rest of this is digging through the results 
269 | results = np.delete(results, 1)
270 | index = 0
271 | highest = 0
272 | arr = np.empty((0,2), dtype=object)
273 | arr[:,0] = int(10)
274 | arr[:,1:] = float(10)
275 | for i, r in enumerate(results):
276 |     # imagenet index begins with 1!
277 |     i=i+1
278 |     arr = np.append(arr, np.array([[i,r]]), axis=0)
279 |     if (r > highest):
280 |         highest = r
281 |         index = i 
282 | 
283 | # top N results
284 | N = 5
285 | topN = sorted(arr, key=lambda x: x[1], reverse=True)[:N]
286 | print("Raw top {} results: {}".format(N,topN))
287 | 
288 | # Isolate the indexes of the top-N most likely classes
289 | topN_inds = [int(x[0]) for x in topN]
290 | print("Top {} classes in order: {}".format(N,topN_inds))
291 | 
292 | # Now we can grab the code list and create a class Look Up Table
293 | response = urllib2.urlopen(codes)
294 | class_LUT = []
295 | for line in response:
296 |     code, result = line.partition(":")[::2]
297 |     code = code.strip()
298 |     result = result.replace("'", "")
299 |     if code.isdigit():
300 |         class_LUT.append(result.split(",")[0][1:])
301 |         
302 | # For each of the top-N results, associate the integer result with an actual class
303 | for n in topN:
304 |     print("Model predicts '{}' with {}% confidence".format(class_LUT[int(n[0])],float("{0:.2f}".format(n[1]*100))))
305 | 
306 | 
307 | # ### Feeding Larger Batches
308 | # 
309 | # Above is an example of how to feed one image at a time. We can achieve higher throughput if we feed multiple images at a time in a single batch. Recall, the data fed into the classifier is in 'NCHW' order, so to feed multiple images, we will expand the 'N' axis.
310 | 
311 | # In[7]:
312 | 
313 | 
314 | # List of input images to be fed
315 | images = ["images/cowboy-hat.jpg",
316 |             "images/cell-tower.jpg",
317 |             "images/Ducreux.jpg",
318 |             "images/pretzel.jpg",
319 |             "images/orangutan.jpg",
320 |             "images/aircraft-carrier.jpg",
321 |             "images/cat.jpg"]
322 | 
323 | # Allocate space for the batch of formatted images
324 | NCHW_batch = np.zeros((len(images),3,227,227))
325 | print ("Batch Shape: ",NCHW_batch.shape)
326 | 
327 | # For each of the images in the list, format it and place it in the batch
328 | for i,curr_img in enumerate(images):
329 |     img = skimage.img_as_float(skimage.io.imread(curr_img)).astype(np.float32)
330 |     img = rescale(img, 227, 227)
331 |     img = crop_center(img, 227, 227)
332 |     img = img.swapaxes(1, 2).swapaxes(0, 1)
333 |     img = img[(2, 1, 0), :, :]
334 |     img = img * 255 - mean
335 |     NCHW_batch[i] = img
336 | 
337 | print("NCHW image (ready to be used as input): ", NCHW_batch.shape)
338 | 
339 | # Run the net on the batch
340 | results = p.run([NCHW_batch.astype(np.float32)])
341 | 
342 | # Turn it into something we can play with and examine which is in a multi-dimensional array
343 | results = np.asarray(results)
344 | 
345 | # Squeeze out the unnecessary axis
346 | preds = np.squeeze(results)
347 | print("Squeezed Predictions Shape, with batch size {}: {}".format(len(images),preds.shape))
348 | 
349 | # Describe the results
350 | for i,pred in enumerate(preds):
351 |     print("Results for: '{}'".format(images[i]))
352 |     # Get the prediction and the confidence by finding the maximum value 
353 |     #   and index of maximum value in preds array
354 |     curr_pred, curr_conf = max(enumerate(pred), key=operator.itemgetter(1))
355 |     print("\tPrediction: ", curr_pred)
356 |     print("\tClass Name: ", class_LUT[int(curr_pred)])
357 |     print("\tConfidence: ", curr_conf)
358 | 
359 | 
360 | 


--------------------------------------------------------------------------------
/py_gen/CIFAR10_Part2.py:
--------------------------------------------------------------------------------
  1 | #########################################################
  2 | #
  3 | # DO NOT EDIT THIS FILE. IT IS GENERATED AUTOMATICALLY. #
  4 | # PLEASE LOOK INTO THE README FOR MORE INFORMATION.     #
  5 | #
  6 | #########################################################
  7 | 
  8 | 
  9 | # coding: utf-8
 10 | 
 11 | # # CIFAR-10: Part 2
 12 | # 
 13 | # Welcome back! If you have not completed [Part 1](*), please do so before running the code in this notebook. 
 14 | # 
 15 | # In Part 2 we will assume you have the training and testing lmdbs, as well as the trained model .pb files from Part 1. As you may recall from Part 1, we created the dataset in the form of lmdbs then trained a model and saved the trained model in the form of a *predict_net.pb* and an *init_net.pb*. In this notebook, we will show how to test that saved model with the test lmdb and how to continue training to increase our test accuracy.
 16 | # 
 17 | # Recall the objectives of the two part CIFAR-10 tutorial:
 18 | # 
 19 | # **Part 1:**
 20 | # - Download dataset
 21 | # - Write images to lmdbs
 22 | # - Define and train a model with checkpoints
 23 | # - Save the trained model
 24 | # 
 25 | # **Part 2:**
 26 | # - Load pre-trained model from Part 1
 27 | # - Run inference on testing lmdb
 28 | # - Continue training to improve test accuracy
 29 | # - Test the retrained model
 30 | # 
 31 | # As before, let's start with some necessary imports.
 32 | 
 33 | # In[ ]:
 34 | 
 35 | 
 36 | from __future__ import absolute_import
 37 | from __future__ import division
 38 | from __future__ import print_function
 39 | from __future__ import unicode_literals
 40 | import numpy as np
 41 | import os
 42 | import shutil
 43 | import operator
 44 | import glob
 45 | from caffe2.python import core,model_helper,optimizer,workspace,brew,utils
 46 | from caffe2.proto import caffe2_pb2
 47 | import matplotlib.pyplot as plt 
 48 | from caffe2.python.modeling import initializers
 49 | from caffe2.python.modeling.parameter_info import ParameterTags
 50 | 
 51 | 
 52 | # ## Check Inputs
 53 | # 
 54 | # Before we get started, let's make sure you have the necessary Part 1 files. We will use the saved model from the most recent run of Part 1.
 55 | 
 56 | # In[2]:
 57 | 
 58 | 
 59 | # Train lmdb
 60 | TRAIN_LMDB = os.path.join(os.path.expanduser('~'),"caffe2_notebooks/tutorial_data/cifar10/training_lmdb")
 61 | # Test lmdb
 62 | TEST_LMDB = os.path.join(os.path.expanduser('~'),"caffe2_notebooks/tutorial_data/cifar10/testing_lmdb")
 63 | 
 64 | 
 65 | # Extract protobuf files from most recent Part 1 run
 66 | part1_runs_path = os.path.join(os.path.expanduser('~'), "caffe2_notebooks", "tutorial_files", "tutorial_cifar10")
 67 | runs = sorted(glob.glob(part1_runs_path + "/*"))
 68 | 
 69 | # Init net
 70 | INIT_NET = os.path.join(runs[-1], "cifar10_init_net.pb")
 71 | # Predict net
 72 | PREDICT_NET = os.path.join(runs[-1], "cifar10_predict_net.pb")
 73 | 
 74 | 
 75 | # Make sure they all exist
 76 | if (not os.path.exists(TRAIN_LMDB)) or (not os.path.exists(TEST_LMDB)) or (not os.path.exists(INIT_NET)) or (not os.path.exists(PREDICT_NET)):
 77 |     print("ERROR: input not found!")
 78 | else:
 79 |     print("Success, you may continue!")
 80 | 
 81 | 
 82 | # ### Repeat Helper Functions
 83 | # 
 84 | # If these functions look familiar, you are correct; they have been copied-and-pasted from Part 1. To summarize, we will need the *AddInputLayer* function to connect our models to the lmdbs, and the *Add_Original_CIFAR10_Model* function to provide the architecture of the network.
 85 | 
 86 | # In[3]:
 87 | 
 88 | 
 89 | def AddInputLayer(model, batch_size, db, db_type):
 90 |     # load the data
 91 |     #data_uint8, label = brew.db_input(
 92 |     #    model,
 93 |     #   blobs_out=["data_uint8", "label"],
 94 |     #   batch_size=batch_size,
 95 |     #   db=db,
 96 |     #    db_type=db_type,
 97 |     #)
 98 |     data_uint8, label = model.TensorProtosDBInput([], ["data_uint8", "label"], batch_size=batch_size, db=db, db_type=db_type)
 99 |     # cast the data to float
100 |     data = model.Cast(data_uint8, "data", to=core.DataType.FLOAT)
101 |     # scale data from [0,255] down to [0,1]
102 |     data = model.Scale(data, data, scale=float(1./256))
103 |     # don't need the gradient for the backward pass
104 |     data = model.StopGradient(data, data)
105 |     return data, label
106 | 
107 | def update_dims(height, width, kernel, stride, pad):
108 |     new_height = ((height - kernel + 2*pad)//stride) + 1
109 |     new_width = ((width - kernel + 2*pad)//stride) + 1
110 |     return new_height, new_width
111 | 
112 | def Add_Original_CIFAR10_Model(model, data, num_classes, image_height, image_width, image_channels):
113 |     # Convolutional layer 1
114 |     conv1 = brew.conv(model, data, 'conv1', dim_in=image_channels, dim_out=32, kernel=5, stride=1, pad=2)
115 |     h,w = update_dims(height=image_height, width=image_width, kernel=5, stride=1, pad=2)
116 |     # Pooling layer 1
117 |     pool1 = brew.max_pool(model, conv1, 'pool1', kernel=3, stride=2)
118 |     h,w = update_dims(height=h, width=w, kernel=3, stride=2, pad=0)
119 |     # ReLU layer 1
120 |     relu1 = brew.relu(model, pool1, 'relu1')
121 |     
122 |     # Convolutional layer 2
123 |     conv2 = brew.conv(model, relu1, 'conv2', dim_in=32, dim_out=32, kernel=5, stride=1, pad=2)
124 |     h,w = update_dims(height=h, width=w, kernel=5, stride=1, pad=2)
125 |     # ReLU layer 2
126 |     relu2 = brew.relu(model, conv2, 'relu2')
127 |     # Pooling layer 1
128 |     pool2 = brew.average_pool(model, relu2, 'pool2', kernel=3, stride=2)
129 |     h,w = update_dims(height=h, width=w, kernel=3, stride=2, pad=0)
130 |     
131 |     # Convolutional layer 3
132 |     conv3 = brew.conv(model, pool2, 'conv3', dim_in=32, dim_out=64, kernel=5, stride=1, pad=2)
133 |     h,w = update_dims(height=h, width=w, kernel=5, stride=1, pad=2)
134 |     # ReLU layer 3
135 |     relu3 = brew.relu(model, conv3, 'relu3')
136 |     # Pooling layer 3
137 |     pool3 = brew.average_pool(model, relu3, 'pool3', kernel=3, stride=2)
138 |     h,w = update_dims(height=h, width=w, kernel=3, stride=2, pad=0)
139 |     
140 |     # Fully connected layers
141 |     fc1 = brew.fc(model, pool3, 'fc1', dim_in=64*h*w, dim_out=64)
142 |     fc2 = brew.fc(model, fc1, 'fc2', dim_in=64, dim_out=num_classes)
143 |     
144 |     # Softmax layer
145 |     softmax = brew.softmax(model, fc2, 'softmax')
146 |     return softmax
147 | 
148 | 
149 | # ## Test Saved Model From Part 1
150 | # 
151 | # ### Construct Model for Testing
152 | # 
153 | # The first thing we need is a model helper object that we can attach the lmdb reader to.
154 | 
155 | # In[4]:
156 | 
157 | 
158 | # Create a ModelHelper object with init_params=False
159 | arg_scope = {"order": "NCHW"}
160 | test_model = model_helper.ModelHelper(name="test_model", arg_scope=arg_scope, init_params=False)
161 | 
162 | # Add the data input layer to the model, pointing at the TEST_LMDB
163 | data,_ = AddInputLayer(test_model,1,TEST_LMDB,'lmdb')
164 | 
165 | 
166 | # ### Populate the Model Helper with Saved Model Params
167 | # 
168 | # To format a model for testing, we do not need to create params in the model helper, nor do we need to add gradient operators as we will only be performing forward passes. All we really need to do is populate the *.net* and *.param_init_net* members of the model helper with the contents of the saved *predict_net.pb* and *init_net.pb*, respectively. To accomplish this, we construct *caffe2_pb* objects with the protobuf from the pb files, create *Net* objects with the *caffe2_pb* objects, then **append** the net objects to the *.net* and *.param_init_net* members of the model helper. Appending is very important here! If we do not append, we would wipe out the input data layer stuff that we just added.
169 | # 
170 | # Recall from Part 1, the saved model expected an input named *data* and produced an output called *softmax*. Conveniently (but not accidentally), the *AddInputLayer* function reads from the lmdb and puts the information into the workspace in a blob called *data*. It is also important to remember what each of the saved nets that we are appending to our model contains. The *predict_net* contains the structure of the model, including the ops involved in the forward pass. It has the definitions of the convolutional, pooling, and fc layers in the model. The *init_net* contains the weight initializations for the parameters that the ops in the *predict_net* expect. For example, if there is an op in the *predict_net* named 'fc1', the *init_net* will contain the trained weights (*fc1_w*), and biases (*fc1_b*) for that layer. 
171 | # 
172 | # After we append the nets, we add an accuracy layer to the model which uses the *softmax* output from the saved model and the *label* input from the lmdb. Note, we could manually fetch the softmax blob from the workspace after every iteration and check whether or not the class with the highest softmax score is the true label, but instead we opt for the simpler accuacy layer. 
173 | 
174 | # In[5]:
175 | 
176 | 
177 | # Populate the model helper obj with the init net stuff, which provides the 
178 | #    weight initializations for the model
179 | init_net_proto = caffe2_pb2.NetDef()
180 | with open(INIT_NET, "rb") as f:
181 |     init_net_proto.ParseFromString(f.read())
182 | test_model.param_init_net = test_model.param_init_net.AppendNet(core.Net(init_net_proto))
183 | 
184 | # Populate the model helper obj with the predict net stuff, which defines 
185 | #    the structure of the model
186 | predict_net_proto = caffe2_pb2.NetDef()
187 | with open(PREDICT_NET, "rb") as f:
188 |     predict_net_proto.ParseFromString(f.read())
189 | test_model.net = test_model.net.AppendNet(core.Net(predict_net_proto))
190 | 
191 | # Add an accuracy feature to the model for convenient reporting during testing
192 | accuracy = brew.accuracy(test_model, ['softmax', 'label' ], 'accuracy')
193 | 
194 | 
195 | # ### Run Testing
196 | # 
197 | # At this point, our model is initialized as the saved model from Part 1. We can now run the testing loop and check the accuracy.
198 | 
199 | # In[6]:
200 | 
201 | 
202 | # Run the param init net to put the trained model info into the workspace
203 | workspace.RunNetOnce(test_model.param_init_net)
204 | workspace.CreateNet(test_model.net, overwrite=True)
205 | 
206 | # Stat keeper
207 | avg_accuracy = 0.0
208 | 
209 | # Number of test iterations to run here, since the full test set is 10k images and the
210 | #  batch size is 1, we will run 10000 test batches to cover the entire test set
211 | test_iters = 10000
212 | 
213 | # Main testing loop
214 | for i in range(test_iters):
215 |     workspace.RunNet(test_model.net)
216 |     acc = workspace.FetchBlob('accuracy')
217 |     avg_accuracy += acc
218 |     if (i % 500 == 0) and (i > 0):
219 |         print("Iter: {}, Current Accuracy: {}".format(i, avg_accuracy/float(i)))
220 | 
221 | # Report final test accuracy score as the number of correct predictions divided by 10,000
222 | print("*********************************************")
223 | print("Final Test Accuracy: ",avg_accuracy/float(test_iters))
224 | 
225 | 
226 | # ## Continue Training
227 | # 
228 | # Our model is performing significantly better than random guessing, but I think we can do a little better with more training. To do this we will:
229 | # - create a new model helper
230 | # - specify that the train data will come from the training lmdb
231 | # - re-define the model architecture with the Add_Original_CIFAR10_Model function
232 | # - grab the trained weights and biases from the saved init_net.pb
233 | # - resume training
234 | # 
235 | # ### Construct Model for Re-Training
236 | # 
237 | # Here we create a new model helper object for training. Nothing here should look new but take notice that we set **init_params=False**. This is important, as we do not want brew (in *Add_Original_CIFAR10_Model* function) to automatically initialize the params, rather we want to set them ourselves. Once we construct the model helper, we add the input layer and point it to the training lmdb, brew in the model architecture, and finally initialize the parameters by appending the contents of the saved *init_net.pb* to the *.param_init_net* member of the train model.
238 | 
239 | # In[7]:
240 | 
241 | 
242 | # Number of iterations to train for here
243 | training_iters = 3000
244 | 
245 | # Reset workspace to clear all of the information from the testing stage
246 | workspace.ResetWorkspace()
247 | 
248 | # Create new model
249 | arg_scope = {"order": "NCHW"}
250 | train_model = model_helper.ModelHelper(name="cifar10_train", arg_scope=arg_scope, init_params=False)
251 | 
252 | # Add the data layer to the model
253 | data,_ = AddInputLayer(train_model,100,TRAIN_LMDB,'lmdb')
254 | softmax = Add_Original_CIFAR10_Model(train_model, data, 10, 32, 32, 3)
255 | 
256 | # Populate the param_init_net of the model obj with the contents of the init net
257 | init_net_proto = caffe2_pb2.NetDef()
258 | with open(INIT_NET, "rb") as f:
259 |     init_net_proto.ParseFromString(f.read())
260 | tmp_init_net = core.Net(init_net_proto)
261 | train_model.param_init_net = train_model.param_init_net.AppendNet(tmp_init_net)
262 | 
263 | 
264 | # ### Specify Loss Function and Optimizer
265 | # 
266 | # We can now proceed as normal by specifying the loss function, adding the gradient operators, and building the optimizier. Here, we opt for the same loss function and optimizer that we used in Part 1.
267 | 
268 | # In[8]:
269 | 
270 | 
271 | # Add the "training operators" to the model
272 | xent = train_model.LabelCrossEntropy([softmax, 'label'], 'xent')
273 | # compute the expected loss
274 | loss = train_model.AveragedLoss(xent, "loss")
275 | # track the accuracy of the model
276 | accuracy = brew.accuracy(train_model, [softmax, 'label'], "accuracy")
277 | # use the average loss we just computed to add gradient operators to the model
278 | train_model.AddGradientOperators([loss])
279 | # Specify Optimization Algorithm
280 | optimizer.build_sgd(
281 |     train_model,
282 |     base_learning_rate=0.01,
283 |     policy="fixed",
284 |     momentum=0.9,
285 |     weight_decay=0.004
286 | )
287 | 
288 | 
289 | # **Important Note**
290 | # 
291 | # Check out the results of the *GetOptimizationParamInfo* function. The *params* that this function returns are the parameters that will be optimized by the optimization function. If you are attempting to retrain a model in a different way, and your model doesnt seem to be learning, check the return value of this fuction. If it returns nothing, look no further for your problem! This is exactly the reason that we brew'ed in the layers of the train model with the *Add_Original_CIFAR10_Model* function, because it creates the params in the model automatically. If we had appended the *.net* member of the Model Helper as we did for the test model, this function would return nothing, meaning no parameters would get optimized. A workaround if you appended the net would be to manually create the params with the *create_param* function, which feels like a bit of a hack, especially if you have the add model code on-hand. 
292 | 
293 | # In[9]:
294 | 
295 | 
296 | for param in train_model.GetOptimizationParamInfo():
297 |     print("Param to be optimized: ",param)
298 | 
299 | 
300 | # ### Run Training
301 | # 
302 | # **This step will take a while!**
303 | # 
304 | # With our model helper setup we can now run the training as normal. Note, the accuracy and loss reported here is as measured on the *training* batches. Recall that the accuracy reported in Part 1 was the validation accuracy. Be careful how you interpret this number!
305 | 
306 | # In[10]:
307 | 
308 | 
309 | # Prime the workspace
310 | workspace.RunNetOnce(train_model.param_init_net)
311 | workspace.CreateNet(train_model.net, overwrite=True)
312 | 
313 | # Run the training loop
314 | for i in range(training_iters):
315 |     workspace.RunNet(train_model.net)
316 |     acc = workspace.FetchBlob('accuracy')
317 |     loss = workspace.FetchBlob('loss')
318 |     if i % 100 == 0:
319 |         print ("Iter: {}, Loss: {}, Accuracy: {}".format(i,loss,acc))
320 | 
321 | 
322 | # ## Test the Retrained Model
323 | # 
324 | # We will test the retrained model, just as we did in the first part of this notebook. However, since the params already exist in the workspace from the retraining step, we do not need to set the *.param_init_net*. Rather, we set **init_params=False** and brew in the model architecture with *Add_Original_CIFAR10_Model*. When we create the net, the model will find that the required blobs are already in the workspace. Then, we can run the main testing loop, which will report a final test accuracy score (which is hopefully higher).
325 | 
326 | # In[11]:
327 | 
328 | 
329 | arg_scope = {"order": "NCHW"}
330 | # Construct the model
331 | test_model = model_helper.ModelHelper(name="test_model", arg_scope=arg_scope, init_params=False)
332 | # Set the input as the test lmdb
333 | data,_ = AddInputLayer(test_model,1,TEST_LMDB,'lmdb')
334 | # brew in the model architecture
335 | softmax = Add_Original_CIFAR10_Model(test_model, data, 10, 32, 32, 3)
336 | accuracy = brew.accuracy(test_model, ['softmax', 'label' ], 'accuracy')
337 | # Prime the net
338 | workspace.RunNetOnce(test_model.param_init_net)
339 | workspace.CreateNet(test_model.net, overwrite=True)
340 | 
341 | # Confusion Matrix for CIFAR-10
342 | cmat = np.zeros((10,10))
343 | 
344 | # Stat keepers
345 | avg_accuracy = 0.0
346 | test_iters = 10000
347 | 
348 | # Main testing loop
349 | for i in range(test_iters):
350 |     workspace.RunNet(test_model.net)
351 |     acc = workspace.FetchBlob('accuracy')
352 |     avg_accuracy += acc
353 |     if (i % 500 == 0) and (i > 0):
354 |         print("Iter: {}, Current Accuracy: {}".format(i, avg_accuracy/float(i)))
355 |         
356 |     # Get the top-1 prediction
357 |     results = workspace.FetchBlob('softmax')[0]
358 |     label = workspace.FetchBlob('label')[0]
359 |     max_index, max_value = max(enumerate(results), key=operator.itemgetter(1))    
360 |     # Update confusion matrix
361 |     cmat[label,max_index] += 1
362 | 
363 | # Report final testing results
364 | print("*********************************************")
365 | print("Final Test Accuracy: ",avg_accuracy/float(test_iters))
366 | 
367 | 
368 | # ### Check Results
369 | # 
370 | # Notice, the result from testing the re-trained model is better than the original test accuracy. If you wish, you can save the new model as .pb files just as in Part 1, but we will leave that to you. The last thing we will do is attempt to visualize the performance of our classifier by plotting a confusion matrix and looking for a **strong diagonal** trend.
371 | 
372 | # In[12]:
373 | 
374 | 
375 | # Plot confusion matrix
376 | fig = plt.figure(figsize=(10,10))
377 | plt.tight_layout()
378 | ax = fig.add_subplot(111)
379 | res = ax.imshow(cmat, cmap=plt.cm.rainbow,interpolation='nearest')
380 | width, height = cmat.shape
381 | for x in xrange(width):
382 |     for y in xrange(height):
383 |         ax.annotate(str(cmat[x,y]), xy=(y, x),horizontalalignment='center',verticalalignment='center')
384 | 
385 | classes = ['Airplane','Automobile','Bird','Cat','Deer','Dog','Frog','Horse','Ship','Truck']
386 | plt.xticks(range(width), classes, rotation=0)
387 | plt.yticks(range(height), classes, rotation=0)
388 | ax.set_xlabel('Predicted Class')
389 | ax.set_ylabel('True Class')
390 | plt.title('CIFAR-10 Confusion Matrix')
391 | plt.show()
392 | 
393 | 
394 | 


--------------------------------------------------------------------------------
/py_gen/Control_Ops.py:
--------------------------------------------------------------------------------
  1 | #########################################################
  2 | #
  3 | # DO NOT EDIT THIS FILE. IT IS GENERATED AUTOMATICALLY. #
  4 | # PLEASE LOOK INTO THE README FOR MORE INFORMATION.     #
  5 | #
  6 | #########################################################
  7 | 
  8 | 
  9 | # coding: utf-8
 10 | 
 11 | # # Control Ops Tutorial
 12 | # 
 13 | # In this tutorial we show how to use control flow operators in Caffe2 and give some details about their underlying implementations.
 14 | 
 15 | # ### Conditional Execution Using NetBuilder
 16 | # 
 17 | # Let's start with conditional operator. We will demonstrate how to use it in two Caffe2 APIs used for building nets: `NetBuilder` and `brew`.
 18 | 
 19 | # In[1]:
 20 | 
 21 | 
 22 | from __future__ import absolute_import
 23 | from __future__ import division
 24 | from __future__ import print_function
 25 | from __future__ import unicode_literals
 26 | 
 27 | from caffe2.python import workspace
 28 | from caffe2.python.core import Plan, to_execution_step, Net
 29 | from caffe2.python.net_builder import ops, NetBuilder
 30 | 
 31 | 
 32 | # In the first example, we define several blobs and then use the 'If' operator to set the value of one of them conditionally depending on values of other blobs.
 33 | # 
 34 | # The pseudocode for the conditional examples we will implement is as follows:
 35 | # 
 36 | #     if (x > 0):
 37 | #         y = 1
 38 | #     else:
 39 | #         y = 0
 40 | 
 41 | # In[2]:
 42 | 
 43 | 
 44 | with NetBuilder() as nb:
 45 |     # Define our constants
 46 |     ops.Const(0.0, blob_out="zero")
 47 |     ops.Const(1.0, blob_out="one")
 48 |     ops.Const(0.5, blob_out="x")
 49 |     ops.Const(0.0, blob_out="y")
 50 |     # Define our conditional sequence
 51 |     with ops.IfNet(ops.GT(["x", "zero"])):
 52 |         ops.Copy("one", "y")
 53 |     with ops.Else():
 54 |         ops.Copy("zero", "y")
 55 | 
 56 | 
 57 | # Note the usage of `NetBuilder`'s `ops.IfNet` and `ops.Else` calls: `ops.IfNet` accepts a blob reference or blob name as an input, it expects an input blob to have a scalar value convertible to bool. Note that the optional `ops.Else` is at the same level as `ops.IfNet` and immediately follows the corresponding `ops.IfNet`. Let's execute the resulting net (execution step) and check the values of the blobs.
 58 | # 
 59 | # Note that since x = 0.5, which is indeed greater than 0, we should expect y = 1 after execution.
 60 | 
 61 | # In[3]:
 62 | 
 63 | 
 64 | # Initialize a Plan
 65 | plan = Plan('if_net_test')
 66 | # Add the NetBuilder definition above to the Plan
 67 | plan.AddStep(to_execution_step(nb))
 68 | # Initialize workspace for blobs
 69 | ws = workspace.C.Workspace()
 70 | # Run the Plan
 71 | ws.run(plan)
 72 | # Fetch some blobs and print
 73 | print('x = ', ws.blobs["x"].fetch())
 74 | print('y = ', ws.blobs["y"].fetch())
 75 | 
 76 | 
 77 | # Before going further, it's important to understand the semantics of execution blocks ('then' and 'else' branches in the example above), i.e. handling of reads and writes into global (defined outside of the block) and local (defined inside the block) blobs.
 78 | # 
 79 | # `NetBuilder` uses the following set of rules:
 80 | # 
 81 | #  - In `NetBuilder`'s syntax, a blob's declaration and definition occur at the same time - we define an operator which writes its output into a blob with a given name.
 82 | #  
 83 | #  - `NetBuilder` keeps track of all operators seen before the current execution point in the same block and up the stack in parent blocks.
 84 | #  
 85 | #  - If an operator writes into a previously unseen blob, it creates a **local** blob that is visible only within the current block and the subsequent children blocks. Local blobs created in a given block are effectively deleted when we exit the block. Any write into previously defined (in the same block or in the parent blocks) blob updates an originally created blob and does not result in the redefinition of a blob.
 86 | #  
 87 | #  - An operator's input blobs have to be defined earlier in the same block or in the stack of parent blocks. 
 88 | #  
 89 | #  
 90 | # As a result, in order to see the values computed by a block after its execution, the blobs of interest have to be defined outside of the block. This rule effectively forces visible blobs to always be correctly initialized.
 91 | # 
 92 | # To illustrate concepts of block semantics and provide a more sophisticated example, let's consider the following net:
 93 | 
 94 | # In[4]:
 95 | 
 96 | 
 97 | with NetBuilder() as nb:
 98 |     # Define our constants
 99 |     ops.Const(0.0, blob_out="zero")
100 |     ops.Const(1.0, blob_out="one")
101 |     ops.Const(2.0, blob_out="two")
102 |     ops.Const(1.5, blob_out="x")
103 |     ops.Const(0.0, blob_out="y")
104 |     # Define our conditional sequence
105 |     with ops.IfNet(ops.GT(["x", "zero"])):
106 |         ops.Copy("x", "local_blob")  # create local_blob using Copy -- this is not visible outside of this block
107 |         with ops.IfNet(ops.LE(["local_blob", "one"])):
108 |             ops.Copy("one", "y")
109 |         with ops.Else():
110 |             ops.Copy("two", "y")
111 |     with ops.Else():
112 |         ops.Copy("zero", "y")
113 |         # Note that using local_blob would fail here because it is outside of the block in
114 |         # which it was created
115 | 
116 | 
117 | # When we execute this, we expect that y == 2.0, and that `local_blob` will not exist in the workspace.
118 | 
119 | # In[5]:
120 | 
121 | 
122 | # Initialize a Plan
123 | plan = Plan('if_net_test_2')
124 | # Add the NetBuilder definition above to the Plan
125 | plan.AddStep(to_execution_step(nb))
126 | # Initialize workspace for blobs
127 | ws = workspace.C.Workspace()
128 | # Run the Plan
129 | ws.run(plan)
130 | # Fetch some blobs and print
131 | print('x = ', ws.blobs["x"].fetch())
132 | print('y = ', ws.blobs["y"].fetch())
133 | # Assert that the local_blob does not exist in the workspace
134 | # It should have been destroyed because of its locality
135 | assert "local_blob" not in ws.blobs
136 | 
137 | 
138 | # ### Conditional Execution Using Brew Module
139 | # 
140 | # Brew is another Caffe2 interface used to construct nets. Unlike `NetBuilder`, `brew` does not track the hierarchy of blocks and, as a result, we need to specify which blobs are considered local and which blobs are considered global when passing 'then' and 'else' models to an API call.
141 | # 
142 | # Let's start by importing the necessary items for the `brew` API.
143 | 
144 | # In[6]:
145 | 
146 | 
147 | from caffe2.python import brew
148 | from caffe2.python.workspace import FeedBlob, RunNetOnce, FetchBlob
149 | from caffe2.python.model_helper import ModelHelper
150 | 
151 | 
152 | # We will use the Caffe2's `ModelHelper` class to define and represent our models, as well as contain the parameter information about the models. Note that a `ModelHelper` object has two underlying nets:
153 | # 
154 | #     (1) param_init_net: Responsible for parameter initialization
155 | #     (2) net: Contains the main network definition, i.e. the graph of operators that the data flows through
156 | # 
157 | # Note that `ModelHelper` is similar to `NetBuilder` in that we define the operator graph first, and actually run later. With that said, let's define some models to act as conditional elements, and use the `brew` module to form the conditional statement that we want to run. We will construct the same statement used in the first example above.
158 | 
159 | # In[7]:
160 | 
161 | 
162 | # Initialize model, which will represent our main conditional model for this test
163 | model = ModelHelper(name="test_if_model")
164 | 
165 | # Add variables and constants to our conditional model; notice how we add them to the param_init_net
166 | model.param_init_net.ConstantFill([], ["zero"], shape=[1], value=0.0)
167 | model.param_init_net.ConstantFill([], ["one"], shape=[1], value=1.0)
168 | model.param_init_net.ConstantFill([], ["x"], shape=[1], value=0.5)
169 | model.param_init_net.ConstantFill([], ["y"], shape=[1], value=0.0)
170 | 
171 | # Add Greater Than (GT) conditional operator to our model
172 | #  which checks if "x" > "zero", and outputs the result in the "cond" blob
173 | model.param_init_net.GT(["x", "zero"], "cond")
174 | 
175 | # Initialize a then_model, and add an operator which we will set to be
176 | #  executed if the conditional model returns True
177 | then_model = ModelHelper(name="then_test_model")
178 | then_model.net.Copy("one", "y")
179 | 
180 | # Initialize an else_model, and add an operator which we will set to be
181 | #  executed if the conditional model returns False
182 | else_model = ModelHelper(name="else_test_model")
183 | else_model.net.Copy("zero", "y")
184 | 
185 | # Use the brew module's handy cond operator to facilitate the construction of the operator graph
186 | brew.cond(
187 |     model=model,                               # main conditional model
188 |     cond_blob="cond",                          # blob with condition value
189 |     external_blobs=["x", "y", "zero", "one"],  # data blobs used in execution of conditional
190 |     then_model=then_model,                     # pass then_model
191 |     else_model=else_model)                     # pass else_model
192 | 
193 | 
194 | # Before we run the model, let's use Caffe2's graph visualization tool `net_drawer` to check if the operator graph makes sense.
195 | 
196 | # In[8]:
197 | 
198 | 
199 | from caffe2.python import net_drawer
200 | from IPython import display
201 | graph = net_drawer.GetPydotGraph(model.net, rankdir="LR")
202 | display.Image(graph.create_png(), width=800)
203 | 
204 | 
205 | # Now let's run the net! When using `ModelHelper`, we must first run the `param_init_net` to initialize paramaters, then we execute the main `net`.
206 | 
207 | # In[9]:
208 | 
209 | 
210 | # Run param_init_net once
211 | RunNetOnce(model.param_init_net)
212 | # Run main net (once in this case)
213 | RunNetOnce(model.net)
214 | # Fetch and examine some blobs
215 | print("x = ", FetchBlob("x"))
216 | print("y = ", FetchBlob("y"))
217 | 
218 | 
219 | # ### Loops Using NetBuilder
220 | # 
221 | # Another important control flow operator is 'While', which allows repeated execution of a fragment of net. Let's consider `NetBuilder`'s version first.
222 | # 
223 | # The pseudocode for this example is:
224 | # 
225 | #     i = 0
226 | #     y = 0
227 | #     while (i <= 7):
228 | #         y = i + y
229 | #         i += 1
230 | 
231 | # In[10]:
232 | 
233 | 
234 | with NetBuilder() as nb:
235 |     # Define our variables
236 |     ops.Const(0, blob_out="i")
237 |     ops.Const(0, blob_out="y")
238 |     
239 |     # Define loop code and conditions
240 |     with ops.WhileNet():
241 |         with ops.Condition():
242 |             ops.Add(["i", ops.Const(1)], ["i"])
243 |             ops.LE(["i", ops.Const(7)])
244 |         ops.Add(["i", "y"], ["y"])
245 | 
246 | 
247 | # As with the 'If' operator, standard block semantic rules apply. Note the usage of `ops.Condition` clause that should immediately follow `ops.WhileNet` and contains code that is executed before each iteration. The last operator in the condition clause is expected to have a single boolean output that determines whether the other iteration is executed.
248 | # 
249 | # In the example above we increment the counter ("i") before each iteration and accumulate its values in "y" blob, the loop's body is executed 7 times, the resulting blob values:
250 | 
251 | # In[11]:
252 | 
253 | 
254 | # Initialize a Plan
255 | plan = Plan('while_net_test')
256 | # Add the NetBuilder definition above to the Plan
257 | plan.AddStep(to_execution_step(nb))
258 | # Initialize workspace for blobs
259 | ws = workspace.C.Workspace()
260 | # Run the Plan
261 | ws.run(plan)
262 | # Fetch blobs and print
263 | print("i = ", ws.blobs["i"].fetch())
264 | print("y = ", ws.blobs["y"].fetch())
265 | 
266 | 
267 | # ### Loops Using Brew Module
268 | # 
269 | # Now let's take a look at how to replicate the loop above using the `ModelHelper`+`brew` interface.
270 | 
271 | # In[12]:
272 | 
273 | 
274 | # Initialize model, which will represent our main conditional model for this test
275 | model = ModelHelper(name="test_while_model")
276 | 
277 | # Add variables and constants to our model
278 | model.param_init_net.ConstantFill([], ["i"], shape=[1], value=0)
279 | model.param_init_net.ConstantFill([], ["one"], shape=[1], value=1)
280 | model.param_init_net.ConstantFill([], ["seven"], shape=[1], value=7)
281 | model.param_init_net.ConstantFill([], ["y"], shape=[1], value=0)
282 | 
283 | # Initialize a loop_model that represents the code to run inside of loop
284 | loop_model = ModelHelper(name="loop_test_model")
285 | loop_model.net.Add(["i", "y"], ["y"])
286 | 
287 | # Initialize cond_model that represents the conditional test that the loop
288 | #  abides by, as well as the incrementation step
289 | cond_model = ModelHelper(name="cond_test_model")
290 | cond_model.net.Add(["i", "one"], "i")
291 | cond_model.net.LE(["i", "seven"], "cond")
292 | 
293 | # Use brew's loop operator to facilitate the creation of the loop's operator graph
294 | brew.loop(
295 |     model=model,             # main model that contains data
296 |     cond_blob="cond",        # explicitly specifying condition blob
297 |     external_blobs=["cond", "i", "one", "seven", "y"], # data blobs used in execution of the loop
298 |     loop_model=loop_model,   # pass loop_model
299 |     cond_model=cond_model    # pass condition model (optional)
300 | )
301 | 
302 | 
303 | # Once again, let's visualize the net using the `net_drawer`.
304 | 
305 | # In[13]:
306 | 
307 | 
308 | graph = net_drawer.GetPydotGraph(model.net, rankdir="LR")
309 | display.Image(graph.create_png(), width=800)
310 | 
311 | 
312 | # Finally, we'll run the `param_init_net` and `net` and print our final blob values.
313 | 
314 | # In[14]:
315 | 
316 | 
317 | RunNetOnce(model.param_init_net)
318 | RunNetOnce(model.net)
319 | print("i = ", FetchBlob("i"))
320 | print("y = ", FetchBlob("y"))
321 | 
322 | 
323 | # ### Backpropagation
324 | # 
325 | # Both 'If' and 'While' operators support backpropagation. To illustrate how backpropagation with control ops work, let's consider the following examples in which we construct the operator graph using `NetBuilder` and obtain calculate gradients using the `AddGradientOperators` function. The first example shows the following conditional statement:
326 | # 
327 | #     x = 1-D numpy float array
328 | #     y = 4
329 | #     z = 0
330 | #     if (x > 0):
331 | #         z = y^2
332 | #     else:
333 | #         z = y^3
334 | 
335 | # In[15]:
336 | 
337 | 
338 | import numpy as np
339 | 
340 | # Feed blob called x, which is simply a 1-D numpy array [0.5]
341 | FeedBlob("x", np.array(0.5, dtype='float32'))
342 | 
343 | # _use_control_ops=True forces NetBuilder to output single net as a result
344 | # x is external for NetBuilder, so we let nb know about it through initial_scope param
345 | with NetBuilder(_use_control_ops=True, initial_scope=["x"]) as nb:
346 |     ops.Const(0.0, blob_out="zero")
347 |     ops.Const(1.0, blob_out="one")
348 |     ops.Const(4.0, blob_out="y")
349 |     ops.Const(0.0, blob_out="z")
350 |     with ops.IfNet(ops.GT(["x", "zero"])):
351 |         ops.Pow("y", "z", exponent=2.0)
352 |     with ops.Else():
353 |         ops.Pow("y", "z", exponent=3.0)
354 | 
355 | # we should get a single net as output
356 | assert len(nb.get()) == 1, "Expected a single net produced"
357 | net = nb.get()[0]
358 | 
359 | # add gradient operators for 'z' blob
360 | grad_map = net.AddGradientOperators(["z"])
361 | 
362 | 
363 | # In this case
364 | # 
365 | # $$x = 0.5$$
366 | # 
367 | # $$z = y^2 = 4^2 = 16$$
368 | # 
369 | # We will fetch the blob `y_grad`, which was generated by the `AddGradientOperators` call above. This blob contains the gradient of blob z with respect to y. According to basic calculus:
370 | # 
371 | # $$y\_grad = \frac{\partial{z}}{\partial{y}}y^2 = 2y = 2(4) = 8$$
372 | 
373 | # In[16]:
374 | 
375 | 
376 | # Run the net
377 | RunNetOnce(net)
378 | # Fetch blobs and print
379 | print("x = ", FetchBlob("x"))
380 | print("y = ", FetchBlob("y"))
381 | print("z = ", FetchBlob("z"))
382 | print("y_grad = ", FetchBlob("y_grad"))
383 | 
384 | 
385 | # Now, let's change value of blob "x" to -0.5 and rerun net:
386 | 
387 | # In[17]:
388 | 
389 | 
390 | # To re-run net with different input, simply feed new blob
391 | FeedBlob("x", np.array(-0.5, dtype='float32'))
392 | RunNetOnce(net)
393 | print("x = ", FetchBlob("x"))
394 | print("y = ", FetchBlob("y"))
395 | print("z = ", FetchBlob("z"))
396 | print("y_grad = ", FetchBlob("y_grad"))
397 | 
398 | 
399 | # The next and final example illustrates backpropagation on the following loop:
400 | # 
401 | #     x = 2
402 | #     y = 3
403 | #     z = 2
404 | #     i = 0
405 | #     while (i <= 2):
406 | #         x = x^2
407 | #         if (i < 2):
408 | #             y = y^2
409 | #         else:
410 | #             z = z^3
411 | #         i += 1
412 | #     s = x + y + z
413 | #     
414 | # Note that this code essentially computes the sum of x^4 (by squaring x twice), y^2, and z^3.
415 | 
416 | # In[18]:
417 | 
418 | 
419 | with NetBuilder(_use_control_ops=True) as nb:
420 |     # Define variables and constants
421 |     ops.Copy(ops.Const(0), "i")
422 |     ops.Copy(ops.Const(1), "one")
423 |     ops.Copy(ops.Const(2), "two")
424 |     ops.Copy(ops.Const(2.0), "x")
425 |     ops.Copy(ops.Const(3.0), "y")
426 |     ops.Copy(ops.Const(2.0), "z")
427 |     
428 |     # Define loop statement
429 |     # Computes x^4, y^2, z^3
430 |     with ops.WhileNet():
431 |         with ops.Condition():
432 |             ops.Add(["i", "one"], "i")
433 |             ops.LE(["i", "two"])
434 |         ops.Pow("x", "x", exponent=2.0)
435 |         with ops.IfNet(ops.LT(["i", "two"])):
436 |             ops.Pow("y", "y", exponent=2.0)
437 |         with ops.Else():
438 |             ops.Pow("z", "z", exponent=3.0)
439 |     
440 |     # Sum s = x + y + z
441 |     ops.Add(["x", "y"], "x_plus_y")
442 |     ops.Add(["x_plus_y", "z"], "s")
443 | 
444 | assert len(nb.get()) == 1, "Expected a single net produced"
445 | net = nb.get()[0]
446 | 
447 | # Add gradient operators to output blob 's'
448 | grad_map = net.AddGradientOperators(["s"])
449 | 
450 | 
451 | # In[19]:
452 | 
453 | 
454 | workspace.RunNetOnce(net)
455 | print("x = ", FetchBlob("x"))
456 | print("x_grad = ", FetchBlob("x_grad")) # derivative: 4x^3
457 | print("y = ", FetchBlob("y"))
458 | print("y_grad = ", FetchBlob("y_grad")) # derivative: 2y
459 | print("z = ", FetchBlob("z"))
460 | print("z_grad = ", FetchBlob("z_grad")) # derivative: 3z^2
461 | 
462 | 
463 | # ### Implementation Notes
464 | # 
465 | # On the low level, Caffe2 uses the following set of operators to implement forward and backward branching and loops:
466 | # - If - accepts *then_net* and *else_net* nets as arguments and executes one of them, depending on input condition blob value, nets are executed **in the same** workspace;
467 | # - While - repeats execution of *loop_net* net passed as argument, net is executed in the same workspace;
468 | # - Do - special operator that creates a separate inner workspace, sets up blob mappings between outer and inner workspaces, and runs a net in an inner workspace;
469 | # - CreateScope/HasScope - special operators that create and keep track of workspaces used by Do operator.
470 | # 
471 | # Higher level libraries that implement branching and looping (e.g. in `NetBuilder`, `brew`), use these operators to build control flow, e.g. for 'If':
472 | #  - do necessary sanity checks (e.g. determine which blobs are initialized and check that subnet does not read undefined blobs)
473 | #  - wrap 'then' and 'else' branches into Do
474 | #  - setup correct blob mappings by specifying which local names are mapped to outer blobs
475 | #  - prepare scope structure, used by Do operator
476 | # 
477 | # While 'If' and 'While' Caffe2 ops can be used directly without creating local block workspaces, we encourage users to use higher level Caffe2 interfaces that provide necessary correctness guarantees.
478 | # 
479 | # Backpropagation for 'While' in general is expensive memory-wise - we have to save local workspace for every iteration of a block, including global blobs visible to the block. It is recommended that users use `RecurrentNetwork` operator instead in production environments.
480 | 
481 | 


--------------------------------------------------------------------------------