├── .gitignore
├── Dockerfile
├── README.md
├── code
    ├── .clang-format
    ├── common
    │   ├── socket.cpp
    │   └── socket.h
    ├── cudnn
    │   ├── Makefile
    │   ├── README.md
    │   ├── conv.cu
    │   └── cppcon-logo.png
    ├── dlib
    │   ├── Makefile
    │   ├── README.md
    │   └── lenet.cpp
    ├── mkl
    │   ├── Makefile
    │   ├── README.md
    │   ├── conv.cpp
    │   └── cppcon-logo.png
    ├── mxnet
    │   ├── Makefile
    │   ├── README.md
    │   ├── demo
    │   │   ├── .qmake.stash
    │   │   ├── Makefile
    │   │   ├── backend.cpp
    │   │   ├── backend.h
    │   │   ├── main.cpp
    │   │   ├── main.pro
    │   │   ├── main.qml
    │   │   ├── main.qmlc
    │   │   └── qml.qrc
    │   ├── download_mnist.sh
    │   ├── lenet.cpp
    │   └── mnist_data
    │   │   ├── t10k-images-idx3-ubyte
    │   │   ├── t10k-labels-idx1-ubyte
    │   │   ├── train-images-idx3-ubyte
    │   │   └── train-labels-idx1-ubyte
    ├── requirements.txt
    ├── tf-graph
    │   ├── Makefile
    │   ├── README.md
    │   ├── demo
    │   │   ├── .qmake.stash
    │   │   ├── CMakeLists.txt
    │   │   ├── Makefile
    │   │   ├── backend.cpp
    │   │   ├── backend.h
    │   │   ├── main.cpp
    │   │   ├── main.pro
    │   │   ├── main.qml
    │   │   ├── main.qmlc
    │   │   └── qml.qrc
    │   ├── infogan.py
    │   ├── load-graph-server.cpp
    │   └── load-graph.cpp
    └── tf-kernel
    │   ├── .bash_history
    │   ├── README.md
    │   ├── cpu+gpu
    │       ├── Makefile
    │       ├── cpp_con_sigmoid.py
    │       ├── kernel.cpp
    │       ├── kernel.cu
    │       ├── kernel.cuh
    │       └── test.py
    │   └── cpu
    │       ├── Makefile
    │       ├── cpp_con_sigmoid.py
    │       ├── kernel.cpp
    │       └── test.py
└── presentation
    ├── .tags
    ├── .tags1
    ├── figures
        ├── alexnet-kernels.png
        ├── alexnet-training-days.png
        ├── bedrooms.png
        ├── big-sur.jpg
        ├── caffe2.png
        ├── cntk.jpg
        ├── cntk.png
        ├── cppcon-logo-blurry.png
        ├── cppcon-logo-nn.jpg
        ├── cppcon-logo.png
        ├── cudnn.png
        ├── deepdream.jpg
        ├── dgx-1.jpg
        ├── dl-trend.png
        ├── face-interpolations.png
        ├── faces.png
        ├── fb-scaling.png
        ├── gpu-power.png
        ├── graphcore.jpg
        ├── imagenet-gpu.jpg
        ├── infogan-mnist.png
        ├── intel-xeon.jpg
        ├── intel.png
        ├── jetson-tx2.jpg
        ├── mnist1.png
        ├── mnist2.png
        ├── movidius.jpg
        ├── mxnet.png
        ├── mxnet2.png
        ├── nervana-engine.png
        ├── nvidia-stock.png
        ├── piranha.png
        ├── pytorch.png
        ├── sgemm.jpg
        ├── shark.jpg
        ├── shark.png
        ├── sigmoid.png
        ├── sky
        │   ├── sky.jpg
        │   ├── sky00.jpg
        │   ├── sky01.jpg
        │   ├── sky02.jpg
        │   ├── sky10.jpg
        │   ├── sky11.jpg
        │   ├── sky12.jpg
        │   ├── sky20.jpg
        │   ├── sky21.jpg
        │   └── sky22.jpeg
        ├── stickman.jpg
        ├── teapot.jpg
        ├── tensorflow.png
        ├── theano.png
        ├── tioga-pass.jpg
        ├── titan-x.jpg
        ├── torch.png
        ├── tpu.jpg
        ├── weird-fish.jpg
        └── xkcd.png
    ├── notes.md
    ├── preamble.tex
    ├── presentation.pdf
    ├── presentation.tex
    └── slides
        ├── down.tex
        ├── down
            ├── graph.tex
            ├── hardware.tex
            ├── kernel.tex
            ├── layer.tex
            ├── model.tex
            ├── op.tex
            └── task.tex
        ├── intro.tex
        ├── outro.tex
        ├── title.tex
        ├── up.tex
        └── up
            ├── graph.tex
            ├── hardware.tex
            ├── kernel.tex
            ├── layer.tex
            ├── model.tex
            ├── op.tex
            └── task.tex


/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/latex,c++,python
  3 | 
  4 | ### C++ ###
  5 | # Prerequisites
  6 | *.d
  7 | 
  8 | # Compiled Object files
  9 | *.slo
 10 | *.lo
 11 | *.o
 12 | *.obj
 13 | 
 14 | # Precompiled Headers
 15 | *.gch
 16 | *.pch
 17 | 
 18 | # Compiled Dynamic libraries
 19 | *.so
 20 | *.dylib
 21 | *.dll
 22 | 
 23 | # Fortran module files
 24 | *.mod
 25 | *.smod
 26 | 
 27 | # Compiled Static libraries
 28 | *.lai
 29 | *.la
 30 | *.a
 31 | *.lib
 32 | 
 33 | # Executables
 34 | *.exe
 35 | *.out
 36 | *.app
 37 | 
 38 | ### LaTeX ###
 39 | ## Core latex/pdflatex auxiliary files:
 40 | *.aux
 41 | *.lof
 42 | *.log
 43 | *.lot
 44 | *.fls
 45 | *.toc
 46 | *.fmt
 47 | *.fot
 48 | *.cb
 49 | *.cb2
 50 | 
 51 | ## Intermediate documents:
 52 | *.dvi
 53 | *-converted-to.*
 54 | # these rules might exclude image files for figures etc.
 55 | # *.ps
 56 | # *.eps
 57 | # *.pdf
 58 | 
 59 | ## Generated if empty string is given at "Please type another file name for output:"
 60 | .pdf
 61 | 
 62 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
 63 | *.bbl
 64 | *.bcf
 65 | *.blg
 66 | *-blx.aux
 67 | *-blx.bib
 68 | *.brf
 69 | *.run.xml
 70 | 
 71 | ## Build tool auxiliary files:
 72 | *.fdb_latexmk
 73 | *.synctex
 74 | *.synctex(busy)
 75 | *.synctex.gz
 76 | *.synctex.gz(busy)
 77 | *.pdfsync
 78 | 
 79 | ## Auxiliary and intermediate files from other packages:
 80 | # algorithms
 81 | *.alg
 82 | *.loa
 83 | 
 84 | # achemso
 85 | acs-*.bib
 86 | 
 87 | # amsthm
 88 | *.thm
 89 | 
 90 | # beamer
 91 | *.nav
 92 | *.pre
 93 | *.snm
 94 | *.vrb
 95 | 
 96 | # changes
 97 | *.soc
 98 | 
 99 | # cprotect
100 | *.cpt
101 | 
102 | # elsarticle (documentclass of Elsevier journals)
103 | *.spl
104 | 
105 | # endnotes
106 | *.ent
107 | 
108 | # fixme
109 | *.lox
110 | 
111 | # feynmf/feynmp
112 | *.mf
113 | *.mp
114 | *.t[1-9]
115 | *.t[1-9][0-9]
116 | *.tfm
117 | *.[1-9]
118 | *.[1-9][0-9]
119 | 
120 | #(r)(e)ledmac/(r)(e)ledpar
121 | *.end
122 | *.?end
123 | *.[1-9][0-9][0-9]
124 | *.[1-9]R
125 | *.[1-9][0-9]R
126 | *.[1-9][0-9][0-9]R
127 | *.eledsec[1-9]
128 | *.eledsec[1-9]R
129 | *.eledsec[1-9][0-9]
130 | *.eledsec[1-9][0-9]R
131 | *.eledsec[1-9][0-9][0-9]
132 | *.eledsec[1-9][0-9][0-9]R
133 | 
134 | # glossaries
135 | *.acn
136 | *.acr
137 | *.glg
138 | *.glo
139 | *.gls
140 | *.glsdefs
141 | 
142 | # gnuplottex
143 | *-gnuplottex-*
144 | 
145 | # gregoriotex
146 | *.gaux
147 | *.gtex
148 | 
149 | # hyperref
150 | 
151 | # knitr
152 | *-concordance.tex
153 | # TODO Comment the next line if you want to keep your tikz graphics files
154 | *.tikz
155 | *-tikzDictionary
156 | 
157 | # listings
158 | *.lol
159 | 
160 | # makeidx
161 | *.idx
162 | *.ilg
163 | *.ind
164 | *.ist
165 | 
166 | # minitoc
167 | *.maf
168 | *.mlf
169 | *.mlt
170 | *.mtc[0-9]*
171 | 
172 | # minted
173 | _minted*
174 | *.pyg
175 | 
176 | # morewrites
177 | *.mw
178 | 
179 | # mylatexformat
180 | 
181 | # nomencl
182 | *.nlo
183 | 
184 | # pax
185 | *.pax
186 | 
187 | # sagetex
188 | *.sagetex.sage
189 | *.sagetex.py
190 | *.sagetex.scmd
191 | 
192 | # scrwfile
193 | *.wrt
194 | 
195 | # sympy
196 | *.sout
197 | *.sympy
198 | sympy-plots-for-*.tex/
199 | 
200 | # pdfcomment
201 | *.upa
202 | *.upb
203 | 
204 | # pythontex
205 | *.pytxcode
206 | pythontex-files-*/
207 | 
208 | # thmtools
209 | *.loe
210 | 
211 | # TikZ & PGF
212 | *.dpth
213 | *.md5
214 | *.auxlock
215 | 
216 | # todonotes
217 | *.tdo
218 | 
219 | # easy-todo
220 | *.lod
221 | 
222 | # xindy
223 | *.xdy
224 | 
225 | # xypic precompiled matrices
226 | *.xyc
227 | 
228 | # endfloat
229 | *.ttt
230 | *.fff
231 | 
232 | # Latexian
233 | TSWLatexianTemp*
234 | 
235 | ## Editors:
236 | # WinEdt
237 | *.bak
238 | *.sav
239 | 
240 | # Texpad
241 | .texpadtmp
242 | 
243 | # Kile
244 | *.backup
245 | 
246 | # KBibTeX
247 | *~[0-9]*
248 | 
249 | # auto folder when using emacs and auctex
250 | /auto/*
251 | 
252 | # expex forward references with \gathertags
253 | *-tags.tex
254 | 
255 | ### Python ###
256 | # Byte-compiled / optimized / DLL files
257 | __pycache__/
258 | *.py[cod]
259 | *$py.class
260 | 
261 | # C extensions
262 | 
263 | # Distribution / packaging
264 | .Python
265 | env/
266 | build/
267 | develop-eggs/
268 | dist/
269 | downloads/
270 | eggs/
271 | .eggs/
272 | lib/
273 | lib64/
274 | parts/
275 | sdist/
276 | var/
277 | wheels/
278 | *.egg-info/
279 | .installed.cfg
280 | *.egg
281 | 
282 | # PyInstaller
283 | #  Usually these files are written by a python script from a template
284 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
285 | *.manifest
286 | *.spec
287 | 
288 | # Installer logs
289 | pip-log.txt
290 | pip-delete-this-directory.txt
291 | 
292 | # Unit test / coverage reports
293 | htmlcov/
294 | .tox/
295 | .coverage
296 | .coverage.*
297 | .cache
298 | nosetests.xml
299 | coverage.xml
300 | *,cover
301 | .hypothesis/
302 | 
303 | # Translations
304 | *.mo
305 | *.pot
306 | 
307 | # Django stuff:
308 | local_settings.py
309 | 
310 | # Flask stuff:
311 | instance/
312 | .webassets-cache
313 | 
314 | # Scrapy stuff:
315 | .scrapy
316 | 
317 | # Sphinx documentation
318 | docs/_build/
319 | 
320 | # PyBuilder
321 | target/
322 | 
323 | # Jupyter Notebook
324 | .ipynb_checkpoints
325 | 
326 | # pyenv
327 | .python-version
328 | 
329 | # celery beat schedule file
330 | celerybeat-schedule
331 | 
332 | # dotenv
333 | .env
334 | 
335 | # virtualenv
336 | .venv
337 | venv/
338 | ENV/
339 | 
340 | # Spyder project settings
341 | .spyderproject
342 | 
343 | # Rope project settings
344 | .ropeproject
345 | 
346 | # End of https://www.gitignore.io/api/latex,c++,python
347 | 
348 | 
349 | ## Other
350 | *.pb
351 | *.app
352 | code/*.png
353 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:16.04
 2 | MAINTAINER <peter@goldsborough.me>
 3 | 
 4 | RUN apt-get update && \
 5 |     apt-get install -y --no-install-recommends \
 6 |     software-properties-common apt-utils
 7 | 
 8 | RUN apt-get update && apt-get install -y \
 9 |     clang-3.8 git python3-numpy python3-dev python3-pip python3-wheel
10 | 
11 | RUN pip3 install --upgrade pip && pip3 install tensorflow-gpu
12 | 
13 | # Additional packages to do work.
14 | RUN apt-get install -y vim emacs
15 | 
16 | ENV C clang-3.8
17 | ENV CXX clang++-3.8
18 | 
19 | WORKDIR /root
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Deep Learning with C++
2 | 
3 | Slides and code samples for my talk at CppCon 2017.
4 | 


--------------------------------------------------------------------------------
/code/.clang-format:
--------------------------------------------------------------------------------
 1 | Language: Cpp
 2 | Standard: Cpp11
 3 | BasedOnStyle: Google
 4 | 
 5 | AllowAllParametersOfDeclarationOnNextLine: true
 6 | AllowShortBlocksOnASingleLine: false
 7 | AllowShortCaseLabelsOnASingleLine: false
 8 | AllowShortFunctionsOnASingleLine: true
 9 | AllowShortIfStatementsOnASingleLine: true
10 | AllowShortLoopsOnASingleLine: true
11 | 
12 | AlignOperands: true
13 | AlignConsecutiveAssignments: false
14 | 
15 | BinPackArguments: false
16 | BinPackParameters: false
17 | BreakConstructorInitializersBeforeComma: true
18 | ColumnLimit: 80
19 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
20 | ConstructorInitializerIndentWidth: 0
21 | ContinuationIndentWidth: 4
22 | Cpp11BracedListStyle: true
23 | DerivePointerAlignment: false
24 | IndentCaseLabels: true
25 | IndentWidth: 2
26 | MaxEmptyLinesToKeep: 2
27 | NamespaceIndentation: None
28 | PointerAlignment: Left
29 | SpacesBeforeTrailingComments: 2
30 | TabWidth: 2
31 | UseTab: Never
32 | 
33 | PenaltyExcessCharacter: 1000000
34 | PenaltyReturnTypeOnItsOwnLine: 10
35 | PenaltyBreakBeforeFirstCallParameter: 1000
36 | 


--------------------------------------------------------------------------------
/code/common/socket.cpp:
--------------------------------------------------------------------------------
  1 | #include "socket.h"
  2 | 
  3 | #include <arpa/inet.h>
  4 | #include <fcntl.h>
  5 | #include <netdb.h>
  6 | #include <netinet/in.h>
  7 | #include <unistd.h>
  8 | 
  9 | #include <cassert>
 10 | #include <stdexcept>
 11 | #include <string>
 12 | #include <vector>
 13 | 
 14 | namespace {
 15 | void handle_blocking(int fd) {
 16 |   // Will be necessary when calling setsockopt to free busy sockets
 17 |   int yes = 1;
 18 | 
 19 |   // Reclaim blocked but unused sockets (from zombie processes)
 20 |   const int return_code =
 21 |       setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof yes);
 22 | 
 23 |   if (return_code == -1) {
 24 |     throw std::runtime_error("Error reclaiming socket");
 25 |   }
 26 | }
 27 | 
 28 | int get_socket_for_first_valid_address(addrinfo* server_info) {
 29 |   int fd = -1;
 30 | 
 31 |   for (auto* address = server_info; address; address = address->ai_next) {
 32 |     fd = socket(address->ai_family, address->ai_socktype, address->ai_protocol);
 33 |     if (fd == -1) continue;
 34 | 
 35 |     handle_blocking(fd);
 36 | 
 37 |     if (bind(fd, address->ai_addr, address->ai_addrlen) == 1) {
 38 |       close(fd);
 39 |     }
 40 | 
 41 |     break;
 42 |   }
 43 | 
 44 |   if (fd == -1) {
 45 |     throw std::runtime_error("Error finding valid address");
 46 |   }
 47 | 
 48 |   return fd;
 49 | }
 50 | 
 51 | addrinfo* get_server_information(int port) {
 52 |   struct addrinfo hints;
 53 |   memset(&hints, 0, sizeof hints);
 54 |   hints.ai_family = AF_UNSPEC;
 55 |   hints.ai_socktype = SOCK_STREAM;
 56 | 
 57 |   addrinfo* server_info;
 58 |   const auto port_string = std::to_string(port);
 59 |   const auto return_code =
 60 |       getaddrinfo("localhost", port_string.c_str(), &hints, &server_info);
 61 | 
 62 |   if (return_code != 0) {
 63 |     throw std::runtime_error(std::string("getaddrinfo failed: ") +
 64 |                              gai_strerror(return_code));
 65 |   }
 66 | 
 67 |   return server_info;
 68 | }
 69 | 
 70 | 
 71 | int get_server_socket(int port) {
 72 |   addrinfo* server_info = get_server_information(port);
 73 |   const int server_socket = get_socket_for_first_valid_address(server_info);
 74 |   assert(server_socket != -1);
 75 |   freeaddrinfo(server_info);
 76 | 
 77 |   return server_socket;
 78 | }
 79 | }  // namespace
 80 | 
 81 | Socket::Socket(int port) : server_socket(get_server_socket(port)) {
 82 |   if (listen(server_socket, /*queue=*/10) == 1) {
 83 |     throw std::runtime_error("Error listening on given socket!");
 84 |   }
 85 | }
 86 | 
 87 | Socket::~Socket() {
 88 |   close(server_socket);
 89 |   close(connection_socket);
 90 | }
 91 | 
 92 | void Socket::accept() {
 93 |   struct sockaddr_storage other_address;
 94 |   socklen_t sin_size = sizeof other_address;
 95 |   const int fd = ::accept(server_socket,
 96 |                           reinterpret_cast<sockaddr*>(&other_address),
 97 |                           &sin_size);
 98 |   if (fd == -1) {
 99 |     throw std::runtime_error("Error accepting");
100 |   } else {
101 |     connection_socket = fd;
102 |   }
103 | }
104 | 
105 | std::string Socket::read(int max_bytes) {
106 |   std::vector<char> buffer(max_bytes, '\0');
107 | 
108 |   if (recv(connection_socket, buffer.data(), buffer.size(), 0) == 0) {
109 |     throw std::runtime_error("Error receiving from client");
110 |   }
111 | 
112 |   return std::string(buffer.data());
113 | }
114 | 
115 | void Socket::write(const std::string& data) {
116 |   if (send(connection_socket, data.data(), data.size(), 0) == 0) {
117 |     throw std::runtime_error("Error sending to client");
118 |   }
119 | }
120 | 


--------------------------------------------------------------------------------
/code/common/socket.h:
--------------------------------------------------------------------------------
 1 | #ifndef SOCKET_H
 2 | #define SOCKET_H
 3 | 
 4 | #include <string>
 5 | 
 6 | class Socket {
 7 |  public:
 8 |   explicit Socket(int port);
 9 | 
10 |   ~Socket();
11 | 
12 |   void accept();
13 | 
14 |   std::string read(int max_bytes);
15 | 
16 |   void write(const std::string& data);
17 | 
18 |  private:
19 |   const int server_socket{-1};
20 |   int connection_socket{-1};
21 | };
22 | 
23 | #endif  // SOCKET_H
24 | 


--------------------------------------------------------------------------------
/code/cudnn/Makefile:
--------------------------------------------------------------------------------
 1 | CXX := nvcc
 2 | TARGET := conv
 3 | CUDNN_PATH := cudnn
 4 | HEADERS := -I $(CUDNN_PATH)/include
 5 | LIBS := -L $(CUDNN_PATH)/lib64 -L/usr/local/lib
 6 | CXXFLAGS := -arch=sm_35 -std=c++11 -O2
 7 | 
 8 | all: conv
 9 | 
10 | conv: $(TARGET).cu
11 | 	$(CXX) $(CXXFLAGS) $(HEADERS) $(LIBS) $(TARGET).cu -o $(TARGET) \
12 | 	-lcudnn -lopencv_imgcodecs -lopencv_imgproc -lopencv_core
13 | 
14 | .phony: clean
15 | 
16 | clean:
17 | 	rm $(TARGET) || echo -n ""
18 | 


--------------------------------------------------------------------------------
/code/cudnn/README.md:
--------------------------------------------------------------------------------
 1 | # cudnn
 2 | 
 3 | Example of doing an edge-detection convolution using NVIDIA cuDNN.
 4 | 
 5 | ## Building
 6 | 
 7 | Prerequisites:
 8 | 
 9 | 0. A GPU and the whole CUDA stack, including the `nvcc` compiler,
10 | 1. Install NVIDIA cuDNN for your system: https://developer.nvidia.com/rdp/cudnn-download,
11 | 2. OpenCV2 (consult your package manager).
12 | 
13 | Set the `CUDNN_PATH` environment variable and `make`, e.g.:
14 | 
15 | ```shell
16 | $ CUDNN_PATH=/opt/cudnn make
17 | ```
18 | 
19 | ## Running
20 | 
21 | The binary expects the path to an image, e.g. for the `cppcon-logo.png` image
22 | that's already there:
23 | 
24 | ```sh
25 | $ ./conv cppcon-logo.png
26 | ```
27 | 
28 | It then generates an image called `cudnn-out.png`.
29 | 


--------------------------------------------------------------------------------
/code/cudnn/conv.cu:
--------------------------------------------------------------------------------
  1 | #include <cudnn.h>
  2 | #include <cassert>
  3 | #include <cstdlib>
  4 | #include <iostream>
  5 | #include <opencv2/opencv.hpp>
  6 | 
  7 | #define checkCUDNN(expression)                               \
  8 |   {                                                          \
  9 |     cudnnStatus_t status = (expression);                     \
 10 |     if (status != CUDNN_STATUS_SUCCESS) {                    \
 11 |       std::cerr << "Error on line " << __LINE__ << ": "      \
 12 |                 << cudnnGetErrorString(status) << std::endl; \
 13 |       std::exit(EXIT_FAILURE);                               \
 14 |     }                                                        \
 15 |   }
 16 | 
 17 | cv::Mat load_image(const char* image_path) {
 18 |   cv::Mat image = cv::imread(image_path, CV_LOAD_IMAGE_COLOR);
 19 |   image.convertTo(image, CV_32FC3);
 20 |   cv::normalize(image, image, 0, 1, cv::NORM_MINMAX);
 21 |   std::cerr << "Input Image: " << image.rows << " x " << image.cols << " x "
 22 |             << image.channels() << std::endl;
 23 |   return image;
 24 | }
 25 | 
 26 | void save_image(const char* output_filename,
 27 |                 float* buffer,
 28 |                 int height,
 29 |                 int width) {
 30 |   cv::Mat output_image(height, width, CV_32FC3, buffer);
 31 |   // Make negative values zero.
 32 |   cv::threshold(output_image,
 33 |                 output_image,
 34 |                 /*threshold=*/0,
 35 |                 /*maxval=*/0,
 36 |                 cv::THRESH_TOZERO);
 37 |   cv::normalize(output_image, output_image, 0.0, 255.0, cv::NORM_MINMAX);
 38 |   output_image.convertTo(output_image, CV_8UC3);
 39 |   cv::imwrite(output_filename, output_image);
 40 |   std::cerr << "Wrote output to " << output_filename << std::endl;
 41 | }
 42 | 
 43 | int main(int argc, const char* argv[]) {
 44 |   if (argc < 2) {
 45 |     std::cerr << "usage: conv <image> [gpu=0] [sigmoid=0]" << std::endl;
 46 |     std::exit(EXIT_FAILURE);
 47 |   }
 48 | 
 49 |   int gpu_id = (argc > 2) ? std::atoi(argv[2]) : 0;
 50 |   std::cerr << "GPU: " << gpu_id << std::endl;
 51 | 
 52 |   bool with_sigmoid = (argc > 3) ? std::atoi(argv[3]) : 0;
 53 |   std::cerr << "With sigmoid: " << std::boolalpha << with_sigmoid << std::endl;
 54 | 
 55 |   cv::Mat image = load_image(argv[1]);
 56 | 
 57 |   cudaSetDevice(gpu_id);
 58 | 
 59 |   cudnnHandle_t cudnn;
 60 |   cudnnCreate(&cudnn);
 61 | 
 62 |   cudnnTensorDescriptor_t input_descriptor;
 63 |   checkCUDNN(cudnnCreateTensorDescriptor(&input_descriptor));
 64 |   checkCUDNN(cudnnSetTensor4dDescriptor(input_descriptor,
 65 |                                         /*format=*/CUDNN_TENSOR_NHWC,
 66 |                                         /*dataType=*/CUDNN_DATA_FLOAT,
 67 |                                         /*batch_size=*/1,
 68 |                                         /*channels=*/3,
 69 |                                         /*image_height=*/image.rows,
 70 |                                         /*image_width=*/image.cols));
 71 | 
 72 |   cudnnFilterDescriptor_t kernel_descriptor;
 73 |   checkCUDNN(cudnnCreateFilterDescriptor(&kernel_descriptor));
 74 |   checkCUDNN(cudnnSetFilter4dDescriptor(kernel_descriptor,
 75 |                                         /*dataType=*/CUDNN_DATA_FLOAT,
 76 |                                         /*format=*/CUDNN_TENSOR_NCHW,
 77 |                                         /*out_channels=*/3,
 78 |                                         /*in_channels=*/3,
 79 |                                         /*kernel_height=*/3,
 80 |                                         /*kernel_width=*/3));
 81 | 
 82 |   cudnnConvolutionDescriptor_t convolution_descriptor;
 83 |   checkCUDNN(cudnnCreateConvolutionDescriptor(&convolution_descriptor));
 84 |   checkCUDNN(cudnnSetConvolution2dDescriptor(convolution_descriptor,
 85 |                                              /*pad_height=*/1,
 86 |                                              /*pad_width=*/1,
 87 |                                              /*vertical_stride=*/1,
 88 |                                              /*horizontal_stride=*/1,
 89 |                                              /*dilation_height=*/1,
 90 |                                              /*dilation_width=*/1,
 91 |                                              /*mode=*/CUDNN_CROSS_CORRELATION,
 92 |                                              /*computeType=*/CUDNN_DATA_FLOAT));
 93 | 
 94 |   int batch_size{0}, channels{0}, height{0}, width{0};
 95 |   checkCUDNN(cudnnGetConvolution2dForwardOutputDim(convolution_descriptor,
 96 |                                                    input_descriptor,
 97 |                                                    kernel_descriptor,
 98 |                                                    &batch_size,
 99 |                                                    &channels,
100 |                                                    &height,
101 |                                                    &width));
102 | 
103 |   std::cerr << "Output Image: " << height << " x " << width << " x " << channels
104 |             << std::endl;
105 | 
106 |   cudnnTensorDescriptor_t output_descriptor;
107 |   checkCUDNN(cudnnCreateTensorDescriptor(&output_descriptor));
108 |   checkCUDNN(cudnnSetTensor4dDescriptor(output_descriptor,
109 |                                         /*format=*/CUDNN_TENSOR_NHWC,
110 |                                         /*dataType=*/CUDNN_DATA_FLOAT,
111 |                                         /*batch_size=*/1,
112 |                                         /*channels=*/3,
113 |                                         /*image_height=*/image.rows,
114 |                                         /*image_width=*/image.cols));
115 | 
116 |   cudnnConvolutionFwdAlgo_t convolution_algorithm;
117 |   checkCUDNN(
118 |       cudnnGetConvolutionForwardAlgorithm(cudnn,
119 |                                           input_descriptor,
120 |                                           kernel_descriptor,
121 |                                           convolution_descriptor,
122 |                                           output_descriptor,
123 |                                           CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
124 |                                           /*memoryLimitInBytes=*/0,
125 |                                           &convolution_algorithm));
126 | 
127 |   size_t workspace_bytes{0};
128 |   checkCUDNN(cudnnGetConvolutionForwardWorkspaceSize(cudnn,
129 |                                                      input_descriptor,
130 |                                                      kernel_descriptor,
131 |                                                      convolution_descriptor,
132 |                                                      output_descriptor,
133 |                                                      convolution_algorithm,
134 |                                                      &workspace_bytes));
135 |   std::cerr << "Workspace size: " << (workspace_bytes / 1048576.0) << "MB"
136 |             << std::endl;
137 |   assert(workspace_bytes > 0);
138 | 
139 |   void* d_workspace{nullptr};
140 |   cudaMalloc(&d_workspace, workspace_bytes);
141 | 
142 |   int image_bytes = batch_size * channels * height * width * sizeof(float);
143 | 
144 |   float* d_input{nullptr};
145 |   cudaMalloc(&d_input, image_bytes);
146 |   cudaMemcpy(d_input, image.ptr<float>(0), image_bytes, cudaMemcpyHostToDevice);
147 | 
148 |   float* d_output{nullptr};
149 |   cudaMalloc(&d_output, image_bytes);
150 |   cudaMemset(d_output, 0, image_bytes);
151 | 
152 |   // clang-format off
153 |   const float kernel_template[3][3] = {
154 |     {1, 1, 1},
155 |     {1, -8, 1},
156 |     {1, 1, 1}
157 |   };
158 |   // clang-format on
159 | 
160 |   float h_kernel[3][3][3][3];
161 |   for (int kernel = 0; kernel < 3; ++kernel) {
162 |     for (int channel = 0; channel < 3; ++channel) {
163 |       for (int row = 0; row < 3; ++row) {
164 |         for (int column = 0; column < 3; ++column) {
165 |           h_kernel[kernel][channel][row][column] = kernel_template[row][column];
166 |         }
167 |       }
168 |     }
169 |   }
170 | 
171 |   float* d_kernel{nullptr};
172 |   cudaMalloc(&d_kernel, sizeof(h_kernel));
173 |   cudaMemcpy(d_kernel, h_kernel, sizeof(h_kernel), cudaMemcpyHostToDevice);
174 | 
175 |   const float alpha = 1.0f, beta = 0.0f;
176 | 
177 |   checkCUDNN(cudnnConvolutionForward(cudnn,
178 |                                      &alpha,
179 |                                      input_descriptor,
180 |                                      d_input,
181 |                                      kernel_descriptor,
182 |                                      d_kernel,
183 |                                      convolution_descriptor,
184 |                                      convolution_algorithm,
185 |                                      d_workspace,
186 |                                      workspace_bytes,
187 |                                      &beta,
188 |                                      output_descriptor,
189 |                                      d_output));
190 | 
191 |   if (with_sigmoid) {
192 |     cudnnActivationDescriptor_t activation_descriptor;
193 |     checkCUDNN(cudnnCreateActivationDescriptor(&activation_descriptor));
194 |     checkCUDNN(cudnnSetActivationDescriptor(activation_descriptor,
195 |                                             CUDNN_ACTIVATION_SIGMOID,
196 |                                             CUDNN_PROPAGATE_NAN,
197 |                                             /*relu_coef=*/0));
198 |     checkCUDNN(cudnnActivationForward(cudnn,
199 |                                       activation_descriptor,
200 |                                       &alpha,
201 |                                       output_descriptor,
202 |                                       d_output,
203 |                                       &beta,
204 |                                       output_descriptor,
205 |                                       d_output));
206 |     cudnnDestroyActivationDescriptor(activation_descriptor);
207 |   }
208 | 
209 |   float* h_output = new float[image_bytes];
210 |   cudaMemcpy(h_output, d_output, image_bytes, cudaMemcpyDeviceToHost);
211 | 
212 |   save_image("cudnn-out.png", h_output, height, width);
213 | 
214 |   delete[] h_output;
215 |   cudaFree(d_kernel);
216 |   cudaFree(d_input);
217 |   cudaFree(d_output);
218 |   cudaFree(d_workspace);
219 | 
220 |   cudnnDestroyTensorDescriptor(input_descriptor);
221 |   cudnnDestroyTensorDescriptor(output_descriptor);
222 |   cudnnDestroyFilterDescriptor(kernel_descriptor);
223 |   cudnnDestroyConvolutionDescriptor(convolution_descriptor);
224 | 
225 |   cudnnDestroy(cudnn);
226 | }
227 | 


--------------------------------------------------------------------------------
/code/cudnn/cppcon-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/code/cudnn/cppcon-logo.png


--------------------------------------------------------------------------------
/code/dlib/Makefile:
--------------------------------------------------------------------------------
 1 | TARGET := lenet
 2 | WARNINGS := -Wall -Wextra -pedantic
 3 | CXXFLAGS := -std=c++11 -O2
 4 | 
 5 | all: conv
 6 | 
 7 | conv: $(TARGET).cpp
 8 | 	$(CXX) $(WARNINGS) $(CXXFLAGS) $(HEADERS) $(LIBS) \
 9 | 	$(TARGET).cpp -o $(TARGET) -ldlib
10 | 
11 | .phony: clean
12 | 
13 | clean:
14 | 	rm $(TARGET) || echo -n ""
15 | 


--------------------------------------------------------------------------------
/code/dlib/README.md:
--------------------------------------------------------------------------------
 1 | # dlib
 2 | 
 3 | Example of training a neural network with `dlib`.
 4 | 
 5 | ## Building
 6 | 
 7 | Prerequisites:
 8 | 
 9 | 1. DLib: http://dlib.net,
10 | 2. OpenCV2 (consult your package manager).
11 | 
12 | Then: `make`.
13 | 
14 | ## Running
15 | 
16 | Use the script in the `mxnet` folder to fetch the MNIST dataset. Then pass that
17 | folder to the built binary to start the training process.
18 | 


--------------------------------------------------------------------------------
/code/dlib/lenet.cpp:
--------------------------------------------------------------------------------
 1 | #include <dlib/data_io.h>
 2 | #include <dlib/dnn.h>
 3 | #include <iostream>
 4 | 
 5 | namespace lenet {
 6 | using namespace dlib;
 7 | // clang-format off
 8 |   using model = loss_multiclass_log<
 9 |       fc<10,
10 |       relu<fc<1024,
11 |       max_pool<2,2,2,2,relu<con<64, 5, 5, 1, 1,
12 |       max_pool<2,2,2,2,relu<con<32, 5, 5, 1, 1,
13 |       input<matrix<uint8_t>>
14 |       >>>>>>>>>>;
15 | // clang-format on
16 | }  // namespace lenet
17 | 
18 | int main(int argc, char const* argv[]) {
19 |   if (argc < 2) {
20 |     std::cerr << "usage: lenet <mnist_path>" << std::endl;
21 |   }
22 | 
23 |   std::vector<dlib::matrix<uint8_t>> training_images;
24 |   std::vector<unsigned long> training_labels;
25 | 
26 |   std::vector<dlib::matrix<uint8_t>> test_images;
27 |   std::vector<unsigned long> test_labels;
28 | 
29 |   dlib::load_mnist_dataset(argv[1],
30 |                            training_images,
31 |                            training_labels,
32 |                            test_images,
33 |                            test_labels);
34 | 
35 |   lenet::model model;
36 | 
37 |   dlib::dnn_trainer<lenet::model> trainer(model);
38 |   trainer.set_learning_rate(0.01);
39 |   trainer.set_min_learning_rate(1e-5);
40 |   trainer.set_mini_batch_size(128);
41 |   trainer.set_max_num_epochs(2);
42 |   trainer.be_verbose();
43 | 
44 |   trainer.train(training_images, training_labels);
45 | 
46 |   model.clean();
47 | 
48 |   std::vector<unsigned long> predicted = model(test_images);
49 |   double hits = 0;
50 |   for (size_t i = 0; i < test_images.size(); i++) {
51 |     if (predicted[i] == test_labels[i]) {
52 |       hits += 1;
53 |     }
54 |   }
55 | 
56 |   std::cerr << "Test accuracy: " << hits / test_images.size() << std::endl;
57 | }
58 | 


--------------------------------------------------------------------------------
/code/mkl/Makefile:
--------------------------------------------------------------------------------
 1 | TARGET := conv
 2 | WARNINGS := -Wall -Wextra -pedantic -Wno-vla-extension
 3 | HEADERS := -isystem /opt/intel/mkl/include
 4 | MKL_PATH := /opt/intel/mkl
 5 | LIBS := -L $(MKL_PATH)/lib/ -L /usr/local/lib
 6 | CXXFLAGS := -std=c++11 -O2
 7 | 
 8 | all: conv
 9 | 
10 | conv: $(TARGET).cpp
11 | 	$(CXX) $(WARNINGS) $(CXXFLAGS) $(HEADERS) $(LIBS) \
12 | 	$(TARGET).cpp -o $(TARGET) \
13 | 	-lmkl_intel -lmkl_sequential -lmkl_core \
14 | 	-lopencv_imgcodecs -lopencv_imgproc -lopencv_core
15 | 
16 | .phony: clean
17 | 
18 | clean:
19 | 	rm $(TARGET) mkl-out.png || echo -n ""
20 | 


--------------------------------------------------------------------------------
/code/mkl/README.md:
--------------------------------------------------------------------------------
 1 | # mkl
 2 | 
 3 | Example of doing an edge-detection convolution using Intel MKL.
 4 | 
 5 | ## Building
 6 | 
 7 | Prerequisites:
 8 | 
 9 | 1. Install Intel MKL for your system: https://software.intel.com/en-us/mkl,
10 | 2. OpenCV2 (consult your package manager).
11 | 
12 | Then just `make`. If your MKL did not end up under `/opt/intel/mkl`, change the
13 | Makefile or set the `MKL_PATH` environment variable before the `make`
14 | invocation.
15 | 
16 | ## Running
17 | 
18 | The binary expects the path to an image, e.g. for the `cppcon-logo.png` image
19 | that's already there:
20 | 
21 | ```sh
22 | $ LD_LIBRARY_PATH=/path/to/mkl/libs ./conv cppcon-logo.png
23 | ```
24 | 
25 | It then generates an image called `mkl-out.png`.
26 | 


--------------------------------------------------------------------------------
/code/mkl/conv.cpp:
--------------------------------------------------------------------------------
  1 | #include <mkl_dnn.h>
  2 | #include <iostream>
  3 | #include <opencv2/opencv.hpp>
  4 | #include <vector>
  5 | 
  6 | #define checkMKL(expression)                                      \
  7 |   {                                                               \
  8 |     dnnError_t status = expression;                               \
  9 |     if (status != E_SUCCESS) {                                    \
 10 |       std::cerr << "Error at line " << __LINE__ << ": " << status \
 11 |                 << std::endl;                                     \
 12 |       std::exit(EXIT_FAILURE);                                    \
 13 |     }                                                             \
 14 |   }
 15 | 
 16 | cv::Mat load_image(const char* image_path, bool is_gray) {
 17 |   cv::Mat image = cv::imread(image_path, CV_LOAD_IMAGE_COLOR);
 18 |   image.convertTo(image, CV_32FC3);
 19 |   if (is_gray) {
 20 |     cv::cvtColor(image, image, cv::COLOR_BGR2GRAY);
 21 |   }
 22 |   cv::normalize(image, image, 0, 1, cv::NORM_MINMAX);
 23 |   std::cout << "Input Image: " << image.rows << " x " << image.cols << " x "
 24 |             << image.channels() << std::endl;
 25 |   return image;
 26 | }
 27 | 
 28 | std::vector<float>
 29 | flip_channels(float* input_buffer, size_t X, size_t Y, size_t Z) {
 30 |   assert(input_buffer != nullptr);
 31 |   std::vector<float> flipped(X * Y * Z);
 32 | 
 33 |   for (size_t x = 0; x < X; ++x) {
 34 |     for (size_t y = 0; y < Y; ++y) {
 35 |       for (size_t z = 0; z < Z; ++z) {
 36 |         const size_t index = x * (Y * Z) + y * Z + z;
 37 |         const size_t flipped_index = z * (X * Y) + y * X + x;
 38 |         assert(flipped_index < X * Y * Z);
 39 |         flipped[flipped_index] = input_buffer[index];
 40 |       }
 41 |     }
 42 |   }
 43 | 
 44 |   return flipped;
 45 | }
 46 | 
 47 | 
 48 | void save_image(const char* output_filename,
 49 |                 float* buffer,
 50 |                 int height,
 51 |                 int width,
 52 |                 bool is_gray) {
 53 |   const auto format = is_gray ? CV_32F : CV_32FC3;
 54 |   cv::Mat output_image(height, width, format, buffer);
 55 |   // Make negative values zero.
 56 |   cv::threshold(output_image,
 57 |                 output_image,
 58 |                 /*threshold=*/0,
 59 |                 /*maxval=*/0,
 60 |                 cv::THRESH_TOZERO);
 61 |   cv::normalize(output_image, output_image, 0.0, 255.0, cv::NORM_MINMAX);
 62 |   output_image.convertTo(output_image, CV_8UC3);
 63 |   cv::imwrite(output_filename, output_image);
 64 |   std::cerr << "Wrote output to " << output_filename << std::endl;
 65 | }
 66 | 
 67 | void setup_conversion(dnnPrimitive_t* conversion_primitive,
 68 |                       dnnLayout_t source_layout,
 69 |                       dnnLayout_t target_layout,
 70 |                       float* source_buffer,
 71 |                       float** target_buffer) {
 72 |   if (!dnnLayoutCompare_F32(target_layout, source_layout)) {
 73 |     checkMKL(dnnConversionCreate_F32(conversion_primitive,
 74 |                                      source_layout,
 75 |                                      target_layout));
 76 |     checkMKL(dnnAllocateBuffer_F32(reinterpret_cast<void**>(target_buffer),
 77 |                                    target_layout));
 78 |   } else {
 79 |     assert(source_buffer != nullptr);
 80 |     *target_buffer = source_buffer;
 81 |   }
 82 | 
 83 |   assert(target_buffer != nullptr);
 84 | }
 85 | 
 86 | int main(int argc, const char* argv[]) {
 87 |   if (argc < 2) {
 88 |     std::cerr << "usage: conv <image> [is_gray]" << std::endl;
 89 |     std::exit(EXIT_FAILURE);
 90 |   }
 91 | 
 92 |   bool is_gray = false;
 93 |   if (argc == 3) {
 94 |     is_gray = std::atoi(argv[2]);
 95 |   }
 96 | 
 97 |   cv::Mat image = load_image(argv[1], is_gray);
 98 | 
 99 |   const size_t height = image.rows;
100 |   const size_t width = image.cols;
101 |   const size_t input_channels = is_gray ? 1 : 3;
102 |   const size_t output_channels = is_gray ? 1 : 3;
103 |   const size_t batch_size = 1;
104 |   const int kernel_size = 5;
105 |   const size_t dimension = 4;
106 | 
107 |   // Format is: WHCN
108 |   size_t input_shape[] = {width, height, input_channels, batch_size};
109 |   size_t input_strides[] = {1,
110 |                             width,
111 |                             width * height,
112 |                             width * height * input_channels};
113 | 
114 |   size_t output_shape[] = {width, height, output_channels, batch_size};
115 |   size_t output_strides[] = {1,
116 |                              width,
117 |                              width * height,
118 |                              width * height * output_channels};
119 | 
120 |   // HWIO
121 |   size_t kernel_shape[] = {kernel_size,
122 |                            kernel_size,
123 |                            input_channels,
124 |                            output_channels};
125 |   size_t kernel_strides[] = {1,
126 |                              kernel_size,
127 |                              kernel_size * kernel_size,
128 |                              kernel_size * kernel_size * input_channels};
129 | 
130 |   dnnLayout_t input_layout{nullptr};
131 |   checkMKL(dnnLayoutCreate_F32(&input_layout,
132 |                                dimension,
133 |                                input_shape,
134 |                                input_strides));
135 | 
136 |   dnnLayout_t output_layout{nullptr};
137 |   checkMKL(dnnLayoutCreate_F32(&output_layout,
138 |                                dimension,
139 |                                output_shape,
140 |                                output_strides));
141 | 
142 |   dnnLayout_t kernel_layout{nullptr};
143 |   checkMKL(dnnLayoutCreate_F32(&kernel_layout,
144 |                                dimension,
145 |                                kernel_shape,
146 |                                kernel_strides));
147 | 
148 |   // assert(dnnLayoutCompare_F32(input_layout, output_layout));
149 | 
150 |   dnnPrimitiveAttributes_t attributes{nullptr};
151 |   checkMKL(dnnPrimitiveAttributesCreate_F32(&attributes));
152 | 
153 |   size_t convolution_strides[] = {/*width=*/1, /*height=*/1};
154 |   int convolution_offsets[] = {/*horizontal=*/(1 - kernel_size) / 2,
155 |                                /*vertical=*/(1 - kernel_size) / 2};
156 | 
157 |   dnnPrimitive_t convolution_primitive{nullptr};
158 |   checkMKL(dnnConvolutionCreateForward_F32(&convolution_primitive,
159 |                                            attributes,
160 |                                            dnnAlgorithmConvolutionDirect,
161 |                                            dimension,
162 |                                            input_shape,
163 |                                            output_shape,
164 |                                            kernel_shape,
165 |                                            convolution_strides,
166 |                                            convolution_offsets,
167 |                                            dnnBorderZeros));
168 | 
169 |   dnnLayout_t conv_input_layout{nullptr};
170 |   checkMKL(dnnLayoutCreateFromPrimitive_F32(&conv_input_layout,
171 |                                             convolution_primitive,
172 |                                             dnnResourceSrc));
173 |   std::cerr << "Input size: " << dnnLayoutGetMemorySize_F32(conv_input_layout)
174 |             << "B" << std::endl;
175 | 
176 | 
177 |   dnnLayout_t conv_output_layout{nullptr};
178 |   checkMKL(dnnLayoutCreateFromPrimitive_F32(&conv_output_layout,
179 |                                             convolution_primitive,
180 |                                             dnnResourceDst));
181 |   std::cerr << "Output size: " << dnnLayoutGetMemorySize_F32(conv_output_layout)
182 |             << "B" << std::endl;
183 | 
184 |   dnnLayout_t conv_kernel_layout{nullptr};
185 |   checkMKL(dnnLayoutCreateFromPrimitive_F32(&conv_kernel_layout,
186 |                                             convolution_primitive,
187 |                                             dnnResourceFilter));
188 |   std::cerr << "Kernel size: " << dnnLayoutGetMemorySize_F32(conv_kernel_layout)
189 |             << "B" << std::endl;
190 | 
191 |   auto input_buffer =
192 |       flip_channels(image.ptr<float>(0), height, width, input_channels);
193 |   float* output_buffer{nullptr};
194 |   float* conversion_buffer[dnnResourceNumber] = {nullptr};
195 | 
196 |   // clang-format off
197 |   float kernel_template[kernel_size][kernel_size] = {
198 |     {-1, -1, -1, -1, -1},
199 |     {-1, -1, -1, -1, -1},
200 |     {-1, -1, 24, -1, -1},
201 |     {-1, -1, -1, -1, -1},
202 |     {-1, -1, -1, -1, -1},
203 |   };
204 |   // clang-format on
205 | 
206 |   float kernel_buffer[output_channels][input_channels][kernel_size]
207 |                      [kernel_size];
208 |   for (size_t output_channel = 0; output_channel < output_channels;
209 |        ++output_channel) {
210 |     for (size_t input_channel = 0; input_channel < input_channels;
211 |          ++input_channel) {
212 |       for (size_t column = 0; column < kernel_size; ++column) {
213 |         for (size_t row = 0; row < kernel_size; ++row) {
214 |           kernel_buffer[output_channel][input_channel][column][row] =
215 |               kernel_template[column][row];
216 |         }
217 |       }
218 |     }
219 |   }
220 |   float* kernel_buffer_flat = &kernel_buffer[0][0][0][0];
221 | 
222 |   dnnPrimitive_t input_conversion{nullptr};
223 |   setup_conversion(&input_conversion,
224 |                    input_layout,
225 |                    conv_input_layout,
226 |                    input_buffer.data(),
227 |                    &conversion_buffer[dnnResourceSrc]);
228 | 
229 |   dnnPrimitive_t kernel_conversion{nullptr};
230 |   setup_conversion(&kernel_conversion,
231 |                    kernel_layout,
232 |                    conv_kernel_layout,
233 |                    kernel_buffer_flat,
234 |                    &conversion_buffer[dnnResourceFilter]);
235 | 
236 |   checkMKL(dnnAllocateBuffer_F32(reinterpret_cast<void**>(
237 |                                      &conversion_buffer[dnnResourceDst]),
238 |                                  conv_output_layout));
239 | 
240 |   dnnPrimitive_t output_conversion{nullptr};
241 |   setup_conversion(&output_conversion,
242 |                    conv_output_layout,
243 |                    output_layout,
244 |                    conversion_buffer[dnnResourceDst],
245 |                    &output_buffer);
246 | 
247 |   if (kernel_conversion) {
248 |     std::cerr << "Performing kernel conversion" << std::endl;
249 |     checkMKL(dnnConversionExecute_F32(kernel_conversion,
250 |                                       kernel_buffer,
251 |                                       conversion_buffer[dnnResourceFilter]));
252 |   } else {
253 |     std::cerr << "Skipping kernel conversion" << std::endl;
254 |   }
255 | 
256 |   if (input_conversion) {
257 |     std::cerr << "Performing input conversion" << std::endl;
258 |     checkMKL(dnnConversionExecute_F32(input_conversion,
259 |                                       input_buffer.data(),
260 |                                       conversion_buffer[dnnResourceSrc]));
261 |   } else {
262 |     std::cerr << "Skipping input conversion" << std::endl;
263 |   }
264 | 
265 |   std::cerr << "Executing convolution" << std::endl;
266 |   checkMKL(dnnExecute_F32(convolution_primitive,
267 |                           reinterpret_cast<void**>(conversion_buffer)));
268 | 
269 | 
270 |   if (output_conversion) {
271 |     std::cerr << "Performing output conversion" << std::endl;
272 |     checkMKL(dnnConversionExecute_F32(output_conversion,
273 |                                       conversion_buffer[dnnResourceDst],
274 |                                       output_buffer));
275 |   } else {
276 |     std::cerr << "Skipping output conversion" << std::endl;
277 |   }
278 | 
279 |   auto flipped_output =
280 |       flip_channels(output_buffer, output_channels, height, width);
281 |   save_image("mkl-out.png", flipped_output.data(), height, width, is_gray);
282 | 
283 |   // ---------------------------------------------------------------------------
284 | 
285 |   checkMKL(dnnPrimitiveAttributesDestroy_F32(attributes));
286 | 
287 |   checkMKL(dnnLayoutDelete_F32(kernel_layout));
288 |   checkMKL(dnnLayoutDelete_F32(output_layout));
289 |   checkMKL(dnnLayoutDelete_F32(input_layout));
290 | 
291 |   checkMKL(dnnLayoutDelete_F32(conv_kernel_layout));
292 |   checkMKL(dnnLayoutDelete_F32(conv_output_layout));
293 |   checkMKL(dnnLayoutDelete_F32(conv_input_layout));
294 | 
295 |   if (conversion_buffer[dnnResourceSrc] != input_buffer.data()) {
296 |     checkMKL(dnnReleaseBuffer_F32(conversion_buffer[dnnResourceSrc]));
297 |   }
298 |   if (conversion_buffer[dnnResourceFilter] != kernel_buffer_flat) {
299 |     checkMKL(dnnReleaseBuffer_F32(conversion_buffer[dnnResourceFilter]));
300 |   }
301 |   checkMKL(dnnReleaseBuffer_F32(conversion_buffer[dnnResourceDst]));
302 | }
303 | 


--------------------------------------------------------------------------------
/code/mkl/cppcon-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/code/mkl/cppcon-logo.png


--------------------------------------------------------------------------------
/code/mxnet/Makefile:
--------------------------------------------------------------------------------
 1 | TARGET := lenet
 2 | WARNINGS := -Wall -Wextra -pedantic
 3 | HEADERS := -isystem $(MXNET_PATH)/cpp-package/include \
 4 | 					 -isystem $(MXNET_PATH)/include \
 5 |  					 -isystem $(MXNET_PATH)/nnvm/include \
 6 | 					 -isystem $(MXNET_PATH)/dmlc-core/include \
 7 | 					 -I ../
 8 | LIBS := -L $(MXNET_PATH)/lib -L /usr/local/lib
 9 | CXXFLAGS := -std=c++11
10 | 
11 | all: lenet
12 | 
13 | lenet: $(TARGET).cpp
14 | 	$(CXX) $(WARNINGS) $(CXXFLAGS) $(HEADERS) $(LIBS) \
15 | 	$(TARGET).cpp ../common/socket.cpp -o $(TARGET) \
16 | 	-lmxnet -lopencv_imgcodecs -lopencv_imgproc -lopencv_core
17 | 
18 | .phony: clean
19 | 
20 | clean:
21 | 	rm $(TARGET) || echo -n ""
22 | 


--------------------------------------------------------------------------------
/code/mxnet/README.md:
--------------------------------------------------------------------------------
 1 | # mxnet
 2 | 
 3 | Demo of creating a deep neural network with MXNet's C++ api.
 4 | 
 5 | ## Building
 6 | 
 7 | ### Prerequisites
 8 | 
 9 | 1. Clone MXNet: `git clone https://github.com/apache/incubator-mxnet`,
10 | 2. Build with CMake, using something like this: `make -j4 USE_BLAS=apple USE_OPENCV=1 USE_CPP_PACKAGE=1 USE_OPENMP=0` (the important part is `USE_CPP_PACKAGE`, the rest may differ for you),
11 | 5. OpenCV2 (consult your package manager).
12 | 
13 | ### Build the Neural Network
14 | 
15 | The `lenet.cpp` file contains code for a neural network using MXNet. You can
16 | build it with the Makefile in this folder. For this, you need to set the
17 | `MXNET_PATH` environment variable to point to your MXNet library path prefix,
18 | e.g. for me:
19 | 
20 | ```sh
21 | MXNET_PATH=~/Documents/Libraries/mxnet make
22 | ```
23 | 
24 | ### Build the Demo
25 | 
26 | The demo GUI uses Qt (5.7 or newer). You will need to download it. Then generate
27 | a Makefile using `qmake` and make:
28 | 
29 | ```sh
30 | $ cd demo
31 | $ qmake
32 | $ make
33 | ```
34 | 
35 | ## Running
36 | 
37 | First download the MNIST dataset using the `download_mnist.sh` script. Then run
38 | the binary produced from `lenet.cpp`, which will train the neural network. You
39 | can optionally pass a number of epochs to train as a command line argument.
40 | Anywhere between 1 and 10 is sensible. The default is two epochs, which gets you
41 | to around 98% accuracy for the task (handwritten digit classification). Once
42 | it's done training, it will start listening on a socket for prediction requests.
43 | At this point, launch the demo app binary, which will connect to the server,
44 | allowing you to request predictions. Like so:
45 | 
46 | ```sh
47 | $ LD_LIBRARY_PATH=/path/to/mxnet/lib ./lenet MNIST_data
48 | $ demo/<demo binary>
49 | ```
50 | 
51 | where `<demo binary>` is `demo.app/Contents/MacOS/demo` for example. Differs on
52 | Linux or Windows.
53 | 


--------------------------------------------------------------------------------
/code/mxnet/demo/.qmake.stash:
--------------------------------------------------------------------------------
 1 | QMAKE_MAC_SDK.macosx.Path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.12.sdk
 2 | QMAKE_MAC_SDK.macosx.PlatformPath = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform
 3 | QMAKE_MAC_SDK.macosx.SDKVersion = 10.12
 4 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_CC = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
 5 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_CXX = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
 6 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_FIX_RPATH = \
 7 |     /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/install_name_tool \
 8 |     -id
 9 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_AR = \
10 |     /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ar \
11 |     cq
12 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_RANLIB = \
13 |     /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ranlib \
14 |     -s
15 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_LINK = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
16 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_LINK_SHLIB = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
17 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_ACTOOL = /Applications/Xcode.app/Contents/Developer/usr/bin/actool
18 | QMAKE_CXX.INCDIRS = \
19 |     /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1 \
20 |     /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/8.1.0/include \
21 |     /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include \
22 |     /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.12.sdk/usr/include
23 | QMAKE_CXX.LIBDIRS = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.12.sdk/usr/lib
24 | QMAKE_CXX.QT_COMPILER_STDCXX = 199711L
25 | QMAKE_CXX.QMAKE_APPLE_CC = 6000
26 | QMAKE_CXX.QMAKE_APPLE_CLANG_MAJOR_VERSION = 8
27 | QMAKE_CXX.QMAKE_APPLE_CLANG_MINOR_VERSION = 1
28 | QMAKE_CXX.QMAKE_APPLE_CLANG_PATCH_VERSION = 0
29 | QMAKE_CXX.QMAKE_GCC_MAJOR_VERSION = 4
30 | QMAKE_CXX.QMAKE_GCC_MINOR_VERSION = 2
31 | QMAKE_CXX.QMAKE_GCC_PATCH_VERSION = 1
32 | QMAKE_CXX.COMPILER_MACROS = \
33 |     QT_COMPILER_STDCXX \
34 |     QMAKE_APPLE_CC \
35 |     QMAKE_APPLE_CLANG_MAJOR_VERSION \
36 |     QMAKE_APPLE_CLANG_MINOR_VERSION \
37 |     QMAKE_APPLE_CLANG_PATCH_VERSION \
38 |     QMAKE_GCC_MAJOR_VERSION \
39 |     QMAKE_GCC_MINOR_VERSION \
40 |     QMAKE_GCC_PATCH_VERSION
41 | QMAKE_XCODE_DEVELOPER_PATH = /Applications/Xcode.app/Contents/Developer
42 | QMAKE_XCODE_VERSION = 8.3.3
43 | QMAKE_DEFAULT_INCDIRS = \
44 |     /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1 \
45 |     /usr/local/include \
46 |     /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/8.1.0/include \
47 |     /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include \
48 |     /usr/include \
49 |     "/System/Library/Frameworks (framework directory)" \
50 |     "/Library/Frameworks (framework directory)"
51 | QMAKE_DEFAULT_LIBDIRS = \
52 |     /lib \
53 |     /usr/lib
54 | 


--------------------------------------------------------------------------------
/code/mxnet/demo/backend.cpp:
--------------------------------------------------------------------------------
 1 | #include "backend.h"
 2 | 
 3 | #include <QDataStream>
 4 | #include <QDebug>
 5 | #include <QObject>
 6 | #include <QString>
 7 | #include <QTcpSocket>
 8 | 
 9 | const int kPort = 6666;
10 | 
11 | BackEnd::BackEnd(QObject* parent)
12 | : QObject(parent), socket(new QTcpSocket(this)) {
13 |   socket->connectToHost("localhost", kPort);
14 | 
15 |   QObject::connect(socket, &QTcpSocket::connected, [] {
16 |     qDebug() << "Connected to localhost:" << kPort;
17 |   });
18 | 
19 |   QObject::connect(socket, &QTcpSocket::readyRead, [this] {
20 |     emit prediction(socket->readAll().toInt());
21 |   });
22 | }
23 | 
24 | void BackEnd::predict(QString imageFilename) {
25 |   if (socket->waitForConnected(3000)) {
26 |     socket->write(imageFilename.toStdString().c_str());
27 |   } else {
28 |     qDebug() << "Error connecting to server!";
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/code/mxnet/demo/backend.h:
--------------------------------------------------------------------------------
 1 | #ifndef BACKEND_H
 2 | #define BACKEND_H
 3 | 
 4 | #include <QObject>
 5 | #include <QString>
 6 | #include <QTcpSocket>
 7 | 
 8 | class BackEnd : public QObject {
 9 |   Q_OBJECT
10 | 
11 |  public:
12 |   explicit BackEnd(QObject* parent = nullptr);
13 | 
14 |   Q_INVOKABLE void predict(QString imageFilename);
15 | 
16 |  signals:
17 | 
18 |   void prediction(int prediction);
19 | 
20 |  private:
21 |   QTcpSocket* socket;
22 | };
23 | 
24 | #endif  // BACKEND_H
25 | 


--------------------------------------------------------------------------------
/code/mxnet/demo/main.cpp:
--------------------------------------------------------------------------------
 1 | #include <QApplication>
 2 | #include <QQmlApplicationEngine>
 3 | 
 4 | #include "backend.h"
 5 | 
 6 | int main(int argc, char** argv) {
 7 |   QApplication app(argc, argv);
 8 |   QQmlApplicationEngine engine;
 9 | 
10 |   qmlRegisterType<BackEnd>("demo.backend", 1, 0, "BackEnd");
11 | 
12 |   engine.load(QUrl("qrc:///main.qml"));
13 | 
14 |   return app.exec();
15 | }
16 | 


--------------------------------------------------------------------------------
/code/mxnet/demo/main.pro:
--------------------------------------------------------------------------------
1 | TARGET = demo
2 | TEMPLATE = app
3 | QT += widgets qml quick core network
4 | RESOURCES += qml.qrc
5 | SOURCES += main.cpp backend.cpp
6 | HEADERS += backend.h
7 | 


--------------------------------------------------------------------------------
/code/mxnet/demo/main.qml:
--------------------------------------------------------------------------------
 1 | import QtQuick 2.6
 2 | import QtQuick.Controls 2.0
 3 | import QtQuick.Layouts 1.3
 4 | import QtQuick.Controls.Styles 1.4
 5 | import demo.backend 1.0
 6 | 
 7 | ApplicationWindow {
 8 |   id: root
 9 |   width: 500
10 |   height: 500
11 |   visible: true
12 |   title: "Canvas"
13 | 
14 |   readonly property string imageFilename: "demo.png"
15 | 
16 |   Canvas {
17 |     id: demo
18 |     anchors.fill: parent
19 |     property int posX;
20 |     property int posY;
21 |     property bool pressed;
22 | 
23 |     signal clear
24 | 
25 |     onPaint: {
26 |       var ctx = getContext("2d");
27 |       if (pressed) {
28 |         ctx.fillStyle = "white";
29 |         ctx.ellipse(posX, posY, 25, 25);
30 |         ctx.fill();
31 |       } else {
32 |         ctx.reset();
33 |         ctx.fillStyle = 'black';
34 |         ctx.fillRect(0, 0, width, height);
35 |       }
36 |     }
37 | 
38 |     onClear: {
39 |       pressed = false;
40 |       requestPaint();
41 |     }
42 | 
43 |     MouseArea {
44 |       anchors.fill: parent
45 |       onPressed: {
46 |         parent.pressed = true;
47 |       }
48 |       onPositionChanged: {
49 |         parent.posX = mouseX;
50 |         parent.posY = mouseY;
51 |         parent.requestPaint();
52 |       }
53 |     }
54 |   }
55 | 
56 |   RowLayout {
57 |     anchors.bottom: parent.bottom
58 |     anchors.horizontalCenter: parent.horizontalCenter
59 |     Button {
60 |       flat: true
61 |       onClicked: {
62 |         demo.save(root.imageFilename);
63 |         backend.predict(root.imageFilename);
64 |       }
65 |       contentItem: Text {
66 |         text: "Predict"
67 |         color: "white"
68 |         horizontalAlignment: Text.AlignHCenter
69 |         verticalAlignment: Text.AlignVCenter
70 |       }
71 |     }
72 | 
73 |     Label {
74 |       id: predicted
75 |       text: "?"
76 |       font.pixelSize: 72
77 |       color: "white"
78 |     }
79 | 
80 |     Button {
81 |         flat: true
82 |         onClicked: demo.clear();
83 |         contentItem: Text {
84 |           text: "Reset"
85 |           color: "white"
86 |           horizontalAlignment: Text.AlignHCenter
87 |           verticalAlignment: Text.AlignVCenter
88 |         }
89 |     }
90 |   }
91 | 
92 |   BackEnd {
93 |     id: backend
94 |     onPrediction: predicted.text = prediction
95 |   }
96 | }
97 | 


--------------------------------------------------------------------------------
/code/mxnet/demo/main.qmlc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/code/mxnet/demo/main.qmlc


--------------------------------------------------------------------------------
/code/mxnet/demo/qml.qrc:
--------------------------------------------------------------------------------
1 | <!DOCTYPE RCC>
2 | <RCC version="1.0">
3 |     <qresource prefix="/">
4 |         <file>main.qml</file>
5 |     </qresource>
6 | </RCC>
7 | 


--------------------------------------------------------------------------------
/code/mxnet/download_mnist.sh:
--------------------------------------------------------------------------------
 1 | if [ ! -d "./MNIST_data" ]; then
 2 |   mkdir mnist_data
 3 |   cd mnist_data
 4 |   wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz \
 5 |        http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz \
 6 |        http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz \
 7 |        http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
 8 |   gzip -d *.gz
 9 | fi
10 | 


--------------------------------------------------------------------------------
/code/mxnet/lenet.cpp:
--------------------------------------------------------------------------------
  1 | #include "common/socket.h"
  2 | 
  3 | #include <mxnet-cpp/MxNetCpp.h>
  4 | #include <opencv2/opencv.hpp>
  5 | 
  6 | #include <cassert>
  7 | #include <cstdlib>
  8 | #include <iostream>
  9 | #include <map>
 10 | #include <memory>
 11 | #include <string>
 12 | 
 13 | namespace mx = mxnet::cpp;
 14 | 
 15 | mx::Symbol LeNet() {
 16 |   auto images = mx::Symbol::Variable("images");
 17 |   auto labels = mx::Symbol::Variable("labels");
 18 | 
 19 |   // ------------------------------- CONV 1 -------------------------------
 20 | 
 21 |   mx::Symbol conv1_weights("conv1_weights");
 22 |   mx::Symbol conv1_bias("conv1_bias");
 23 | 
 24 |   auto conv1 = mx::Convolution("conv1",
 25 |                                images,
 26 |                                conv1_weights,
 27 |                                conv1_bias,
 28 |                                /*kernel=*/mx::Shape(5, 5),
 29 |                                /*filters=*/32);
 30 |   auto conv1_activation =
 31 |       mx::Activation("conv1_activation", conv1, mx::ActivationActType::kRelu);
 32 |   auto pool1 = mx::Pooling("pool1",
 33 |                            conv1_activation,
 34 |                            mx::Shape(2, 2),
 35 |                            mx::PoolingPoolType::kMax,
 36 |                            /*global_pool=*/false,
 37 |                            /*use_cudnn=*/false,
 38 |                            mx::PoolingPoolingConvention::kValid,
 39 |                            mx::Shape(2, 2));
 40 | 
 41 |   // ------------------------------- CONV 2 -------------------------------
 42 | 
 43 |   mx::Symbol conv2_weights("conv2_weights");
 44 |   mx::Symbol conv2_bias("conv2_bias");
 45 | 
 46 |   auto conv2 = mx::Convolution("conv2",
 47 |                                pool1,
 48 |                                conv2_weights,
 49 |                                conv2_bias,
 50 |                                /*kernel=*/mx::Shape(5, 5),
 51 |                                /*filters=*/64);
 52 |   auto conv2_activation =
 53 |       mx::Activation("conv2_activation", conv2, mx::ActivationActType::kRelu);
 54 |   auto pool2 = mx::Pooling("pool2",
 55 |                            conv2_activation,
 56 |                            mx::Shape(2, 2),
 57 |                            mx::PoolingPoolType::kMax,
 58 |                            /*global_pool=*/false,
 59 |                            /*use_cudnn=*/false,
 60 |                            mx::PoolingPoolingConvention::kValid,
 61 |                            mx::Shape(2, 2));
 62 | 
 63 |   // ------------------------------- FC 1 -------------------------------
 64 | 
 65 |   mx::Symbol fc1_weights("fc1_weights");
 66 |   mx::Symbol fc1_bias("fc1_bias");
 67 | 
 68 |   auto flatten = mx::Flatten("flatten", pool2);
 69 |   auto fc1 = mx::FullyConnected("fc1",
 70 |                                 flatten,
 71 |                                 fc1_weights,
 72 |                                 fc1_bias,
 73 |                                 /*units=*/1024);
 74 |   auto fc1_activation =
 75 |       mx::Activation("fc1_activation", fc1, mx::ActivationActType::kRelu);
 76 | 
 77 |   // ------------------------------- FC 2 -------------------------------
 78 | 
 79 |   mx::Symbol fc2_weights("fc2_weights");
 80 |   mx::Symbol fc2_bias("fc2_bias");
 81 | 
 82 |   auto fc2 = mx::FullyConnected("fc2",
 83 |                                 fc1_activation,
 84 |                                 fc2_weights,
 85 |                                 fc2_bias,
 86 |                                 /*units=*/10);
 87 | 
 88 |   // ------------------------------- P -------------------------------
 89 | 
 90 |   return mx::SoftmaxOutput("softmax", fc2, labels);
 91 | }
 92 | 
 93 | mx::NDArray load_image(const std::string& image_path,
 94 |                        const mx::Shape& batch_shape,
 95 |                        mx::Context& context) {
 96 |   cv::Mat image2 = cv::imread(image_path, CV_LOAD_IMAGE_GRAYSCALE);
 97 |   assert(image2.data != nullptr);
 98 |   image2.convertTo(image2, CV_32F);
 99 |   cv::normalize(image2, image2, 0, 1, cv::NORM_MINMAX);
100 | 
101 |   cv::Mat image(batch_shape[2], batch_shape[3], batch_shape[1]);
102 |   resize(image2, image, image.size(), 0, 0);
103 |   std::cerr << "Loaded image of shape: " << image.rows << " x " << image.cols
104 |             << " x " << image.channels() << std::endl;
105 | 
106 |   // Make a batch and fill the first image
107 |   const size_t batch_flat_size = batch_shape[0] * image.total();
108 |   std::vector<mx_float> flat(batch_flat_size, 0);
109 |   const auto* pointer = image.ptr<mx_float>(0);
110 |   std::copy(pointer, pointer + image.total(), flat.begin());
111 | 
112 |   mx::NDArray ndarray(batch_shape, context);
113 |   ndarray.SyncCopyFromCPU(flat);
114 |   mx::NDArray::WaitAll();
115 | 
116 |   return ndarray;
117 | }
118 | 
119 | int predict(mx::NDArray image,
120 |             mx::Executor& executor,
121 |             std::map<std::string, mx::NDArray>& symbols) {
122 |   image.CopyTo(&symbols["images"]);
123 |   symbols["labels"] = 0;
124 |   mx::NDArray::WaitAll();
125 |   executor.Forward(/*training=*/false);
126 | 
127 |   std::vector<mx_float> predictions(image.GetShape()[0]);
128 |   executor.outputs[0].ArgmaxChannel().SyncCopyToCPU(&predictions,
129 |                                                     predictions.size());
130 | 
131 |   return predictions[0];
132 | }
133 | 
134 | int main(int argc, char const* argv[]) {
135 |   if (argc < 2) {
136 |     std::cerr << "usage: lenet <mnist_path> [epochs=2]\n";
137 |     std::exit(EXIT_FAILURE);
138 |   }
139 | 
140 |   std::string mnist_path(argv[1]);
141 | 
142 |   const size_t batch_size = 128;
143 |   const size_t number_of_epochs = (argc == 3) ? std::atoi(argv[2]) : 2;
144 |   const size_t image_width = 28;
145 |   const size_t image_height = 28;
146 |   const size_t image_channels = 1;
147 | 
148 |   auto context = mx::Context::cpu();
149 | 
150 |   mx::Shape image_shape(batch_size, image_channels, image_width, image_height);
151 | 
152 |   auto lenet = LeNet();
153 | 
154 |   // clang-format off
155 |   std::map<std::string, mx::NDArray> symbols = {
156 |     {"images", mx::NDArray(image_shape, context)},
157 |     {"labels", mx::NDArray(mx::Shape(batch_size), context)},
158 |   };
159 |   // clang-format on
160 | 
161 |   lenet.InferArgsMap(context, &symbols, symbols);
162 |   const auto symbol_names = lenet.ListArguments();
163 | 
164 |   mx::Normal normal_initializer(/*mean=*/0.0, /*stddev=*/0.1);
165 |   for (auto& symbol : symbols) {
166 |     if (symbol.first == "images" || symbol.first == "labels") continue;
167 |     normal_initializer(symbol.first, &symbol.second);
168 |   }
169 | 
170 |   mx::Optimizer* optimizer = mx::OptimizerRegistry::Find("sgd");
171 |   assert(optimizer != nullptr);
172 |   optimizer->SetParam("lr", 0.1)->SetParam("rescale_grad", 1.0 / batch_size);
173 | 
174 |   std::unique_ptr<mx::Executor> executor(lenet.SimpleBind(context, symbols));
175 | 
176 |   auto training_iterator =
177 |       mx::MXDataIter("MNISTIter")
178 |           .SetParam("image", mnist_path + "/train-images-idx3-ubyte")
179 |           .SetParam("label", mnist_path + "/train-labels-idx1-ubyte")
180 |           .SetParam("batch_size", batch_size)
181 |           .SetParam("shuffle", true)
182 |           .SetParam("flat", 0)
183 |           .CreateDataIter();
184 | 
185 |   auto test_iterator =
186 |       mx::MXDataIter("MNISTIter")
187 |           .SetParam("image", mnist_path + "/t10k-images-idx3-ubyte")
188 |           .SetParam("label", mnist_path + "/t10k-labels-idx1-ubyte")
189 |           .SetParam("batch_size", batch_size)
190 |           .SetParam("shuffle", true)
191 |           .SetParam("flat", 0)
192 |           .CreateDataIter();
193 | 
194 |   size_t training_number_of_batches = 60000 / batch_size;
195 |   for (size_t epoch = 1; epoch <= number_of_epochs; ++epoch) {
196 |     training_iterator.Reset();
197 |     for (size_t batch_index = 0; training_iterator.Next(); ++batch_index) {
198 |       auto batch = training_iterator.GetDataBatch();
199 |       batch.data.CopyTo(&symbols["images"]);
200 |       batch.label.CopyTo(&symbols["labels"]);
201 | 
202 |       // Wait for symbols to be populated.
203 |       mx::NDArray::WaitAll();
204 | 
205 |       executor->Forward(/*training=*/true);
206 |       executor->Backward();
207 | 
208 |       for (size_t symbol = 0; symbol < symbol_names.size(); ++symbol) {
209 |         if (symbol_names[symbol] == "images") continue;
210 |         if (symbol_names[symbol] == "labels") continue;
211 |         optimizer->Update(symbol,
212 |                           executor->arg_arrays[symbol],
213 |                           executor->grad_arrays[symbol]);
214 |       }
215 | 
216 |       std::cout << "\rBatch " << batch_index << "/"
217 |                 << training_number_of_batches << std::flush;
218 |     }
219 | 
220 |     std::cout << std::endl;
221 |     LOG(INFO) << "Evaluating ...";
222 | 
223 |     mx::Accuracy accuracy;
224 |     test_iterator.Reset();
225 |     while (test_iterator.Next()) {
226 |       auto batch = test_iterator.GetDataBatch();
227 |       batch.data.CopyTo(&symbols["images"]);
228 |       batch.label.CopyTo(&symbols["labels"]);
229 |       mx::NDArray::WaitAll();
230 |       executor->Forward(/*training=*/false);
231 |       accuracy.Update(batch.label, executor->outputs[0]);
232 |     }
233 | 
234 |     std::cout << "Epoch: " << epoch << " | Accuracy: " << accuracy.Get()
235 |               << std::endl;
236 |   }
237 | 
238 | 
239 |   Socket socket(6666);
240 |   std::cerr << "Listening on port 6666" << std::endl;
241 | 
242 |   socket.accept();
243 |   std::cerr << "Connection established" << std::endl;
244 | 
245 |   while (true) {
246 |     const std::string image_filename = socket.read(256);
247 |     std::cout << "Prediction request for: \"" << image_filename << "\""
248 |               << std::endl;
249 |     auto image = load_image(image_filename, image_shape, context);
250 |     const int prediction = predict(image, *executor, symbols);
251 |     socket.write(std::to_string(prediction));
252 |     std::cout << "Sending prediction: " << prediction << std::endl;
253 |   }
254 | 
255 |   MXNotifyShutdown();
256 | }
257 | 


--------------------------------------------------------------------------------
/code/mxnet/mnist_data/t10k-images-idx3-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/code/mxnet/mnist_data/t10k-images-idx3-ubyte


--------------------------------------------------------------------------------
/code/mxnet/mnist_data/t10k-labels-idx1-ubyte:
--------------------------------------------------------------------------------
1 |     ' 		 	 		   		  				 						   		  	  	 				 		 		  					      			  	 	   		 			 		   		 			 		 	 		 	   		 	  	   	 		  	 				    					  	    	  		 		  	 	   	 		 	   			 			  	  		 	 	 	 		 		 			 			 	 	 	 	  	   	   	 	 		   		   						 	 		 	    	 						 	 		 	  		 		 		  	   			 	    			   	 		 				 			 		  	  	  	 		       				  	  			   			 		 	 			   	 	 	 	 				   		   	  		   		 	 	  	 	  		  		 								   		 		    				   	  	 	 	 		 			  	 			   			 	 	  			  	 			  	   		  	    		 			     	 			  		    	  		  	    						 			  					 	 		   	 		 			   				    		   			    					    	 	    		      		        	     	 				 		   	     						  			 				 		 	 	   			  	      		  		   	 			   			   	 		    		 			  					  	 				 					   			  	 			 	 		 						 	  	 	    	   	  								  	  		 				  	 			 			 				 	  		 										 		  	 	   	 	 	 	   								 		  	     			 				   	 		 							 	  	   	  		 				 	 								  	  	 		  	 		  	  					  	 	  				   		   						  		  						   	 	 	 		 	 		   	 		  	  	 					 	  	      			 	 	 	   		  	  		   					 	 	 	 	   	  	    		   	 	  		 	 			 	  	 	 				    		 	    			 	 	   		 	 				 		  	    	  	 		 	 		 	 	   	 	 	 		 	    		   			 		 	 	 	  	  		     	 				 	 	 	 					 		        		 	     	 		  						    	 	 	     	 				 			    		 		    	  		 		 	 	 				 		 	 	 		      		  	 		  	 	 	    	 		  							    	 	 	  	     							  		  	 	 	  				 		 	 	 	   	 	 	    	 	 					 	   	 	 	 		 	  	 		   		 		  	    			 	 		 			  	 	 	   	 	 					 	   	 	 			 		  		  	 	   	  	 	 		  	 			 	  		  	  	 		 	  	 	  		   	 	 	 		   				  	  	 		   	 	 	 	 		   	 	 		    			   		 	 	 	 	 	 	 	  		 		 		     	 		 	 	  	 	 			 	 	 		    			  	 	 	 	    		 		    	  		 		 	 	 				    		 	    			 	 	 	 			 	       		 				 	 	 			 				    		   			   		 	 	 			    			  	 	  	 	 				       					 	  			  	  	   	 	 	 	  		 	 		 	      				 	 	 	   		 	 					 	 	 	    		  	  		   		 	 	 	 		    				   	  	 		 	   		 	  		 	 	     	 	 


--------------------------------------------------------------------------------
/code/mxnet/mnist_data/train-images-idx3-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/code/mxnet/mnist_data/train-images-idx3-ubyte


--------------------------------------------------------------------------------
/code/mxnet/mnist_data/train-labels-idx1-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/code/mxnet/mnist_data/train-labels-idx1-ubyte


--------------------------------------------------------------------------------
/code/requirements.txt:
--------------------------------------------------------------------------------
 1 | bleach==1.5.0
 2 | html5lib==0.9999999
 3 | Keras==2.0.8
 4 | Markdown==2.6.9
 5 | numpy==1.13.1
 6 | protobuf==3.4.0
 7 | PyYAML==3.12
 8 | scipy==0.19.1
 9 | six==1.11.0
10 | tensorflow==1.3.0
11 | tensorflow-tensorboard==0.1.6
12 | Werkzeug==0.12.2
13 | 


--------------------------------------------------------------------------------
/code/tf-graph/Makefile:
--------------------------------------------------------------------------------
 1 | TARGET := load-graph
 2 | WARNINGS := -Wall -Wextra -pedantic -Wno-vla-extension
 3 | HEADERS := -isystem $(TF_PATH) \
 4 | 					 -isystem $(TF_PATH)/bazel-genfiles \
 5 | 					 -isystem $(TF_PATH)/third-party \
 6 | 					 -isystem /usr/local/include/eigen3 \
 7 | 					 -isystem $(PB_PATH)/src \
 8 | 					 -I ../
 9 | LIBS := -L$(TF_PATH)/bazel-bin/tensorflow/
10 | CXXFLAGS := -std=c++11 -O2
11 | 
12 | all: graph
13 | 
14 | graph: $(TARGET).cpp
15 | 	$(CXX) $(WARNINGS) $(CXXFLAGS) $(HEADERS) $(LIBS) \
16 | 	$(TARGET).cpp -o $(TARGET) -l tensorflow_cc \
17 | 	-lopencv_imgcodecs -lopencv_imgproc -lopencv_core
18 | 
19 | server: $(TARGET)-server.cpp
20 | 	$(CXX) $(WARNINGS) $(CXXFLAGS) $(HEADERS) $(LIBS) \
21 | 	$(TARGET)-server.cpp ../common/socket.cpp -o $(TARGET)-server \
22 | 	-l tensorflow_cc -lopencv_imgcodecs -lopencv_imgproc -lopencv_core
23 | 
24 | .phony: clean
25 | 
26 | clean:
27 | 	rm $(TARGET) $(TARGET)-server || echo -n ""
28 | 


--------------------------------------------------------------------------------
/code/tf-graph/README.md:
--------------------------------------------------------------------------------
 1 | # tf-graph
 2 | 
 3 | Demo of loading and running a TensorFlow graph trained and exported in Python.
 4 | 
 5 | ## Building
 6 | 
 7 | ### Prerequisites
 8 | 
 9 | 1. A clone of TensorFlow: `git clone https://github.com/tensorflow/tensorflow` checked out at release/branch r1.3 (that's what I use, may work with newer),
10 | 2. See `https://github.com/tensorflow/tensorflow/issues/2412` how to build a library from TensorFlow sources (you'll need Bazel),
11 | 3. A clone of Protocol Buffers at version 3.3.0 (exactly!): `https://github.com/google/protobuf/releases/tag/v3.3.0`,
12 | 4. Build that version of ProtoBuf from source using the Makefile inside,
13 | 5. OpenCV2 (consult your package manager).
14 | 
15 | Note that TensorFlow also needs Eigen.
16 | 
17 | ### Build the Graph Loader
18 | 
19 | You can build using the Makefile found in this folder. You need to set `TF_PATH` and `PB_PATH` environment variables to your local folder location of TensorFlow and ProtoBuf, respectively, e.g. for me:
20 | 
21 | ```sh
22 | TF_PATH=~/Documents/Libraries/tensorflow \
23 | PB_PATH=~/Documents/Libraries/protobuf-3.3.0 make
24 | ```
25 | 
26 | This will build `load-graph.cpp`, which loads a graph and generates an image.
27 | There is also `load-graph-server.cpp` under the `server` target of the Makefile
28 | which builds the version that listens on a socket for inference requests from
29 | the demo.
30 | 
31 | ### Demo
32 | 
33 | First make sure you've built the server version with the above instructions.
34 | Then, for the `demo` folder, you'll need Qt (5.7 or newer). Generate a Makefile
35 | and simply make:
36 | 
37 | ```sh
38 | $ cd demo
39 | $ qmake
40 | $ make
41 | ```
42 | 
43 | ## Running
44 | 
45 | Both the server and non-server version require two arguments:
46 | 
47 | 1. The path to a model checkpoint,
48 | 2. The path prefix for a saved TensorFlow session.
49 | 
50 | You can pass those two to the binary of `load-graph.cpp` and it will generate an
51 | image under `/tmp/out.png`.
52 | 
53 | For the demo, the server version of `load-graph` will also start listening on a
54 | socket when you run it. You should start this binary first, then start the
55 | `demo` Qt app, which will connect to the server to request images. That is:
56 | 
57 | ```sh
58 | $ LD_LIBRARY_PATH=/path/to/tensorflow_cc.so load-graph-server graph.pb checkpoint
59 | $ demo/<demo binary>
60 | ```
61 | 
62 | where `<demo binary>` is `demo.app/Contents/MacOS/demo` for example. Differs on
63 | Linux or Windows.
64 | 


--------------------------------------------------------------------------------
/code/tf-graph/demo/.qmake.stash:
--------------------------------------------------------------------------------
 1 | QMAKE_DEFAULT_INCDIRS = \
 2 |     /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1 \
 3 |     /usr/local/include \
 4 |     /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/8.1.0/include \
 5 |     /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include \
 6 |     /usr/include \
 7 |     "/System/Library/Frameworks (framework directory)" \
 8 |     "/Library/Frameworks (framework directory)"
 9 | QMAKE_DEFAULT_LIBDIRS = \
10 |     /lib \
11 |     /usr/lib
12 | QMAKE_XCODE_DEVELOPER_PATH = /Applications/Xcode.app/Contents/Developer
13 | QMAKE_XCODE_VERSION = 8.3.3
14 | QMAKE_MAC_SDK.macosx.Path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.12.sdk
15 | QMAKE_MAC_SDK.macosx.PlatformPath = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform
16 | QMAKE_MAC_SDK.macosx.SDKVersion = 10.12
17 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_CC = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
18 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_CXX = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
19 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_FIX_RPATH = \
20 |     /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/install_name_tool \
21 |     -id
22 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_AR = \
23 |     /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ar \
24 |     cq
25 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_RANLIB = \
26 |     /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ranlib \
27 |     -s
28 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_LINK = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
29 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_LINK_SHLIB = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
30 | 


--------------------------------------------------------------------------------
/code/tf-graph/demo/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ###########################################################
 2 | ## CMAKE SETUP
 3 | ###########################################################
 4 | 
 5 | cmake_minimum_required(VERSION 3.2)
 6 | project(canvas)
 7 | 
 8 | ###########################################################
 9 | ## INCLUDES
10 | ###########################################################
11 | 
12 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
13 | 
14 | ###########################################################
15 | ## SOURCES
16 | ###########################################################
17 | 
18 | set(CANVAS_SOURCES main.cpp)
19 | 
20 | ###########################################################
21 | ## TARGETS
22 | ###########################################################
23 | 
24 | # The following variables are for Qt support
25 | set(CMAKE_AUTOMOC ON)
26 | set(CMAKE_INCLUDE_CURRENT_DIR ON)
27 | 
28 | # Find the actual Qt5 packages
29 | find_package(Qt5Widgets REQUIRED)
30 | find_package(Qt5Qml REQUIRED)
31 | find_package(Qt5Quick REQUIRED)
32 | find_package(Qt5Svg REQUIRED)
33 | 
34 | add_executable(canvas ${CANVAS_SOURCES})
35 | 
36 | ###########################################################
37 | ## COMPILER FLAGS
38 | ###########################################################
39 | 
40 | target_compile_options(canvas PUBLIC -std=c++14)
41 | 


--------------------------------------------------------------------------------
/code/tf-graph/demo/backend.cpp:
--------------------------------------------------------------------------------
 1 | #include "backend.h"
 2 | 
 3 | #include <QDebug>
 4 | #include <QObject>
 5 | #include <QString>
 6 | #include <QTcpSocket>
 7 | 
 8 | const int kPort = 6666;
 9 | 
10 | BackEnd::BackEnd(QObject* parent)
11 | : QObject(parent), socket(new QTcpSocket(this)) {
12 |   socket->connectToHost("localhost", kPort);
13 | 
14 |   QObject::connect(socket, &QTcpSocket::connected, [] {
15 |     qDebug() << "Connected to localhost:" << kPort;
16 |   });
17 | 
18 |   QObject::connect(socket, &QTcpSocket::readyRead, [this] {
19 |     emit imageReady(socket->readAll());
20 |   });
21 | }
22 | 
23 | void BackEnd::generateImage(int digit, double a, double b) {
24 |   if (socket->waitForConnected(3000)) {
25 |     const auto string = QString("%1 %2 %3").arg(digit).arg(a).arg(b);
26 |     socket->write(string.toStdString().c_str());
27 |   } else {
28 |     qDebug() << "Error connecting to server!";
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/code/tf-graph/demo/backend.h:
--------------------------------------------------------------------------------
 1 | #ifndef BACKEND_H
 2 | #define BACKEND_H
 3 | 
 4 | #include <QObject>
 5 | #include <QString>
 6 | #include <QTcpSocket>
 7 | 
 8 | class BackEnd : public QObject {
 9 |   Q_OBJECT
10 | 
11 |  public:
12 |   explicit BackEnd(QObject* parent = nullptr);
13 | 
14 |   Q_INVOKABLE void generateImage(int digit, double a, double b);
15 | 
16 |  signals:
17 | 
18 |   void imageReady(QString imagePath);
19 | 
20 |  private:
21 |   QTcpSocket* socket;
22 | };
23 | 
24 | #endif  // BACKEND_H
25 | 


--------------------------------------------------------------------------------
/code/tf-graph/demo/main.cpp:
--------------------------------------------------------------------------------
 1 | #include <QApplication>
 2 | #include <QQmlApplicationEngine>
 3 | 
 4 | #include "backend.h"
 5 | 
 6 | int main(int argc, char** argv) {
 7 |   QApplication app(argc, argv);
 8 |   QQmlApplicationEngine engine;
 9 | 
10 |   qmlRegisterType<BackEnd>("demo.backend", 1, 0, "BackEnd");
11 | 
12 |   engine.load(QUrl("qrc:///main.qml"));
13 | 
14 |   return app.exec();
15 | }
16 | 


--------------------------------------------------------------------------------
/code/tf-graph/demo/main.pro:
--------------------------------------------------------------------------------
1 | TARGET = demo
2 | TEMPLATE = app
3 | QT += widgets qml quick core
4 | RESOURCES += qml.qrc
5 | SOURCES += main.cpp backend.cpp
6 | HEADERS += backend.h
7 | 


--------------------------------------------------------------------------------
/code/tf-graph/demo/main.qml:
--------------------------------------------------------------------------------
 1 | import QtQuick 2.6
 2 | import QtQuick.Controls 2.0
 3 | import QtQuick.Layouts 1.3
 4 | import QtQuick.Controls.Styles 1.4
 5 | import demo.backend 1.0
 6 | 
 7 | ApplicationWindow {
 8 |   id: root
 9 |   width: 400
10 |   height: 450
11 |   visible: true
12 |   title: "Demo"
13 |   color: "black"
14 | 
15 |   Image {
16 |     id: image
17 |     width: 400
18 |     height: 280
19 |     fillMode: Image.PreserveAspectFit
20 |     source: "file:/tmp/gan-out.png"
21 |     cache: false
22 |   }
23 | 
24 |   BackEnd {
25 |     id: backend
26 |     function generate() {
27 |       backend.generateImage(digit.value, a.value, b.value);
28 |     }
29 |     onImageReady: image.source = "file:" + imagePath
30 |   }
31 | 
32 |   ColumnLayout {
33 |     anchors.top: image.bottom
34 |     anchors.bottom: parent.bottom
35 |     anchors.bottomMargin: 10
36 |     anchors.horizontalCenter: parent.horizontalCenter
37 |     Slider {
38 |       id: digit
39 |       from: 0
40 |       to: 9
41 |       value: 0
42 |       stepSize: 1
43 |       snapMode: Slider.SnapAlways
44 |       onMoved: backend.generate()
45 | 
46 |       Text {
47 |         anchors.right: parent.left
48 |         anchors.rightMargin: 10
49 |         anchors.verticalCenter: parent.verticalCenter
50 |         text: Math.ceil(digit.value)
51 |         color: "white"
52 |         font.pixelSize: 20
53 |       }
54 |     }
55 | 
56 |     Slider {
57 |       id: a
58 |       from: -3
59 |       to: +3
60 |       value: 0
61 |       stepSize: 0.1
62 |       snapMode: Slider.SnapAlways
63 |       onMoved: backend.generate()
64 | 
65 |       Text {
66 |         anchors.right: parent.left
67 |         anchors.rightMargin: 10
68 |         anchors.verticalCenter: parent.verticalCenter
69 |         text: a.value.toPrecision(1)
70 |         color: "white"
71 |         font.pixelSize: 20
72 |       }
73 |     }
74 | 
75 |     Slider {
76 |       id: b
77 |       from: -3
78 |       to: +3
79 |       value: 0
80 |       stepSize: 0.1
81 |       snapMode: Slider.SnapAlways
82 |       onMoved: backend.generate()
83 | 
84 |       Text {
85 |         anchors.right: parent.left
86 |         anchors.rightMargin: 10
87 |         anchors.verticalCenter: parent.verticalCenter
88 |         text: b.value.toPrecision(1)
89 |         color: "white"
90 |         font.pixelSize: 20
91 |       }
92 |     }
93 |   }
94 | }
95 | 


--------------------------------------------------------------------------------
/code/tf-graph/demo/main.qmlc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/code/tf-graph/demo/main.qmlc


--------------------------------------------------------------------------------
/code/tf-graph/demo/qml.qrc:
--------------------------------------------------------------------------------
1 | <!DOCTYPE RCC>
2 | <RCC version="1.0">
3 |     <qresource prefix="/">
4 |         <file>main.qml</file>
5 |     </qresource>
6 | </RCC>
7 | 


--------------------------------------------------------------------------------
/code/tf-graph/infogan.py:
--------------------------------------------------------------------------------
  1 | import keras.backend as K
  2 | import matplotlib.pyplot as plot
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from keras.layers import (Activation, BatchNormalization, Concatenate, Conv2D,
  6 |                           Dense, Flatten, Input, LeakyReLU, Reshape, Lambda,
  7 |                           UpSampling2D)
  8 | from keras.models import Model
  9 | from keras.optimizers import Adam
 10 | from tensorflow.examples.tutorials.mnist import input_data
 11 | 
 12 | gpu_options = tf.GPUOptions(allow_growth=True)
 13 | session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 14 | K.set_session(session)
 15 | 
 16 | # Supress warnings about wrong compilation of TensorFlow.
 17 | tf.logging.set_verbosity(tf.logging.ERROR)
 18 | tf.set_random_seed(42)
 19 | 
 20 | noise_size = 100
 21 | 
 22 | latent_discrete = 10
 23 | latent_continuous = 2
 24 | latent_size = latent_discrete + latent_continuous
 25 | 
 26 | ## G
 27 | 
 28 | z = Input(shape=[noise_size], name='z')
 29 | c = Input(shape=[latent_size], name='c')
 30 | G = Concatenate()([z, c])
 31 | 
 32 | G = Dense(7 * 7 * 256)(G)
 33 | G = BatchNormalization(momentum=0.9)(G)
 34 | G = LeakyReLU(alpha=0.2)(G)
 35 | G = Reshape((7, 7, 256))(G)
 36 | 
 37 | G = UpSampling2D()(G)
 38 | G = Conv2D(128, (5, 5), padding='same')(G)
 39 | G = BatchNormalization(momentum=0.9)(G)
 40 | G = LeakyReLU(alpha=0.2)(G)
 41 | 
 42 | G = UpSampling2D()(G)
 43 | G = Conv2D(64, (5, 5), padding='same')(G)
 44 | G = BatchNormalization(momentum=0.9)(G)
 45 | G = LeakyReLU(alpha=0.2)(G)
 46 | 
 47 | G = Conv2D(32, (5, 5), padding='same')(G)
 48 | G = BatchNormalization(momentum=0.9)(G)
 49 | G = LeakyReLU(alpha=0.2)(G)
 50 | 
 51 | G = Conv2D(1, (5, 5), padding='same')(G)
 52 | G = Activation('tanh', name='G_final')(G)
 53 | 
 54 | ## D
 55 | 
 56 | x = Input(shape=(28, 28, 1))
 57 | D = Conv2D(32, (5, 5), strides=(2, 2), padding='same')(x)
 58 | D = LeakyReLU(alpha=0.2)(D)
 59 | 
 60 | D = Conv2D(64, (5, 5), strides=(2, 2), padding='same')(D)
 61 | D = LeakyReLU(alpha=0.2)(D)
 62 | 
 63 | D = Conv2D(128, (5, 5), strides=(2, 2), padding='same')(D)
 64 | D = LeakyReLU(alpha=0.2)(D)
 65 | 
 66 | D = Conv2D(256, (5, 5), padding='same')(D)
 67 | D = LeakyReLU(alpha=0.2)(D)
 68 | D = Flatten(name='D_final')(D)
 69 | 
 70 | 
 71 | def latent_activations(Q):
 72 |     Q_discrete = Activation('softmax')(Q[:, :latent_discrete])
 73 |     Q_continuous = Activation('sigmoid')(Q[:, -latent_continuous:])
 74 |     return Concatenate(axis=1)([Q_discrete, Q_continuous])
 75 | 
 76 | 
 77 | Q = Dense(latent_discrete + 2 * latent_continuous)(D)
 78 | Q = Lambda(latent_activations)(Q)
 79 | 
 80 | P = Dense(1, activation='sigmoid')(D)
 81 | 
 82 | 
 83 | def mutual_information(prior_c, c_given_x):
 84 |     h_c = K.categorical_crossentropy(prior_c, prior_c)
 85 |     h_c_given_x = K.categorical_crossentropy(prior_c, c_given_x)
 86 |     return K.mean(h_c_given_x - h_c)
 87 | 
 88 | 
 89 | def joint_mutual_information(prior_c, c_given_x):
 90 |     discrete = mutual_information(prior_c[:, :latent_discrete],
 91 |                                   c_given_x[:, :latent_discrete],
 92 |                                   K.categorical_crossentropy)
 93 |     continuous_1 = mutual_information(prior_c[:, -2], c_given_x[:, -2])
 94 |     continuous_2 = mutual_information(prior_c[:, -1], c_given_x[:, -1])
 95 |     return discrete + continuous_1 + continuous_2
 96 | 
 97 | 
 98 | generator = Model([z, c], G, name='G')
 99 | 
100 | discriminator = Model(x, P, name='D')
101 | discriminator.compile(
102 |     loss='binary_crossentropy',
103 |     optimizer=Adam(lr=5e-4, beta_1=0.5, decay=2e-7))
104 | 
105 | # x = G(z, c)
106 | q = Model(x, Q, name='Q')
107 | q.compile(
108 |     loss=joint_mutual_information,
109 |     optimizer=Adam(lr=2e-4, beta_1=0.5, decay=2e-7))
110 | 
111 | discriminator.trainable = False
112 | q.trainable = False
113 | infogan = Model([z, c], [discriminator(G), q(G)], name='InfoGAN')
114 | infogan.compile(
115 |     loss=['binary_crossentropy', joint_mutual_information],
116 |     optimizer=Adam(lr=2e-4, beta_1=0.5, decay=1e-7))
117 | 
118 | generator.summary()
119 | discriminator.summary()
120 | 
121 | data = input_data.read_data_sets('MNIST_data').train.images
122 | data = data.reshape(-1, 28, 28, 1) * 2 - 1
123 | 
124 | number_of_epochs = 30
125 | batch_size = 256
126 | 
127 | print(generator.outputs[0])
128 | 
129 | 
130 | def sample_noise(size):
131 |     return np.random.randn(size, noise_size)
132 | 
133 | 
134 | def sample_prior(size):
135 |     discrete = np.random.multinomial(1, [0.1] * 10, size=size)
136 |     continuous_1 = np.random.uniform(-1, +1, size).reshape(-1, 1)
137 |     continuous_2 = np.random.uniform(-1, +1, size).reshape(-1, 1)
138 |     return np.concatenate([discrete, continuous_1, continuous_2], axis=1)
139 | 
140 | 
141 | def smooth_labels(size):
142 |     return np.random.uniform(low=0.8, high=1.0, size=size)
143 | 
144 | 
145 | saver = tf.train.Saver(max_to_keep=1)
146 | saver_def = saver.as_saver_def()
147 | 
148 | print(saver_def.filename_tensor_name)
149 | print(saver_def.restore_op_name)
150 | 
151 | try:
152 |     for epoch in range(number_of_epochs):
153 |         print('Epoch: {0}/{1}'.format(epoch + 1, number_of_epochs))
154 |         for batch_start in range(0, len(data) - batch_size + 1, batch_size):
155 |             noise = sample_noise(batch_size)
156 |             latent_code = sample_prior(batch_size)
157 |             generated_images = generator.predict([noise, latent_code])
158 | 
159 |             real_images = data[batch_start:batch_start + batch_size]
160 |             assert len(generated_images) == len(real_images)
161 |             all_images = np.concatenate(
162 |                 [generated_images, real_images], axis=0)
163 |             all_images += np.random.normal(0, 0.1, all_images.shape)
164 | 
165 |             labels = np.zeros(len(all_images))
166 |             labels[batch_size:] = smooth_labels(batch_size)
167 |             d_loss = discriminator.train_on_batch(all_images, labels)
168 | 
169 |             q_loss = q.train_on_batch(generated_images, latent_code)
170 | 
171 |             labels = np.ones(batch_size)
172 |             noise = sample_noise(batch_size)
173 |             latent_code = sample_prior(batch_size)
174 |             g_loss, _, _ = infogan.train_on_batch([noise, latent_code],
175 |                                                   [labels, latent_code])
176 | 
177 |             batch_index = batch_start // batch_size + 1
178 |             message = '\rBatch: {0} | D: {1:.10f} | G: {2:.10f} | Q: {3:.10f}'
179 |             print(message.format(batch_index, d_loss, g_loss, q_loss), end='')
180 |         print()
181 |         np.random.shuffle(data)
182 |         tf.train.write_graph(
183 |             session.graph_def, 'graphs', 'graph.pb', as_text=False)
184 |         saver.save(session, 'checkpoints/chkp')
185 | 
186 | except KeyboardInterrupt:
187 |     print()
188 | 
189 | print('Training complete!')
190 | 


--------------------------------------------------------------------------------
/code/tf-graph/load-graph-server.cpp:
--------------------------------------------------------------------------------
  1 | #include "common/socket.h"
  2 | 
  3 | #include <tensorflow/cc/ops/const_op.h>
  4 | #include <tensorflow/cc/ops/image_ops.h>
  5 | #include <tensorflow/cc/ops/standard_ops.h>
  6 | #include <tensorflow/core/framework/graph.pb.h>
  7 | #include <tensorflow/core/framework/tensor.h>
  8 | #include <tensorflow/core/graph/default_device.h>
  9 | #include <tensorflow/core/graph/graph_def_builder.h>
 10 | #include <tensorflow/core/lib/core/errors.h>
 11 | #include <tensorflow/core/platform/env.h>
 12 | #include <tensorflow/core/platform/init_main.h>
 13 | #include <tensorflow/core/platform/logging.h>
 14 | #include <tensorflow/core/platform/types.h>
 15 | #include <tensorflow/core/public/session.h>
 16 | #include <tensorflow/core/util/command_line_flags.h>
 17 | 
 18 | #include <opencv2/opencv.hpp>
 19 | 
 20 | #include <cstdlib>
 21 | #include <iostream>
 22 | #include <memory>
 23 | #include <stdexcept>
 24 | #include <string>
 25 | #include <vector>
 26 | 
 27 | const size_t kNoiseSize = 100;
 28 | const size_t kDiscreteCodeSize = 10;
 29 | const size_t kContinuousCodeSize = 2;
 30 | const size_t kCodeSize = kDiscreteCodeSize + kContinuousCodeSize;
 31 | const char* const kLearningPhase = "batch_normalization_1/keras_learning_phase";
 32 | 
 33 | void load_graph(const std::string& graph_path,
 34 |                 std::unique_ptr<tensorflow::Session>& session) {
 35 |   tensorflow::GraphDef graph_def;
 36 |   TF_CHECK_OK(tensorflow::ReadBinaryProto(tensorflow::Env::Default(),
 37 |                                           graph_path,
 38 |                                           &graph_def));
 39 | 
 40 |   session.reset(tensorflow::NewSession(tensorflow::SessionOptions()));
 41 |   TF_CHECK_OK(session->Create(graph_def));
 42 | }
 43 | 
 44 | tensorflow::Tensor sample_noise() {
 45 |   using RandomNormal = Eigen::internal::NormalRandomGenerator<float>;
 46 |   tensorflow::Tensor noise(tensorflow::DT_FLOAT,
 47 |                            tensorflow::TensorShape({1, kNoiseSize}));
 48 |   noise.matrix<float>().setRandom<RandomNormal>();
 49 |   return noise;
 50 | }
 51 | 
 52 | tensorflow::Tensor create_code(int digit, double a, double b) {
 53 |   tensorflow::Tensor tensor(tensorflow::DT_FLOAT,
 54 |                             tensorflow::TensorShape({1, kCodeSize}));
 55 |   auto code = tensor.flat<float>();
 56 |   code.setZero();
 57 | 
 58 |   assert(digit >= 0 && digit <= 9);
 59 |   code(digit) = 1;
 60 |   code(kDiscreteCodeSize) = a;
 61 |   code(kDiscreteCodeSize + 1) = b;
 62 | 
 63 |   return tensor;
 64 | }
 65 | 
 66 | tensorflow::Tensor generate(std::unique_ptr<tensorflow::Session>& session,
 67 |                             tensorflow::Tensor& noise,
 68 |                             tensorflow::Tensor& code) {
 69 |   tensorflow::Tensor zero(tensorflow::DT_BOOL, tensorflow::TensorShape());
 70 |   zero.scalar<bool>()(0) = false;
 71 |   std::vector<std::pair<std::string, tensorflow::Tensor>> feeds =
 72 |       {{"z:0", noise}, {"c:0", code}, {kLearningPhase, zero}};
 73 | 
 74 |   std::vector<tensorflow::Tensor> outputs;
 75 |   TF_CHECK_OK(session->Run(feeds, {"G_final/Tanh:0"}, {}, &outputs));
 76 | 
 77 |   assert(!outputs.empty());
 78 |   return outputs.front();
 79 | }
 80 | 
 81 | void restore_session(const std::string& checkpoint_path,
 82 |                      std::unique_ptr<tensorflow::Session>& session) {
 83 |   tensorflow::Tensor checkpoint_tensor(tensorflow::DT_STRING,
 84 |                                        tensorflow::TensorShape());
 85 |   checkpoint_tensor.flat<tensorflow::string>()(0) = checkpoint_path;
 86 |   TF_CHECK_OK(session->Run({{"save/Const:0", checkpoint_tensor}},
 87 |                            {},
 88 |                            {"save/restore_all"},
 89 |                            nullptr));
 90 |   LOG(INFO) << "Restored session from " << checkpoint_path;
 91 | }
 92 | 
 93 | void save_image(const std::string& filename, float* buffer) {
 94 |   cv::Mat image(28, 28, CV_32F, buffer);
 95 |   cv::normalize(image, image, 0.0, 255.0, cv::NORM_MINMAX);
 96 |   image.convertTo(image, CV_8UC3);
 97 |   cv::imwrite(filename, image);
 98 |   LOG(INFO) << "Wrote " << filename;
 99 | }
100 | 
101 | int main(int argc, char* argv[]) {
102 |   if (argc < 3) {
103 |     std::cerr << "usage: load-graph <path/to/graph> <checkpoint>\n";
104 |     std::exit(EXIT_FAILURE);
105 |   }
106 | 
107 |   tensorflow::port::InitMain(argv[0], &argc, &argv);
108 | 
109 |   std::unique_ptr<tensorflow::Session> session;
110 |   load_graph(argv[1], session);
111 |   restore_session(argv[2], session);
112 | 
113 |   Socket socket(6666);
114 |   std::cerr << "Listening on port 6666" << std::endl;
115 | 
116 |   socket.accept();
117 |   std::cerr << "Connection established" << std::endl;
118 | 
119 |   for (size_t count = 0; true; ++count) {
120 |     std::istringstream stream(socket.read(256));
121 |     int digit = 0;
122 |     double a = 0, b = 0;
123 |     stream >> digit >> a >> b;
124 | 
125 |     LOG(INFO) << "Prediction request for code: \"" << digit << " " << a << " "
126 |               << b << "\"";
127 | 
128 |     auto noise = sample_noise();
129 |     auto code = create_code(digit, a, b);
130 |     auto tensor = generate(session, noise, code);
131 |     auto image = tensor.flat<float>();
132 |     image = (image + 1.0f) / 2.0f;
133 |     const std::string image_path =
134 |         "/tmp/gan-out-" + std::to_string(count % 2) + ".png";
135 |     save_image(image_path, image.data());
136 | 
137 |     socket.write(image_path);
138 | 
139 |     LOG(INFO) << "Wrote " << image_path;
140 |   }
141 | 
142 |   TF_CHECK_OK(session->Close());
143 | }
144 | 


--------------------------------------------------------------------------------
/code/tf-graph/load-graph.cpp:
--------------------------------------------------------------------------------
  1 | #include <tensorflow/cc/ops/const_op.h>
  2 | #include <tensorflow/cc/ops/image_ops.h>
  3 | #include <tensorflow/cc/ops/standard_ops.h>
  4 | #include <tensorflow/core/framework/graph.pb.h>
  5 | #include <tensorflow/core/framework/tensor.h>
  6 | #include <tensorflow/core/graph/default_device.h>
  7 | #include <tensorflow/core/graph/graph_def_builder.h>
  8 | #include <tensorflow/core/lib/core/errors.h>
  9 | #include <tensorflow/core/platform/env.h>
 10 | #include <tensorflow/core/platform/init_main.h>
 11 | #include <tensorflow/core/platform/logging.h>
 12 | #include <tensorflow/core/platform/types.h>
 13 | #include <tensorflow/core/public/session.h>
 14 | #include <tensorflow/core/util/command_line_flags.h>
 15 | 
 16 | #include <opencv2/opencv.hpp>
 17 | 
 18 | #include <cstdlib>
 19 | #include <iostream>
 20 | #include <memory>
 21 | #include <stdexcept>
 22 | #include <string>
 23 | #include <vector>
 24 | 
 25 | const size_t kNoiseSize = 100;
 26 | const size_t kDiscreteCodeSize = 10;
 27 | const size_t kContinuousCodeSize = 2;
 28 | const size_t kCodeSize = kDiscreteCodeSize + kContinuousCodeSize;
 29 | const char* const kLearningPhase = "batch_normalization_1/keras_learning_phase";
 30 | 
 31 | void load_graph(const std::string& graph_path,
 32 |                 std::unique_ptr<tensorflow::Session>& session) {
 33 |   tensorflow::GraphDef graph_def;
 34 |   TF_CHECK_OK(tensorflow::ReadBinaryProto(tensorflow::Env::Default(),
 35 |                                           graph_path,
 36 |                                           &graph_def));
 37 | 
 38 |   session.reset(tensorflow::NewSession(tensorflow::SessionOptions()));
 39 |   TF_CHECK_OK(session->Create(graph_def));
 40 | }
 41 | 
 42 | tensorflow::Tensor sample_noise() {
 43 |   using RandomNormal = Eigen::internal::NormalRandomGenerator<float>;
 44 |   tensorflow::Tensor noise(tensorflow::DT_FLOAT,
 45 |                            tensorflow::TensorShape({1, kNoiseSize}));
 46 |   noise.matrix<float>().setRandom<RandomNormal>();
 47 |   return noise;
 48 | }
 49 | 
 50 | tensorflow::Tensor sample_code() {
 51 |   using RandomUniform = Eigen::internal::UniformRandomGenerator<float>;
 52 | 
 53 |   static std::random_device seed;
 54 |   static std::mt19937 rng(seed());
 55 |   std::uniform_int_distribution<size_t> indices(0, kDiscreteCodeSize - 1);
 56 | 
 57 |   tensorflow::Tensor code(tensorflow::DT_FLOAT,
 58 |                           tensorflow::TensorShape({1, kCodeSize}));
 59 |   code.flat<float>().setZero();
 60 | 
 61 |   const auto one_hot_index = indices(rng);
 62 |   code.flat<float>()(one_hot_index) = 1;
 63 | 
 64 |   Eigen::array<int, 1> offsets = {{kDiscreteCodeSize}};
 65 |   Eigen::array<int, 1> extents = {{kContinuousCodeSize}};
 66 |   auto continuous = code.flat<float>().slice(offsets, extents);
 67 |   continuous.setRandom<RandomUniform>();
 68 |   continuous = (continuous * 2.0f) - 1.0f;
 69 | 
 70 |   return code;
 71 | }
 72 | 
 73 | void restore_session(const std::string& checkpoint_path,
 74 |                      std::unique_ptr<tensorflow::Session>& session) {
 75 |   tensorflow::Tensor checkpoint_tensor(tensorflow::DT_STRING,
 76 |                                        tensorflow::TensorShape());
 77 |   checkpoint_tensor.flat<tensorflow::string>()(0) = checkpoint_path;
 78 |   TF_CHECK_OK(session->Run({{"save/Const:0", checkpoint_tensor}},
 79 |                            {},
 80 |                            {"save/restore_all"},
 81 |                            nullptr));
 82 |   LOG(INFO) << "Restored session from " << checkpoint_path;
 83 | }
 84 | 
 85 | void save_image(const char* filename, float* buffer) {
 86 |   cv::Mat image(28, 28, CV_32F, buffer);
 87 |   cv::normalize(image, image, 0.0, 255.0, cv::NORM_MINMAX);
 88 |   image.convertTo(image, CV_8UC3);
 89 |   cv::imwrite(filename, image);
 90 |   LOG(INFO) << "Wrote " << filename;
 91 | }
 92 | 
 93 | int main(int argc, char* argv[]) {
 94 |   if (argc < 3) {
 95 |     std::cerr << "usage: load-graph <path/to/graph> <checkpoint>\n";
 96 |     std::exit(EXIT_FAILURE);
 97 |   }
 98 | 
 99 |   tensorflow::port::InitMain(argv[0], &argc, &argv);
100 | 
101 |   std::unique_ptr<tensorflow::Session> session;
102 |   load_graph(argv[1], session);
103 |   restore_session(argv[2], session);
104 | 
105 |   auto noise = sample_noise();
106 |   auto code = sample_code();
107 | 
108 |   std::cout << noise.flat<float>() << std::endl;
109 |   std::cout << "------------------" << std::endl;
110 |   std::cout << code.flat<float>() << std::endl;
111 | 
112 |   tensorflow::Tensor zero(tensorflow::DT_BOOL, tensorflow::TensorShape());
113 |   zero.scalar<bool>()(0) = false;
114 |   std::vector<std::pair<std::string, tensorflow::Tensor>> feeds =
115 |       {{"z:0", noise}, {"c:0", code}, {kLearningPhase, zero}};
116 | 
117 |   std::vector<tensorflow::Tensor> outputs;
118 |   TF_CHECK_OK(session->Run(feeds, {"G_final/Tanh"}, {}, &outputs));
119 | 
120 |   assert(!outputs.empty());
121 |   auto image = outputs.front().flat<float>();
122 |   image = (image + 1.0f) / 2.0f;
123 |   save_image("out.png", image.data());
124 | 
125 |   TF_CHECK_OK(session->Close());
126 | }
127 | 


--------------------------------------------------------------------------------
/code/tf-kernel/.bash_history:
--------------------------------------------------------------------------------
1 | ls
2 | make
3 | clang
4 | clang++-3.8 
5 | exit
6 | ls
7 | make
8 | exit
9 | 


--------------------------------------------------------------------------------
/code/tf-kernel/README.md:
--------------------------------------------------------------------------------
 1 | # tf-kernel
 2 | 
 3 | Example of creating a custom TensorFlow operator.
 4 | 
 5 | ## Building
 6 | 
 7 | Prerequisites:
 8 | 
 9 | 1. Install Python (preferrably 3) and `pip install -r requirements.txt` found in the `code/` root folder,
10 | 
11 | Then `make`.
12 | 
13 | ## Running
14 | 
15 | The `test.py` script gives an example of loading the custom op in Python.
16 | 
17 | The `cpu/` folder can be run exclusively on a CPU, the `cpu+gpu` stuff requires
18 | a GPU to run (at least to run `test.py`, the kernel itself is available on
19 | both).
20 | 


--------------------------------------------------------------------------------
/code/tf-kernel/cpu+gpu/Makefile:
--------------------------------------------------------------------------------
 1 | TARGET := kernel
 2 | HEADERS := -I `python3 -c 'import tensorflow as tf; print(tf.sysconfig.get_include())'`
 3 | CXXFLAGS := -std=c++11 -fPIC -O2 -D GOOGLE_CUDA=1
 4 | GPUFLAGS := -arch=sm_35 -std=c++11 -O2 -D GOOGLE_CUDA=1 \
 5 | 						-x cu -Xcompiler -fPIC --expt-relaxed-constexpr
 6 | 
 7 | ifeq ($(shell uname), Darwin)
 8 | 	CXXFLAGS := $(CXXFLAGS) -undefined dynamic_lookup
 9 | endif
10 | 
11 | all: cpu+gpu
12 | 
13 | gpu: $(TARGET).cu
14 | 	nvcc $(HEADERS) $(GPUFLAGS) -c $(TARGET).cu -o $(TARGET).cu.o
15 | 
16 | cpu: $(TARGET).cpp
17 | 	$(CXX) $(HEADERS) $(CXXFLAGS) -c $(TARGET).cpp -o $(TARGET).o
18 | 
19 | cpu+gpu: cpu gpu
20 | 	$(CXX) $(HEADERS) $(CXXFLAGS) -L /usr/local/cuda/lib64 \
21 | 	$(TARGET).o $(TARGET).cu.o -shared -o $(TARGET).so -fPIC -lcudart
22 | 
23 | .phony: clean
24 | 
25 | clean:
26 | 	rm $(TARGET).so $(TARGET).o $(TARGET).cu.o || echo -n ""
27 | 


--------------------------------------------------------------------------------
/code/tf-kernel/cpu+gpu/cpp_con_sigmoid.py:
--------------------------------------------------------------------------------
1 | import os
2 | import tensorflow as tf
3 | 
4 | _kernel_path = os.environ.get('CPP_CON_KERNEL_PATH', './kernel.so')
5 | _module = tf.load_op_library(_kernel_path)
6 | 
7 | cpp_con_sigmoid = _module.cpp_con_sigmoid
8 | 


--------------------------------------------------------------------------------
/code/tf-kernel/cpu+gpu/kernel.cpp:
--------------------------------------------------------------------------------
 1 | #define EIGEN_USE_THREADS
 2 | 
 3 | #include <tensorflow/core/framework/op.h>
 4 | #include <tensorflow/core/framework/op_kernel.h>
 5 | #include <tensorflow/core/framework/shape_inference.h>
 6 | 
 7 | #if GOOGLE_CUDA
 8 | #define EIGEN_USE_GPU
 9 | #include "kernel.cuh"
10 | #endif  // GOOGLE_CUDA
11 | 
12 | template <typename T>
13 | struct CPUKernel {
14 |   void operator()(tensorflow::OpKernelContext*,
15 |                   const tensorflow::Tensor& input_tensor,
16 |                   tensorflow::Tensor& output_tensor) {
17 |     auto input = input_tensor.flat<T>();
18 |     auto output = output_tensor.flat<T>();
19 |     output = (1 + (-input).exp()).inverse();
20 |   }
21 | };
22 | 
23 | namespace tensorflow {
24 | 
25 | REGISTER_OP("CppConSigmoid")
26 |     .Attr("T: {float, double}")
27 |     .Input("tensor: T")
28 |     .Output("output: T")
29 |     .SetShapeFn([](shape_inference::InferenceContext* context) {
30 |       context->set_output(0, context->input(0));
31 |       return Status::OK();
32 |     });
33 | 
34 | template <template <typename T> class Kernel, typename T>
35 | class CppConSigmoid : public OpKernel {
36 |  public:
37 |   explicit CppConSigmoid(OpKernelConstruction* context) : OpKernel(context) {}
38 | 
39 |   void Compute(OpKernelContext* context) override {
40 |     const Tensor& input_tensor = context->input(0);
41 | 
42 |     Tensor* output_tensor{nullptr};
43 |     OP_REQUIRES_OK(context,
44 |                    context->allocate_output(0,
45 |                                             input_tensor.shape(),
46 |                                             &output_tensor));
47 | 
48 |     Kernel<T>()(context, input_tensor, *output_tensor);
49 |   }
50 | };
51 | 
52 | #define CPU_KERNEL(T)                                  \
53 |   REGISTER_KERNEL_BUILDER(Name("CppConSigmoid")        \
54 |                               .Device(DEVICE_CPU)      \
55 |                               .TypeConstraint<T>("T"), \
56 |                           CppConSigmoid<CPUKernel, T>);
57 | 
58 | #define GPU_KERNEL(T)                                  \
59 |   REGISTER_KERNEL_BUILDER(Name("CppConSigmoid")        \
60 |                               .Device(DEVICE_GPU)      \
61 |                               .TypeConstraint<T>("T"), \
62 |                           CppConSigmoid<GPUKernel, T>);
63 | 
64 | CPU_KERNEL(float)
65 | CPU_KERNEL(double)
66 | 
67 | #if GOOGLE_CUDA
68 | GPU_KERNEL(float)
69 | GPU_KERNEL(double)
70 | #endif  // GOOGLE_CUDA
71 | }  // namespace tensorflow
72 | 


--------------------------------------------------------------------------------
/code/tf-kernel/cpu+gpu/kernel.cu:
--------------------------------------------------------------------------------
 1 | #define EIGEN_USE_GPU
 2 | 
 3 | #include "kernel.cuh"
 4 | 
 5 | #include <tensorflow/core/framework/op.h>
 6 | #include <tensorflow/core/framework/op_kernel.h>
 7 | #include <tensorflow/core/framework/tensor.h>
 8 | 
 9 | template <typename T>
10 | __global__ void sigmoid_kernel(const T* __restrict__ input,
11 |                                T* __restrict__ output,
12 |                                const long size) {
13 |   const auto index = blockIdx.x * blockDim.x + threadIdx.x;
14 |   if (index >= size) return;
15 |   output[index] = 1 / (1 + __expf(-__ldg(&input[index])));
16 | }
17 | 
18 | template <typename T>
19 | void GPUKernel<T>::operator()(tensorflow::OpKernelContext* context,
20 |                               const tensorflow::Tensor& input_tensor,
21 |                               tensorflow::Tensor& output_tensor) {
22 |   const auto& device = context->eigen_device<Eigen::GpuDevice>();
23 | 
24 |   auto input = input_tensor.flat<T>();
25 |   auto output = output_tensor.flat<T>();
26 | 
27 |   const int blocks = 1024;
28 |   const int threads = (input.size() + blocks - 1) / blocks;
29 |   sigmoid_kernel<T><<<blocks, threads, 0, device.stream()>>>(input.data(),
30 |                                                              output.data(),
31 |                                                              input.size());
32 | }
33 | 
34 | 
35 | template struct GPUKernel<float>;
36 | template struct GPUKernel<double>;
37 | 


--------------------------------------------------------------------------------
/code/tf-kernel/cpu+gpu/kernel.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef KERNEL_CUH
 2 | #define KERNEL_CUH
 3 | 
 4 | namespace tensorflow {
 5 | class OpKernelContext;
 6 | class Tensor;
 7 | }  // namespace tensorflow
 8 | 
 9 | template <typename T>
10 | struct GPUKernel {
11 |   void operator()(tensorflow::OpKernelContext* context,
12 |                   const tensorflow::Tensor& input_tensor,
13 |                   tensorflow::Tensor& output_tensor);
14 | };
15 | 
16 | #endif  // KERNEL_CUH
17 | 


--------------------------------------------------------------------------------
/code/tf-kernel/cpu+gpu/test.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import cpp_con_sigmoid
 3 | 
 4 | with tf.device('/gpu:0'):
 5 |     x = tf.constant([[5.0, 0.0], [0.0, -5.0]])
 6 |     y = cpp_con_sigmoid.cpp_con_sigmoid(x)
 7 | 
 8 | config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=False)
 9 | config.graph_options.optimizer_options.opt_level = -1
10 | 
11 | with tf.Session(config=config) as session:
12 |     print(session.run(y))
13 | 


--------------------------------------------------------------------------------
/code/tf-kernel/cpu/Makefile:
--------------------------------------------------------------------------------
 1 | TARGET := kernel
 2 | HEADERS := -I `python3 -c 'import tensorflow as tf; print(tf.sysconfig.get_include())'`
 3 | CXXFLAGS := -std=c++11 -fPIC -O2
 4 | 
 5 | ifeq ($(shell uname), Darwin)
 6 | 	CXXFLAGS := $(CXXFLAGS) -undefined dynamic_lookup
 7 | endif
 8 | 
 9 | all: cpu
10 | 
11 | cpu: $(TARGET).cpp
12 | 	$(CXX) $(HEADERS) $(CXXFLAGS) $(TARGET).cpp -shared -o $(TARGET).so
13 | 
14 | .phony: clean
15 | 
16 | clean:
17 | 	rm $(TARGET).so || echo -n ""
18 | 


--------------------------------------------------------------------------------
/code/tf-kernel/cpu/cpp_con_sigmoid.py:
--------------------------------------------------------------------------------
1 | import os
2 | import tensorflow as tf
3 | 
4 | _kernel_path = os.environ.get('CPP_CON_KERNEL_PATH', './kernel.so')
5 | _module = tf.load_op_library(_kernel_path)
6 | 
7 | cpp_con_sigmoid = _module.cpp_con_sigmoid
8 | 


--------------------------------------------------------------------------------
/code/tf-kernel/cpu/kernel.cpp:
--------------------------------------------------------------------------------
 1 | #include <tensorflow/core/framework/op.h>
 2 | #include <tensorflow/core/framework/op_kernel.h>
 3 | #include <tensorflow/core/framework/shape_inference.h>
 4 | 
 5 | namespace tensorflow {
 6 | 
 7 | REGISTER_OP("CppConSigmoid")
 8 |     .Attr("T: {float, double}")
 9 |     .Input("tensor: T")
10 |     .Output("output: T")
11 |     .SetShapeFn([](shape_inference::InferenceContext* context) {
12 |       context->set_output(0, context->input(0));
13 |       return Status::OK();
14 |     });
15 | 
16 | template <typename T>
17 | class CppConSigmoid : public OpKernel {
18 |  public:
19 |   explicit CppConSigmoid(OpKernelConstruction* context) : OpKernel(context) {}
20 | 
21 |   void Compute(OpKernelContext* context) override {
22 |     const Tensor& input_tensor = context->input(0);
23 |     const auto input = input_tensor.flat<T>();
24 | 
25 |     Tensor* output_tensor{nullptr};
26 |     OP_REQUIRES_OK(context,
27 |                    context->allocate_output(0,
28 |                                             input_tensor.shape(),
29 |                                             &output_tensor));
30 |     auto output = output_tensor->flat<T>();
31 | 
32 |     // sigmoid(z) = 1 / (1 + exp(-z))
33 |     output = (1 + (-input).exp()).inverse();
34 |   }
35 | };
36 | 
37 | #define CPU_KERNEL(T)                                  \
38 |   REGISTER_KERNEL_BUILDER(Name("CppConSigmoid")        \
39 |                               .Device(DEVICE_CPU)      \
40 |                               .TypeConstraint<T>("T"), \
41 |                           CppConSigmoid<T>);
42 | 
43 | CPU_KERNEL(float)
44 | CPU_KERNEL(double)
45 | }  // namespace tensorflow
46 | 


--------------------------------------------------------------------------------
/code/tf-kernel/cpu/test.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import cpp_con_sigmoid
3 | 
4 | x = tf.constant([[1.0, 0.0], [0.0, -1.0]])
5 | y = cpp_con_sigmoid.cpp_con_sigmoid(x)
6 | 
7 | with tf.Session() as session:
8 |     print(session.run(y))
9 | 


--------------------------------------------------------------------------------
/presentation/.tags:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/.tags


--------------------------------------------------------------------------------
/presentation/.tags1:
--------------------------------------------------------------------------------
1 | !_TAG_FILE_FORMAT	2	/extended format; --format=1 will not append ;" to lines/
2 | !_TAG_FILE_SORTED	0	/0=unsorted, 1=sorted, 2=foldcase/
3 | !_TAG_PROGRAM_AUTHOR	Darren Hiebert	/dhiebert@users.sourceforge.net/
4 | !_TAG_PROGRAM_NAME	Exuberant Ctags	//
5 | !_TAG_PROGRAM_URL	http://ctags.sourceforge.net	/official site/
6 | !_TAG_PROGRAM_VERSION	5.8	//
7 | 


--------------------------------------------------------------------------------
/presentation/figures/alexnet-kernels.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/alexnet-kernels.png


--------------------------------------------------------------------------------
/presentation/figures/alexnet-training-days.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/alexnet-training-days.png


--------------------------------------------------------------------------------
/presentation/figures/bedrooms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/bedrooms.png


--------------------------------------------------------------------------------
/presentation/figures/big-sur.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/big-sur.jpg


--------------------------------------------------------------------------------
/presentation/figures/caffe2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/caffe2.png


--------------------------------------------------------------------------------
/presentation/figures/cntk.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/cntk.jpg


--------------------------------------------------------------------------------
/presentation/figures/cntk.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/cntk.png


--------------------------------------------------------------------------------
/presentation/figures/cppcon-logo-blurry.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/cppcon-logo-blurry.png


--------------------------------------------------------------------------------
/presentation/figures/cppcon-logo-nn.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/cppcon-logo-nn.jpg


--------------------------------------------------------------------------------
/presentation/figures/cppcon-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/cppcon-logo.png


--------------------------------------------------------------------------------
/presentation/figures/cudnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/cudnn.png


--------------------------------------------------------------------------------
/presentation/figures/deepdream.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/deepdream.jpg


--------------------------------------------------------------------------------
/presentation/figures/dgx-1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/dgx-1.jpg


--------------------------------------------------------------------------------
/presentation/figures/dl-trend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/dl-trend.png


--------------------------------------------------------------------------------
/presentation/figures/face-interpolations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/face-interpolations.png


--------------------------------------------------------------------------------
/presentation/figures/faces.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/faces.png


--------------------------------------------------------------------------------
/presentation/figures/fb-scaling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/fb-scaling.png


--------------------------------------------------------------------------------
/presentation/figures/gpu-power.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/gpu-power.png


--------------------------------------------------------------------------------
/presentation/figures/graphcore.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/graphcore.jpg


--------------------------------------------------------------------------------
/presentation/figures/imagenet-gpu.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/imagenet-gpu.jpg


--------------------------------------------------------------------------------
/presentation/figures/infogan-mnist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/infogan-mnist.png


--------------------------------------------------------------------------------
/presentation/figures/intel-xeon.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/intel-xeon.jpg


--------------------------------------------------------------------------------
/presentation/figures/intel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/intel.png


--------------------------------------------------------------------------------
/presentation/figures/jetson-tx2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/jetson-tx2.jpg


--------------------------------------------------------------------------------
/presentation/figures/mnist1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/mnist1.png


--------------------------------------------------------------------------------
/presentation/figures/mnist2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/mnist2.png


--------------------------------------------------------------------------------
/presentation/figures/movidius.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/movidius.jpg


--------------------------------------------------------------------------------
/presentation/figures/mxnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/mxnet.png


--------------------------------------------------------------------------------
/presentation/figures/mxnet2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/mxnet2.png


--------------------------------------------------------------------------------
/presentation/figures/nervana-engine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/nervana-engine.png


--------------------------------------------------------------------------------
/presentation/figures/nvidia-stock.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/nvidia-stock.png


--------------------------------------------------------------------------------
/presentation/figures/piranha.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/piranha.png


--------------------------------------------------------------------------------
/presentation/figures/pytorch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/pytorch.png


--------------------------------------------------------------------------------
/presentation/figures/sgemm.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/sgemm.jpg


--------------------------------------------------------------------------------
/presentation/figures/shark.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/shark.jpg


--------------------------------------------------------------------------------
/presentation/figures/shark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/shark.png


--------------------------------------------------------------------------------
/presentation/figures/sigmoid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/sigmoid.png


--------------------------------------------------------------------------------
/presentation/figures/sky/sky.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/sky/sky.jpg


--------------------------------------------------------------------------------
/presentation/figures/sky/sky00.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/sky/sky00.jpg


--------------------------------------------------------------------------------
/presentation/figures/sky/sky01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/sky/sky01.jpg


--------------------------------------------------------------------------------
/presentation/figures/sky/sky02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/sky/sky02.jpg


--------------------------------------------------------------------------------
/presentation/figures/sky/sky10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/sky/sky10.jpg


--------------------------------------------------------------------------------
/presentation/figures/sky/sky11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/sky/sky11.jpg


--------------------------------------------------------------------------------
/presentation/figures/sky/sky12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/sky/sky12.jpg


--------------------------------------------------------------------------------
/presentation/figures/sky/sky20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/sky/sky20.jpg


--------------------------------------------------------------------------------
/presentation/figures/sky/sky21.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/sky/sky21.jpg


--------------------------------------------------------------------------------
/presentation/figures/sky/sky22.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/sky/sky22.jpeg


--------------------------------------------------------------------------------
/presentation/figures/stickman.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/stickman.jpg


--------------------------------------------------------------------------------
/presentation/figures/teapot.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/teapot.jpg


--------------------------------------------------------------------------------
/presentation/figures/tensorflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/tensorflow.png


--------------------------------------------------------------------------------
/presentation/figures/theano.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/theano.png


--------------------------------------------------------------------------------
/presentation/figures/tioga-pass.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/tioga-pass.jpg


--------------------------------------------------------------------------------
/presentation/figures/titan-x.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/titan-x.jpg


--------------------------------------------------------------------------------
/presentation/figures/torch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/torch.png


--------------------------------------------------------------------------------
/presentation/figures/tpu.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/tpu.jpg


--------------------------------------------------------------------------------
/presentation/figures/weird-fish.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/weird-fish.jpg


--------------------------------------------------------------------------------
/presentation/figures/xkcd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/figures/xkcd.png


--------------------------------------------------------------------------------
/presentation/notes.md:
--------------------------------------------------------------------------------
  1 | # Notes
  2 | 
  3 | Time available: 60 minutes
  4 | 
  5 | Basic strategy:
  6 | - Inspect a single model (GAN)
  7 | - Define an OSI model for neural networks
  8 | - Walk down the OSI model only describing (walk it down theoretically)
  9 | - Walk it back up, speaking more about code, showing code, comparing libraries (walk it up practically).
 10 | 
 11 | Time allocation:
 12 | 
 13 | 1. Intro, about me etc: 5 minutes
 14 | 2. How we'll approach the talk (case study, walk up and down osi model). Define OSI model: 5 minutes
 15 | 3. Walk down: 15-20 minutes
 16 | 4. Walk up: 20-25 minutes
 17 | 5. Q & A
 18 | 
 19 | ## Approach to Talk
 20 | 
 21 | I want to show you this field from both a researcher's perspective, who has to
 22 | define deep learning models to solve some machine learning task, and an
 23 | engineer's perspective, who has to implement the researcher's model in fast
 24 | programming languages, on fast hardware.
 25 | 
 26 | The approach I want to take is a case study approach. I want to single out a
 27 | particular machine learning model that I'm working with and like very much, and
 28 | then inspect that model at all levels, from its definition in high level
 29 | programming frameworks to the hardware that it ends up running on in production.
 30 | And at every level of abstraction I want to explain and show how C++ solves the
 31 | particular problems of that subdomain.
 32 | 
 33 | To do this, I first want to define an OSI Model for Neural Networks:
 34 | 
 35 | 7. Task layer (classification, regression, translation, synthesis)
 36 | 6. Model layer (model architectures that solve a task)
 37 | 5. Layer layer (basic deep learning building blocks)
 38 | 4. Graph layer (computation graphs)
 39 | 3. Op layer (algorithms)
 40 | 2. Kernel layer (code)
 41 | 1. Hardware layer
 42 | 
 43 | ## Down (Theory, Description)
 44 | 
 45 | ### Task Layer
 46 | 
 47 | - What is a GAN
 48 | 
 49 | ### Layer layer
 50 | 
 51 | - Layers of a GAN/dnn - high level mathematical description
 52 | - Dense
 53 | - Convolutions
 54 | - Pooling
 55 | 
 56 | ### Graph Layer
 57 | 
 58 | - Computational graphs, forward backward passes, mapped to Devices
 59 | - Distributed computing, downpour sgd
 60 | - Static vs Dynamic Graphs
 61 |   * Static:
 62 |     + Declarative Programming Model
 63 |     + Static shapes and sizes, defined once
 64 |     + Can be optimized/compiled
 65 |   * Dynamic:
 66 |     + Imperative Programming Model
 67 |     + Dynamic shapes and sizes, defined by run
 68 | 
 69 | 
 70 | ### Op Layer
 71 | 
 72 | The last abstraction before code
 73 | 
 74 | How are Convolutions implemented (im2col). Fast algorithms for certain operations? -> More research
 75 | 
 76 | that softmax paper I have somewhere
 77 | 
 78 | 
 79 | This is about __algorithms__, kernel layer is about __implementations__
 80 | 
 81 | ### Kernel Layer
 82 | 
 83 | - Mostly implemented in C++ and CUDA, sometimes in the framework languages
 84 | - A kernel typically needs a forward and backward pass
 85 | - Discuss code for CPU/GPU implementations of Convolutions
 86 | - Importance of GEMM and BLAS.
 87 | - Nervana Neon has their own assembly
 88 | - quantization
 89 | 
 90 | ### Hardware Layer
 91 | 
 92 | - Talk a bit about hardware.
 93 | - Gpus,
 94 | - cpus,
 95 | - 200 gpus for alphago
 96 | - training time
 97 | - data required
 98 | - orders of magnitude we're talking about.
 99 | 
100 | ## Up (Practice, Code, Libraries)
101 | 
102 | ### Hardware Layer
103 | 
104 | - Talk about typical hardware setups
105 | - Nvidia GPUs, Big Basin
106 | - CPU side, Intel MKL (and their deep learning library)
107 | - Specialized Hardware, TPUs, Graphcore
108 | 
109 | ### Kernel Layer
110 | 
111 | - Implement a kernel in tensorflow
112 | - GPU kernel for extra coolness
113 | 
114 | ### Op Layer
115 | 
116 | - Abstract over them with intel mkl and cudnn
117 | 
118 | ### Graph Layer
119 | 
120 | - Now introduce deep learning frameworks: tf and caffe2/pytorch
121 | - Explain communication libraries, gloo, mpi
122 | 
123 | ### Layer Layer
124 | 
125 | - Discuss high level libraries like Keras
126 | - Show TinyDNN or MxNet code to train a GAN
127 | 
128 | ### Task Layer
129 | 
130 | - Pretrained models
131 | - Show how to load a pretrained tensorflow model in C++
132 | 


--------------------------------------------------------------------------------
/presentation/preamble.tex:
--------------------------------------------------------------------------------
  1 | % !TEX root = presentation.tex
  2 | % Presentation Preamble
  3 | 
  4 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  5 | %   BEAMER CLASS
  6 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  7 | 
  8 | \documentclass[xcolor={dvipsnames}]{beamer}
  9 | 
 10 | \definecolor{llvmblue}{RGB}{60, 66, 80}
 11 | 
 12 | \usetheme{default}
 13 | \usecolortheme[named=orange]{structure}
 14 | \setbeamertemplate{sections/subsections in toc}[sections numbered]
 15 | \setbeamertemplate{items}[default]
 16 | \usefonttheme[onlymath]{serif}
 17 | \setbeamertemplate{caption}{\raggedright\insertcaption\par}
 18 | 
 19 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 20 | % LANGUAGE
 21 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 22 | 
 23 | \usepackage{CJKutf8}
 24 | \usepackage[utf8]{inputenc}
 25 | \usepackage[american]{babel}
 26 | 
 27 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 28 | % 	GENERAL PACKAGES
 29 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 30 | 
 31 | \usepackage{hyperref}
 32 | \usepackage{graphicx}
 33 | \usepackage{xcolor}
 34 | \usepackage{textcomp}
 35 | \usepackage{anyfontsize}
 36 | \usepackage{cancel}
 37 | \usepackage{tcolorbox}
 38 | 
 39 | \usepackage{pifont}
 40 | \newcommand{\cmark}{\ding{51}}
 41 | \newcommand{\xmark}{\ding{55}}
 42 | 
 43 | \usepackage{ulem}
 44 | \renewcommand{\ULthickness}{.6pt}%
 45 | 
 46 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 47 | % 	MATH
 48 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 49 | 
 50 | \usepackage{amsmath}
 51 | \usepackage{amssymb}
 52 | \usepackage{blkarray}
 53 | \usepackage{cancel}
 54 | \usepackage{gensymb}
 55 | \usepackage{latexsym}
 56 | \usepackage{mathdots}
 57 | \usepackage{mathtools}
 58 | \usepackage{relsize}
 59 | \usepackage{scalerel}
 60 | 
 61 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 62 | % 	DRAWING
 63 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 64 | 
 65 | \usepackage{tikz}
 66 | \usepackage{pgfplots}
 67 | 
 68 | \pgfplotsset{compat=1.3}
 69 | \usetikzlibrary{3d}
 70 | \usetikzlibrary{shapes}
 71 | \usetikzlibrary{3d}
 72 | \usepgflibrary{fpu}
 73 | 
 74 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 75 | % 	CODE
 76 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 77 | 
 78 | \usepackage{algorithm}
 79 | \usepackage[noend]{algpseudocode}
 80 | \usepackage{listings}
 81 | 
 82 | \definecolor{stringgreen}{RGB}{50,220,15}
 83 | \definecolor{brightred}{RGB}{234, 38, 49}
 84 | 
 85 | \lstset{%
 86 |   inputencoding=utf8,
 87 | 	basicstyle=\ttfamily\footnotesize,
 88 | 	language=Python,
 89 | 	aboveskip=3mm,
 90 | 	belowskip=3mm,
 91 | 	showstringspaces=false,
 92 | 	columns=flexible,
 93 | 	numbers=none,
 94 | 	numberstyle=\tiny\color{red},
 95 | 	keywordstyle=\color{ProcessBlue},
 96 | 	commentstyle=\color{gray},
 97 | 	stringstyle=\color{stringgreen},
 98 |   emphstyle=\color{brightred},
 99 | 	breaklines=true,
100 | 	breakatwhitespace=true,
101 | 	tabsize=2,
102 |   escapeinside={@}{@}
103 | }
104 | 
105 | \makeatletter
106 | \lstdefinelanguage{pseudoML}{
107 |   sensitive = true,
108 |   classoffset=0,
109 |   morekeywords={
110 |     Matrix, Scalar, scalar
111 |   },
112 |   classoffset=1,
113 |   keywordstyle=\color{magenta},
114 |   morekeywords={
115 |     matmul, evaluate, if_clause, while_loop, if, else, while
116 |   },
117 |   literate=%
118 |     {0}{{{\color{red}0}}}1
119 |     {1}{{{\color{red}1}}}1
120 |     {2}{{{\color{red}2}}}1
121 |     {3}{{{\color{red}3}}}1
122 |     {4}{{{\color{red}4}}}1
123 |     {5}{{{\color{red}5}}}1
124 |     {6}{{{\color{red}6}}}1
125 |     {7}{{{\color{red}7}}}1
126 |     {8}{{{\color{red}8}}}1
127 |     {9}{{{\color{red}9}}}1
128 | }
129 | \makeatother
130 | 
131 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
132 | % 	CONFIG
133 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
134 | 
135 | \graphicspath{{figures/}}
136 | % \addtolength{\parskip}{\baselineskip}
137 | 
138 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
139 | % 	BEAMER FOOTER
140 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
141 | 
142 | \setbeamertemplate{footline}[text line]{%
143 |   \parbox{\linewidth}{%
144 |     \vspace*{-15pt}%
145 |     \scriptsize
146 |     Peter Goldsborough\hspace{1.5cm}%
147 |     {\footnotesize\texttt{Deep Learning w/ C++}}\hfill%
148 |     \insertpagenumber}%
149 | }
150 | \setbeamertemplate{navigation symbols}{}
151 | 
152 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
153 | % 	NEW COMMANDS
154 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
155 | 
156 | \newcommand{\hs}[1]{\hspace{#1}}
157 | \newcommand{\vs}[1]{\vspace{#1}}
158 | 
159 | \newcommand{\red}{\color{red}}
160 | 
161 | \newcommand{\inlineitem}[2]{{\color{orange}#1.} #2 \hspace{0.5cm}}
162 | 
163 | \newcommand{\textframe}[2]{%
164 |   \begin{slide}{#1}
165 |     {
166 |       \fontsize{48}{48}\selectfont
167 |       \color{llvmblue}
168 |       #2
169 |     }
170 |   \end{slide}
171 | }
172 | 
173 | \newcommand\invisiblesection[1]{%
174 |   \refstepcounter{section}%
175 |   \addcontentsline{toc}{section}{\protect\numberline{\thesection}#1}%
176 |   \sectionmark{#1}
177 | }
178 | 
179 | \newcommand{\pitem}{\pause\item}
180 | 
181 | \newcommand{\frameheader}[1]{{\Large #1}\vspace{0.5cm}}
182 | 
183 | \newcommand{\random}[1]{\pdfuniformdeviate #1}
184 | 
185 | \newcommand{\randomcolor}{%
186 |   \definecolor{randomcolor}{RGB}
187 |    {
188 |     \pdfuniformdeviate 256,
189 |     \pdfuniformdeviate 256,
190 |     \pdfuniformdeviate 256
191 |    }%
192 | }
193 | 
194 | \newcommand{\randomgray}{%
195 |   \newcount\gray\relax
196 |   \gray=\pdfuniformdeviate 256\relax
197 |   \definecolor{randomgray}{RGB}
198 |    {
199 |     \the\gray,
200 |     \the\gray,
201 |     \the\gray
202 |    }%
203 | }
204 | 
205 | \newcommand{\numbersquare}[3]{
206 |   \draw #1 rectangle ++(#2, #2) node [midway, black] {#3};
207 | }
208 | 
209 | \newcommand{\colornumbersquare}[4]{
210 |   \draw [#4] #1 rectangle ++(#2, #2) node [midway] {#3};
211 | }
212 | 
213 | \newcommand{\onesquare}[2]{ \numbersquare{#1}{1}{#2} }
214 | 
215 | \newcommand{\xyplane}[2]{
216 |   \begin{scope}[canvas is xy plane at z=#1]
217 |     #2
218 |   \end{scope}
219 | }
220 | 
221 | \newcommand{\xzplane}[2]{
222 |   \begin{scope}[canvas is xz plane at y=#1]
223 |     #2
224 |   \end{scope}
225 | }
226 | 
227 | \newcommand{\yzplane}[2]{
228 |   \begin{scope}[canvas is yz plane at x=#1]
229 |     #2
230 |   \end{scope}
231 | }
232 | 
233 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
234 | % 	NEW ENVIRONMENTS
235 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
236 | 
237 | % http://tex.stackexchange.com/questions/78462/labelling-ax-b-under-an-actual-matrix
238 | \newenvironment{sbmatrix}[1]
239 |  {\def\mysubscript{#1}\mathop\bgroup\begin{bmatrix}}
240 |  {\end{bmatrix}\egroup_{\textstyle\mathstrut\mysubscript}}
241 | 
242 | \newenvironment{spmatrix}[1]
243 | {\def\mysubscript{#1}\mathop\bgroup\begin{pmatrix}}
244 | {\end{pmatrix}\egroup_{\textstyle\mathstrut\mysubscript}}
245 | 
246 | \newenvironment{slide}[1]
247 | {
248 |   \centering
249 | 	\section{#1}
250 | 	\begin{frame}
251 | 		\frametitle{#1}
252 | }
253 | {
254 | 	\end{frame}
255 | }
256 | 
257 | \newenvironment{fslide}[1]
258 | {
259 |   \centering
260 | 	\section{#1}
261 | 	\begin{frame}[fragile]
262 | 		\frametitle{#1}
263 | }
264 | {
265 | 	\end{frame}
266 | }
267 | 


--------------------------------------------------------------------------------
/presentation/presentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/presentation.pdf


--------------------------------------------------------------------------------
/presentation/presentation.tex:
--------------------------------------------------------------------------------
 1 | % C++ London
 2 | 
 3 | \input{preamble.tex}
 4 | 
 5 | \begin{document}
 6 | 
 7 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 8 | % 	TITLE
 9 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
10 | 
11 | \input{slides/title.tex}
12 | 
13 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
14 | % 	SLIDES
15 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
16 | 
17 | \input{slides/intro.tex}
18 | \input{slides/down.tex}
19 | \input{slides/up.tex}
20 | \input{slides/outro.tex}
21 | 
22 | \end{document}
23 | 


--------------------------------------------------------------------------------
/presentation/slides/down.tex:
--------------------------------------------------------------------------------
 1 | % !TEX root = ../presentation.tex
 2 | 
 3 | \input{slides/down/task.tex}
 4 | \input{slides/down/model.tex}
 5 | \input{slides/down/layer.tex}
 6 | \input{slides/down/graph.tex}
 7 | \input{slides/down/op.tex}
 8 | \input{slides/down/kernel.tex}
 9 | \input{slides/down/hardware.tex}
10 | 


--------------------------------------------------------------------------------
/presentation/slides/down/graph.tex:
--------------------------------------------------------------------------------
  1 | % !TEX root = ../../presentation.tex
  2 | 
  3 | \begin{slide}{Graph Layer}
  4 | \begin{tikzpicture}[thick]
  5 |   \tikzset{box/.style={draw, rectangle, text width=3cm, text height=1cm}}
  6 | 
  7 |   % Model
  8 |   \path (0, 0) coordinate (r)
  9 |         node {\includegraphics[scale=0.1]{cppcon-logo}};
 10 | 
 11 |   \foreach \x/\l/\h in {0/Convolution/0,%
 12 |                      1/Convolution/0,%
 13 |                      2/Convolution/0,%
 14 |                      3/Convolution/3,%
 15 |                      4/Flatten/4,%
 16 |                      5/Dense/5%
 17 |                      } {
 18 |         \ifnum\h=0
 19 |           \draw ({1.75 + \x * 0.8}, -1.25) rectangle ++(0.6, 2.5)
 20 |                 node [midway, rotate=90] {\l};
 21 |         \else
 22 |           \draw ({1.75 + \x * 0.8}, -1.25) rectangle ++(0.6, 2.5)
 23 |                 node [midway, rotate=90] {\l};
 24 |           \onslide<2> {
 25 |             \draw [fill=red] ({1.75 + \x * 0.8}, -1.25) rectangle ++(0.6, 2.5)
 26 |                   node [midway, rotate=90] {\l};
 27 |           }
 28 |         \fi
 29 | 
 30 |   }
 31 |   \path (8, 0) coordinate (P) node {$\mathbf{P(\text{real})}$};
 32 | 
 33 |   % Edges;
 34 |   \draw [->, shorten <=0.75cm] (r) -- (1.75, 0);
 35 |   \draw [->, shorten >=0.75cm] (6.35, 0) -- (P);
 36 | \end{tikzpicture}
 37 | \end{slide}
 38 | 
 39 | \begin{slide}{Graph Layer}
 40 | \begin{tikzpicture}[thick]
 41 |   \tikzset{op/.style={%
 42 |     draw, rectangle, text width=0.8cm, text height=0.4cm, rounded corners=1pt}}
 43 |   \tikzset{var/.style={draw, circle, inner sep=8pt}}
 44 | 
 45 |   % Image, W, s
 46 |   \onslide<1->{
 47 |     \path (-1, -0.5)
 48 |           coordinate (i) node {\includegraphics[scale=0.06]{cppcon-logo}};
 49 |     \path (1.5, -1) coordinate [var] (Wc) node {\small$\mathbf{W_c}$};
 50 |     \path (3, 0) coordinate [var] (s) node {\small$\mathbf{s}$};
 51 |     \path (1.5, 1.5) coordinate [op] (conv2d) node {\tiny\textbf{Conv2D}};
 52 | 
 53 |     % Edges
 54 |     \draw [-stealth, shorten <=0.4cm, bend left] (i) -- (conv2d);
 55 |     \draw [-stealth] (Wc) -- (conv2d);
 56 |     \draw [-stealth] (s) -- (conv2d);
 57 |   }
 58 | 
 59 |   % Flatten
 60 |   \onslide<2->{
 61 |     \path (1.5, 3) coordinate [op] (flatten) node {\tiny\textbf{Flatten}};
 62 | 
 63 |     % Edges
 64 |     \draw [-stealth] (conv2d) -- (flatten);
 65 |   }
 66 | 
 67 |   % Matmul
 68 |   \onslide<3->{
 69 |     \path (4, 3) coordinate [op] (dense) node {\tiny\textbf{MatMul}};
 70 |     \path (4, 1.5) coordinate [var] (Wd) node {\small$\mathbf{W_d}$};
 71 | 
 72 |     % Edges
 73 |     \draw [-stealth] (Wd) -- (dense);
 74 |     \draw [-stealth] (flatten) -- (dense);
 75 |   }
 76 | 
 77 |   % Output
 78 |   \onslide<4->{
 79 |     \path (5.5, 3) coordinate [var] (P) node {\small$\mathbf{P}$};
 80 | 
 81 |     % Edges
 82 |     \draw [-stealth] (dense) -- (P);
 83 |   }
 84 | \end{tikzpicture}
 85 | \end{slide}
 86 | 
 87 | % \begin{slide}{Graph Layer}
 88 | % \begin{tikzpicture}[thick]
 89 | %   \tikzset{op/.style={%
 90 | %     draw, rectangle, text width=0.8cm, text height=0.4cm, rounded corners=1pt}}
 91 | %   \tikzset{var/.style={draw, circle, inner sep=8pt}}
 92 | %
 93 | %   % Image, W, s
 94 | %   \path (-1, -0.5)
 95 | %         coordinate (i) node {\includegraphics[scale=0.06]{cppcon-logo}};
 96 | %   \path (1.5, -1) coordinate [var] (Wc) node {\small$\mathbf{W_c}$};
 97 | %   \path (3, 0) coordinate [var] (s) node {\small$\mathbf{s}$};
 98 | %   \path (1.5, 1.5) coordinate [op] (conv2d) node {\tiny\textbf{Conv2D}};
 99 | %
100 | %   % Edges
101 | %   \onslide<-2>{
102 | %     \draw [-stealth, shorten <=0.4cm, bend left] (i) -- (conv2d);
103 | %     \draw [-stealth] (Wc) -- (conv2d);
104 | %     \draw [-stealth] (s) -- (conv2d);
105 | %   }
106 | %   % \onslide<3>{
107 | %   %   \draw [-stealth, shorten <=0.4cm, bend left] (i) -- (conv2d)
108 | %   %         node [pos=0.57, above, sloped]
109 | %   %         {\fontsize{6}{6}$n\times 16\times 8\times 8$};
110 | %   %   \draw [-stealth] (Wc) -- (conv2d)
111 | %   %         node [midway, above, sloped] {\tiny$5\times 5\times 8 \times 4$};
112 | %   %   \draw [-stealth] (s) -- (conv2d)
113 | %   %         node [pos=0.4, above, sloped] {\tiny$4$};
114 | %   % }
115 | %
116 | %   % Flatten
117 | %   \path (1.5, 3) coordinate [op] (flatten) node {\tiny\textbf{Flatten}};
118 | %
119 | %   % Edges
120 | %   \onslide<-2>{
121 | %     \draw [-stealth] (conv2d) -- (flatten);
122 | %   }
123 | %   % \onslide<3>{
124 | %   %   \draw [-stealth] (conv2d) -- (flatten)
125 | %   %         node [midway, left] {\tiny$n \times 8 \times 8 \times 4$};
126 | %   % }
127 | %
128 | %   % Matmul
129 | %   \path (4, 3) coordinate [op] (dense) node {\tiny\textbf{MatMul}};
130 | %   \path (4, 1.5) coordinate [var] (Wd) node {\small$\mathbf{W_d}$};
131 | %
132 | %   % Edges
133 | %   \onslide<-2>{
134 | %     \draw [-stealth] (Wd) -- (dense);
135 | %     \draw [-stealth] (flatten) -- (dense);
136 | %   }
137 | %   % \onslide<3>{
138 | %   %   \draw [-stealth] (Wd) -- (dense)
139 | %   %         node [midway, right] {\tiny$n \times 256$};
140 | %   %   \draw [-stealth] (flatten) -- (dense) node [midway, above] {\tiny$256$};
141 | %   % }
142 | %
143 | %   % Output
144 | %   \path (5.5, 3) coordinate [var] (P) node {\small$\mathbf{P}$};
145 | %
146 | %   % Edges
147 | %   \onslide<-2>{
148 | %     \draw [-stealth] (dense) -- (P);
149 | %   }
150 | %   % \onslide<3>{
151 | %   %   \draw [-stealth] (dense) -- (P) node [pos=0.45, above] {\tiny$n$};
152 | %   % }
153 | %
154 | %   % List
155 | %   % \draw (6.2, -0.3) node [text width=3cm] {
156 | %   %   \begin{enumerate}
157 | %   %     \item<1-> Operations
158 | %   %     \item<2-> Variables
159 | %   %     \item<3-> Tensors
160 | %   %   \end{enumerate}
161 | %   % };
162 | % \end{tikzpicture}
163 | % \end{slide}
164 | 
165 | % \begin{slide}{Graph Layer}
166 | % \begin{tikzpicture}[thick]
167 | %   \tikzset{op/.style={%
168 | %     draw, rectangle, text width=0.8cm, text height=0.4cm, rounded corners=1pt}}
169 | %   \tikzset{var/.style={draw, circle, inner sep=8pt}}
170 | %
171 | %   % Image, W, s
172 | %   \path (-1, -0.5)
173 | %         coordinate (i) node {\includegraphics[scale=0.06]{cppcon-logo}};
174 | %   \onslide<1>{
175 | %     \path (1.5, -1) coordinate [var] (Wc) node {\small$\mathbf{W_c}$};
176 | %     \path (3, 0) coordinate [var] (s) node {\small$\mathbf{s}$};
177 | %     \path (1.5, 1.5) coordinate [op] (conv2d) node {\tiny\textbf{Conv2D}};
178 | %   }
179 | %   \onslide<2->{
180 | %     \path (1.5, -1) coordinate [fill=red, var] (Wc) node {\small$\mathbf{W_c}$};
181 | %     \path (3, 0) coordinate [fill=red, var] (s) node {\small$\mathbf{s}$};
182 | %     \path (1.5, 1.5) coordinate [fill=red, op]
183 | %           (conv2d) node {\tiny\textbf{Conv2D}};
184 | %   }
185 | %
186 | %   % Edges
187 | %   \draw [-stealth, shorten <=0.4cm, bend left] (i) -- (conv2d);
188 | %   \draw [-stealth] (Wc) -- (conv2d);
189 | %   \draw [-stealth] (s) -- (conv2d);
190 | %
191 | %   % Flatten
192 | %   \onslide<-2>{
193 | %     \path (1.5, 3) coordinate [op] (flatten) node {\tiny\textbf{Flatten}};
194 | %   }
195 | %   \onslide<3->{
196 | %     \path (1.5, 3) coordinate [fill=NavyBlue, op] (flatten)
197 | %           node {\tiny\textbf{Flatten}};
198 | %   }
199 | %
200 | %   % Edges
201 | %   \draw [-stealth] (conv2d) -- (flatten);
202 | %
203 | %   % Matmul
204 | %   \onslide<-3>{
205 | %     \path (4, 3) coordinate [op] (dense) node {\tiny\textbf{Matmul}};
206 | %     \path (4, 1.5) coordinate [var] (Wd) node {\small$\mathbf{W_d}$};
207 | %   }
208 | %   \onslide<4->{
209 | %     \path (4, 3) coordinate [fill=LimeGreen, op] (dense)
210 | %           node {\tiny\textbf{Matmul}};
211 | %     \path (4, 1.5) coordinate [fill=LimeGreen, var] (Wd)
212 | %           node {\small$\mathbf{W_d}$};
213 | %   }
214 | %
215 | %   % Edges
216 | %   \draw [-stealth] (Wd) -- (dense);
217 | %   \draw [-stealth] (flatten) -- (dense);
218 | %
219 | %   % Output
220 | %   \onslide<-4>{
221 | %     \path (5.5, 3) coordinate [var] (P) node {\small$\mathbf{P}$};
222 | %   }
223 | %   \onslide<5->{
224 | %     \path (5.5, 3) coordinate [fill=Goldenrod, var] (P)
225 | %           node {\small$\mathbf{P}$};
226 | %   }
227 | %
228 | %   % Edges
229 | %   \draw [-stealth] (dense) -- (P);
230 | %
231 | %   % Machines
232 | %   % Server 0
233 | %   \draw [rounded corners=1pt] (-2.25, -3.5) rectangle ++(4.75, 1.5);
234 | %   \draw (0.25, -3.85) node {$\textbf{Server}_0$};
235 | %
236 | %   % Server 1
237 | %   \draw [rounded corners=1pt] (3.25, -3.5) rectangle ++(4.75, 1.5);
238 | %   \draw (5.5, -3.85) node {$\textbf{Server}_1$};
239 | %
240 | %   % CPU 0
241 | %   \onslide<-2>{
242 | %     \draw [rounded corners=1pt] (-2, -3.25) rectangle ++(2, 1)
243 | %           node [midway] {$\text{CPU}_0$};
244 | %   }
245 | %
246 | %   \onslide<3->{
247 | %     \draw [fill=NavyBlue, rounded corners=1pt] (-2, -3.25) rectangle ++(2, 1)
248 | %           node [midway] {$\text{CPU}_0$};
249 | %   }
250 | %
251 | %   % GPU 0
252 | %   \onslide<1>{
253 | %     \draw [rounded corners=1pt] (0.25, -3.25) rectangle ++(2, 1)
254 | %           node [midway] {$\text{GPU}_0$};
255 | %   }
256 | %   \onslide<2->{
257 | %     \draw [fill=red, rounded corners=1pt] (0.25, -3.25) rectangle ++(2, 1)
258 | %           node [midway] {$\text{GPU}_0$};
259 | %   }
260 | %
261 | %   % CPU 1
262 | %   \onslide<-4>{
263 | %     \draw [rounded corners=1pt] (3.5, -3.25) rectangle ++(2, 1)
264 | %           node [midway] {$\text{CPU}_1$};
265 | %   }
266 | %   \onslide<5->{
267 | %     \draw [fill=Goldenrod, rounded corners=1pt] (3.5, -3.25) rectangle ++(2, 1)
268 | %           node [midway] {$\text{CPU}_1$};
269 | %   }
270 | %
271 | %   % GPU 1
272 | %   \onslide<-3>{
273 | %     \draw [rounded corners=1pt] (5.75, -3.25) rectangle ++(2, 1)
274 | %           node [midway] {$\text{GPU}_1$};
275 | %   }
276 | %
277 | %   \onslide<4->{
278 | %     \draw [fill=LimeGreen, rounded corners=1pt] (5.75, -3.25) rectangle ++(2, 1)
279 | %           node [midway] {$\text{GPU}_1$};
280 | %   }
281 | % \end{tikzpicture}
282 | % \end{slide}
283 | 
284 | \begin{slide}{Graph Layer Parallelism}
285 |   \pause
286 |   \begin{tikzpicture}[thick]
287 |     \tikzset{box/.style={%
288 |       draw, rectangle, rounded corners=1pt, text width=3cm, text height=2cm}}
289 | 
290 |     % Machine A
291 |     \path (-2, -0.2) coordinate [box] (a);
292 |     \draw (a)+(0, -1.5) node {Machine A};
293 | 
294 |     % Machine B
295 |     \path (+2, -0.2) coordinate [box] (b);
296 |     \draw (b)+(0, -1.5) node {Machine B};
297 | 
298 |     % Parameter Server
299 |     \path (0, +3) coordinate [box, text height=1cm, text width=3.5cm]
300 |           (ps) node {Parameter Server};
301 | 
302 |     % Neural Networks
303 |     \foreach \m in {0, 1} {
304 |       \foreach \l in {0, 1, 2} {
305 |         \foreach \n in {0, ..., 3} {
306 |           \path ({(\m * -4) + 0.85 + \n * 0.75}, {-0.95 + \l * 0.75})
307 |                 coordinate [draw, semithick, circle, inner sep=2pt] (\m\l\n);
308 |         }
309 |       }
310 |       \foreach \l/\k in {0/1, 1/2} {
311 |         \foreach \i in {0, ..., 3} {
312 |           \foreach \j in {0, ..., 3} {
313 |             \draw [thin, ->] (\m\l\i) -- (\m\k\j);
314 |           }
315 |         }
316 |       }
317 |     }
318 | 
319 |     % Edges
320 |     \draw [<->, dotted] (a) -- (ps)
321 |           node [pos=0.5, sloped, fill=white] {\scriptsize Sync};
322 |     \draw [<->, dotted] (b) -- (ps)
323 |           node [pos=0.5, sloped, fill=white] {\scriptsize Sync};
324 | 
325 |     % Label
326 |     \draw (0, -2.6) node {\large\textbf{Data Parallelism}};
327 |   \end{tikzpicture}
328 | \end{slide}
329 | 
330 | \begin{slide}{Graph Layer Parallelism}
331 |   \begin{tikzpicture}[thick]
332 |     \tikzset{box/.style={%
333 |       draw, rectangle, rounded corners=1pt, text width=3cm, text height=2cm}}
334 | 
335 |     % Machine A
336 |     \path (-2.5, 0) coordinate [box] (a);
337 |     \draw (a)+(0, -1.5) node {Machine A};
338 | 
339 |     % Machine B
340 |     \path (+2.5, 0) coordinate [box] (b);
341 |     \draw (b)+(0, -1.5) node {Machine B};
342 | 
343 |     % Neural Networks
344 |     \foreach \m in {0, 1} {
345 |       \foreach \l in {0, 1, 2} {
346 |         \foreach \n in {0, 1} {
347 |           \path ({(\m * -5) + 2 + \n}, {-0.75 + \l * 0.75})
348 |                 coordinate [draw, semithick, circle, inner sep=2pt] (\m\l\n);
349 |         }
350 |       }
351 |       \foreach \l/\k in {0/1, 1/2} {
352 |         \foreach \i in {0, ..., 1} {
353 |           \foreach \j in {0, ..., 1} {
354 |             \draw [thin, ->] (\m\l\i) -- (\m\k\j);
355 |           }
356 |         }
357 |       }
358 |     }
359 | 
360 |     \foreach \l in {0, 1, 2} {
361 |       \draw [<->, dotted] (1\l1) -- (0\l0)
362 |             node [midway, fill=white] {\scriptsize Sync};
363 |     }
364 | 
365 |     % Label
366 |     \draw (0, -2.5) node {\large\textbf{Model Parallelism}};
367 |   \end{tikzpicture}
368 | \end{slide}
369 | 
370 | 
371 | \begin{frame}[fragile]{Static Graphs}
372 |   \pause
373 |   % \begin{columns}
374 |   %   \begin{column}{0.55\textwidth}
375 |   %     \begin{itemize}
376 |   %       \item Graphs defined once
377 |   %       \item Static shapes and sizes
378 |   %       \item Can be compiled and optimized
379 |   %       \pause
380 |   %       \item Declarative programming model
381 |   %     \end{itemize}
382 |   %   \end{column}
383 |   %   \begin{column}{0.45\textwidth}
384 |       \begin{lstlisting}[language=pseudoML]
385 |                         Matrix<6, 9> a = ...;
386 |                         Matrix<6, 9> b = ...;
387 |                         Matrix<16, 6> c = ...;
388 |                         scalar s = 7;
389 | 
390 |                         d := s * a + b;
391 |                         e := matmul(c, d);
392 | 
393 |                         x := if_clause(condition, d, e);
394 | 
395 |                         result := evaluate(x);
396 |       \end{lstlisting}
397 |   %   \end{column}
398 |   % \end{columns}
399 | \end{frame}
400 | 
401 | % \begin{frame}[fragile]{Static Graphs}
402 | %   % \begin{columns}
403 | %   %   \begin{column}{0.55\textwidth}
404 | %   %     \begin{itemize}
405 | %   %       \item Graphs defined once
406 | %   %       \item Static shapes and sizes
407 | %   %       \item Can be compiled and optimized
408 | %   %       \item Declarative programming model
409 | %   %     \end{itemize}
410 | %   %   \end{column}
411 | %   %   \begin{column}{0.45\textwidth}
412 | %       \begin{lstlisting}[language=pseudoML]
413 | %                       x = if_clause(condition,
414 | %                                       then_value,
415 | %                                       else_value);
416 | %
417 | %                       while_loop([a, b],
418 | %                                   condition_function,
419 | %                                   body_function);
420 | %       \end{lstlisting}
421 | %   %   \end{column}
422 | %   % \end{columns}
423 | % \end{frame}
424 | 
425 | 
426 | \begin{frame}[fragile]{Dynamic Graphs}
427 |   % \begin{columns}
428 |   %   \begin{column}{0.55\textwidth}
429 |   %     \begin{itemize}
430 |   %       \item Graphs defined by run
431 |   %       \item Dynamic shapes and sizes
432 |   %       \pause
433 |   %       \item Imperative programming model
434 |   %     \end{itemize}
435 |   %   \end{column}
436 |   %   \begin{column}{0.45\textwidth}
437 |       \begin{lstlisting}[language=pseudoML]
438 |                         Matrix a = ...;
439 |                         Matrix b = ...;
440 |                         Matrix c = ...;
441 |                         scalar s = 7;
442 | 
443 |                         if s > 0  {
444 |                             d := s * a + b;
445 |                         } else {
446 |                             d := a - b;
447 |                         }
448 | 
449 |                         while s > 0  {
450 |                           x = input();
451 |                           c = matmul(c, x);
452 |                         }
453 | 
454 |                         result := c;
455 |       \end{lstlisting}
456 |   %   \end{column}
457 |   % \end{columns}
458 | \end{frame}
459 | 


--------------------------------------------------------------------------------
/presentation/slides/down/hardware.tex:
--------------------------------------------------------------------------------
 1 | % !TEX root = ../../presentation.tex
 2 | 
 3 | \begin{slide}{Hardware Layer}
 4 |   \begin{tikzpicture}
 5 |     \pause
 6 |     \node [inner sep=0] (cpu) at (-2.5, 0)
 7 |           {\includegraphics[width=4cm, height=2.2cm]{shark}};
 8 |     \draw [white, rounded corners=5pt, line width=5pt]
 9 |         (cpu.north west) --
10 |         (cpu.north east) --
11 |         (cpu.south east) --
12 |         (cpu.south west) -- cycle;
13 |     \draw (cpu)+(0, -1.4) node {\textbf{CPU}};
14 | 
15 |     \pause
16 |     \node [inner sep=0] (gpu) at (+2.5, 0)
17 |           {\includegraphics[width=4cm, height=2.2cm]{piranha}};
18 |     \draw [white, rounded corners=5pt, line width=5pt]
19 |         (gpu.north west) --
20 |         (gpu.north east) --
21 |         (gpu.south east) --
22 |         (gpu.south west) -- cycle;
23 |     \draw (gpu)+(0, -1.4) node {\textbf{GPU}};
24 | 
25 |     \pause
26 |     \node [inner sep=0] (asic) at (0, -3.5)
27 |       {\includegraphics[scale=0.25, trim={0 2cm 0 0.5cm}, clip]{weird-fish}};
28 |     \draw [white, rounded corners=5pt, line width=5pt]
29 |         (asic.north west) --
30 |         (asic.north east) --
31 |         (asic.south east) --
32 |         (asic.south west) -- cycle;
33 |     \draw (asic)+(0, -1.5) node {\textbf{ASIC}};
34 |   \end{tikzpicture}
35 | \end{slide}
36 | 
37 | % \begin{slide}{GPUs}
38 | %   \includegraphics[scale=0.4]{imagenet-gpu}
39 | %
40 | %   \vspace{0.3cm}
41 | %   \small
42 | %   Teams using GPUs at ImageNet Competition
43 | % \end{slide}
44 | 
45 | % \begin{slide}{GPUs}
46 | %   \includegraphics[scale=0.5]{alexnet-training-days}
47 | %
48 | %     \vspace{0.3cm}
49 | %     \small
50 | %     Training Time for ImageNet
51 | % \end{slide}
52 | %
53 | % \begin{slide}{GPUs}
54 | %   \includegraphics[scale=0.35]{fb-scaling}
55 | %
56 | %     \vspace{0.3cm}
57 | %     \small
58 | %     Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour
59 | %
60 | %     \vspace{0.1cm}
61 | %     Goyal et al. (2017)
62 | % \end{slide}
63 | 
64 | % \begin{slide}{GPUs}
65 | %   \includegraphics[scale=0.37]{sgemm}
66 | %
67 | %     \vspace{0.3cm}
68 | %     \small
69 | %     Single Precision General Matrix/Matrix Multiply Performance
70 | % \end{slide}
71 | 
72 | % \begin{slide}{}
73 | %   \includegraphics[scale=0.22]{xkcd}
74 | % \end{slide}
75 | 


--------------------------------------------------------------------------------
/presentation/slides/down/kernel.tex:
--------------------------------------------------------------------------------
 1 | % !TEX root = ../../presentation.tex
 2 | 
 3 | \begin{slide}{Kernel Layer}
 4 |   \pause
 5 |   \begin{tikzpicture}
 6 |     \begin{axis}[samples=64, domain=-7:+7]
 7 |       \addplot[mark=none]{1/(1+exp(-x))};
 8 |   \end{axis}
 9 |   \end{tikzpicture}
10 | 
11 |   \textbf{Sigmoid Activation Function}
12 | \end{slide}
13 | 
14 | \begin{slide}{Kernel Layer}
15 | \begin{columns}
16 |   \begin{column}{0.5\textwidth}
17 |     \centering
18 |     \pause
19 |     {\large$\sigma(z) = \frac{1}{1 + e^{-z}}$}
20 | 
21 |     \vspace{0.5cm}
22 |     \href{https://eigen.tuxfamily.org/dox/unsupported/TensorFunctors_8h_source.html}{\textbf{Forward Pass}}
23 |   \end{column}
24 |   \begin{column}{0.5\textwidth}
25 |     \pause
26 |     {\large$\sigma'(z) = \sigma(z)(1 - \sigma(z))$}
27 | 
28 |     \vspace{0.48cm}
29 |     \hspace{0.4cm}\href{https://github.com/tensorflow/tensorflow/blob/2c8d0dca978a246f54c506aae4587dbce5d3bcf0/tensorflow/core/kernels/cwise_ops_gradients.h\#L51}{\textbf{Backward Pass}}
30 |   \end{column}
31 | \end{columns}
32 | \end{slide}
33 | 
34 | \begin{slide}{Quantization}
35 |   \begin{tikzpicture}[thick]
36 |     \tikzset{op/.style={draw, rectangle, text width=0.8cm,%
37 |                         text height=0.4cm, rounded corners=1pt}};
38 |     \tikzset{var/.style={draw, circle, inner sep=8pt}};
39 | 
40 |     \path (-4, 0) coordinate [var] (z) node {$z$};
41 |     \path (0, 0) coordinate [var] (s) node {$\sigma$};
42 | 
43 |     % Quantize Node
44 |     \onslide<2-> {
45 |       \path (2, 0) coordinate [op] (Q) node {\tiny\textbf{Quantize}};
46 |       \draw [->] (s) -- (Q);
47 |     }
48 | 
49 |     % Edges
50 |     \draw [->] (z) -- (s);
51 |     \onslide<1>{
52 |       \draw [dotted, ->] (s) -- (4, 0);
53 |     }
54 |     \onslide<2->{
55 |       \draw [dotted, ->] (Q) -- (4, 0);
56 |     }
57 | 
58 |     % Scale
59 |     \only<3-> {
60 |       \foreach \t/\f/\q in {-2/0/0,%
61 |                             -1/0.25/64,%
62 |                             0/0.5/128,%
63 |                             +1/0.75/192,%
64 |                             +2/1/256} {
65 |         \ifnum\q<192
66 |           \draw [|-] (\t, -2) -- +(1, 0);
67 |         \else
68 |           \ifnum\q=192
69 |             \draw [|-|] (\t, -2) -- +(1, 0);
70 |           \fi
71 |         \fi
72 |         \draw (\t, -2.5) node {\tiny$\f$};
73 |         \draw (\t, -1.5) node {\tiny$\q$};
74 |       }
75 |     }
76 | 
77 |   \only<4-> {
78 |     \draw (0, -3.8) node {\footnotesize\texttt{github.com/google/gemmlowp}};
79 |   }
80 |   \end{tikzpicture}
81 | \end{slide}
82 | 


--------------------------------------------------------------------------------
/presentation/slides/down/layer.tex:
--------------------------------------------------------------------------------
  1 | % !TEX root = ../../presentation.tex
  2 | 
  3 | \begin{slide}{Layer Layer}
  4 | \only<1>{
  5 | \begin{tikzpicture}[thick]
  6 |   \tikzset{box/.style={draw, rectangle, text width=1.75cm, text height=0.75cm}}
  7 | 
  8 |   % Model
  9 |   \path (0, 0) coordinate (z) node {$\mathbf{z}$};
 10 |   \path (1.75, 0) coordinate [box] (G) node {Generator};
 11 |   \path (4.25, -0.8) coordinate (f)
 12 |         node {\includegraphics[scale=0.1]{cppcon-logo-blurry}};
 13 |   \path (4.25, +0.8) coordinate (r)
 14 |         node {\includegraphics[scale=0.1]{cppcon-logo}};
 15 |   \path (7.25, 0) coordinate [box, text width=2.2cm] (D) node {Discriminator};
 16 |   \path (5.45, 0) coordinate (_);
 17 |   \path (9.75, 0) coordinate (P) node {$\mathbf{P(\text{real})}$};
 18 | 
 19 |   % Edges
 20 |   \draw [->, shorten <=0.25cm] (z) -- (G);
 21 |   \draw [->, shorten >=0.7cm] (G) -- (f);
 22 |   \draw [shorten <=0.775cm] (f) -- (_);
 23 |   \draw [shorten <=0.75cm] (r) -- (_);
 24 |   \draw [->] (_) -- (D);
 25 |   \draw [->, shorten >=0.75cm] (D) -- (P);
 26 | \end{tikzpicture}
 27 | }
 28 | 
 29 | \only<2>{
 30 | \begin{tikzpicture}[thick]
 31 |   \tikzset{box/.style={draw, rectangle, text width=3cm, text height=1cm}}
 32 | 
 33 |   % Model
 34 |   \path (0, 0) coordinate (r)
 35 |         node {\includegraphics[scale=0.1]{cppcon-logo}};
 36 |   \path (4, 0) coordinate [box] (D) node {Discriminator};
 37 |   \path (8, 0) coordinate (P) node {$\mathbf{P(\text{real})}$};
 38 | 
 39 |   % Edges;
 40 |   \draw [->, shorten <=0.75cm] (r) -- (D);
 41 |   \draw [->, shorten >=0.75cm] (D) -- (P);
 42 | \end{tikzpicture}
 43 | }
 44 | 
 45 | \only<3>{
 46 | \begin{tikzpicture}[thick]
 47 |   \tikzset{box/.style={draw, rectangle, text width=3cm, text height=1cm}}
 48 | 
 49 |   % Model
 50 |   \path (0, 0) coordinate (r)
 51 |         node {\includegraphics[scale=0.1]{cppcon-logo}};
 52 | 
 53 |   \foreach \x/\l in {0/Convolution,%
 54 |                      1/Convolution,%
 55 |                      2/Convolution,%
 56 |                      3/Convolution,%
 57 |                      4/Flatten,%
 58 |                      5/Dense%
 59 |                      } {
 60 |     \draw ({1.75 + \x * 0.8}, -1.25) rectangle ++(0.6, 2.5)
 61 |           node [midway, rotate=90] {\l};
 62 | 
 63 |   }
 64 |   \path (8, 0) coordinate (P) node {$\mathbf{P(\text{real})}$};
 65 | 
 66 |   % Edges;
 67 |   \draw [->, shorten <=0.75cm] (r) -- (1.75, 0);
 68 |   \draw [->, shorten >=0.75cm] (6.35, 0) -- (P);
 69 | \end{tikzpicture}
 70 | }
 71 | \end{slide}
 72 | 
 73 | % \begin{slide}{Layers: Dense}
 74 | % \begin{tikzpicture}
 75 | %   \tikzset{neuron/.style={draw, circle, thick, inner sep = 7pt}};
 76 | %
 77 | %   % Input layer
 78 | %   \foreach \i in {0, ..., 3} {
 79 | %     \path (0, {-\i}) coordinate [neuron] (x\i) node {$x_\i$};
 80 | %   }
 81 | %   \draw (0, -3.8) node {\textbf{Input Layer}};
 82 | %
 83 | %   % Output layer
 84 | %   \foreach \i in {0, ..., 3} {
 85 | %     \path (3, {-\i}) coordinate [neuron] (y\i) node {$y_\i$};
 86 | %   }
 87 | %   \draw (3, -3.8) node {\textbf{Output Layer}};
 88 | %
 89 | %   % Dimensions
 90 | %   \only<5-> {
 91 | %     \draw [Red, semithick, <->] (-0.8, -3.2) -- ++(0, 3.3) node [midway, left] {$k$};
 92 | %     \draw [NavyBlue, semithick, <->] (4, -3.2) -- ++(0, 3.3) node [midway, right] {$l$};
 93 | %   }
 94 | %
 95 | %   % Connections
 96 | %   \onslide<2->{
 97 | %     \foreach \i in {0, ..., 3} {
 98 | %       \foreach \j in {0, ..., 3} {
 99 | %         \ifnum\i=0
100 | %           \draw [semithick, ->] (x\i) -- (y\j);
101 | %         \else
102 | %           \onslide<4-> {
103 | %             \draw [semithick, ->] (x\i) -- (y\j);
104 | %           }
105 | %         \fi
106 | %       }
107 | %       \ifnum\i=0
108 | %         \onslide<3>{
109 | %           \draw [line width=0] (x0) -- (y0)
110 | %                 node [above, midway] {$w_{0,0}$};
111 | %
112 | %           \draw [line width=0] (x0) -- (y1)
113 | %                 node [above, pos=0.55, rotate=-10] {$w_{0,1}$};
114 | %
115 | %           \draw [line width=0] (x0) -- (y2)
116 | %                 node [above, pos=0.65, rotate=-20] {$w_{0,2}$};
117 | %
118 | %           \draw [line width=0] (x0) -- (y3)
119 | %                 node [above, pos=0.7, rotate=-30] {$w_{0,3}$};
120 | %         }
121 | %       \fi
122 | %     }
123 | %   }
124 | % \end{tikzpicture}
125 | %
126 | % \vspace{0.3cm}
127 | % \only<5->{
128 | %   \begin{tikzpicture}[semithick]
129 | %     % Matrices
130 | %     \draw (0, 0) node {
131 | %       \scalebox{0.75}{
132 | %       $
133 | %       \begin{bmatrix}
134 | %         x_{0,0} & x_{0,1} & x_{0,2} & x_{0,3}  \\
135 | %         x_{1,0} & x_{1,1} & x_{1,2} & x_{1,3}  \\
136 | %         \vdots & \vdots & \vdots & \vdots      \\
137 | %         x_{n,0} & x_{n,1} & x_{n,2} & x_{n,3}  \\
138 | %       \end{bmatrix}
139 | %       %
140 | %       \times
141 | %       %
142 | %       \begin{bmatrix}
143 | %         w_{0,0} & w_{0,1} & w_{0,2} & w_{0,3} \\
144 | %         w_{1,0} & w_{1,1} & w_{1,2} & w_{1,3} \\
145 | %         w_{2,0} & w_{2,1} & w_{2,2} & w_{2,3} \\
146 | %         w_{3,0} & w_{3,1} & w_{3,2} & w_{3,3} \\
147 | %       \end{bmatrix}
148 | %       %
149 | %       =
150 | %       %
151 | %       \begin{bmatrix}
152 | %         y_{0,0} & y_{0,1} & y_{0,2} & y_{0,3}  \\
153 | %         y_{1,0} & y_{1,1} & y_{1,2} & y_{1,3}  \\
154 | %         \vdots & \vdots & \vdots & \vdots      \\
155 | %         y_{n,0} & y_{n,1} & y_{n,2} & y_{n,3}  \\
156 | %       \end{bmatrix}
157 | %       $
158 | %       }
159 | %     };
160 | %
161 | %     % Labels
162 | %     \draw [<->] (-5.15, -0.77) -- ++(0, 1.54) node [midway, left] {$n$};
163 | %     \draw [Red, <->] (-4.9, 0.95) -- ++(2.8, 0) node [midway, above] {$k$};
164 | %     \draw [NavyBlue, <->] (-1.4, 0.9) -- ++(2.8, 0) node [midway, above] {$l$};
165 | %   \end{tikzpicture}
166 | % }
167 | % \end{slide}
168 | %
169 | % \begin{slide}{Layers: Dense}
170 | % \begin{tikzpicture}
171 | %   \tikzset{neuron/.style={draw, circle, thick, inner sep = 7pt}};
172 | %
173 | %   % Input Layer
174 | %   \foreach \y/\l in {0/0, 1/1, 2/2, 4/l} {
175 | %     \path (0, {-\y * 0.9}) coordinate [neuron] (x\l)
176 | %           node {\hspace{0.5mm}\footnotesize$x_{\l}$};
177 | %   }
178 | %   % Label
179 | %   \draw (0, -4.4) node {\textbf{Pixels}};
180 | %
181 | %   \draw (0, -2.55) node {\LARGE$\vdots$};
182 | %
183 | %   % Output Layer
184 | %   \path (3, -1.5) coordinate [neuron, inner sep=7pt] (P)
185 | %         node {\hspace{0.4mm}\footnotesize$P$};
186 | %   % Label
187 | %   \draw (4.7, -1.52) node {\textbf{Probability}};
188 | %
189 | %   % Edges
190 | %   \foreach \i in {0, 1, 2, l} {
191 | %     \draw [semithick, ->] (x\i) -- (P);
192 | %   }
193 | % \end{tikzpicture}
194 | % \end{slide}
195 | 
196 | \begin{slide}{Layers: Convolution}
197 |   \begin{tikzpicture}[semithick]
198 |     % Image
199 |     \draw (0, 0) grid ++(3, 3);
200 | 
201 |     % Grayscale pixel values
202 |     \only<1>{
203 |       \foreach \x in {0, ..., 2} {
204 |         \foreach \y in {0, ..., 2} {
205 |           \randomgray
206 |           \fill [ultra thin, randomgray] (\x, \y) rectangle ++(1, 1);
207 |         }
208 |       }
209 |     }
210 | 
211 |     \only<2-3>{
212 |     \draw (0.5, 0.5) node {$0.8$};
213 |     \draw (1.5, 0.5) node {$0.3$};
214 |     \draw (2.5, 0.5) node {$0.5$};
215 |     \draw (0.5, 1.5) node {$0.7$};
216 |     \draw (1.5, 1.5) node {$0.2$};
217 |     \draw (2.5, 1.5) node {$0.6$};
218 |     \draw (0.5, 2.5) node {$0.4$};
219 |     \draw (1.5, 2.5) node {$0.9$};
220 |     \draw (2.5, 2.5) node {$0.1$};
221 |     }
222 | 
223 |     \draw (1.5, -0.5) node {Image};
224 | 
225 |     \only<3-3>{
226 |     \draw (4, 0) grid ++(2, 2);
227 | 
228 |     \draw (4.5, 0.5) node {$3.1$};
229 |     \draw (5.5, 0.5) node {$0.9$};
230 |     \draw (4.5, 1.5) node {$5.7$};
231 |     \draw (5.5, 1.5) node {$2.4$};
232 | 
233 |     \draw (5, -0.48) node {Kernel};
234 |     }
235 | 
236 |     \only<4-5>{
237 |       \draw (0.5, 0.5) node {$0.8$};
238 |       \draw (1.5, 0.5) node {$0.3$};
239 |       \draw (2.5, 0.5) node {$0.5$};
240 |       \draw (0.5, 1.5) node {\tiny$3.1 \cdot 0.7$};
241 |       \draw (1.5, 1.5) node {\tiny$0.9 \cdot 0.2$};
242 |       \draw (2.5, 1.5) node {$0.6$};
243 |       \draw (0.5, 2.5) node {\tiny$5.7 \cdot 0.4$};
244 |       \draw (1.5, 2.5) node {\tiny$2.4 \cdot 0.9$};
245 |       \draw (2.5, 2.5) node {$0.1$};
246 | 
247 |       \draw [red] (0, 1) grid (2, 3);
248 |     }
249 |     \only<5-> {
250 |       \draw [red] (4, 1) rectangle ++(1, 1) node [midway, black] {$6.79$};
251 |       \draw (5, -0.5) node {Output};
252 |     }
253 |     \only<6-7>{
254 |     \draw (0.5, 0.5) node {$0.8$};
255 |     \draw (1.5, 0.5) node {$0.3$};
256 |     \draw (2.5, 0.5) node {$0.5$};
257 |     \draw (0.5, 1.5) node {$0.7$};
258 |     \draw (1.5, 1.5) node {\tiny$3.1 \cdot 0.2$};
259 |     \draw (2.5, 1.5) node {\tiny$0.9 \cdot 0.6$};
260 |     \draw (0.5, 2.5) node {$0.4$};
261 |     \draw (1.5, 2.5) node {\tiny$5.7 \cdot 0.9$};
262 |     \draw (2.5, 2.5) node {\tiny$2.4 \cdot 0.1$};
263 | 
264 |       \draw [NavyBlue] (1, 1) grid (3, 3);
265 |     }
266 |     \only<7-> {
267 |       \draw [NavyBlue] (5, 1) rectangle ++(1, 1) node [black, midway] {$6.53$};
268 |     }
269 |     \only<8-9>{
270 |     \draw (0.5, 0.5) node {\tiny$3.1 \cdot 0.8$};
271 |     \draw (1.5, 0.5) node {\tiny$0.9 \cdot 0.3$};
272 |     \draw (2.5, 0.5) node {$0.5$};
273 |     \draw (0.5, 1.5) node {\tiny$5.7 \cdot 0.7$};
274 |     \draw (1.5, 1.5) node {\tiny$2.4 \cdot 0.2$};
275 |     \draw (2.5, 1.5) node {$0.6$};
276 |     \draw (0.5, 2.5) node {$0.4$};
277 |     \draw (1.5, 2.5) node {$0.9$};
278 |     \draw (2.5, 2.5) node {$0.1$};
279 | 
280 |       \draw [Green] (0, 0) grid (2, 2);
281 |     }
282 |     \only<9-> {
283 |       \draw [Green] (4, 0) rectangle ++(1, 1) node [black, midway] {$7.67$};
284 |     }
285 |     \only<10-11>{
286 |     \draw (0.5, 0.5) node {$0.8$};
287 |     \draw (1.5, 0.5) node {\tiny$3.1 \cdot 0.3$};
288 |     \draw (2.5, 0.5) node {\tiny$0.9 \cdot 0.5$};
289 |     \draw (0.5, 1.5) node {$0.7$};
290 |     \draw (1.5, 1.5) node {\tiny$5.7 \cdot 0.2$};
291 |     \draw (2.5, 1.5) node {\tiny$2.4 \cdot 0.6$};
292 |     \draw (0.5, 2.5) node {$0.4$};
293 |     \draw (1.5, 2.5) node {$0.9$};
294 |     \draw (2.5, 2.5) node {$0.1$};
295 | 
296 |       \draw [Magenta] (1, 0) grid (3, 2);
297 |     }
298 |     \only<11-> {
299 |       \draw [Magenta] (5, 0) rectangle ++(1, 1) node [black, midway] {$3.96$};
300 |     }
301 |   \end{tikzpicture}
302 | \end{slide}
303 | 
304 | \begin{slide}{Layers: Convolution}
305 |   \begin{tikzpicture}
306 |     % R layer (feature map)
307 |     \yzplane{0}{
308 |       \draw [NavyBlue] (0, 0) rectangle ++(4, 4);
309 |     }
310 |     \xyplane{4}{
311 |       \draw [NavyBlue] (0, 0) rectangle ++(0.4, 4);
312 |     }
313 |     \xzplane{0}{
314 |       \draw [NavyBlue] (0, 0) rectangle ++(0.4, 4);
315 |     }
316 |     \xzplane{4}{
317 |       \draw [NavyBlue] (0, 0) rectangle ++(0.4, 4);
318 |     }
319 | 
320 |     % G layer (feature map)
321 |     \yzplane{0.4}{
322 |       \draw [Green] (0, 0) rectangle ++(4, 4);
323 |     }
324 |     \xyplane{0}{
325 |       \draw [Green] (0.4, 0) rectangle ++(0.4, 4);
326 |     }
327 |     \xyplane{4}{
328 |       \draw [Green] (0.4, 0) rectangle ++(0.4, 4);
329 |     }
330 |     \xzplane{0}{
331 |       \draw [Green] (0.4, 0) rectangle ++(0.4, 4);
332 |     }
333 |     \xzplane{4}{
334 |       \draw [Green] (0.4, 0) rectangle ++(0.4, 4);
335 |     }
336 | 
337 |     % B layer (feature map)
338 |     \yzplane{0.8}{
339 |       \draw [Red] (0, 0) rectangle ++(4, 4);
340 |     }
341 |     \xyplane{0}{
342 |       \draw [Red] (0.8, 0) rectangle ++(0.4, 4);
343 |     }
344 |     \xyplane{4}{
345 |       \draw [Red] (0.8, 0) rectangle ++(0.4, 4);
346 |     }
347 |     \xzplane{0}{
348 |       \draw [Red] (0.8, 0) rectangle ++(0.4, 4);
349 |     }
350 |     \xzplane{4}{
351 |       \draw [Red] (0.8, 0) rectangle ++(0.4, 4);
352 |     }
353 | 
354 |     \foreach \i/\j in {0/1, 0.4/2, 0.8/3, 1.2/4} {
355 |     \only<\j>{
356 |       \xyplane{\i}{
357 |         \draw (0, 2.79) grid [step=0.4] ++(1.2, 1.21);
358 |       }
359 |       \xyplane{{\i+1.2}}{
360 |         \draw (0, 2.79) grid [step=0.4] ++(1.2, 1.21);
361 |       }
362 |       \yzplane{0}{
363 |         \draw (2.79, \i) grid [step=0.4] ++(1.21, 1.21);
364 |       }
365 |       \yzplane{1.2}{
366 |         \draw (2.79, \i) grid [step=0.4] ++(1.21, 1.21);
367 |       }
368 |       \xzplane{4}{
369 |         \draw (0, \i) grid [step=0.4] ++(1.21, 1.21);
370 |       }
371 |       \xzplane{2.8}{
372 |         \draw (0, \i) grid [step=0.4] ++(1.21, 1.21);
373 |       }
374 |     }
375 |     }
376 |   \end{tikzpicture}
377 | \end{slide}
378 | 
379 | \begin{slide}{Layers: Convolution}
380 |   \begin{tikzpicture}[thick]
381 |     \draw (0, 0) grid ++(3, 3);
382 |     \foreach \i in {0, ..., 2} {
383 |       \foreach \j in {0, ..., 2} {
384 |         \ifnum\i=1
385 |           \draw ({\i+0.5}, {\j+0.5}) node {\color{Red}1};
386 |         \else
387 |           \draw ({\i+0.5}, {\j+0.5}) node {0};
388 |         \fi
389 |       }
390 |     }
391 | 
392 |     \onslide<2->{\draw (1.5, -0.5) node {\textbf{Patterns}};}
393 |   \end{tikzpicture}
394 |   \hspace{2cm}
395 |   \begin{tikzpicture}[thick]
396 |     \draw (0, 0) grid ++(3, 3);
397 |     \foreach \i in {0, ..., 2} {
398 |       \foreach \j in {0, ..., 2} {
399 |         \ifnum\j=1
400 |           \ifnum\i=0
401 |             \draw ({\i+0.5}, {\j+0.5}) node {\color{Red}-1};
402 |           \else
403 |             \ifnum\i=1
404 |               \draw ({\i+0.5}, {\j+0.5}) node {\color{Red}1};
405 |             \else
406 |               \draw ({\i+0.5}, {\j+0.5}) node {0};
407 |             \fi
408 |           \fi
409 |         \else
410 |           \draw ({\i+0.5}, {\j+0.5}) node {0};
411 |         \fi
412 |       }
413 |     }
414 | 
415 |     \onslide<3->{\draw (1.5, -0.5) node {\textbf{Edges}};}
416 |   \end{tikzpicture}
417 | \end{slide}
418 | 
419 | % \begin{slide}{Layers: Convolution}
420 | %   \includegraphics[scale=0.275]{alexnet-kernels}
421 | %
422 | %   \scriptsize
423 | %   AlexNet: Krizhevsky et al. (2012)
424 | % \end{slide}
425 | 
426 | % \begin{slide}{Layer: Convolution}
427 | % % Stride 1
428 | % \begin{tikzpicture}[semithick]
429 | %     % Full grid
430 | %     \draw (0, 0) grid [step=0.5] (2, 2);
431 | %
432 | %     \newcount\slidecount\relax
433 | %     \slidecount=1\relax
434 | %     \foreach \y in {1, 0.5, 0} {
435 | %       \foreach \x in {0, 0.5, 1} {
436 | %         \only<\the\slidecount>{
437 | %           \fill [red] (\x, \y) rectangle ++(1, 1);
438 | %           % Draw over
439 | %           \draw (\x, \y) grid [step=0.5] ++(1, 1);
440 | %         }
441 | %         \onslide<\the\slidecount->{
442 | %           \draw [black, fill=red] ({3+\x}, {\y+0.25}) rectangle ++(0.5, 0.5);
443 | %         }
444 | %         \global\advance\slidecount by 1\relax
445 | %       }
446 | %     }
447 | %
448 | %     % Label
449 | %     \draw (1, -0.4) node {\textbf{Stride 1}};
450 | % \end{tikzpicture}
451 | %
452 | % \vspace{0.75cm}
453 | %
454 | % % Stride 2
455 | % \begin{tikzpicture}[semithick]
456 | %     % Full grid
457 | %     \draw (0, 0) grid [step=0.5] (2, 2);
458 | %
459 | %     \newcount\slidecount\relax
460 | %     \slidecount=10\relax
461 | %     \foreach \y in {1, 0} {
462 | %       \foreach \x in {0, 1} {
463 | %         \only<\the\slidecount>{
464 | %           \fill [red] (\x, \y) rectangle ++(1, 1);
465 | %           % Draw over
466 | %           \draw (\x, \y) grid [step=0.5] ++(1, 1);
467 | %         }
468 | %         \onslide<\the\slidecount->{
469 | %           \draw [black, fill=red]
470 | %                 ({3.25+\x/2}, {\y/2+0.5}) rectangle ++(0.5, 0.5);
471 | %         }
472 | %         \global\advance\slidecount by 1\relax
473 | %       }
474 | %     }
475 | %
476 | %     % Label
477 | %     \draw (1, -0.4) node {\textbf{Stride 2}};
478 | %
479 | %     % Spacer
480 | %     \draw (4.4, 0);
481 | % \end{tikzpicture}
482 | % \end{slide}
483 | %
484 | % \begin{slide}{Layer: Convolution}
485 | % \begin{tikzpicture}[thick]
486 | %   \tikzset{box/.style={draw, rectangle, text width=3cm, text height=1cm}}
487 | %
488 | %   % Model
489 | %   \path (0, 0) coordinate (r)
490 | %         node {\includegraphics[scale=0.1]{cppcon-logo}};
491 | %   \draw (r)+(0, -1.1) node {\footnotesize\texttt{128 $\times$\hspace{-1mm} 128 \hspace{-1mm}$\times$\hspace{-1mm} 3}};
492 | %
493 | %   \foreach \x/\l/\s\b in {0/Convolution/64$\times$64$\times$32/0,%
494 | %                      1/Convolution/32$\times$32$\times$16/1,%
495 | %                      2/Convolution/16$\times$16$\times$8/0,%
496 | %                      3/Convolution/8$\times$8$\times$4/1,%
497 | %                      4/Flatten/256/0,%
498 | %                      5/Dense/1/0%
499 | %                      } {
500 | %     \draw ({1.75 + \x * 0.8}, -1.25) rectangle ++(0.6, 2.5)
501 | %           node [midway, rotate=90] {\l};
502 | %     \ifnum\b=0
503 | %       \draw ({2 + \x * 0.82}, -1.5) node {\tiny\texttt{\s}};
504 | %     \else
505 | %       \draw ({2 + \x * 0.82}, -1.8) node {\tiny\texttt{\s}};
506 | %     \fi
507 | %
508 | %   }
509 | %   \path (8, 0) coordinate (P) node {$\mathbf{P(\text{real})}$};
510 | %
511 | %   % Edges;
512 | %   \draw [->, shorten <=0.75cm] (r) -- (1.75, 0);
513 | %   \draw [->, shorten >=0.75cm] (6.35, 0) -- (P);
514 | % \end{tikzpicture}
515 | %
516 | % \vspace{1cm}
517 | %
518 | % \begin{tabular}{l l}
519 | %   \textbf{Kernel: } & 5 $\times$ 5 \\
520 | %   \textbf{Stride: } & 2
521 | % \end{tabular}
522 | % \end{slide}
523 | 


--------------------------------------------------------------------------------
/presentation/slides/down/model.tex:
--------------------------------------------------------------------------------
  1 | % !TEX root = ../../presentation.tex
  2 | 
  3 | \begin{slide}{Model Layer}
  4 | \vspace{0.5cm}
  5 | \pause
  6 | 
  7 | % \begin{tikzpicture}[thick]
  8 | %   \tikzset{box/.style={draw, rectangle, text width=1.75cm, text height=0.75cm}}
  9 | %
 10 | %   % Model
 11 | %   \path (0, 0) coordinate (i) node {\includegraphics[scale=0.1]{cppcon-logo}};
 12 | %   \path (2.5, 0) coordinate [box] (E) node {Encoder};
 13 | %   \path (4.5, 0) coordinate (z) node {$\mathbf{z}$};
 14 | %   \path (6.5, 0) coordinate [box] (D) node {Decoder};
 15 | %   \path (9, 0) coordinate (r)
 16 | %         node {\includegraphics[scale=0.1]{cppcon-logo-blurry}};
 17 | %
 18 | %   % Edges
 19 | %   \draw [->, shorten <=0.75cm] (i) -- (E);
 20 | %   \draw [->, shorten >=0.25cm] (E) -- (z);
 21 | %   \draw [->, shorten <=0.25cm] (z) -- (D);
 22 | %   \draw [->, shorten >=0.75cm] (D) -- (r);
 23 | %
 24 | %   % Label
 25 | %   \draw (4.5, -1.2) node {\textbf{Autoencoder}};
 26 | % \end{tikzpicture}
 27 | % \pause
 28 | 
 29 | % \vspace{0.5cm}
 30 | 
 31 | \begin{tikzpicture}[thick]
 32 |   \tikzset{box/.style={draw, rectangle, text width=1.75cm, text height=0.75cm}}
 33 | 
 34 |   % Model
 35 |   \path (0, 0) coordinate (z) node {$\mathbf{z}$};
 36 |   \path (1.75, 0) coordinate [box] (G) node {Generator};
 37 |   \path (4.25, -0.8) coordinate (f)
 38 |         node {\includegraphics[scale=0.1]{cppcon-logo-blurry}};
 39 |   \path (4.25, +0.8) coordinate (r)
 40 |         node {\includegraphics[scale=0.1]{cppcon-logo}};
 41 |   \path (7.25, 0) coordinate [box, text width=2.2cm] (D) node {Discriminator};
 42 |   \path (5.45, 0) coordinate (_);
 43 |   \path (9.75, 0) coordinate (P) node {$\mathbf{P(\text{real})}$};
 44 | 
 45 |   % Edges
 46 |   \draw [->, shorten <=0.25cm] (z) -- (G);
 47 |   \draw [->, shorten >=0.7cm] (G) -- (f);
 48 |   \draw [shorten <=0.775cm] (f) -- (_);
 49 |   \draw [shorten <=0.75cm] (r) -- (_);
 50 |   \draw [->] (_) -- (D);
 51 |   \draw [->, shorten >=0.75cm] (D) -- (P);
 52 | 
 53 |   % Label
 54 |   \draw (5.5, -2.4) node {\textbf{Generative Adversarial Networks (GAN)}};
 55 | \end{tikzpicture}
 56 | \end{slide}
 57 | 
 58 | % \begin{slide}{Generative Adversarial Networks}
 59 | % $$
 60 | % \underbrace{\mathbf{z} \sim P_{\text{noise}}(\mathbf{z})}_\text{Noise}
 61 | % \hspace{0.4cm}
 62 | % \underbrace{\mathbf{x} \sim P_{\text{data}}(\mathbf{x})}_\text{Real Images}
 63 | % $$
 64 | % \vspace{0.1cm}
 65 | % $$
 66 | % \underbrace{G(\mathbf{z})}_\text{Fake Images}
 67 | % \hspace{0.4cm}
 68 | % \underbrace{D(\mathbf{x})}_\text{Probability of Being Real}
 69 | % $$
 70 | %
 71 | % \only<2>{
 72 | % $$
 73 | % \frac{%
 74 | % \int_{-\infty}^\infty
 75 | %     \hat f(\xi)\,e^{2 \pi i \xi x}
 76 | %     \,d\xi + \sum_{n=0}^\infty \frac{f^{(n)}(a)}{n!} + {e^2 \brace \pi}
 77 | % }{%
 78 | % \frac{1}{\sqrt{2\pi\hbar}}\int_{\text{all space}}e^{-i\mathbf{p}\cdot r/\hbar}\Psi(\mathbf{r}, s_z, t)d^3\mathbf{r} - \frac{1}{\sqrt{2\pi\sigma^2}} \cdot e^{-\frac{(x - \mu)^2}{2\sigma^2}}
 79 | % }
 80 | % $$
 81 | % }
 82 | %
 83 | % \only<3->{
 84 | % $$
 85 | % \min_G \max_D\, \log(D(\mathbf{x})) + \log(1 - D(G(\mathbf{z})))
 86 | % $$
 87 | % }
 88 | % \end{slide}
 89 | 
 90 | \begin{slide}{Generative Adversarial Networks}
 91 |   \includegraphics[scale=0.45]{faces}
 92 | 
 93 |   \scriptsize
 94 |   BEGAN: Berthelot et al. (2017)
 95 | \end{slide}
 96 | 
 97 | % \begin{slide}{Generative Adversarial Networks}
 98 | %   \includegraphics[scale=0.3]{bedrooms}
 99 | %
100 | %   \scriptsize
101 | %   LSGAN: Mao et al. (2016)
102 | % \end{slide}
103 | 
104 | \begin{slide}{Generative Adversarial Networks}
105 |   \includegraphics[scale=0.235]{face-interpolations}
106 | 
107 |   \scriptsize
108 |   BEGAN: Berthelot et al. (2017)
109 | \end{slide}
110 | 
111 | % \begin{slide}{Generative Adversarial Networks}
112 | %   \includegraphics[scale=0.3]{mnist1}
113 | %
114 | %   \includegraphics[scale=0.3]{mnist2}
115 | %
116 | %   \scriptsize
117 | %   InfoGAN: Chen et al. (2016)
118 | % \end{slide}
119 | 


--------------------------------------------------------------------------------
/presentation/slides/down/op.tex:
--------------------------------------------------------------------------------
  1 | % !TEX root = ../../presentation.tex
  2 | 
  3 | \begin{slide}{Op Layer}
  4 |   \pause
  5 |   \includegraphics[scale=0.4]{stickman}
  6 | 
  7 |   \textbf{Bob}
  8 | \end{slide}
  9 | 
 10 | \begin{slide}{im2col}
 11 | \begin{tikzpicture}
 12 |   % R layer (feature map)
 13 |   \yzplane{0}{
 14 |     \draw [NavyBlue] (0, 0) rectangle ++(4, 4);
 15 |   }
 16 |   \xyplane{4}{
 17 |     \draw [NavyBlue] (0, 0) rectangle ++(0.4, 4);
 18 |   }
 19 |   \xzplane{0}{
 20 |     \draw [NavyBlue] (0, 0) rectangle ++(0.4, 4);
 21 |   }
 22 |   \xzplane{4}{
 23 |     \draw [NavyBlue] (0, 0) rectangle ++(0.4, 4);
 24 |   }
 25 | 
 26 |   % G layer (feature map)
 27 |   \yzplane{0.4}{
 28 |     \draw [Green] (0, 0) rectangle ++(4, 4);
 29 |   }
 30 |   \xyplane{0}{
 31 |     \draw [Green] (0.4, 0) rectangle ++(0.4, 4);
 32 |   }
 33 |   \xyplane{4}{
 34 |     \draw [Green] (0.4, 0) rectangle ++(0.4, 4);
 35 |   }
 36 |   \xzplane{0}{
 37 |     \draw [Green] (0.4, 0) rectangle ++(0.4, 4);
 38 |   }
 39 |   \xzplane{4}{
 40 |     \draw [Green] (0.4, 0) rectangle ++(0.4, 4);
 41 |   }
 42 | 
 43 |   % B layer (feature map)
 44 |   \yzplane{0.8}{
 45 |     \draw [Red] (0, 0) rectangle ++(4, 4);
 46 |   }
 47 |   \xyplane{0}{
 48 |     \draw [Red] (0.8, 0) rectangle ++(0.4, 4);
 49 |   }
 50 |   \xyplane{4}{
 51 |     \draw [Red] (0.8, 0) rectangle ++(0.4, 4);
 52 |   }
 53 |   \xzplane{0}{
 54 |     \draw [Red] (0.8, 0) rectangle ++(0.4, 4);
 55 |   }
 56 |   \xzplane{4}{
 57 |     \draw [Red] (0.8, 0) rectangle ++(0.4, 4);
 58 |   }
 59 | 
 60 |   \foreach \i/\j in {0/1, 0/2, 0.4/3, 0.8/4, 0.8/5, 0.8/6, 0.8/7, 0.8/8, 0.8/9}
 61 |   {
 62 |   \only<\j>{
 63 |     \xyplane{\i}{
 64 |       \draw (0, 2.79) grid [step=0.4] ++(1.2, 1.21);
 65 |     }
 66 |     \xyplane{{\i+1.2}}{
 67 |       \draw (0, 2.79) grid [step=0.4] ++(1.2, 1.21);
 68 |     }
 69 |     \yzplane{0}{
 70 |       \draw (2.79, \i) grid [step=0.4] ++(1.21, 1.21);
 71 |     }
 72 |     \yzplane{1.2}{
 73 |       \draw (2.79, \i) grid [step=0.4] ++(1.21, 1.21);
 74 |     }
 75 |     \xzplane{4}{
 76 |       \draw (0, \i) grid [step=0.4] ++(1.21, 1.21);
 77 |     }
 78 |     \xzplane{2.8}{
 79 |       \draw (0, \i) grid [step=0.4] ++(1.21, 1.21);
 80 |     }
 81 |   }
 82 |   }
 83 | \end{tikzpicture}
 84 | \hspace{1cm}
 85 | \begin{tikzpicture}
 86 |   \foreach \i in {2, ..., 4} {
 87 |     \onslide<\i->{
 88 |       \foreach \x in {0, ..., 2} {
 89 |         \draw ({\x * 0.45}, {(4 - \i) * 0.45}) rectangle ++(0.45, 0.45)
 90 |               node [midway] {\scriptsize$x_{\scaleto{\x}{2.5pt}}$};
 91 |       }
 92 |       \draw (1.845, {(4 - \i) * 0.45 + 0.245}) node {$\dots$};
 93 |       \foreach \x in {24, ..., 26} {
 94 |         \draw ({(\x - 19) * 0.45}, {(4 - \i) * 0.45}) rectangle ++(0.45, 0.45)
 95 |               node [midway] {\scriptsize$x_{\scaleto{\x}{2.5pt}}$};
 96 |       }
 97 |     }
 98 |   }
 99 | 
100 |   \onslide<5->{
101 |     \foreach \i in {1, 2, 3, 6, 7, 8} {
102 |         \draw ({\i * 0.45 - 0.2}, -0.2) node {$\vdots$};
103 |     }
104 |     \foreach \x in {0, ..., 2} {
105 |       \draw ({\x * 0.45}, -1.1) rectangle ++(0.45, 0.45)
106 |             node [midway] {\scriptsize$x_{\scaleto{\x}{2.5pt}}$};
107 |     }
108 |     \draw (1.845, -0.2) node {$\ddots$};
109 |     \draw (1.845, -0.855) node {$\dots$};
110 |     \foreach \x in {24, ..., 26} {
111 |       \draw ({(\x - 19) * 0.45}, -1.1) rectangle ++(0.45, 0.45)
112 |             node [midway] {\scriptsize$x_{\scaleto{\x}{2.5pt}}$};
113 |     }
114 |   }
115 | 
116 |   \onslide<9> { \draw (4.1, -0.3) node {\Large$\times$};}
117 | 
118 |   \foreach \i in {6, 7, 8} {
119 |     \onslide<\i-> {
120 |       \foreach \y in {0, ..., 2} {
121 |         \draw ({4.5 + (\i - 6) * 0.45}, {(2 - \y) * 0.45})
122 |               rectangle ++(0.45, 0.45)
123 |               node [midway] {\scriptsize$k_{\scaleto{\y}{2.5pt}}$};
124 |       }
125 |       \draw ({4.7 + (\i - 6) * 0.45}, -0.2) node {$\vdots$};
126 |       \foreach \y in {24, ..., 26} {
127 |         \draw ({4.5 + (\i - 6) * 0.45}, {(21.5 - \y) * 0.45})
128 |               rectangle ++(0.45, 0.45)
129 |               node [midway] {\scriptsize$k_{\scaleto{\y}{2.5pt}}$};
130 |       }
131 |     }
132 |   }
133 | \end{tikzpicture}
134 | \vspace{1cm}
135 | \end{slide}
136 | 


--------------------------------------------------------------------------------
/presentation/slides/down/task.tex:
--------------------------------------------------------------------------------
 1 | % !TEX root = ../../presentation.tex
 2 | 
 3 | \begin{slide}{Task Layer}
 4 | \pause
 5 | \vspace{0.5cm}
 6 | \begin{tikzpicture}[thick]
 7 |   \path (0, 0) coordinate (x)
 8 |         node {$f\left(\raisebox{-0.4cm}{\includegraphics[scale=0.035, trim={0 1.5cm 0 1.5cm}, clip]{teapot}}\right) = \mathtt{Teapot}$};
 9 |   \draw (0, -1) node {\textbf{Classification}\textsuperscript{$\star$}};
10 | \end{tikzpicture}
11 | % \hspace{2cm}
12 | % \pause
13 | % \begin{tikzpicture}
14 | %   \path (-1.2, 0.2) coordinate (x) node {$f($\small\begin{CJK*}{UTF8}{gbsn}文内容\end{CJK*}$) = $};
15 | %   \path (-0.05, 0.2) coordinate [draw, gray, circle, inner sep=0.9pt] (y);
16 | %   \draw [gray]
17 | %         (-0.05, +0.4) circle [radius=1.2pt]
18 | %         +(0, -0.4) circle [radius=1.2pt]
19 | %         +(+0.2, -0.4) circle [radius=1.2pt]
20 | %         +(0.4, -0.2) circle [radius=1.2pt]
21 | %         +(0.6, -0.2) circle [radius=1.2pt]
22 | %         +(0.6, 0) circle [radius=1.2pt]
23 | %         +(0.6, -0.4) circle [radius=1.2pt];
24 | %   \draw (-0.7, -0.8) node {\textbf{Translation}\textsuperscript{$\star$}};
25 | %   \draw (0.5, -1.1);
26 | % \end{tikzpicture}
27 | \pause
28 | 
29 | \vspace{0.9cm}
30 | 
31 | \begin{tikzpicture}
32 |   % Spacer
33 |   % \draw (-4.2, 0);
34 |   \path (-1.3, 0) coordinate (x) node {$f\left(\raisebox{-0.3cm}{\includegraphics[scale=0.01, trim={-3cm -3cm -2.5cm -3cm}, clip]{sky/sky}}\right) = $};
35 |   \foreach \x in {0, ..., 2} {
36 |     \foreach \y in {0, ..., 2} {
37 |       \draw ({0.7 + \x * 1.2}, {\y * 0.85 - 0.85})
38 |       node {\includegraphics[width=1.05cm, height=0.7cm]{sky/sky\x\y}};
39 |     }
40 |   }
41 |   % \path (+1.3, 0) coordinate (y);
42 |   % \draw [shorten >=1cm, shorten <=1.1cm, ->] (x) -- (y);
43 |   \draw (0.5, -1.8) node {\textbf{Generative}\textsuperscript{$\dagger$}};
44 | \end{tikzpicture}
45 | 
46 | \vspace{-0.3cm}
47 | \begin{flushleft}
48 | \scriptsize
49 | $\star$ Supervised\\
50 | \vspace{0.05cm}
51 | $\dagger$ Unsupervised
52 | \end{flushleft}
53 | \end{slide}
54 | 


--------------------------------------------------------------------------------
/presentation/slides/intro.tex:
--------------------------------------------------------------------------------
 1 | % !TEX root = ../presentation.tex
 2 | % Intro
 3 | 
 4 | \begin{slide}{Strategy}
 5 |   \pause
 6 |   \begin{tikzpicture}[thick]
 7 |     % Task Layer
 8 |     \foreach \y/\l/\c in {1/Hardware/Red,%
 9 |                           2/Kernel/llvmblue,%
10 |                           3/Op/ProcessBlue,%
11 |                           4/Graph/orange,%
12 |                           5/Layer/Green,%
13 |                           6/Model/Goldenrod,
14 |                           7/Task/Purple} {
15 |       \draw [fill=\c] (0, {\y * 0.8})
16 |             rectangle ++(3, 0.8) node [midway] {\l};
17 |     }
18 |   \end{tikzpicture}
19 | 
20 |   \vspace{0.4cm}
21 |   \textbf{OSI Model of Machine Learning}
22 | \end{slide}
23 | 


--------------------------------------------------------------------------------
/presentation/slides/outro.tex:
--------------------------------------------------------------------------------
 1 | % !TEX root = ../presentation.tex
 2 | 
 3 | % \begin{slide}{}
 4 | %   \fontsize{32}{32}\selectfont
 5 | %   \color{orange}
 6 | %   How do I continue?
 7 | % \end{slide}
 8 | %
 9 | % \begin{slide}{Resources}
10 | %   \begin{itemize}
11 | %       \item Machine Learning @ Coursera
12 | %       \item Deep Learning @ Coursera
13 | %       \item Deep Learning (Google) @ Udacity
14 | %   \end{itemize}
15 | %   \pause
16 | %   \begin{itemize}
17 | %       \item \url{mxnet.incubator.apache.org/architecture}
18 | %       \item \url{github.com/dmlc/nnvm}
19 | %       \item Intro to Parallel Programming (NVIDIA) @ Udacity
20 | %       \item A Tour of TensorFlow, Goldsborough (2016)
21 | %   \end{itemize}
22 | %   \pause
23 | %   \vspace{0.75cm}
24 | %   {\large\texttt{github.com/peter-can-talk/cppcon-2017}}
25 | % \end{slide}
26 | 
27 | \begin{slide}{}
28 |   \vspace{1cm}
29 |   {
30 |     \fontsize{48}{48}\selectfont
31 |     \color{orange}
32 |     Q \& A
33 |   }
34 | 
35 |   \vspace{1.5cm}
36 |   {\large\texttt{github.com/peter-can-talk/cppcon-2017}}
37 | \end{slide}
38 | 


--------------------------------------------------------------------------------
/presentation/slides/title.tex:
--------------------------------------------------------------------------------
 1 | % !TEX root = ../presentation.tex
 2 | % Title
 3 | 
 4 | {
 5 | \setbeamertemplate{footline}[text line]{%
 6 |   \parbox{\linewidth}{%
 7 |     \vspace*{-20pt}%
 8 |     \footnotesize Peter Goldsborough\hspace{5.2cm} {\today}}%
 9 | }
10 | 
11 | \begin{frame}
12 |   \vspace{1cm}
13 |   \title{
14 |     \begin{flushleft}
15 |       \LARGE\raggedright
16 |       \texttt{>>> c++ deep-learning.cpp} \\
17 |       \vspace{0.5cm}
18 |       \texttt{>>> ./a.out data.csv} \\
19 |       \vspace{0.5cm}
20 |       \texttt{>>> 42}
21 |     \end{flushleft}
22 |     }
23 |   \date{}
24 |   \titlepage
25 | \end{frame}
26 | }
27 | 
28 | \addtocounter{framenumber}{-1}
29 | 


--------------------------------------------------------------------------------
/presentation/slides/up.tex:
--------------------------------------------------------------------------------
1 | % !TEX root = ../presentation.tex
2 | 
3 | \input{slides/up/hardware.tex}
4 | \input{slides/up/kernel.tex}
5 | \input{slides/up/op.tex}
6 | \input{slides/up/graph.tex}
7 | \input{slides/up/layer.tex}
8 | % \input{slides/up/model.tex}
9 | 


--------------------------------------------------------------------------------
/presentation/slides/up/graph.tex:
--------------------------------------------------------------------------------
 1 | % !TEX root = ../../presentation.tex
 2 | 
 3 | \begin{slide}{Graph Layer}
 4 | \end{slide}
 5 | 
 6 | \begin{slide}{Graph Layer: TensorFlow}
 7 |   \begin{columns}
 8 |     \begin{column}{0.5\textwidth}
 9 |       \begin{itemize}
10 |         \item Google (2015)
11 |         \item Static Graphs
12 |         \item GPUs \cmark
13 |         \item Distributed \cmark
14 |         \item C++ API \xmark
15 |         \item Very ``Deployable''
16 |       \end{itemize}
17 |     \end{column}
18 |     \begin{column}{0.5\textwidth}
19 |       \includegraphics[scale=0.2]{tensorflow}
20 |     \end{column}
21 |   \end{columns}
22 | \end{slide}
23 | 
24 | \begin{slide}{Graph Layer: PyTorch/Caffe2}
25 |   \begin{columns}
26 |     \begin{column}{0.4\textwidth}
27 |       \centering
28 |       \hspace{-1.2cm}\includegraphics[scale=0.4]{pytorch}
29 |       \vspace{0.2cm}
30 | 
31 |       \begin{itemize}
32 |         \item Facebook (2016)
33 |         \item Dynamic Graphs
34 |         \item For Research
35 |         \item GPUs \cmark
36 |         \item Distributed \cmark
37 |         \item Pure Python
38 |       \end{itemize}
39 |     \end{column}
40 |     \begin{column}{0.5\textwidth}
41 |       \centering
42 |       \hspace{-1.2cm}\includegraphics[scale=0.9]{caffe2}
43 |       \vspace{0.4cm}
44 | 
45 |       \begin{itemize}
46 |         \item Facebook (2017)
47 |         \item Static Graphs
48 |         \item For Deployment
49 |         \item GPUs \cmark
50 |         \item Distributed \cmark
51 |         \item C++ API \cmark
52 |       \end{itemize}
53 |     \end{column}
54 |   \end{columns}
55 | \end{slide}
56 | 
57 | \begin{slide}{Graph Layer: MXNet}
58 |   \begin{columns}
59 |     \begin{column}{0.5\textwidth}
60 |       \begin{itemize}
61 |         \item Community, then Amazon
62 |         \item Apache Incubator
63 |         \item Highly Modular
64 |         \item GPUs \cmark
65 |         \item Distributed \cmark
66 |         \item APIs for many languages (C++ \cmark)
67 |       \end{itemize}
68 |     \end{column}
69 |     \begin{column}{0.5\textwidth}
70 |       \includegraphics[scale=0.2]{mxnet}
71 |     \end{column}
72 |   \end{columns}
73 | \end{slide}
74 | 
75 | % \begin{slide}{Graph Layer: Others}
76 | %   \includegraphics[scale=0.5, trim={0 0.5cm 0 0}, clip]{cntk}
77 | %   \hspace{1cm}
78 | %   \includegraphics[scale=0.55, trim={0 -0.5cm 0 0}, clip]{theano}
79 | %
80 | %
81 | %   \vspace{1.4cm}
82 | %   \includegraphics[scale=0.25]{torch}
83 | % \end{slide}
84 | 


--------------------------------------------------------------------------------
/presentation/slides/up/hardware.tex:
--------------------------------------------------------------------------------
  1 | % !TEX root = ../../presentation.tex
  2 | 
  3 | % \begin{slide}{Hardware: GPU}
  4 | %   \includegraphics[width=8cm, height=2.75cm]{nvidia-stock}
  5 | %
  6 | %   NVIDIA Stock
  7 | %
  8 | %   \vspace{0.3cm}
  9 | %
 10 | %   \includegraphics[width=8cm, height=2.75cm]{dl-trend}
 11 | %
 12 | %   Deep Learning Trend
 13 | % \end{slide}
 14 | 
 15 | % \begin{slide}{Hardware: GPU}
 16 | %   \vspace{0.2cm}
 17 | %
 18 | %   \begin{tikzpicture}
 19 | %     \pause
 20 | %     % \node [inner sep=0] (titan) at (-3, 0)
 21 | %     %       {\includegraphics[scale=0.12]{titan-x}};
 22 | %     % \draw [white, rounded corners=5pt, line width=5pt]
 23 | %     %     (titan.north west) --
 24 | %     %     (titan.north east) --
 25 | %     %     (titan.south east) --
 26 | %     %     (titan.south west) -- cycle;
 27 | %     % \draw (titan)+(0, -2.9) node {\textbf{Titan X}};
 28 | %
 29 | %     % \pause
 30 | %     % \node [inner sep=0] (tx2) at (+3, 0)
 31 | %     %       {\includegraphics[width=4.5cm, height=2.7cm]{jetson-tx2}};
 32 | %     % \draw [white, rounded corners=5pt, line width=5pt]
 33 | %     %     (tx2.north west) --
 34 | %     %     (tx2.north east) --
 35 | %     %     (tx2.south east) --
 36 | %     %     (tx2.south west) -- cycle;
 37 | %     % \draw (tx2)+(0, -1.6) node {\textbf{Jetson TX2}};
 38 | %
 39 | %     % \pause
 40 | %     % \node [inner sep=0] (dgx1) at (0, -3.9)
 41 | %     %       {\includegraphics[width=4.5cm, height=3cm]{dgx-1}};
 42 | %     % \draw [white, rounded corners=5pt, line width=5pt]
 43 | %     %     (dgx1.north west) --
 44 | %     %     (dgx1.north east) --
 45 | %     %     (dgx1.south east) --
 46 | %     %     (dgx1.south west) -- cycle;
 47 | %     % \draw (dgx1)+(0, -1.8) node {\textbf{DGX-1}};
 48 | %   \end{tikzpicture}
 49 | % \end{slide}
 50 | 
 51 | \begin{slide}{Hardware: GPU}
 52 |   \pause
 53 |   \begin{columns}
 54 |     \begin{column}{0.5\textwidth}
 55 |     \centering
 56 |       \begin{tikzpicture}
 57 |         \node [inner sep=0] (titan) at (-3, 0)
 58 |               {\includegraphics[scale=0.08]{titan-x}};
 59 |         \draw [white, rounded corners=5pt, line width=5pt]
 60 |             (titan.north west) --
 61 |             (titan.north east) --
 62 |             (titan.south east) --
 63 |             (titan.south west) -- cycle;
 64 |         \draw (titan)+(0, -2.2) node {\textbf{Titan X}};
 65 |       \end{tikzpicture}
 66 |     \end{column}
 67 |     \begin{column}{0.5\textwidth}
 68 |     \centering
 69 |       \begin{itemize}
 70 |         \item 3840 cores
 71 |         \item 12GB on-chip memory
 72 |         \item 547.7GB/s bandwidth
 73 |         \item 12 TFLOPS
 74 |       \end{itemize}
 75 |     \end{column}
 76 |   \end{columns}
 77 | \end{slide}
 78 | 
 79 | % \begin{slide}{Hardware: Intel + Nervana}
 80 | %   \begin{tikzpicture}
 81 | %     \pause
 82 | %     \node [inner sep=0] (xeon) at (-3, 0)
 83 | %           {\includegraphics[width=4cm, height=2.2cm]{intel-xeon}};
 84 | %     \draw (xeon)+(0, -1.5) node {\textbf{Xeon}};
 85 | %
 86 | %     \pause
 87 | %     \node [inner sep=0] (mov) at (+3, 0)
 88 | %           {\includegraphics[width=4.5cm, height=2.7cm]{movidius}};
 89 | %     \draw (mov)+(0, -1.5) node {\textbf{Movidius VPU}};
 90 | %
 91 | %     \pause
 92 | %     \node [inner sep=0] (nervana) at (0, -3.7)
 93 | %           {\includegraphics[width=4.5cm, height=4cm, trim={0 0 0 1.5cm}, clip]{nervana-engine}};
 94 | %     \draw (nervana)+(0, -2.1) node {\textbf{Nervana Engine}};
 95 | %   \end{tikzpicture}
 96 | % \end{slide}
 97 | 
 98 | \begin{slide}{Hardware: Big Basin}
 99 |   \begin{columns}
100 |     \begin{column}{0.5\textwidth}
101 |       \centering
102 |       \begin{tikzpicture}
103 |         \node [inner sep=0] (pic) at (0, 0.2)
104 |               {\includegraphics[scale=0.075]{big-sur}};
105 |         \draw [white, rounded corners=5pt, line width=5pt]
106 |             (pic.north west) --
107 |             (pic.north east) --
108 |             (pic.south east) --
109 |             (pic.south west) -- cycle;
110 |         \draw (pic)+(0, -3.5) node {\textbf{Big Basin}};
111 |       \end{tikzpicture}
112 |     \end{column}
113 |     \begin{column}{0.55\textwidth}
114 |       \begin{itemize}
115 |         \item 8 NVIDIA Tesla P100 GPUs
116 |         \item NVLink (12x faster than PCIe)
117 |         \item 16 GB RAM
118 |         \item 10.6 TFLOPS/GPU
119 |         \item Reduced Precision Computing
120 |       \end{itemize}
121 |     \end{column}
122 |   \end{columns}
123 | \end{slide}
124 | 
125 | \begin{slide}{Hardware: TPU}
126 |   \begin{columns}
127 |     \begin{column}{0.55\textwidth}
128 |       \centering
129 |       \begin{tikzpicture}
130 |         \node [inner sep=0] (pic) at (0, 0)
131 |               {\includegraphics[scale=0.1]{tpu}};
132 |         \draw [white, rounded corners=5pt, line width=5pt]
133 |             (pic.north west) --
134 |             (pic.north east) --
135 |             (pic.south east) --
136 |             (pic.south west) -- cycle;
137 |         \draw (pic)+(0, -2.5) node {\textbf{TPU}};
138 |       \end{tikzpicture}
139 |     \end{column}
140 |     \begin{column}{0.5\textwidth}
141 |       \begin{itemize}
142 |         \item \textit{Coprocessor}
143 |         \item 92 TOPS for 8-bit int
144 |         \item 42 TFLOPS (TPU2)
145 |         \item 24 MB Memory
146 |       \end{itemize}
147 |     \end{column}
148 |   \end{columns}
149 | \end{slide}
150 | 
151 | % \begin{slide}{Hardware: IPU}
152 | %   \begin{tikzpicture}
153 | %     \tikzset{mem/.style={%
154 | %       draw, fill=Red, rectangle, text width=0.3mm, text height=2.25mm}}
155 | %     \tikzset{proc/.style={%
156 | %       draw, fill=Goldenrod, circle, inner sep=1.2mm}}
157 | %
158 | %     \foreach \a in {0, 10, ..., 360} {
159 | %       % Memory
160 | %       \onslide<2->{
161 | %         \path (0, 0)+(\a:2.3cm)
162 | %               coordinate [mem, rotate={\a+90}] (m\a)
163 | %               node [rotate={\a-90}] {\tiny\textbf{M}};
164 | %       }
165 | %
166 | %       % Processor
167 | %       \onslide<2>{
168 | %         \path (0, 0)+(\a:2.9cm) coordinate [proc] (p\a)
169 | %               node [rotate={\a-90}] {\tiny\textbf{P}};
170 | %       }
171 | %       \onslide<3>{
172 | %         \path (0, 0)+(\a:2.9cm) coordinate [proc, fill=white] (p\a)
173 | %               node [rotate={\a-90}] {\tiny\textbf{P}};
174 | %       }
175 | %
176 | %       % Exchange
177 | %       \onslide<2>{
178 | %         \draw [semithick] (0, 0)
179 | %               circle [radius=1.65cm] node [black] {Compute};
180 | %       }
181 | %       \onslide<3>{
182 | %         \fill [llvmblue, semithick] (0, 0)
183 | %               circle [radius=1.65cm] node [white] {Communication};
184 | %       }
185 | %
186 | %       \onslide<2->{
187 | %         % P - M
188 | %         \draw (p\a) -- (m\a);
189 | %
190 | %         % M - Exchange
191 | %         \draw [stealth-stealth, shorten >=0.5pt, shorten <=0.5pt]
192 | %               (m\a) -- ++(\a:-0.65cm);
193 | %
194 | %         \node at (0, -3.7) {\textbf{Bulk Synchronous Parallelism}};
195 | %       }
196 | %     }
197 | %   \end{tikzpicture}
198 | % \end{slide}
199 | %
200 | % \begin{slide}{Hardware: IPU}
201 | %   \begin{columns}
202 | %     \begin{column}{0.5\textwidth}
203 | %         \begin{itemize}
204 | %           \item Startup from Bristol (UK)
205 | %           \item Graphcore Colossus
206 | %           \item TBR later this year
207 | %           \item 1000 Processors/Chip
208 | %           \item Mixed Precision Arithmetic
209 | %         \end{itemize}
210 | %     \end{column}
211 | %     \begin{column}{0.5\textwidth}
212 | %       \centering
213 | %       \begin{tikzpicture}
214 | %         \node [inner sep=0] (pic) at (0, 0)
215 | %               {\includegraphics[scale=0.25]{graphcore}};
216 | %         \draw [white, rounded corners=5pt, line width=5pt]
217 | %             (pic.north west) --
218 | %             (pic.north east) --
219 | %             (pic.south east) --
220 | %             (pic.south west) -- cycle;
221 | %         \draw (pic)+(0, -2.7) node {\textbf{Graphcore}};
222 | %       \end{tikzpicture}
223 | %     \end{column}
224 | %   \end{columns}
225 | % \end{slide}
226 | 


--------------------------------------------------------------------------------
/presentation/slides/up/kernel.tex:
--------------------------------------------------------------------------------
 1 | % !TEX root = ../../presentation.tex
 2 | 
 3 | \begin{slide}{Kernel Layer}
 4 |   \begin{columns}
 5 |     \begin{column}{0.5\textwidth}
 6 |       \pause
 7 |       \centering
 8 |       \includegraphics[scale=0.7]{cudnn}
 9 | 
10 |       \vspace{0.4cm}
11 |       \textbf{cuDNN/cuBLAS}
12 |     \end{column}
13 |     \begin{column}{0.5\textwidth}
14 |       \pause
15 |       \centering
16 |       \includegraphics[scale=0.11]{intel}
17 | 
18 |       \vspace{0.3cm}
19 |       \textbf{MKL}
20 |     \end{column}
21 |   \end{columns}
22 | \end{slide}
23 | 
24 | \begin{slide}{Kernel Layer}
25 |   \huge
26 |   \textbf{Demo}: Convolution w/ cuDNN
27 | \end{slide}
28 | 


--------------------------------------------------------------------------------
/presentation/slides/up/layer.tex:
--------------------------------------------------------------------------------
1 | % !TEX root = ../../presentation.tex
2 | 
3 | \begin{slide}{Layer Layer}
4 |   \huge
5 |   \textbf{Demo}: MXNet Classifier
6 | \end{slide}
7 | 


--------------------------------------------------------------------------------
/presentation/slides/up/model.tex:
--------------------------------------------------------------------------------
1 | % !TEX root = ../../presentation.tex
2 | 
3 | \begin{slide}{Model Layer}
4 |   \huge
5 |   \textbf{Demo}: Loading a TensorFlow graph
6 | \end{slide}
7 | 


--------------------------------------------------------------------------------
/presentation/slides/up/op.tex:
--------------------------------------------------------------------------------
1 | % !TEX root = ../../presentation.tex
2 | 
3 | \begin{slide}{Op Layer}
4 |   \huge
5 |   \textbf{Demo}: Custom TensorFlow Op
6 | \end{slide}
7 | 


--------------------------------------------------------------------------------
/presentation/slides/up/task.tex:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/presentation/slides/up/task.tex


--------------------------------------------------------------------------------