├── .gitignore ├── Dockerfile ├── README.md ├── code ├── .clang-format ├── common │ ├── socket.cpp │ └── socket.h ├── cudnn │ ├── Makefile │ ├── README.md │ ├── conv.cu │ └── cppcon-logo.png ├── dlib │ ├── Makefile │ ├── README.md │ └── lenet.cpp ├── mkl │ ├── Makefile │ ├── README.md │ ├── conv.cpp │ └── cppcon-logo.png ├── mxnet │ ├── Makefile │ ├── README.md │ ├── demo │ │ ├── .qmake.stash │ │ ├── Makefile │ │ ├── backend.cpp │ │ ├── backend.h │ │ ├── main.cpp │ │ ├── main.pro │ │ ├── main.qml │ │ ├── main.qmlc │ │ └── qml.qrc │ ├── download_mnist.sh │ ├── lenet.cpp │ └── mnist_data │ │ ├── t10k-images-idx3-ubyte │ │ ├── t10k-labels-idx1-ubyte │ │ ├── train-images-idx3-ubyte │ │ └── train-labels-idx1-ubyte ├── requirements.txt ├── tf-graph │ ├── Makefile │ ├── README.md │ ├── demo │ │ ├── .qmake.stash │ │ ├── CMakeLists.txt │ │ ├── Makefile │ │ ├── backend.cpp │ │ ├── backend.h │ │ ├── main.cpp │ │ ├── main.pro │ │ ├── main.qml │ │ ├── main.qmlc │ │ └── qml.qrc │ ├── infogan.py │ ├── load-graph-server.cpp │ └── load-graph.cpp └── tf-kernel │ ├── .bash_history │ ├── README.md │ ├── cpu+gpu │ ├── Makefile │ ├── cpp_con_sigmoid.py │ ├── kernel.cpp │ ├── kernel.cu │ ├── kernel.cuh │ └── test.py │ └── cpu │ ├── Makefile │ ├── cpp_con_sigmoid.py │ ├── kernel.cpp │ └── test.py └── presentation ├── .tags ├── .tags1 ├── figures ├── alexnet-kernels.png ├── alexnet-training-days.png ├── bedrooms.png ├── big-sur.jpg ├── caffe2.png ├── cntk.jpg ├── cntk.png ├── cppcon-logo-blurry.png ├── cppcon-logo-nn.jpg ├── cppcon-logo.png ├── cudnn.png ├── deepdream.jpg ├── dgx-1.jpg ├── dl-trend.png ├── face-interpolations.png ├── faces.png ├── fb-scaling.png ├── gpu-power.png ├── graphcore.jpg ├── imagenet-gpu.jpg ├── infogan-mnist.png ├── intel-xeon.jpg ├── intel.png ├── jetson-tx2.jpg ├── mnist1.png ├── mnist2.png ├── movidius.jpg ├── mxnet.png ├── mxnet2.png ├── nervana-engine.png ├── nvidia-stock.png ├── piranha.png ├── pytorch.png ├── sgemm.jpg ├── shark.jpg ├── shark.png ├── sigmoid.png ├── sky │ ├── sky.jpg │ ├── sky00.jpg │ ├── sky01.jpg │ ├── sky02.jpg │ ├── sky10.jpg │ ├── sky11.jpg │ ├── sky12.jpg │ ├── sky20.jpg │ ├── sky21.jpg │ └── sky22.jpeg ├── stickman.jpg ├── teapot.jpg ├── tensorflow.png ├── theano.png ├── tioga-pass.jpg ├── titan-x.jpg ├── torch.png ├── tpu.jpg ├── weird-fish.jpg └── xkcd.png ├── notes.md ├── preamble.tex ├── presentation.pdf ├── presentation.tex └── slides ├── down.tex ├── down ├── graph.tex ├── hardware.tex ├── kernel.tex ├── layer.tex ├── model.tex ├── op.tex └── task.tex ├── intro.tex ├── outro.tex ├── title.tex ├── up.tex └── up ├── graph.tex ├── hardware.tex ├── kernel.tex ├── layer.tex ├── model.tex ├── op.tex └── task.tex /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/latex,c++,python 3 | 4 | ### C++ ### 5 | # Prerequisites 6 | *.d 7 | 8 | # Compiled Object files 9 | *.slo 10 | *.lo 11 | *.o 12 | *.obj 13 | 14 | # Precompiled Headers 15 | *.gch 16 | *.pch 17 | 18 | # Compiled Dynamic libraries 19 | *.so 20 | *.dylib 21 | *.dll 22 | 23 | # Fortran module files 24 | *.mod 25 | *.smod 26 | 27 | # Compiled Static libraries 28 | *.lai 29 | *.la 30 | *.a 31 | *.lib 32 | 33 | # Executables 34 | *.exe 35 | *.out 36 | *.app 37 | 38 | ### LaTeX ### 39 | ## Core latex/pdflatex auxiliary files: 40 | *.aux 41 | *.lof 42 | *.log 43 | *.lot 44 | *.fls 45 | *.toc 46 | *.fmt 47 | *.fot 48 | *.cb 49 | *.cb2 50 | 51 | ## Intermediate documents: 52 | *.dvi 53 | *-converted-to.* 54 | # these rules might exclude image files for figures etc. 55 | # *.ps 56 | # *.eps 57 | # *.pdf 58 | 59 | ## Generated if empty string is given at "Please type another file name for output:" 60 | .pdf 61 | 62 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 63 | *.bbl 64 | *.bcf 65 | *.blg 66 | *-blx.aux 67 | *-blx.bib 68 | *.brf 69 | *.run.xml 70 | 71 | ## Build tool auxiliary files: 72 | *.fdb_latexmk 73 | *.synctex 74 | *.synctex(busy) 75 | *.synctex.gz 76 | *.synctex.gz(busy) 77 | *.pdfsync 78 | 79 | ## Auxiliary and intermediate files from other packages: 80 | # algorithms 81 | *.alg 82 | *.loa 83 | 84 | # achemso 85 | acs-*.bib 86 | 87 | # amsthm 88 | *.thm 89 | 90 | # beamer 91 | *.nav 92 | *.pre 93 | *.snm 94 | *.vrb 95 | 96 | # changes 97 | *.soc 98 | 99 | # cprotect 100 | *.cpt 101 | 102 | # elsarticle (documentclass of Elsevier journals) 103 | *.spl 104 | 105 | # endnotes 106 | *.ent 107 | 108 | # fixme 109 | *.lox 110 | 111 | # feynmf/feynmp 112 | *.mf 113 | *.mp 114 | *.t[1-9] 115 | *.t[1-9][0-9] 116 | *.tfm 117 | *.[1-9] 118 | *.[1-9][0-9] 119 | 120 | #(r)(e)ledmac/(r)(e)ledpar 121 | *.end 122 | *.?end 123 | *.[1-9][0-9][0-9] 124 | *.[1-9]R 125 | *.[1-9][0-9]R 126 | *.[1-9][0-9][0-9]R 127 | *.eledsec[1-9] 128 | *.eledsec[1-9]R 129 | *.eledsec[1-9][0-9] 130 | *.eledsec[1-9][0-9]R 131 | *.eledsec[1-9][0-9][0-9] 132 | *.eledsec[1-9][0-9][0-9]R 133 | 134 | # glossaries 135 | *.acn 136 | *.acr 137 | *.glg 138 | *.glo 139 | *.gls 140 | *.glsdefs 141 | 142 | # gnuplottex 143 | *-gnuplottex-* 144 | 145 | # gregoriotex 146 | *.gaux 147 | *.gtex 148 | 149 | # hyperref 150 | 151 | # knitr 152 | *-concordance.tex 153 | # TODO Comment the next line if you want to keep your tikz graphics files 154 | *.tikz 155 | *-tikzDictionary 156 | 157 | # listings 158 | *.lol 159 | 160 | # makeidx 161 | *.idx 162 | *.ilg 163 | *.ind 164 | *.ist 165 | 166 | # minitoc 167 | *.maf 168 | *.mlf 169 | *.mlt 170 | *.mtc[0-9]* 171 | 172 | # minted 173 | _minted* 174 | *.pyg 175 | 176 | # morewrites 177 | *.mw 178 | 179 | # mylatexformat 180 | 181 | # nomencl 182 | *.nlo 183 | 184 | # pax 185 | *.pax 186 | 187 | # sagetex 188 | *.sagetex.sage 189 | *.sagetex.py 190 | *.sagetex.scmd 191 | 192 | # scrwfile 193 | *.wrt 194 | 195 | # sympy 196 | *.sout 197 | *.sympy 198 | sympy-plots-for-*.tex/ 199 | 200 | # pdfcomment 201 | *.upa 202 | *.upb 203 | 204 | # pythontex 205 | *.pytxcode 206 | pythontex-files-*/ 207 | 208 | # thmtools 209 | *.loe 210 | 211 | # TikZ & PGF 212 | *.dpth 213 | *.md5 214 | *.auxlock 215 | 216 | # todonotes 217 | *.tdo 218 | 219 | # easy-todo 220 | *.lod 221 | 222 | # xindy 223 | *.xdy 224 | 225 | # xypic precompiled matrices 226 | *.xyc 227 | 228 | # endfloat 229 | *.ttt 230 | *.fff 231 | 232 | # Latexian 233 | TSWLatexianTemp* 234 | 235 | ## Editors: 236 | # WinEdt 237 | *.bak 238 | *.sav 239 | 240 | # Texpad 241 | .texpadtmp 242 | 243 | # Kile 244 | *.backup 245 | 246 | # KBibTeX 247 | *~[0-9]* 248 | 249 | # auto folder when using emacs and auctex 250 | /auto/* 251 | 252 | # expex forward references with \gathertags 253 | *-tags.tex 254 | 255 | ### Python ### 256 | # Byte-compiled / optimized / DLL files 257 | __pycache__/ 258 | *.py[cod] 259 | *$py.class 260 | 261 | # C extensions 262 | 263 | # Distribution / packaging 264 | .Python 265 | env/ 266 | build/ 267 | develop-eggs/ 268 | dist/ 269 | downloads/ 270 | eggs/ 271 | .eggs/ 272 | lib/ 273 | lib64/ 274 | parts/ 275 | sdist/ 276 | var/ 277 | wheels/ 278 | *.egg-info/ 279 | .installed.cfg 280 | *.egg 281 | 282 | # PyInstaller 283 | # Usually these files are written by a python script from a template 284 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 285 | *.manifest 286 | *.spec 287 | 288 | # Installer logs 289 | pip-log.txt 290 | pip-delete-this-directory.txt 291 | 292 | # Unit test / coverage reports 293 | htmlcov/ 294 | .tox/ 295 | .coverage 296 | .coverage.* 297 | .cache 298 | nosetests.xml 299 | coverage.xml 300 | *,cover 301 | .hypothesis/ 302 | 303 | # Translations 304 | *.mo 305 | *.pot 306 | 307 | # Django stuff: 308 | local_settings.py 309 | 310 | # Flask stuff: 311 | instance/ 312 | .webassets-cache 313 | 314 | # Scrapy stuff: 315 | .scrapy 316 | 317 | # Sphinx documentation 318 | docs/_build/ 319 | 320 | # PyBuilder 321 | target/ 322 | 323 | # Jupyter Notebook 324 | .ipynb_checkpoints 325 | 326 | # pyenv 327 | .python-version 328 | 329 | # celery beat schedule file 330 | celerybeat-schedule 331 | 332 | # dotenv 333 | .env 334 | 335 | # virtualenv 336 | .venv 337 | venv/ 338 | ENV/ 339 | 340 | # Spyder project settings 341 | .spyderproject 342 | 343 | # Rope project settings 344 | .ropeproject 345 | 346 | # End of https://www.gitignore.io/api/latex,c++,python 347 | 348 | 349 | ## Other 350 | *.pb 351 | *.app 352 | code/*.png 353 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | MAINTAINER 3 | 4 | RUN apt-get update && \ 5 | apt-get install -y --no-install-recommends \ 6 | software-properties-common apt-utils 7 | 8 | RUN apt-get update && apt-get install -y \ 9 | clang-3.8 git python3-numpy python3-dev python3-pip python3-wheel 10 | 11 | RUN pip3 install --upgrade pip && pip3 install tensorflow-gpu 12 | 13 | # Additional packages to do work. 14 | RUN apt-get install -y vim emacs 15 | 16 | ENV C clang-3.8 17 | ENV CXX clang++-3.8 18 | 19 | WORKDIR /root 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Learning with C++ 2 | 3 | Slides and code samples for my talk at CppCon 2017. 4 | -------------------------------------------------------------------------------- /code/.clang-format: -------------------------------------------------------------------------------- 1 | Language: Cpp 2 | Standard: Cpp11 3 | BasedOnStyle: Google 4 | 5 | AllowAllParametersOfDeclarationOnNextLine: true 6 | AllowShortBlocksOnASingleLine: false 7 | AllowShortCaseLabelsOnASingleLine: false 8 | AllowShortFunctionsOnASingleLine: true 9 | AllowShortIfStatementsOnASingleLine: true 10 | AllowShortLoopsOnASingleLine: true 11 | 12 | AlignOperands: true 13 | AlignConsecutiveAssignments: false 14 | 15 | BinPackArguments: false 16 | BinPackParameters: false 17 | BreakConstructorInitializersBeforeComma: true 18 | ColumnLimit: 80 19 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 20 | ConstructorInitializerIndentWidth: 0 21 | ContinuationIndentWidth: 4 22 | Cpp11BracedListStyle: true 23 | DerivePointerAlignment: false 24 | IndentCaseLabels: true 25 | IndentWidth: 2 26 | MaxEmptyLinesToKeep: 2 27 | NamespaceIndentation: None 28 | PointerAlignment: Left 29 | SpacesBeforeTrailingComments: 2 30 | TabWidth: 2 31 | UseTab: Never 32 | 33 | PenaltyExcessCharacter: 1000000 34 | PenaltyReturnTypeOnItsOwnLine: 10 35 | PenaltyBreakBeforeFirstCallParameter: 1000 36 | -------------------------------------------------------------------------------- /code/common/socket.cpp: -------------------------------------------------------------------------------- 1 | #include "socket.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace { 15 | void handle_blocking(int fd) { 16 | // Will be necessary when calling setsockopt to free busy sockets 17 | int yes = 1; 18 | 19 | // Reclaim blocked but unused sockets (from zombie processes) 20 | const int return_code = 21 | setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof yes); 22 | 23 | if (return_code == -1) { 24 | throw std::runtime_error("Error reclaiming socket"); 25 | } 26 | } 27 | 28 | int get_socket_for_first_valid_address(addrinfo* server_info) { 29 | int fd = -1; 30 | 31 | for (auto* address = server_info; address; address = address->ai_next) { 32 | fd = socket(address->ai_family, address->ai_socktype, address->ai_protocol); 33 | if (fd == -1) continue; 34 | 35 | handle_blocking(fd); 36 | 37 | if (bind(fd, address->ai_addr, address->ai_addrlen) == 1) { 38 | close(fd); 39 | } 40 | 41 | break; 42 | } 43 | 44 | if (fd == -1) { 45 | throw std::runtime_error("Error finding valid address"); 46 | } 47 | 48 | return fd; 49 | } 50 | 51 | addrinfo* get_server_information(int port) { 52 | struct addrinfo hints; 53 | memset(&hints, 0, sizeof hints); 54 | hints.ai_family = AF_UNSPEC; 55 | hints.ai_socktype = SOCK_STREAM; 56 | 57 | addrinfo* server_info; 58 | const auto port_string = std::to_string(port); 59 | const auto return_code = 60 | getaddrinfo("localhost", port_string.c_str(), &hints, &server_info); 61 | 62 | if (return_code != 0) { 63 | throw std::runtime_error(std::string("getaddrinfo failed: ") + 64 | gai_strerror(return_code)); 65 | } 66 | 67 | return server_info; 68 | } 69 | 70 | 71 | int get_server_socket(int port) { 72 | addrinfo* server_info = get_server_information(port); 73 | const int server_socket = get_socket_for_first_valid_address(server_info); 74 | assert(server_socket != -1); 75 | freeaddrinfo(server_info); 76 | 77 | return server_socket; 78 | } 79 | } // namespace 80 | 81 | Socket::Socket(int port) : server_socket(get_server_socket(port)) { 82 | if (listen(server_socket, /*queue=*/10) == 1) { 83 | throw std::runtime_error("Error listening on given socket!"); 84 | } 85 | } 86 | 87 | Socket::~Socket() { 88 | close(server_socket); 89 | close(connection_socket); 90 | } 91 | 92 | void Socket::accept() { 93 | struct sockaddr_storage other_address; 94 | socklen_t sin_size = sizeof other_address; 95 | const int fd = ::accept(server_socket, 96 | reinterpret_cast(&other_address), 97 | &sin_size); 98 | if (fd == -1) { 99 | throw std::runtime_error("Error accepting"); 100 | } else { 101 | connection_socket = fd; 102 | } 103 | } 104 | 105 | std::string Socket::read(int max_bytes) { 106 | std::vector buffer(max_bytes, '\0'); 107 | 108 | if (recv(connection_socket, buffer.data(), buffer.size(), 0) == 0) { 109 | throw std::runtime_error("Error receiving from client"); 110 | } 111 | 112 | return std::string(buffer.data()); 113 | } 114 | 115 | void Socket::write(const std::string& data) { 116 | if (send(connection_socket, data.data(), data.size(), 0) == 0) { 117 | throw std::runtime_error("Error sending to client"); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /code/common/socket.h: -------------------------------------------------------------------------------- 1 | #ifndef SOCKET_H 2 | #define SOCKET_H 3 | 4 | #include 5 | 6 | class Socket { 7 | public: 8 | explicit Socket(int port); 9 | 10 | ~Socket(); 11 | 12 | void accept(); 13 | 14 | std::string read(int max_bytes); 15 | 16 | void write(const std::string& data); 17 | 18 | private: 19 | const int server_socket{-1}; 20 | int connection_socket{-1}; 21 | }; 22 | 23 | #endif // SOCKET_H 24 | -------------------------------------------------------------------------------- /code/cudnn/Makefile: -------------------------------------------------------------------------------- 1 | CXX := nvcc 2 | TARGET := conv 3 | CUDNN_PATH := cudnn 4 | HEADERS := -I $(CUDNN_PATH)/include 5 | LIBS := -L $(CUDNN_PATH)/lib64 -L/usr/local/lib 6 | CXXFLAGS := -arch=sm_35 -std=c++11 -O2 7 | 8 | all: conv 9 | 10 | conv: $(TARGET).cu 11 | $(CXX) $(CXXFLAGS) $(HEADERS) $(LIBS) $(TARGET).cu -o $(TARGET) \ 12 | -lcudnn -lopencv_imgcodecs -lopencv_imgproc -lopencv_core 13 | 14 | .phony: clean 15 | 16 | clean: 17 | rm $(TARGET) || echo -n "" 18 | -------------------------------------------------------------------------------- /code/cudnn/README.md: -------------------------------------------------------------------------------- 1 | # cudnn 2 | 3 | Example of doing an edge-detection convolution using NVIDIA cuDNN. 4 | 5 | ## Building 6 | 7 | Prerequisites: 8 | 9 | 0. A GPU and the whole CUDA stack, including the `nvcc` compiler, 10 | 1. Install NVIDIA cuDNN for your system: https://developer.nvidia.com/rdp/cudnn-download, 11 | 2. OpenCV2 (consult your package manager). 12 | 13 | Set the `CUDNN_PATH` environment variable and `make`, e.g.: 14 | 15 | ```shell 16 | $ CUDNN_PATH=/opt/cudnn make 17 | ``` 18 | 19 | ## Running 20 | 21 | The binary expects the path to an image, e.g. for the `cppcon-logo.png` image 22 | that's already there: 23 | 24 | ```sh 25 | $ ./conv cppcon-logo.png 26 | ``` 27 | 28 | It then generates an image called `cudnn-out.png`. 29 | -------------------------------------------------------------------------------- /code/cudnn/conv.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define checkCUDNN(expression) \ 8 | { \ 9 | cudnnStatus_t status = (expression); \ 10 | if (status != CUDNN_STATUS_SUCCESS) { \ 11 | std::cerr << "Error on line " << __LINE__ << ": " \ 12 | << cudnnGetErrorString(status) << std::endl; \ 13 | std::exit(EXIT_FAILURE); \ 14 | } \ 15 | } 16 | 17 | cv::Mat load_image(const char* image_path) { 18 | cv::Mat image = cv::imread(image_path, CV_LOAD_IMAGE_COLOR); 19 | image.convertTo(image, CV_32FC3); 20 | cv::normalize(image, image, 0, 1, cv::NORM_MINMAX); 21 | std::cerr << "Input Image: " << image.rows << " x " << image.cols << " x " 22 | << image.channels() << std::endl; 23 | return image; 24 | } 25 | 26 | void save_image(const char* output_filename, 27 | float* buffer, 28 | int height, 29 | int width) { 30 | cv::Mat output_image(height, width, CV_32FC3, buffer); 31 | // Make negative values zero. 32 | cv::threshold(output_image, 33 | output_image, 34 | /*threshold=*/0, 35 | /*maxval=*/0, 36 | cv::THRESH_TOZERO); 37 | cv::normalize(output_image, output_image, 0.0, 255.0, cv::NORM_MINMAX); 38 | output_image.convertTo(output_image, CV_8UC3); 39 | cv::imwrite(output_filename, output_image); 40 | std::cerr << "Wrote output to " << output_filename << std::endl; 41 | } 42 | 43 | int main(int argc, const char* argv[]) { 44 | if (argc < 2) { 45 | std::cerr << "usage: conv [gpu=0] [sigmoid=0]" << std::endl; 46 | std::exit(EXIT_FAILURE); 47 | } 48 | 49 | int gpu_id = (argc > 2) ? std::atoi(argv[2]) : 0; 50 | std::cerr << "GPU: " << gpu_id << std::endl; 51 | 52 | bool with_sigmoid = (argc > 3) ? std::atoi(argv[3]) : 0; 53 | std::cerr << "With sigmoid: " << std::boolalpha << with_sigmoid << std::endl; 54 | 55 | cv::Mat image = load_image(argv[1]); 56 | 57 | cudaSetDevice(gpu_id); 58 | 59 | cudnnHandle_t cudnn; 60 | cudnnCreate(&cudnn); 61 | 62 | cudnnTensorDescriptor_t input_descriptor; 63 | checkCUDNN(cudnnCreateTensorDescriptor(&input_descriptor)); 64 | checkCUDNN(cudnnSetTensor4dDescriptor(input_descriptor, 65 | /*format=*/CUDNN_TENSOR_NHWC, 66 | /*dataType=*/CUDNN_DATA_FLOAT, 67 | /*batch_size=*/1, 68 | /*channels=*/3, 69 | /*image_height=*/image.rows, 70 | /*image_width=*/image.cols)); 71 | 72 | cudnnFilterDescriptor_t kernel_descriptor; 73 | checkCUDNN(cudnnCreateFilterDescriptor(&kernel_descriptor)); 74 | checkCUDNN(cudnnSetFilter4dDescriptor(kernel_descriptor, 75 | /*dataType=*/CUDNN_DATA_FLOAT, 76 | /*format=*/CUDNN_TENSOR_NCHW, 77 | /*out_channels=*/3, 78 | /*in_channels=*/3, 79 | /*kernel_height=*/3, 80 | /*kernel_width=*/3)); 81 | 82 | cudnnConvolutionDescriptor_t convolution_descriptor; 83 | checkCUDNN(cudnnCreateConvolutionDescriptor(&convolution_descriptor)); 84 | checkCUDNN(cudnnSetConvolution2dDescriptor(convolution_descriptor, 85 | /*pad_height=*/1, 86 | /*pad_width=*/1, 87 | /*vertical_stride=*/1, 88 | /*horizontal_stride=*/1, 89 | /*dilation_height=*/1, 90 | /*dilation_width=*/1, 91 | /*mode=*/CUDNN_CROSS_CORRELATION, 92 | /*computeType=*/CUDNN_DATA_FLOAT)); 93 | 94 | int batch_size{0}, channels{0}, height{0}, width{0}; 95 | checkCUDNN(cudnnGetConvolution2dForwardOutputDim(convolution_descriptor, 96 | input_descriptor, 97 | kernel_descriptor, 98 | &batch_size, 99 | &channels, 100 | &height, 101 | &width)); 102 | 103 | std::cerr << "Output Image: " << height << " x " << width << " x " << channels 104 | << std::endl; 105 | 106 | cudnnTensorDescriptor_t output_descriptor; 107 | checkCUDNN(cudnnCreateTensorDescriptor(&output_descriptor)); 108 | checkCUDNN(cudnnSetTensor4dDescriptor(output_descriptor, 109 | /*format=*/CUDNN_TENSOR_NHWC, 110 | /*dataType=*/CUDNN_DATA_FLOAT, 111 | /*batch_size=*/1, 112 | /*channels=*/3, 113 | /*image_height=*/image.rows, 114 | /*image_width=*/image.cols)); 115 | 116 | cudnnConvolutionFwdAlgo_t convolution_algorithm; 117 | checkCUDNN( 118 | cudnnGetConvolutionForwardAlgorithm(cudnn, 119 | input_descriptor, 120 | kernel_descriptor, 121 | convolution_descriptor, 122 | output_descriptor, 123 | CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, 124 | /*memoryLimitInBytes=*/0, 125 | &convolution_algorithm)); 126 | 127 | size_t workspace_bytes{0}; 128 | checkCUDNN(cudnnGetConvolutionForwardWorkspaceSize(cudnn, 129 | input_descriptor, 130 | kernel_descriptor, 131 | convolution_descriptor, 132 | output_descriptor, 133 | convolution_algorithm, 134 | &workspace_bytes)); 135 | std::cerr << "Workspace size: " << (workspace_bytes / 1048576.0) << "MB" 136 | << std::endl; 137 | assert(workspace_bytes > 0); 138 | 139 | void* d_workspace{nullptr}; 140 | cudaMalloc(&d_workspace, workspace_bytes); 141 | 142 | int image_bytes = batch_size * channels * height * width * sizeof(float); 143 | 144 | float* d_input{nullptr}; 145 | cudaMalloc(&d_input, image_bytes); 146 | cudaMemcpy(d_input, image.ptr(0), image_bytes, cudaMemcpyHostToDevice); 147 | 148 | float* d_output{nullptr}; 149 | cudaMalloc(&d_output, image_bytes); 150 | cudaMemset(d_output, 0, image_bytes); 151 | 152 | // clang-format off 153 | const float kernel_template[3][3] = { 154 | {1, 1, 1}, 155 | {1, -8, 1}, 156 | {1, 1, 1} 157 | }; 158 | // clang-format on 159 | 160 | float h_kernel[3][3][3][3]; 161 | for (int kernel = 0; kernel < 3; ++kernel) { 162 | for (int channel = 0; channel < 3; ++channel) { 163 | for (int row = 0; row < 3; ++row) { 164 | for (int column = 0; column < 3; ++column) { 165 | h_kernel[kernel][channel][row][column] = kernel_template[row][column]; 166 | } 167 | } 168 | } 169 | } 170 | 171 | float* d_kernel{nullptr}; 172 | cudaMalloc(&d_kernel, sizeof(h_kernel)); 173 | cudaMemcpy(d_kernel, h_kernel, sizeof(h_kernel), cudaMemcpyHostToDevice); 174 | 175 | const float alpha = 1.0f, beta = 0.0f; 176 | 177 | checkCUDNN(cudnnConvolutionForward(cudnn, 178 | &alpha, 179 | input_descriptor, 180 | d_input, 181 | kernel_descriptor, 182 | d_kernel, 183 | convolution_descriptor, 184 | convolution_algorithm, 185 | d_workspace, 186 | workspace_bytes, 187 | &beta, 188 | output_descriptor, 189 | d_output)); 190 | 191 | if (with_sigmoid) { 192 | cudnnActivationDescriptor_t activation_descriptor; 193 | checkCUDNN(cudnnCreateActivationDescriptor(&activation_descriptor)); 194 | checkCUDNN(cudnnSetActivationDescriptor(activation_descriptor, 195 | CUDNN_ACTIVATION_SIGMOID, 196 | CUDNN_PROPAGATE_NAN, 197 | /*relu_coef=*/0)); 198 | checkCUDNN(cudnnActivationForward(cudnn, 199 | activation_descriptor, 200 | &alpha, 201 | output_descriptor, 202 | d_output, 203 | &beta, 204 | output_descriptor, 205 | d_output)); 206 | cudnnDestroyActivationDescriptor(activation_descriptor); 207 | } 208 | 209 | float* h_output = new float[image_bytes]; 210 | cudaMemcpy(h_output, d_output, image_bytes, cudaMemcpyDeviceToHost); 211 | 212 | save_image("cudnn-out.png", h_output, height, width); 213 | 214 | delete[] h_output; 215 | cudaFree(d_kernel); 216 | cudaFree(d_input); 217 | cudaFree(d_output); 218 | cudaFree(d_workspace); 219 | 220 | cudnnDestroyTensorDescriptor(input_descriptor); 221 | cudnnDestroyTensorDescriptor(output_descriptor); 222 | cudnnDestroyFilterDescriptor(kernel_descriptor); 223 | cudnnDestroyConvolutionDescriptor(convolution_descriptor); 224 | 225 | cudnnDestroy(cudnn); 226 | } 227 | -------------------------------------------------------------------------------- /code/cudnn/cppcon-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/code/cudnn/cppcon-logo.png -------------------------------------------------------------------------------- /code/dlib/Makefile: -------------------------------------------------------------------------------- 1 | TARGET := lenet 2 | WARNINGS := -Wall -Wextra -pedantic 3 | CXXFLAGS := -std=c++11 -O2 4 | 5 | all: conv 6 | 7 | conv: $(TARGET).cpp 8 | $(CXX) $(WARNINGS) $(CXXFLAGS) $(HEADERS) $(LIBS) \ 9 | $(TARGET).cpp -o $(TARGET) -ldlib 10 | 11 | .phony: clean 12 | 13 | clean: 14 | rm $(TARGET) || echo -n "" 15 | -------------------------------------------------------------------------------- /code/dlib/README.md: -------------------------------------------------------------------------------- 1 | # dlib 2 | 3 | Example of training a neural network with `dlib`. 4 | 5 | ## Building 6 | 7 | Prerequisites: 8 | 9 | 1. DLib: http://dlib.net, 10 | 2. OpenCV2 (consult your package manager). 11 | 12 | Then: `make`. 13 | 14 | ## Running 15 | 16 | Use the script in the `mxnet` folder to fetch the MNIST dataset. Then pass that 17 | folder to the built binary to start the training process. 18 | -------------------------------------------------------------------------------- /code/dlib/lenet.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace lenet { 6 | using namespace dlib; 7 | // clang-format off 8 | using model = loss_multiclass_log< 9 | fc<10, 10 | relu> 14 | >>>>>>>>>>; 15 | // clang-format on 16 | } // namespace lenet 17 | 18 | int main(int argc, char const* argv[]) { 19 | if (argc < 2) { 20 | std::cerr << "usage: lenet " << std::endl; 21 | } 22 | 23 | std::vector> training_images; 24 | std::vector training_labels; 25 | 26 | std::vector> test_images; 27 | std::vector test_labels; 28 | 29 | dlib::load_mnist_dataset(argv[1], 30 | training_images, 31 | training_labels, 32 | test_images, 33 | test_labels); 34 | 35 | lenet::model model; 36 | 37 | dlib::dnn_trainer trainer(model); 38 | trainer.set_learning_rate(0.01); 39 | trainer.set_min_learning_rate(1e-5); 40 | trainer.set_mini_batch_size(128); 41 | trainer.set_max_num_epochs(2); 42 | trainer.be_verbose(); 43 | 44 | trainer.train(training_images, training_labels); 45 | 46 | model.clean(); 47 | 48 | std::vector predicted = model(test_images); 49 | double hits = 0; 50 | for (size_t i = 0; i < test_images.size(); i++) { 51 | if (predicted[i] == test_labels[i]) { 52 | hits += 1; 53 | } 54 | } 55 | 56 | std::cerr << "Test accuracy: " << hits / test_images.size() << std::endl; 57 | } 58 | -------------------------------------------------------------------------------- /code/mkl/Makefile: -------------------------------------------------------------------------------- 1 | TARGET := conv 2 | WARNINGS := -Wall -Wextra -pedantic -Wno-vla-extension 3 | HEADERS := -isystem /opt/intel/mkl/include 4 | MKL_PATH := /opt/intel/mkl 5 | LIBS := -L $(MKL_PATH)/lib/ -L /usr/local/lib 6 | CXXFLAGS := -std=c++11 -O2 7 | 8 | all: conv 9 | 10 | conv: $(TARGET).cpp 11 | $(CXX) $(WARNINGS) $(CXXFLAGS) $(HEADERS) $(LIBS) \ 12 | $(TARGET).cpp -o $(TARGET) \ 13 | -lmkl_intel -lmkl_sequential -lmkl_core \ 14 | -lopencv_imgcodecs -lopencv_imgproc -lopencv_core 15 | 16 | .phony: clean 17 | 18 | clean: 19 | rm $(TARGET) mkl-out.png || echo -n "" 20 | -------------------------------------------------------------------------------- /code/mkl/README.md: -------------------------------------------------------------------------------- 1 | # mkl 2 | 3 | Example of doing an edge-detection convolution using Intel MKL. 4 | 5 | ## Building 6 | 7 | Prerequisites: 8 | 9 | 1. Install Intel MKL for your system: https://software.intel.com/en-us/mkl, 10 | 2. OpenCV2 (consult your package manager). 11 | 12 | Then just `make`. If your MKL did not end up under `/opt/intel/mkl`, change the 13 | Makefile or set the `MKL_PATH` environment variable before the `make` 14 | invocation. 15 | 16 | ## Running 17 | 18 | The binary expects the path to an image, e.g. for the `cppcon-logo.png` image 19 | that's already there: 20 | 21 | ```sh 22 | $ LD_LIBRARY_PATH=/path/to/mkl/libs ./conv cppcon-logo.png 23 | ``` 24 | 25 | It then generates an image called `mkl-out.png`. 26 | -------------------------------------------------------------------------------- /code/mkl/conv.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define checkMKL(expression) \ 7 | { \ 8 | dnnError_t status = expression; \ 9 | if (status != E_SUCCESS) { \ 10 | std::cerr << "Error at line " << __LINE__ << ": " << status \ 11 | << std::endl; \ 12 | std::exit(EXIT_FAILURE); \ 13 | } \ 14 | } 15 | 16 | cv::Mat load_image(const char* image_path, bool is_gray) { 17 | cv::Mat image = cv::imread(image_path, CV_LOAD_IMAGE_COLOR); 18 | image.convertTo(image, CV_32FC3); 19 | if (is_gray) { 20 | cv::cvtColor(image, image, cv::COLOR_BGR2GRAY); 21 | } 22 | cv::normalize(image, image, 0, 1, cv::NORM_MINMAX); 23 | std::cout << "Input Image: " << image.rows << " x " << image.cols << " x " 24 | << image.channels() << std::endl; 25 | return image; 26 | } 27 | 28 | std::vector 29 | flip_channels(float* input_buffer, size_t X, size_t Y, size_t Z) { 30 | assert(input_buffer != nullptr); 31 | std::vector flipped(X * Y * Z); 32 | 33 | for (size_t x = 0; x < X; ++x) { 34 | for (size_t y = 0; y < Y; ++y) { 35 | for (size_t z = 0; z < Z; ++z) { 36 | const size_t index = x * (Y * Z) + y * Z + z; 37 | const size_t flipped_index = z * (X * Y) + y * X + x; 38 | assert(flipped_index < X * Y * Z); 39 | flipped[flipped_index] = input_buffer[index]; 40 | } 41 | } 42 | } 43 | 44 | return flipped; 45 | } 46 | 47 | 48 | void save_image(const char* output_filename, 49 | float* buffer, 50 | int height, 51 | int width, 52 | bool is_gray) { 53 | const auto format = is_gray ? CV_32F : CV_32FC3; 54 | cv::Mat output_image(height, width, format, buffer); 55 | // Make negative values zero. 56 | cv::threshold(output_image, 57 | output_image, 58 | /*threshold=*/0, 59 | /*maxval=*/0, 60 | cv::THRESH_TOZERO); 61 | cv::normalize(output_image, output_image, 0.0, 255.0, cv::NORM_MINMAX); 62 | output_image.convertTo(output_image, CV_8UC3); 63 | cv::imwrite(output_filename, output_image); 64 | std::cerr << "Wrote output to " << output_filename << std::endl; 65 | } 66 | 67 | void setup_conversion(dnnPrimitive_t* conversion_primitive, 68 | dnnLayout_t source_layout, 69 | dnnLayout_t target_layout, 70 | float* source_buffer, 71 | float** target_buffer) { 72 | if (!dnnLayoutCompare_F32(target_layout, source_layout)) { 73 | checkMKL(dnnConversionCreate_F32(conversion_primitive, 74 | source_layout, 75 | target_layout)); 76 | checkMKL(dnnAllocateBuffer_F32(reinterpret_cast(target_buffer), 77 | target_layout)); 78 | } else { 79 | assert(source_buffer != nullptr); 80 | *target_buffer = source_buffer; 81 | } 82 | 83 | assert(target_buffer != nullptr); 84 | } 85 | 86 | int main(int argc, const char* argv[]) { 87 | if (argc < 2) { 88 | std::cerr << "usage: conv [is_gray]" << std::endl; 89 | std::exit(EXIT_FAILURE); 90 | } 91 | 92 | bool is_gray = false; 93 | if (argc == 3) { 94 | is_gray = std::atoi(argv[2]); 95 | } 96 | 97 | cv::Mat image = load_image(argv[1], is_gray); 98 | 99 | const size_t height = image.rows; 100 | const size_t width = image.cols; 101 | const size_t input_channels = is_gray ? 1 : 3; 102 | const size_t output_channels = is_gray ? 1 : 3; 103 | const size_t batch_size = 1; 104 | const int kernel_size = 5; 105 | const size_t dimension = 4; 106 | 107 | // Format is: WHCN 108 | size_t input_shape[] = {width, height, input_channels, batch_size}; 109 | size_t input_strides[] = {1, 110 | width, 111 | width * height, 112 | width * height * input_channels}; 113 | 114 | size_t output_shape[] = {width, height, output_channels, batch_size}; 115 | size_t output_strides[] = {1, 116 | width, 117 | width * height, 118 | width * height * output_channels}; 119 | 120 | // HWIO 121 | size_t kernel_shape[] = {kernel_size, 122 | kernel_size, 123 | input_channels, 124 | output_channels}; 125 | size_t kernel_strides[] = {1, 126 | kernel_size, 127 | kernel_size * kernel_size, 128 | kernel_size * kernel_size * input_channels}; 129 | 130 | dnnLayout_t input_layout{nullptr}; 131 | checkMKL(dnnLayoutCreate_F32(&input_layout, 132 | dimension, 133 | input_shape, 134 | input_strides)); 135 | 136 | dnnLayout_t output_layout{nullptr}; 137 | checkMKL(dnnLayoutCreate_F32(&output_layout, 138 | dimension, 139 | output_shape, 140 | output_strides)); 141 | 142 | dnnLayout_t kernel_layout{nullptr}; 143 | checkMKL(dnnLayoutCreate_F32(&kernel_layout, 144 | dimension, 145 | kernel_shape, 146 | kernel_strides)); 147 | 148 | // assert(dnnLayoutCompare_F32(input_layout, output_layout)); 149 | 150 | dnnPrimitiveAttributes_t attributes{nullptr}; 151 | checkMKL(dnnPrimitiveAttributesCreate_F32(&attributes)); 152 | 153 | size_t convolution_strides[] = {/*width=*/1, /*height=*/1}; 154 | int convolution_offsets[] = {/*horizontal=*/(1 - kernel_size) / 2, 155 | /*vertical=*/(1 - kernel_size) / 2}; 156 | 157 | dnnPrimitive_t convolution_primitive{nullptr}; 158 | checkMKL(dnnConvolutionCreateForward_F32(&convolution_primitive, 159 | attributes, 160 | dnnAlgorithmConvolutionDirect, 161 | dimension, 162 | input_shape, 163 | output_shape, 164 | kernel_shape, 165 | convolution_strides, 166 | convolution_offsets, 167 | dnnBorderZeros)); 168 | 169 | dnnLayout_t conv_input_layout{nullptr}; 170 | checkMKL(dnnLayoutCreateFromPrimitive_F32(&conv_input_layout, 171 | convolution_primitive, 172 | dnnResourceSrc)); 173 | std::cerr << "Input size: " << dnnLayoutGetMemorySize_F32(conv_input_layout) 174 | << "B" << std::endl; 175 | 176 | 177 | dnnLayout_t conv_output_layout{nullptr}; 178 | checkMKL(dnnLayoutCreateFromPrimitive_F32(&conv_output_layout, 179 | convolution_primitive, 180 | dnnResourceDst)); 181 | std::cerr << "Output size: " << dnnLayoutGetMemorySize_F32(conv_output_layout) 182 | << "B" << std::endl; 183 | 184 | dnnLayout_t conv_kernel_layout{nullptr}; 185 | checkMKL(dnnLayoutCreateFromPrimitive_F32(&conv_kernel_layout, 186 | convolution_primitive, 187 | dnnResourceFilter)); 188 | std::cerr << "Kernel size: " << dnnLayoutGetMemorySize_F32(conv_kernel_layout) 189 | << "B" << std::endl; 190 | 191 | auto input_buffer = 192 | flip_channels(image.ptr(0), height, width, input_channels); 193 | float* output_buffer{nullptr}; 194 | float* conversion_buffer[dnnResourceNumber] = {nullptr}; 195 | 196 | // clang-format off 197 | float kernel_template[kernel_size][kernel_size] = { 198 | {-1, -1, -1, -1, -1}, 199 | {-1, -1, -1, -1, -1}, 200 | {-1, -1, 24, -1, -1}, 201 | {-1, -1, -1, -1, -1}, 202 | {-1, -1, -1, -1, -1}, 203 | }; 204 | // clang-format on 205 | 206 | float kernel_buffer[output_channels][input_channels][kernel_size] 207 | [kernel_size]; 208 | for (size_t output_channel = 0; output_channel < output_channels; 209 | ++output_channel) { 210 | for (size_t input_channel = 0; input_channel < input_channels; 211 | ++input_channel) { 212 | for (size_t column = 0; column < kernel_size; ++column) { 213 | for (size_t row = 0; row < kernel_size; ++row) { 214 | kernel_buffer[output_channel][input_channel][column][row] = 215 | kernel_template[column][row]; 216 | } 217 | } 218 | } 219 | } 220 | float* kernel_buffer_flat = &kernel_buffer[0][0][0][0]; 221 | 222 | dnnPrimitive_t input_conversion{nullptr}; 223 | setup_conversion(&input_conversion, 224 | input_layout, 225 | conv_input_layout, 226 | input_buffer.data(), 227 | &conversion_buffer[dnnResourceSrc]); 228 | 229 | dnnPrimitive_t kernel_conversion{nullptr}; 230 | setup_conversion(&kernel_conversion, 231 | kernel_layout, 232 | conv_kernel_layout, 233 | kernel_buffer_flat, 234 | &conversion_buffer[dnnResourceFilter]); 235 | 236 | checkMKL(dnnAllocateBuffer_F32(reinterpret_cast( 237 | &conversion_buffer[dnnResourceDst]), 238 | conv_output_layout)); 239 | 240 | dnnPrimitive_t output_conversion{nullptr}; 241 | setup_conversion(&output_conversion, 242 | conv_output_layout, 243 | output_layout, 244 | conversion_buffer[dnnResourceDst], 245 | &output_buffer); 246 | 247 | if (kernel_conversion) { 248 | std::cerr << "Performing kernel conversion" << std::endl; 249 | checkMKL(dnnConversionExecute_F32(kernel_conversion, 250 | kernel_buffer, 251 | conversion_buffer[dnnResourceFilter])); 252 | } else { 253 | std::cerr << "Skipping kernel conversion" << std::endl; 254 | } 255 | 256 | if (input_conversion) { 257 | std::cerr << "Performing input conversion" << std::endl; 258 | checkMKL(dnnConversionExecute_F32(input_conversion, 259 | input_buffer.data(), 260 | conversion_buffer[dnnResourceSrc])); 261 | } else { 262 | std::cerr << "Skipping input conversion" << std::endl; 263 | } 264 | 265 | std::cerr << "Executing convolution" << std::endl; 266 | checkMKL(dnnExecute_F32(convolution_primitive, 267 | reinterpret_cast(conversion_buffer))); 268 | 269 | 270 | if (output_conversion) { 271 | std::cerr << "Performing output conversion" << std::endl; 272 | checkMKL(dnnConversionExecute_F32(output_conversion, 273 | conversion_buffer[dnnResourceDst], 274 | output_buffer)); 275 | } else { 276 | std::cerr << "Skipping output conversion" << std::endl; 277 | } 278 | 279 | auto flipped_output = 280 | flip_channels(output_buffer, output_channels, height, width); 281 | save_image("mkl-out.png", flipped_output.data(), height, width, is_gray); 282 | 283 | // --------------------------------------------------------------------------- 284 | 285 | checkMKL(dnnPrimitiveAttributesDestroy_F32(attributes)); 286 | 287 | checkMKL(dnnLayoutDelete_F32(kernel_layout)); 288 | checkMKL(dnnLayoutDelete_F32(output_layout)); 289 | checkMKL(dnnLayoutDelete_F32(input_layout)); 290 | 291 | checkMKL(dnnLayoutDelete_F32(conv_kernel_layout)); 292 | checkMKL(dnnLayoutDelete_F32(conv_output_layout)); 293 | checkMKL(dnnLayoutDelete_F32(conv_input_layout)); 294 | 295 | if (conversion_buffer[dnnResourceSrc] != input_buffer.data()) { 296 | checkMKL(dnnReleaseBuffer_F32(conversion_buffer[dnnResourceSrc])); 297 | } 298 | if (conversion_buffer[dnnResourceFilter] != kernel_buffer_flat) { 299 | checkMKL(dnnReleaseBuffer_F32(conversion_buffer[dnnResourceFilter])); 300 | } 301 | checkMKL(dnnReleaseBuffer_F32(conversion_buffer[dnnResourceDst])); 302 | } 303 | -------------------------------------------------------------------------------- /code/mkl/cppcon-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/code/mkl/cppcon-logo.png -------------------------------------------------------------------------------- /code/mxnet/Makefile: -------------------------------------------------------------------------------- 1 | TARGET := lenet 2 | WARNINGS := -Wall -Wextra -pedantic 3 | HEADERS := -isystem $(MXNET_PATH)/cpp-package/include \ 4 | -isystem $(MXNET_PATH)/include \ 5 | -isystem $(MXNET_PATH)/nnvm/include \ 6 | -isystem $(MXNET_PATH)/dmlc-core/include \ 7 | -I ../ 8 | LIBS := -L $(MXNET_PATH)/lib -L /usr/local/lib 9 | CXXFLAGS := -std=c++11 10 | 11 | all: lenet 12 | 13 | lenet: $(TARGET).cpp 14 | $(CXX) $(WARNINGS) $(CXXFLAGS) $(HEADERS) $(LIBS) \ 15 | $(TARGET).cpp ../common/socket.cpp -o $(TARGET) \ 16 | -lmxnet -lopencv_imgcodecs -lopencv_imgproc -lopencv_core 17 | 18 | .phony: clean 19 | 20 | clean: 21 | rm $(TARGET) || echo -n "" 22 | -------------------------------------------------------------------------------- /code/mxnet/README.md: -------------------------------------------------------------------------------- 1 | # mxnet 2 | 3 | Demo of creating a deep neural network with MXNet's C++ api. 4 | 5 | ## Building 6 | 7 | ### Prerequisites 8 | 9 | 1. Clone MXNet: `git clone https://github.com/apache/incubator-mxnet`, 10 | 2. Build with CMake, using something like this: `make -j4 USE_BLAS=apple USE_OPENCV=1 USE_CPP_PACKAGE=1 USE_OPENMP=0` (the important part is `USE_CPP_PACKAGE`, the rest may differ for you), 11 | 5. OpenCV2 (consult your package manager). 12 | 13 | ### Build the Neural Network 14 | 15 | The `lenet.cpp` file contains code for a neural network using MXNet. You can 16 | build it with the Makefile in this folder. For this, you need to set the 17 | `MXNET_PATH` environment variable to point to your MXNet library path prefix, 18 | e.g. for me: 19 | 20 | ```sh 21 | MXNET_PATH=~/Documents/Libraries/mxnet make 22 | ``` 23 | 24 | ### Build the Demo 25 | 26 | The demo GUI uses Qt (5.7 or newer). You will need to download it. Then generate 27 | a Makefile using `qmake` and make: 28 | 29 | ```sh 30 | $ cd demo 31 | $ qmake 32 | $ make 33 | ``` 34 | 35 | ## Running 36 | 37 | First download the MNIST dataset using the `download_mnist.sh` script. Then run 38 | the binary produced from `lenet.cpp`, which will train the neural network. You 39 | can optionally pass a number of epochs to train as a command line argument. 40 | Anywhere between 1 and 10 is sensible. The default is two epochs, which gets you 41 | to around 98% accuracy for the task (handwritten digit classification). Once 42 | it's done training, it will start listening on a socket for prediction requests. 43 | At this point, launch the demo app binary, which will connect to the server, 44 | allowing you to request predictions. Like so: 45 | 46 | ```sh 47 | $ LD_LIBRARY_PATH=/path/to/mxnet/lib ./lenet MNIST_data 48 | $ demo/ 49 | ``` 50 | 51 | where `` is `demo.app/Contents/MacOS/demo` for example. Differs on 52 | Linux or Windows. 53 | -------------------------------------------------------------------------------- /code/mxnet/demo/.qmake.stash: -------------------------------------------------------------------------------- 1 | QMAKE_MAC_SDK.macosx.Path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.12.sdk 2 | QMAKE_MAC_SDK.macosx.PlatformPath = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform 3 | QMAKE_MAC_SDK.macosx.SDKVersion = 10.12 4 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_CC = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang 5 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_CXX = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++ 6 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_FIX_RPATH = \ 7 | /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/install_name_tool \ 8 | -id 9 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_AR = \ 10 | /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ar \ 11 | cq 12 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_RANLIB = \ 13 | /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ranlib \ 14 | -s 15 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_LINK = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++ 16 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_LINK_SHLIB = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++ 17 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_ACTOOL = /Applications/Xcode.app/Contents/Developer/usr/bin/actool 18 | QMAKE_CXX.INCDIRS = \ 19 | /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1 \ 20 | /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/8.1.0/include \ 21 | /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include \ 22 | /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.12.sdk/usr/include 23 | QMAKE_CXX.LIBDIRS = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.12.sdk/usr/lib 24 | QMAKE_CXX.QT_COMPILER_STDCXX = 199711L 25 | QMAKE_CXX.QMAKE_APPLE_CC = 6000 26 | QMAKE_CXX.QMAKE_APPLE_CLANG_MAJOR_VERSION = 8 27 | QMAKE_CXX.QMAKE_APPLE_CLANG_MINOR_VERSION = 1 28 | QMAKE_CXX.QMAKE_APPLE_CLANG_PATCH_VERSION = 0 29 | QMAKE_CXX.QMAKE_GCC_MAJOR_VERSION = 4 30 | QMAKE_CXX.QMAKE_GCC_MINOR_VERSION = 2 31 | QMAKE_CXX.QMAKE_GCC_PATCH_VERSION = 1 32 | QMAKE_CXX.COMPILER_MACROS = \ 33 | QT_COMPILER_STDCXX \ 34 | QMAKE_APPLE_CC \ 35 | QMAKE_APPLE_CLANG_MAJOR_VERSION \ 36 | QMAKE_APPLE_CLANG_MINOR_VERSION \ 37 | QMAKE_APPLE_CLANG_PATCH_VERSION \ 38 | QMAKE_GCC_MAJOR_VERSION \ 39 | QMAKE_GCC_MINOR_VERSION \ 40 | QMAKE_GCC_PATCH_VERSION 41 | QMAKE_XCODE_DEVELOPER_PATH = /Applications/Xcode.app/Contents/Developer 42 | QMAKE_XCODE_VERSION = 8.3.3 43 | QMAKE_DEFAULT_INCDIRS = \ 44 | /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1 \ 45 | /usr/local/include \ 46 | /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/8.1.0/include \ 47 | /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include \ 48 | /usr/include \ 49 | "/System/Library/Frameworks (framework directory)" \ 50 | "/Library/Frameworks (framework directory)" 51 | QMAKE_DEFAULT_LIBDIRS = \ 52 | /lib \ 53 | /usr/lib 54 | -------------------------------------------------------------------------------- /code/mxnet/demo/backend.cpp: -------------------------------------------------------------------------------- 1 | #include "backend.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | const int kPort = 6666; 10 | 11 | BackEnd::BackEnd(QObject* parent) 12 | : QObject(parent), socket(new QTcpSocket(this)) { 13 | socket->connectToHost("localhost", kPort); 14 | 15 | QObject::connect(socket, &QTcpSocket::connected, [] { 16 | qDebug() << "Connected to localhost:" << kPort; 17 | }); 18 | 19 | QObject::connect(socket, &QTcpSocket::readyRead, [this] { 20 | emit prediction(socket->readAll().toInt()); 21 | }); 22 | } 23 | 24 | void BackEnd::predict(QString imageFilename) { 25 | if (socket->waitForConnected(3000)) { 26 | socket->write(imageFilename.toStdString().c_str()); 27 | } else { 28 | qDebug() << "Error connecting to server!"; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /code/mxnet/demo/backend.h: -------------------------------------------------------------------------------- 1 | #ifndef BACKEND_H 2 | #define BACKEND_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | class BackEnd : public QObject { 9 | Q_OBJECT 10 | 11 | public: 12 | explicit BackEnd(QObject* parent = nullptr); 13 | 14 | Q_INVOKABLE void predict(QString imageFilename); 15 | 16 | signals: 17 | 18 | void prediction(int prediction); 19 | 20 | private: 21 | QTcpSocket* socket; 22 | }; 23 | 24 | #endif // BACKEND_H 25 | -------------------------------------------------------------------------------- /code/mxnet/demo/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "backend.h" 5 | 6 | int main(int argc, char** argv) { 7 | QApplication app(argc, argv); 8 | QQmlApplicationEngine engine; 9 | 10 | qmlRegisterType("demo.backend", 1, 0, "BackEnd"); 11 | 12 | engine.load(QUrl("qrc:///main.qml")); 13 | 14 | return app.exec(); 15 | } 16 | -------------------------------------------------------------------------------- /code/mxnet/demo/main.pro: -------------------------------------------------------------------------------- 1 | TARGET = demo 2 | TEMPLATE = app 3 | QT += widgets qml quick core network 4 | RESOURCES += qml.qrc 5 | SOURCES += main.cpp backend.cpp 6 | HEADERS += backend.h 7 | -------------------------------------------------------------------------------- /code/mxnet/demo/main.qml: -------------------------------------------------------------------------------- 1 | import QtQuick 2.6 2 | import QtQuick.Controls 2.0 3 | import QtQuick.Layouts 1.3 4 | import QtQuick.Controls.Styles 1.4 5 | import demo.backend 1.0 6 | 7 | ApplicationWindow { 8 | id: root 9 | width: 500 10 | height: 500 11 | visible: true 12 | title: "Canvas" 13 | 14 | readonly property string imageFilename: "demo.png" 15 | 16 | Canvas { 17 | id: demo 18 | anchors.fill: parent 19 | property int posX; 20 | property int posY; 21 | property bool pressed; 22 | 23 | signal clear 24 | 25 | onPaint: { 26 | var ctx = getContext("2d"); 27 | if (pressed) { 28 | ctx.fillStyle = "white"; 29 | ctx.ellipse(posX, posY, 25, 25); 30 | ctx.fill(); 31 | } else { 32 | ctx.reset(); 33 | ctx.fillStyle = 'black'; 34 | ctx.fillRect(0, 0, width, height); 35 | } 36 | } 37 | 38 | onClear: { 39 | pressed = false; 40 | requestPaint(); 41 | } 42 | 43 | MouseArea { 44 | anchors.fill: parent 45 | onPressed: { 46 | parent.pressed = true; 47 | } 48 | onPositionChanged: { 49 | parent.posX = mouseX; 50 | parent.posY = mouseY; 51 | parent.requestPaint(); 52 | } 53 | } 54 | } 55 | 56 | RowLayout { 57 | anchors.bottom: parent.bottom 58 | anchors.horizontalCenter: parent.horizontalCenter 59 | Button { 60 | flat: true 61 | onClicked: { 62 | demo.save(root.imageFilename); 63 | backend.predict(root.imageFilename); 64 | } 65 | contentItem: Text { 66 | text: "Predict" 67 | color: "white" 68 | horizontalAlignment: Text.AlignHCenter 69 | verticalAlignment: Text.AlignVCenter 70 | } 71 | } 72 | 73 | Label { 74 | id: predicted 75 | text: "?" 76 | font.pixelSize: 72 77 | color: "white" 78 | } 79 | 80 | Button { 81 | flat: true 82 | onClicked: demo.clear(); 83 | contentItem: Text { 84 | text: "Reset" 85 | color: "white" 86 | horizontalAlignment: Text.AlignHCenter 87 | verticalAlignment: Text.AlignVCenter 88 | } 89 | } 90 | } 91 | 92 | BackEnd { 93 | id: backend 94 | onPrediction: predicted.text = prediction 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /code/mxnet/demo/main.qmlc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/code/mxnet/demo/main.qmlc -------------------------------------------------------------------------------- /code/mxnet/demo/qml.qrc: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | main.qml 5 | 6 | 7 | -------------------------------------------------------------------------------- /code/mxnet/download_mnist.sh: -------------------------------------------------------------------------------- 1 | if [ ! -d "./MNIST_data" ]; then 2 | mkdir mnist_data 3 | cd mnist_data 4 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz \ 5 | http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz \ 6 | http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz \ 7 | http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz 8 | gzip -d *.gz 9 | fi 10 | -------------------------------------------------------------------------------- /code/mxnet/lenet.cpp: -------------------------------------------------------------------------------- 1 | #include "common/socket.h" 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace mx = mxnet::cpp; 14 | 15 | mx::Symbol LeNet() { 16 | auto images = mx::Symbol::Variable("images"); 17 | auto labels = mx::Symbol::Variable("labels"); 18 | 19 | // ------------------------------- CONV 1 ------------------------------- 20 | 21 | mx::Symbol conv1_weights("conv1_weights"); 22 | mx::Symbol conv1_bias("conv1_bias"); 23 | 24 | auto conv1 = mx::Convolution("conv1", 25 | images, 26 | conv1_weights, 27 | conv1_bias, 28 | /*kernel=*/mx::Shape(5, 5), 29 | /*filters=*/32); 30 | auto conv1_activation = 31 | mx::Activation("conv1_activation", conv1, mx::ActivationActType::kRelu); 32 | auto pool1 = mx::Pooling("pool1", 33 | conv1_activation, 34 | mx::Shape(2, 2), 35 | mx::PoolingPoolType::kMax, 36 | /*global_pool=*/false, 37 | /*use_cudnn=*/false, 38 | mx::PoolingPoolingConvention::kValid, 39 | mx::Shape(2, 2)); 40 | 41 | // ------------------------------- CONV 2 ------------------------------- 42 | 43 | mx::Symbol conv2_weights("conv2_weights"); 44 | mx::Symbol conv2_bias("conv2_bias"); 45 | 46 | auto conv2 = mx::Convolution("conv2", 47 | pool1, 48 | conv2_weights, 49 | conv2_bias, 50 | /*kernel=*/mx::Shape(5, 5), 51 | /*filters=*/64); 52 | auto conv2_activation = 53 | mx::Activation("conv2_activation", conv2, mx::ActivationActType::kRelu); 54 | auto pool2 = mx::Pooling("pool2", 55 | conv2_activation, 56 | mx::Shape(2, 2), 57 | mx::PoolingPoolType::kMax, 58 | /*global_pool=*/false, 59 | /*use_cudnn=*/false, 60 | mx::PoolingPoolingConvention::kValid, 61 | mx::Shape(2, 2)); 62 | 63 | // ------------------------------- FC 1 ------------------------------- 64 | 65 | mx::Symbol fc1_weights("fc1_weights"); 66 | mx::Symbol fc1_bias("fc1_bias"); 67 | 68 | auto flatten = mx::Flatten("flatten", pool2); 69 | auto fc1 = mx::FullyConnected("fc1", 70 | flatten, 71 | fc1_weights, 72 | fc1_bias, 73 | /*units=*/1024); 74 | auto fc1_activation = 75 | mx::Activation("fc1_activation", fc1, mx::ActivationActType::kRelu); 76 | 77 | // ------------------------------- FC 2 ------------------------------- 78 | 79 | mx::Symbol fc2_weights("fc2_weights"); 80 | mx::Symbol fc2_bias("fc2_bias"); 81 | 82 | auto fc2 = mx::FullyConnected("fc2", 83 | fc1_activation, 84 | fc2_weights, 85 | fc2_bias, 86 | /*units=*/10); 87 | 88 | // ------------------------------- P ------------------------------- 89 | 90 | return mx::SoftmaxOutput("softmax", fc2, labels); 91 | } 92 | 93 | mx::NDArray load_image(const std::string& image_path, 94 | const mx::Shape& batch_shape, 95 | mx::Context& context) { 96 | cv::Mat image2 = cv::imread(image_path, CV_LOAD_IMAGE_GRAYSCALE); 97 | assert(image2.data != nullptr); 98 | image2.convertTo(image2, CV_32F); 99 | cv::normalize(image2, image2, 0, 1, cv::NORM_MINMAX); 100 | 101 | cv::Mat image(batch_shape[2], batch_shape[3], batch_shape[1]); 102 | resize(image2, image, image.size(), 0, 0); 103 | std::cerr << "Loaded image of shape: " << image.rows << " x " << image.cols 104 | << " x " << image.channels() << std::endl; 105 | 106 | // Make a batch and fill the first image 107 | const size_t batch_flat_size = batch_shape[0] * image.total(); 108 | std::vector flat(batch_flat_size, 0); 109 | const auto* pointer = image.ptr(0); 110 | std::copy(pointer, pointer + image.total(), flat.begin()); 111 | 112 | mx::NDArray ndarray(batch_shape, context); 113 | ndarray.SyncCopyFromCPU(flat); 114 | mx::NDArray::WaitAll(); 115 | 116 | return ndarray; 117 | } 118 | 119 | int predict(mx::NDArray image, 120 | mx::Executor& executor, 121 | std::map& symbols) { 122 | image.CopyTo(&symbols["images"]); 123 | symbols["labels"] = 0; 124 | mx::NDArray::WaitAll(); 125 | executor.Forward(/*training=*/false); 126 | 127 | std::vector predictions(image.GetShape()[0]); 128 | executor.outputs[0].ArgmaxChannel().SyncCopyToCPU(&predictions, 129 | predictions.size()); 130 | 131 | return predictions[0]; 132 | } 133 | 134 | int main(int argc, char const* argv[]) { 135 | if (argc < 2) { 136 | std::cerr << "usage: lenet [epochs=2]\n"; 137 | std::exit(EXIT_FAILURE); 138 | } 139 | 140 | std::string mnist_path(argv[1]); 141 | 142 | const size_t batch_size = 128; 143 | const size_t number_of_epochs = (argc == 3) ? std::atoi(argv[2]) : 2; 144 | const size_t image_width = 28; 145 | const size_t image_height = 28; 146 | const size_t image_channels = 1; 147 | 148 | auto context = mx::Context::cpu(); 149 | 150 | mx::Shape image_shape(batch_size, image_channels, image_width, image_height); 151 | 152 | auto lenet = LeNet(); 153 | 154 | // clang-format off 155 | std::map symbols = { 156 | {"images", mx::NDArray(image_shape, context)}, 157 | {"labels", mx::NDArray(mx::Shape(batch_size), context)}, 158 | }; 159 | // clang-format on 160 | 161 | lenet.InferArgsMap(context, &symbols, symbols); 162 | const auto symbol_names = lenet.ListArguments(); 163 | 164 | mx::Normal normal_initializer(/*mean=*/0.0, /*stddev=*/0.1); 165 | for (auto& symbol : symbols) { 166 | if (symbol.first == "images" || symbol.first == "labels") continue; 167 | normal_initializer(symbol.first, &symbol.second); 168 | } 169 | 170 | mx::Optimizer* optimizer = mx::OptimizerRegistry::Find("sgd"); 171 | assert(optimizer != nullptr); 172 | optimizer->SetParam("lr", 0.1)->SetParam("rescale_grad", 1.0 / batch_size); 173 | 174 | std::unique_ptr executor(lenet.SimpleBind(context, symbols)); 175 | 176 | auto training_iterator = 177 | mx::MXDataIter("MNISTIter") 178 | .SetParam("image", mnist_path + "/train-images-idx3-ubyte") 179 | .SetParam("label", mnist_path + "/train-labels-idx1-ubyte") 180 | .SetParam("batch_size", batch_size) 181 | .SetParam("shuffle", true) 182 | .SetParam("flat", 0) 183 | .CreateDataIter(); 184 | 185 | auto test_iterator = 186 | mx::MXDataIter("MNISTIter") 187 | .SetParam("image", mnist_path + "/t10k-images-idx3-ubyte") 188 | .SetParam("label", mnist_path + "/t10k-labels-idx1-ubyte") 189 | .SetParam("batch_size", batch_size) 190 | .SetParam("shuffle", true) 191 | .SetParam("flat", 0) 192 | .CreateDataIter(); 193 | 194 | size_t training_number_of_batches = 60000 / batch_size; 195 | for (size_t epoch = 1; epoch <= number_of_epochs; ++epoch) { 196 | training_iterator.Reset(); 197 | for (size_t batch_index = 0; training_iterator.Next(); ++batch_index) { 198 | auto batch = training_iterator.GetDataBatch(); 199 | batch.data.CopyTo(&symbols["images"]); 200 | batch.label.CopyTo(&symbols["labels"]); 201 | 202 | // Wait for symbols to be populated. 203 | mx::NDArray::WaitAll(); 204 | 205 | executor->Forward(/*training=*/true); 206 | executor->Backward(); 207 | 208 | for (size_t symbol = 0; symbol < symbol_names.size(); ++symbol) { 209 | if (symbol_names[symbol] == "images") continue; 210 | if (symbol_names[symbol] == "labels") continue; 211 | optimizer->Update(symbol, 212 | executor->arg_arrays[symbol], 213 | executor->grad_arrays[symbol]); 214 | } 215 | 216 | std::cout << "\rBatch " << batch_index << "/" 217 | << training_number_of_batches << std::flush; 218 | } 219 | 220 | std::cout << std::endl; 221 | LOG(INFO) << "Evaluating ..."; 222 | 223 | mx::Accuracy accuracy; 224 | test_iterator.Reset(); 225 | while (test_iterator.Next()) { 226 | auto batch = test_iterator.GetDataBatch(); 227 | batch.data.CopyTo(&symbols["images"]); 228 | batch.label.CopyTo(&symbols["labels"]); 229 | mx::NDArray::WaitAll(); 230 | executor->Forward(/*training=*/false); 231 | accuracy.Update(batch.label, executor->outputs[0]); 232 | } 233 | 234 | std::cout << "Epoch: " << epoch << " | Accuracy: " << accuracy.Get() 235 | << std::endl; 236 | } 237 | 238 | 239 | Socket socket(6666); 240 | std::cerr << "Listening on port 6666" << std::endl; 241 | 242 | socket.accept(); 243 | std::cerr << "Connection established" << std::endl; 244 | 245 | while (true) { 246 | const std::string image_filename = socket.read(256); 247 | std::cout << "Prediction request for: \"" << image_filename << "\"" 248 | << std::endl; 249 | auto image = load_image(image_filename, image_shape, context); 250 | const int prediction = predict(image, *executor, symbols); 251 | socket.write(std::to_string(prediction)); 252 | std::cout << "Sending prediction: " << prediction << std::endl; 253 | } 254 | 255 | MXNotifyShutdown(); 256 | } 257 | -------------------------------------------------------------------------------- /code/mxnet/mnist_data/t10k-images-idx3-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/code/mxnet/mnist_data/t10k-images-idx3-ubyte -------------------------------------------------------------------------------- /code/mxnet/mnist_data/t10k-labels-idx1-ubyte: -------------------------------------------------------------------------------- 1 | '                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             -------------------------------------------------------------------------------- /code/mxnet/mnist_data/train-images-idx3-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/code/mxnet/mnist_data/train-images-idx3-ubyte -------------------------------------------------------------------------------- /code/mxnet/mnist_data/train-labels-idx1-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/code/mxnet/mnist_data/train-labels-idx1-ubyte -------------------------------------------------------------------------------- /code/requirements.txt: -------------------------------------------------------------------------------- 1 | bleach==1.5.0 2 | html5lib==0.9999999 3 | Keras==2.0.8 4 | Markdown==2.6.9 5 | numpy==1.13.1 6 | protobuf==3.4.0 7 | PyYAML==3.12 8 | scipy==0.19.1 9 | six==1.11.0 10 | tensorflow==1.3.0 11 | tensorflow-tensorboard==0.1.6 12 | Werkzeug==0.12.2 13 | -------------------------------------------------------------------------------- /code/tf-graph/Makefile: -------------------------------------------------------------------------------- 1 | TARGET := load-graph 2 | WARNINGS := -Wall -Wextra -pedantic -Wno-vla-extension 3 | HEADERS := -isystem $(TF_PATH) \ 4 | -isystem $(TF_PATH)/bazel-genfiles \ 5 | -isystem $(TF_PATH)/third-party \ 6 | -isystem /usr/local/include/eigen3 \ 7 | -isystem $(PB_PATH)/src \ 8 | -I ../ 9 | LIBS := -L$(TF_PATH)/bazel-bin/tensorflow/ 10 | CXXFLAGS := -std=c++11 -O2 11 | 12 | all: graph 13 | 14 | graph: $(TARGET).cpp 15 | $(CXX) $(WARNINGS) $(CXXFLAGS) $(HEADERS) $(LIBS) \ 16 | $(TARGET).cpp -o $(TARGET) -l tensorflow_cc \ 17 | -lopencv_imgcodecs -lopencv_imgproc -lopencv_core 18 | 19 | server: $(TARGET)-server.cpp 20 | $(CXX) $(WARNINGS) $(CXXFLAGS) $(HEADERS) $(LIBS) \ 21 | $(TARGET)-server.cpp ../common/socket.cpp -o $(TARGET)-server \ 22 | -l tensorflow_cc -lopencv_imgcodecs -lopencv_imgproc -lopencv_core 23 | 24 | .phony: clean 25 | 26 | clean: 27 | rm $(TARGET) $(TARGET)-server || echo -n "" 28 | -------------------------------------------------------------------------------- /code/tf-graph/README.md: -------------------------------------------------------------------------------- 1 | # tf-graph 2 | 3 | Demo of loading and running a TensorFlow graph trained and exported in Python. 4 | 5 | ## Building 6 | 7 | ### Prerequisites 8 | 9 | 1. A clone of TensorFlow: `git clone https://github.com/tensorflow/tensorflow` checked out at release/branch r1.3 (that's what I use, may work with newer), 10 | 2. See `https://github.com/tensorflow/tensorflow/issues/2412` how to build a library from TensorFlow sources (you'll need Bazel), 11 | 3. A clone of Protocol Buffers at version 3.3.0 (exactly!): `https://github.com/google/protobuf/releases/tag/v3.3.0`, 12 | 4. Build that version of ProtoBuf from source using the Makefile inside, 13 | 5. OpenCV2 (consult your package manager). 14 | 15 | Note that TensorFlow also needs Eigen. 16 | 17 | ### Build the Graph Loader 18 | 19 | You can build using the Makefile found in this folder. You need to set `TF_PATH` and `PB_PATH` environment variables to your local folder location of TensorFlow and ProtoBuf, respectively, e.g. for me: 20 | 21 | ```sh 22 | TF_PATH=~/Documents/Libraries/tensorflow \ 23 | PB_PATH=~/Documents/Libraries/protobuf-3.3.0 make 24 | ``` 25 | 26 | This will build `load-graph.cpp`, which loads a graph and generates an image. 27 | There is also `load-graph-server.cpp` under the `server` target of the Makefile 28 | which builds the version that listens on a socket for inference requests from 29 | the demo. 30 | 31 | ### Demo 32 | 33 | First make sure you've built the server version with the above instructions. 34 | Then, for the `demo` folder, you'll need Qt (5.7 or newer). Generate a Makefile 35 | and simply make: 36 | 37 | ```sh 38 | $ cd demo 39 | $ qmake 40 | $ make 41 | ``` 42 | 43 | ## Running 44 | 45 | Both the server and non-server version require two arguments: 46 | 47 | 1. The path to a model checkpoint, 48 | 2. The path prefix for a saved TensorFlow session. 49 | 50 | You can pass those two to the binary of `load-graph.cpp` and it will generate an 51 | image under `/tmp/out.png`. 52 | 53 | For the demo, the server version of `load-graph` will also start listening on a 54 | socket when you run it. You should start this binary first, then start the 55 | `demo` Qt app, which will connect to the server to request images. That is: 56 | 57 | ```sh 58 | $ LD_LIBRARY_PATH=/path/to/tensorflow_cc.so load-graph-server graph.pb checkpoint 59 | $ demo/ 60 | ``` 61 | 62 | where `` is `demo.app/Contents/MacOS/demo` for example. Differs on 63 | Linux or Windows. 64 | -------------------------------------------------------------------------------- /code/tf-graph/demo/.qmake.stash: -------------------------------------------------------------------------------- 1 | QMAKE_DEFAULT_INCDIRS = \ 2 | /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1 \ 3 | /usr/local/include \ 4 | /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/8.1.0/include \ 5 | /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include \ 6 | /usr/include \ 7 | "/System/Library/Frameworks (framework directory)" \ 8 | "/Library/Frameworks (framework directory)" 9 | QMAKE_DEFAULT_LIBDIRS = \ 10 | /lib \ 11 | /usr/lib 12 | QMAKE_XCODE_DEVELOPER_PATH = /Applications/Xcode.app/Contents/Developer 13 | QMAKE_XCODE_VERSION = 8.3.3 14 | QMAKE_MAC_SDK.macosx.Path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.12.sdk 15 | QMAKE_MAC_SDK.macosx.PlatformPath = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform 16 | QMAKE_MAC_SDK.macosx.SDKVersion = 10.12 17 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_CC = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang 18 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_CXX = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++ 19 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_FIX_RPATH = \ 20 | /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/install_name_tool \ 21 | -id 22 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_AR = \ 23 | /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ar \ 24 | cq 25 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_RANLIB = \ 26 | /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ranlib \ 27 | -s 28 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_LINK = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++ 29 | QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_LINK_SHLIB = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++ 30 | -------------------------------------------------------------------------------- /code/tf-graph/demo/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ########################################################### 2 | ## CMAKE SETUP 3 | ########################################################### 4 | 5 | cmake_minimum_required(VERSION 3.2) 6 | project(canvas) 7 | 8 | ########################################################### 9 | ## INCLUDES 10 | ########################################################### 11 | 12 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) 13 | 14 | ########################################################### 15 | ## SOURCES 16 | ########################################################### 17 | 18 | set(CANVAS_SOURCES main.cpp) 19 | 20 | ########################################################### 21 | ## TARGETS 22 | ########################################################### 23 | 24 | # The following variables are for Qt support 25 | set(CMAKE_AUTOMOC ON) 26 | set(CMAKE_INCLUDE_CURRENT_DIR ON) 27 | 28 | # Find the actual Qt5 packages 29 | find_package(Qt5Widgets REQUIRED) 30 | find_package(Qt5Qml REQUIRED) 31 | find_package(Qt5Quick REQUIRED) 32 | find_package(Qt5Svg REQUIRED) 33 | 34 | add_executable(canvas ${CANVAS_SOURCES}) 35 | 36 | ########################################################### 37 | ## COMPILER FLAGS 38 | ########################################################### 39 | 40 | target_compile_options(canvas PUBLIC -std=c++14) 41 | -------------------------------------------------------------------------------- /code/tf-graph/demo/backend.cpp: -------------------------------------------------------------------------------- 1 | #include "backend.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | const int kPort = 6666; 9 | 10 | BackEnd::BackEnd(QObject* parent) 11 | : QObject(parent), socket(new QTcpSocket(this)) { 12 | socket->connectToHost("localhost", kPort); 13 | 14 | QObject::connect(socket, &QTcpSocket::connected, [] { 15 | qDebug() << "Connected to localhost:" << kPort; 16 | }); 17 | 18 | QObject::connect(socket, &QTcpSocket::readyRead, [this] { 19 | emit imageReady(socket->readAll()); 20 | }); 21 | } 22 | 23 | void BackEnd::generateImage(int digit, double a, double b) { 24 | if (socket->waitForConnected(3000)) { 25 | const auto string = QString("%1 %2 %3").arg(digit).arg(a).arg(b); 26 | socket->write(string.toStdString().c_str()); 27 | } else { 28 | qDebug() << "Error connecting to server!"; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /code/tf-graph/demo/backend.h: -------------------------------------------------------------------------------- 1 | #ifndef BACKEND_H 2 | #define BACKEND_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | class BackEnd : public QObject { 9 | Q_OBJECT 10 | 11 | public: 12 | explicit BackEnd(QObject* parent = nullptr); 13 | 14 | Q_INVOKABLE void generateImage(int digit, double a, double b); 15 | 16 | signals: 17 | 18 | void imageReady(QString imagePath); 19 | 20 | private: 21 | QTcpSocket* socket; 22 | }; 23 | 24 | #endif // BACKEND_H 25 | -------------------------------------------------------------------------------- /code/tf-graph/demo/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "backend.h" 5 | 6 | int main(int argc, char** argv) { 7 | QApplication app(argc, argv); 8 | QQmlApplicationEngine engine; 9 | 10 | qmlRegisterType("demo.backend", 1, 0, "BackEnd"); 11 | 12 | engine.load(QUrl("qrc:///main.qml")); 13 | 14 | return app.exec(); 15 | } 16 | -------------------------------------------------------------------------------- /code/tf-graph/demo/main.pro: -------------------------------------------------------------------------------- 1 | TARGET = demo 2 | TEMPLATE = app 3 | QT += widgets qml quick core 4 | RESOURCES += qml.qrc 5 | SOURCES += main.cpp backend.cpp 6 | HEADERS += backend.h 7 | -------------------------------------------------------------------------------- /code/tf-graph/demo/main.qml: -------------------------------------------------------------------------------- 1 | import QtQuick 2.6 2 | import QtQuick.Controls 2.0 3 | import QtQuick.Layouts 1.3 4 | import QtQuick.Controls.Styles 1.4 5 | import demo.backend 1.0 6 | 7 | ApplicationWindow { 8 | id: root 9 | width: 400 10 | height: 450 11 | visible: true 12 | title: "Demo" 13 | color: "black" 14 | 15 | Image { 16 | id: image 17 | width: 400 18 | height: 280 19 | fillMode: Image.PreserveAspectFit 20 | source: "file:/tmp/gan-out.png" 21 | cache: false 22 | } 23 | 24 | BackEnd { 25 | id: backend 26 | function generate() { 27 | backend.generateImage(digit.value, a.value, b.value); 28 | } 29 | onImageReady: image.source = "file:" + imagePath 30 | } 31 | 32 | ColumnLayout { 33 | anchors.top: image.bottom 34 | anchors.bottom: parent.bottom 35 | anchors.bottomMargin: 10 36 | anchors.horizontalCenter: parent.horizontalCenter 37 | Slider { 38 | id: digit 39 | from: 0 40 | to: 9 41 | value: 0 42 | stepSize: 1 43 | snapMode: Slider.SnapAlways 44 | onMoved: backend.generate() 45 | 46 | Text { 47 | anchors.right: parent.left 48 | anchors.rightMargin: 10 49 | anchors.verticalCenter: parent.verticalCenter 50 | text: Math.ceil(digit.value) 51 | color: "white" 52 | font.pixelSize: 20 53 | } 54 | } 55 | 56 | Slider { 57 | id: a 58 | from: -3 59 | to: +3 60 | value: 0 61 | stepSize: 0.1 62 | snapMode: Slider.SnapAlways 63 | onMoved: backend.generate() 64 | 65 | Text { 66 | anchors.right: parent.left 67 | anchors.rightMargin: 10 68 | anchors.verticalCenter: parent.verticalCenter 69 | text: a.value.toPrecision(1) 70 | color: "white" 71 | font.pixelSize: 20 72 | } 73 | } 74 | 75 | Slider { 76 | id: b 77 | from: -3 78 | to: +3 79 | value: 0 80 | stepSize: 0.1 81 | snapMode: Slider.SnapAlways 82 | onMoved: backend.generate() 83 | 84 | Text { 85 | anchors.right: parent.left 86 | anchors.rightMargin: 10 87 | anchors.verticalCenter: parent.verticalCenter 88 | text: b.value.toPrecision(1) 89 | color: "white" 90 | font.pixelSize: 20 91 | } 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /code/tf-graph/demo/main.qmlc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peter-can-talk/cppcon-2017/b3c5ccd2ca6c927a2cf1c0a1d927550f1ac930d3/code/tf-graph/demo/main.qmlc -------------------------------------------------------------------------------- /code/tf-graph/demo/qml.qrc: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | main.qml 5 | 6 | 7 | -------------------------------------------------------------------------------- /code/tf-graph/infogan.py: -------------------------------------------------------------------------------- 1 | import keras.backend as K 2 | import matplotlib.pyplot as plot 3 | import numpy as np 4 | import tensorflow as tf 5 | from keras.layers import (Activation, BatchNormalization, Concatenate, Conv2D, 6 | Dense, Flatten, Input, LeakyReLU, Reshape, Lambda, 7 | UpSampling2D) 8 | from keras.models import Model 9 | from keras.optimizers import Adam 10 | from tensorflow.examples.tutorials.mnist import input_data 11 | 12 | gpu_options = tf.GPUOptions(allow_growth=True) 13 | session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 14 | K.set_session(session) 15 | 16 | # Supress warnings about wrong compilation of TensorFlow. 17 | tf.logging.set_verbosity(tf.logging.ERROR) 18 | tf.set_random_seed(42) 19 | 20 | noise_size = 100 21 | 22 | latent_discrete = 10 23 | latent_continuous = 2 24 | latent_size = latent_discrete + latent_continuous 25 | 26 | ## G 27 | 28 | z = Input(shape=[noise_size], name='z') 29 | c = Input(shape=[latent_size], name='c') 30 | G = Concatenate()([z, c]) 31 | 32 | G = Dense(7 * 7 * 256)(G) 33 | G = BatchNormalization(momentum=0.9)(G) 34 | G = LeakyReLU(alpha=0.2)(G) 35 | G = Reshape((7, 7, 256))(G) 36 | 37 | G = UpSampling2D()(G) 38 | G = Conv2D(128, (5, 5), padding='same')(G) 39 | G = BatchNormalization(momentum=0.9)(G) 40 | G = LeakyReLU(alpha=0.2)(G) 41 | 42 | G = UpSampling2D()(G) 43 | G = Conv2D(64, (5, 5), padding='same')(G) 44 | G = BatchNormalization(momentum=0.9)(G) 45 | G = LeakyReLU(alpha=0.2)(G) 46 | 47 | G = Conv2D(32, (5, 5), padding='same')(G) 48 | G = BatchNormalization(momentum=0.9)(G) 49 | G = LeakyReLU(alpha=0.2)(G) 50 | 51 | G = Conv2D(1, (5, 5), padding='same')(G) 52 | G = Activation('tanh', name='G_final')(G) 53 | 54 | ## D 55 | 56 | x = Input(shape=(28, 28, 1)) 57 | D = Conv2D(32, (5, 5), strides=(2, 2), padding='same')(x) 58 | D = LeakyReLU(alpha=0.2)(D) 59 | 60 | D = Conv2D(64, (5, 5), strides=(2, 2), padding='same')(D) 61 | D = LeakyReLU(alpha=0.2)(D) 62 | 63 | D = Conv2D(128, (5, 5), strides=(2, 2), padding='same')(D) 64 | D = LeakyReLU(alpha=0.2)(D) 65 | 66 | D = Conv2D(256, (5, 5), padding='same')(D) 67 | D = LeakyReLU(alpha=0.2)(D) 68 | D = Flatten(name='D_final')(D) 69 | 70 | 71 | def latent_activations(Q): 72 | Q_discrete = Activation('softmax')(Q[:, :latent_discrete]) 73 | Q_continuous = Activation('sigmoid')(Q[:, -latent_continuous:]) 74 | return Concatenate(axis=1)([Q_discrete, Q_continuous]) 75 | 76 | 77 | Q = Dense(latent_discrete + 2 * latent_continuous)(D) 78 | Q = Lambda(latent_activations)(Q) 79 | 80 | P = Dense(1, activation='sigmoid')(D) 81 | 82 | 83 | def mutual_information(prior_c, c_given_x): 84 | h_c = K.categorical_crossentropy(prior_c, prior_c) 85 | h_c_given_x = K.categorical_crossentropy(prior_c, c_given_x) 86 | return K.mean(h_c_given_x - h_c) 87 | 88 | 89 | def joint_mutual_information(prior_c, c_given_x): 90 | discrete = mutual_information(prior_c[:, :latent_discrete], 91 | c_given_x[:, :latent_discrete], 92 | K.categorical_crossentropy) 93 | continuous_1 = mutual_information(prior_c[:, -2], c_given_x[:, -2]) 94 | continuous_2 = mutual_information(prior_c[:, -1], c_given_x[:, -1]) 95 | return discrete + continuous_1 + continuous_2 96 | 97 | 98 | generator = Model([z, c], G, name='G') 99 | 100 | discriminator = Model(x, P, name='D') 101 | discriminator.compile( 102 | loss='binary_crossentropy', 103 | optimizer=Adam(lr=5e-4, beta_1=0.5, decay=2e-7)) 104 | 105 | # x = G(z, c) 106 | q = Model(x, Q, name='Q') 107 | q.compile( 108 | loss=joint_mutual_information, 109 | optimizer=Adam(lr=2e-4, beta_1=0.5, decay=2e-7)) 110 | 111 | discriminator.trainable = False 112 | q.trainable = False 113 | infogan = Model([z, c], [discriminator(G), q(G)], name='InfoGAN') 114 | infogan.compile( 115 | loss=['binary_crossentropy', joint_mutual_information], 116 | optimizer=Adam(lr=2e-4, beta_1=0.5, decay=1e-7)) 117 | 118 | generator.summary() 119 | discriminator.summary() 120 | 121 | data = input_data.read_data_sets('MNIST_data').train.images 122 | data = data.reshape(-1, 28, 28, 1) * 2 - 1 123 | 124 | number_of_epochs = 30 125 | batch_size = 256 126 | 127 | print(generator.outputs[0]) 128 | 129 | 130 | def sample_noise(size): 131 | return np.random.randn(size, noise_size) 132 | 133 | 134 | def sample_prior(size): 135 | discrete = np.random.multinomial(1, [0.1] * 10, size=size) 136 | continuous_1 = np.random.uniform(-1, +1, size).reshape(-1, 1) 137 | continuous_2 = np.random.uniform(-1, +1, size).reshape(-1, 1) 138 | return np.concatenate([discrete, continuous_1, continuous_2], axis=1) 139 | 140 | 141 | def smooth_labels(size): 142 | return np.random.uniform(low=0.8, high=1.0, size=size) 143 | 144 | 145 | saver = tf.train.Saver(max_to_keep=1) 146 | saver_def = saver.as_saver_def() 147 | 148 | print(saver_def.filename_tensor_name) 149 | print(saver_def.restore_op_name) 150 | 151 | try: 152 | for epoch in range(number_of_epochs): 153 | print('Epoch: {0}/{1}'.format(epoch + 1, number_of_epochs)) 154 | for batch_start in range(0, len(data) - batch_size + 1, batch_size): 155 | noise = sample_noise(batch_size) 156 | latent_code = sample_prior(batch_size) 157 | generated_images = generator.predict([noise, latent_code]) 158 | 159 | real_images = data[batch_start:batch_start + batch_size] 160 | assert len(generated_images) == len(real_images) 161 | all_images = np.concatenate( 162 | [generated_images, real_images], axis=0) 163 | all_images += np.random.normal(0, 0.1, all_images.shape) 164 | 165 | labels = np.zeros(len(all_images)) 166 | labels[batch_size:] = smooth_labels(batch_size) 167 | d_loss = discriminator.train_on_batch(all_images, labels) 168 | 169 | q_loss = q.train_on_batch(generated_images, latent_code) 170 | 171 | labels = np.ones(batch_size) 172 | noise = sample_noise(batch_size) 173 | latent_code = sample_prior(batch_size) 174 | g_loss, _, _ = infogan.train_on_batch([noise, latent_code], 175 | [labels, latent_code]) 176 | 177 | batch_index = batch_start // batch_size + 1 178 | message = '\rBatch: {0} | D: {1:.10f} | G: {2:.10f} | Q: {3:.10f}' 179 | print(message.format(batch_index, d_loss, g_loss, q_loss), end='') 180 | print() 181 | np.random.shuffle(data) 182 | tf.train.write_graph( 183 | session.graph_def, 'graphs', 'graph.pb', as_text=False) 184 | saver.save(session, 'checkpoints/chkp') 185 | 186 | except KeyboardInterrupt: 187 | print() 188 | 189 | print('Training complete!') 190 | -------------------------------------------------------------------------------- /code/tf-graph/load-graph-server.cpp: -------------------------------------------------------------------------------- 1 | #include "common/socket.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | const size_t kNoiseSize = 100; 28 | const size_t kDiscreteCodeSize = 10; 29 | const size_t kContinuousCodeSize = 2; 30 | const size_t kCodeSize = kDiscreteCodeSize + kContinuousCodeSize; 31 | const char* const kLearningPhase = "batch_normalization_1/keras_learning_phase"; 32 | 33 | void load_graph(const std::string& graph_path, 34 | std::unique_ptr& session) { 35 | tensorflow::GraphDef graph_def; 36 | TF_CHECK_OK(tensorflow::ReadBinaryProto(tensorflow::Env::Default(), 37 | graph_path, 38 | &graph_def)); 39 | 40 | session.reset(tensorflow::NewSession(tensorflow::SessionOptions())); 41 | TF_CHECK_OK(session->Create(graph_def)); 42 | } 43 | 44 | tensorflow::Tensor sample_noise() { 45 | using RandomNormal = Eigen::internal::NormalRandomGenerator; 46 | tensorflow::Tensor noise(tensorflow::DT_FLOAT, 47 | tensorflow::TensorShape({1, kNoiseSize})); 48 | noise.matrix().setRandom(); 49 | return noise; 50 | } 51 | 52 | tensorflow::Tensor create_code(int digit, double a, double b) { 53 | tensorflow::Tensor tensor(tensorflow::DT_FLOAT, 54 | tensorflow::TensorShape({1, kCodeSize})); 55 | auto code = tensor.flat(); 56 | code.setZero(); 57 | 58 | assert(digit >= 0 && digit <= 9); 59 | code(digit) = 1; 60 | code(kDiscreteCodeSize) = a; 61 | code(kDiscreteCodeSize + 1) = b; 62 | 63 | return tensor; 64 | } 65 | 66 | tensorflow::Tensor generate(std::unique_ptr& session, 67 | tensorflow::Tensor& noise, 68 | tensorflow::Tensor& code) { 69 | tensorflow::Tensor zero(tensorflow::DT_BOOL, tensorflow::TensorShape()); 70 | zero.scalar()(0) = false; 71 | std::vector> feeds = 72 | {{"z:0", noise}, {"c:0", code}, {kLearningPhase, zero}}; 73 | 74 | std::vector outputs; 75 | TF_CHECK_OK(session->Run(feeds, {"G_final/Tanh:0"}, {}, &outputs)); 76 | 77 | assert(!outputs.empty()); 78 | return outputs.front(); 79 | } 80 | 81 | void restore_session(const std::string& checkpoint_path, 82 | std::unique_ptr& session) { 83 | tensorflow::Tensor checkpoint_tensor(tensorflow::DT_STRING, 84 | tensorflow::TensorShape()); 85 | checkpoint_tensor.flat()(0) = checkpoint_path; 86 | TF_CHECK_OK(session->Run({{"save/Const:0", checkpoint_tensor}}, 87 | {}, 88 | {"save/restore_all"}, 89 | nullptr)); 90 | LOG(INFO) << "Restored session from " << checkpoint_path; 91 | } 92 | 93 | void save_image(const std::string& filename, float* buffer) { 94 | cv::Mat image(28, 28, CV_32F, buffer); 95 | cv::normalize(image, image, 0.0, 255.0, cv::NORM_MINMAX); 96 | image.convertTo(image, CV_8UC3); 97 | cv::imwrite(filename, image); 98 | LOG(INFO) << "Wrote " << filename; 99 | } 100 | 101 | int main(int argc, char* argv[]) { 102 | if (argc < 3) { 103 | std::cerr << "usage: load-graph \n"; 104 | std::exit(EXIT_FAILURE); 105 | } 106 | 107 | tensorflow::port::InitMain(argv[0], &argc, &argv); 108 | 109 | std::unique_ptr session; 110 | load_graph(argv[1], session); 111 | restore_session(argv[2], session); 112 | 113 | Socket socket(6666); 114 | std::cerr << "Listening on port 6666" << std::endl; 115 | 116 | socket.accept(); 117 | std::cerr << "Connection established" << std::endl; 118 | 119 | for (size_t count = 0; true; ++count) { 120 | std::istringstream stream(socket.read(256)); 121 | int digit = 0; 122 | double a = 0, b = 0; 123 | stream >> digit >> a >> b; 124 | 125 | LOG(INFO) << "Prediction request for code: \"" << digit << " " << a << " " 126 | << b << "\""; 127 | 128 | auto noise = sample_noise(); 129 | auto code = create_code(digit, a, b); 130 | auto tensor = generate(session, noise, code); 131 | auto image = tensor.flat(); 132 | image = (image + 1.0f) / 2.0f; 133 | const std::string image_path = 134 | "/tmp/gan-out-" + std::to_string(count % 2) + ".png"; 135 | save_image(image_path, image.data()); 136 | 137 | socket.write(image_path); 138 | 139 | LOG(INFO) << "Wrote " << image_path; 140 | } 141 | 142 | TF_CHECK_OK(session->Close()); 143 | } 144 | -------------------------------------------------------------------------------- /code/tf-graph/load-graph.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | const size_t kNoiseSize = 100; 26 | const size_t kDiscreteCodeSize = 10; 27 | const size_t kContinuousCodeSize = 2; 28 | const size_t kCodeSize = kDiscreteCodeSize + kContinuousCodeSize; 29 | const char* const kLearningPhase = "batch_normalization_1/keras_learning_phase"; 30 | 31 | void load_graph(const std::string& graph_path, 32 | std::unique_ptr& session) { 33 | tensorflow::GraphDef graph_def; 34 | TF_CHECK_OK(tensorflow::ReadBinaryProto(tensorflow::Env::Default(), 35 | graph_path, 36 | &graph_def)); 37 | 38 | session.reset(tensorflow::NewSession(tensorflow::SessionOptions())); 39 | TF_CHECK_OK(session->Create(graph_def)); 40 | } 41 | 42 | tensorflow::Tensor sample_noise() { 43 | using RandomNormal = Eigen::internal::NormalRandomGenerator; 44 | tensorflow::Tensor noise(tensorflow::DT_FLOAT, 45 | tensorflow::TensorShape({1, kNoiseSize})); 46 | noise.matrix().setRandom(); 47 | return noise; 48 | } 49 | 50 | tensorflow::Tensor sample_code() { 51 | using RandomUniform = Eigen::internal::UniformRandomGenerator; 52 | 53 | static std::random_device seed; 54 | static std::mt19937 rng(seed()); 55 | std::uniform_int_distribution indices(0, kDiscreteCodeSize - 1); 56 | 57 | tensorflow::Tensor code(tensorflow::DT_FLOAT, 58 | tensorflow::TensorShape({1, kCodeSize})); 59 | code.flat().setZero(); 60 | 61 | const auto one_hot_index = indices(rng); 62 | code.flat()(one_hot_index) = 1; 63 | 64 | Eigen::array offsets = {{kDiscreteCodeSize}}; 65 | Eigen::array extents = {{kContinuousCodeSize}}; 66 | auto continuous = code.flat().slice(offsets, extents); 67 | continuous.setRandom(); 68 | continuous = (continuous * 2.0f) - 1.0f; 69 | 70 | return code; 71 | } 72 | 73 | void restore_session(const std::string& checkpoint_path, 74 | std::unique_ptr& session) { 75 | tensorflow::Tensor checkpoint_tensor(tensorflow::DT_STRING, 76 | tensorflow::TensorShape()); 77 | checkpoint_tensor.flat()(0) = checkpoint_path; 78 | TF_CHECK_OK(session->Run({{"save/Const:0", checkpoint_tensor}}, 79 | {}, 80 | {"save/restore_all"}, 81 | nullptr)); 82 | LOG(INFO) << "Restored session from " << checkpoint_path; 83 | } 84 | 85 | void save_image(const char* filename, float* buffer) { 86 | cv::Mat image(28, 28, CV_32F, buffer); 87 | cv::normalize(image, image, 0.0, 255.0, cv::NORM_MINMAX); 88 | image.convertTo(image, CV_8UC3); 89 | cv::imwrite(filename, image); 90 | LOG(INFO) << "Wrote " << filename; 91 | } 92 | 93 | int main(int argc, char* argv[]) { 94 | if (argc < 3) { 95 | std::cerr << "usage: load-graph \n"; 96 | std::exit(EXIT_FAILURE); 97 | } 98 | 99 | tensorflow::port::InitMain(argv[0], &argc, &argv); 100 | 101 | std::unique_ptr session; 102 | load_graph(argv[1], session); 103 | restore_session(argv[2], session); 104 | 105 | auto noise = sample_noise(); 106 | auto code = sample_code(); 107 | 108 | std::cout << noise.flat() << std::endl; 109 | std::cout << "------------------" << std::endl; 110 | std::cout << code.flat() << std::endl; 111 | 112 | tensorflow::Tensor zero(tensorflow::DT_BOOL, tensorflow::TensorShape()); 113 | zero.scalar()(0) = false; 114 | std::vector> feeds = 115 | {{"z:0", noise}, {"c:0", code}, {kLearningPhase, zero}}; 116 | 117 | std::vector outputs; 118 | TF_CHECK_OK(session->Run(feeds, {"G_final/Tanh"}, {}, &outputs)); 119 | 120 | assert(!outputs.empty()); 121 | auto image = outputs.front().flat(); 122 | image = (image + 1.0f) / 2.0f; 123 | save_image("out.png", image.data()); 124 | 125 | TF_CHECK_OK(session->Close()); 126 | } 127 | -------------------------------------------------------------------------------- /code/tf-kernel/.bash_history: -------------------------------------------------------------------------------- 1 | ls 2 | make 3 | clang 4 | clang++-3.8 5 | exit 6 | ls 7 | make 8 | exit 9 | -------------------------------------------------------------------------------- /code/tf-kernel/README.md: -------------------------------------------------------------------------------- 1 | # tf-kernel 2 | 3 | Example of creating a custom TensorFlow operator. 4 | 5 | ## Building 6 | 7 | Prerequisites: 8 | 9 | 1. Install Python (preferrably 3) and `pip install -r requirements.txt` found in the `code/` root folder, 10 | 11 | Then `make`. 12 | 13 | ## Running 14 | 15 | The `test.py` script gives an example of loading the custom op in Python. 16 | 17 | The `cpu/` folder can be run exclusively on a CPU, the `cpu+gpu` stuff requires 18 | a GPU to run (at least to run `test.py`, the kernel itself is available on 19 | both). 20 | -------------------------------------------------------------------------------- /code/tf-kernel/cpu+gpu/Makefile: -------------------------------------------------------------------------------- 1 | TARGET := kernel 2 | HEADERS := -I `python3 -c 'import tensorflow as tf; print(tf.sysconfig.get_include())'` 3 | CXXFLAGS := -std=c++11 -fPIC -O2 -D GOOGLE_CUDA=1 4 | GPUFLAGS := -arch=sm_35 -std=c++11 -O2 -D GOOGLE_CUDA=1 \ 5 | -x cu -Xcompiler -fPIC --expt-relaxed-constexpr 6 | 7 | ifeq ($(shell uname), Darwin) 8 | CXXFLAGS := $(CXXFLAGS) -undefined dynamic_lookup 9 | endif 10 | 11 | all: cpu+gpu 12 | 13 | gpu: $(TARGET).cu 14 | nvcc $(HEADERS) $(GPUFLAGS) -c $(TARGET).cu -o $(TARGET).cu.o 15 | 16 | cpu: $(TARGET).cpp 17 | $(CXX) $(HEADERS) $(CXXFLAGS) -c $(TARGET).cpp -o $(TARGET).o 18 | 19 | cpu+gpu: cpu gpu 20 | $(CXX) $(HEADERS) $(CXXFLAGS) -L /usr/local/cuda/lib64 \ 21 | $(TARGET).o $(TARGET).cu.o -shared -o $(TARGET).so -fPIC -lcudart 22 | 23 | .phony: clean 24 | 25 | clean: 26 | rm $(TARGET).so $(TARGET).o $(TARGET).cu.o || echo -n "" 27 | -------------------------------------------------------------------------------- /code/tf-kernel/cpu+gpu/cpp_con_sigmoid.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | 4 | _kernel_path = os.environ.get('CPP_CON_KERNEL_PATH', './kernel.so') 5 | _module = tf.load_op_library(_kernel_path) 6 | 7 | cpp_con_sigmoid = _module.cpp_con_sigmoid 8 | -------------------------------------------------------------------------------- /code/tf-kernel/cpu+gpu/kernel.cpp: -------------------------------------------------------------------------------- 1 | #define EIGEN_USE_THREADS 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #if GOOGLE_CUDA 8 | #define EIGEN_USE_GPU 9 | #include "kernel.cuh" 10 | #endif // GOOGLE_CUDA 11 | 12 | template 13 | struct CPUKernel { 14 | void operator()(tensorflow::OpKernelContext*, 15 | const tensorflow::Tensor& input_tensor, 16 | tensorflow::Tensor& output_tensor) { 17 | auto input = input_tensor.flat(); 18 | auto output = output_tensor.flat(); 19 | output = (1 + (-input).exp()).inverse(); 20 | } 21 | }; 22 | 23 | namespace tensorflow { 24 | 25 | REGISTER_OP("CppConSigmoid") 26 | .Attr("T: {float, double}") 27 | .Input("tensor: T") 28 | .Output("output: T") 29 | .SetShapeFn([](shape_inference::InferenceContext* context) { 30 | context->set_output(0, context->input(0)); 31 | return Status::OK(); 32 | }); 33 | 34 | template