├── Dockerfile ├── Dockerfile.gpu ├── Makefile ├── README.md ├── downloads └── cudnn-7.5-linux-x64-v5.0-ga.tgz ├── frameworks ├── caffe │ ├── Makefile │ ├── config │ │ ├── Makefile.config.cpu │ │ └── Makefile.config.gpu │ └── test │ │ └── load.py ├── chainer │ ├── Makefile │ └── test │ │ └── load.py ├── keras │ ├── Makefile │ └── test │ │ └── load.py ├── mxnet │ ├── Makefile │ └── test │ │ └── load.py ├── neon │ ├── Makefile │ └── test │ │ └── load.py ├── openface │ └── Makefile ├── tensorflow │ ├── Makefile │ └── test │ │ └── load.py ├── torch │ └── Makefile └── transferflow │ ├── Makefile │ └── test │ └── load.py └── utils └── cuda_device_query ├── Makefile ├── NsightEclipse.xml ├── deviceQuery ├── deviceQuery.cpp ├── deviceQuery.o └── readme.txt /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:14.04 2 | 3 | ENV GPU_SUPPORT=0 4 | ENV PYTHONPATH="/workdir/frameworks/mxnet/src/python:/workdir/frameworks/caffe/src/python:/workdir/frameworks/caffe/src/python:" 5 | ENV PATH="/workdir/frameworks/torch/src/install/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/cuda/bin" 6 | ENV DYLD_LIBRARY_PATH="/workdir/frameworks/torch/src/install/lib:" 7 | ENV LD_LIBRARY_PATH="/workdir/frameworks/torch/src/install/lib::/usr/local/cuda/lib64" 8 | ENV LUA_CPATH="/workdir/frameworks/torch/src/install/lib/?.so;/root/.luarocks/lib/lua/5.1/?.so;/workdir/frameworks/torch/src/install/lib/lua/5.1/?.so;./?.so;/usr/local/lib/lua/5.1/?.so;/usr/local/lib/lua/5.1/loadall.so" 9 | ENV LUA_PATH="/root/.luarocks/share/lua/5.1/?.lua;/root/.luarocks/share/lua/5.1/?/init.lua;/workdir/frameworks/torch/src/install/share/lua/5.1/?.lua;/workdir/frameworks/torch/src/install/share/lua/5.1/?/init.lua;./?.lua;/workdir/frameworks/torch/src/install/share/luajit-2.1.0-beta1/?.lua;/usr/local/share/lua/5.1/?.lua;/usr/local/share/lua/5.1/?/init.lua" 10 | 11 | RUN apt-get update --fix-missing 12 | RUN apt-get -y install curl wget python python-numpy python-scipy python-dev python-pip git-core vim 13 | 14 | WORKDIR /workdir 15 | ADD . /workdir 16 | 17 | RUN make global_dependencies 18 | 19 | # Caffe 20 | WORKDIR /workdir/frameworks/caffe 21 | RUN make dependencies 22 | RUN make src 23 | RUN make build 24 | RUN make install 25 | RUN make load_test 26 | 27 | # Tensorflow 28 | WORKDIR /workdir/frameworks/tensorflow 29 | RUN make dependencies 30 | RUN make src 31 | RUN make build 32 | RUN make install 33 | RUN make load_test 34 | 35 | # Keras 36 | WORKDIR /workdir/frameworks/keras 37 | RUN make dependencies 38 | RUN make src 39 | RUN make build 40 | RUN make install 41 | RUN make load_test 42 | 43 | # Neon 44 | WORKDIR /workdir/frameworks/neon 45 | RUN make dependencies 46 | RUN make src 47 | RUN make build 48 | RUN make install 49 | RUN make load_test 50 | 51 | # Chainer 52 | WORKDIR /workdir/frameworks/chainer 53 | RUN make dependencies 54 | RUN make src 55 | RUN make build 56 | RUN make install 57 | RUN make load_test 58 | 59 | # MXNet 60 | WORKDIR /workdir/frameworks/mxnet 61 | RUN make dependencies 62 | RUN make src 63 | RUN make build 64 | RUN make install 65 | RUN make load_test 66 | 67 | # Torch 68 | WORKDIR /workdir/frameworks/torch 69 | RUN make dependencies 70 | RUN make src 71 | RUN make build 72 | RUN make install 73 | RUN make load_test 74 | 75 | # Openface 76 | WORKDIR /workdir/frameworks/openface 77 | RUN make dependencies 78 | RUN make src 79 | RUN make build 80 | RUN make install 81 | RUN make load_test 82 | 83 | # Transferflow 84 | WORKDIR /workdir/frameworks/transferflow 85 | RUN make dependencies 86 | RUN make src 87 | RUN make build 88 | RUN make install 89 | RUN make load_test 90 | 91 | # Cleanup to make container smaller 92 | WORKDIR /workdir 93 | RUN make clean_global_dependencies 94 | RUN ln -s /dev/null /dev/raw1394 95 | RUN ln -s /usr/local/cuda-8.0 /usr/local/cuda 96 | -------------------------------------------------------------------------------- /Dockerfile.gpu: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu14.04 2 | 3 | ENV GPU_SUPPORT=1 4 | ENV PYTHONPATH="/workdir/frameworks/mxnet/src/python:/workdir/frameworks/caffe/src/python:/workdir/frameworks/caffe/src/python:" 5 | ENV PATH="/workdir/frameworks/torch/src/install/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/cuda/bin" 6 | ENV DYLD_LIBRARY_PATH="/workdir/frameworks/torch/src/install/lib:" 7 | ENV LD_LIBRARY_PATH="/workdir/frameworks/torch/src/install/lib::/usr/local/cuda/lib64" 8 | ENV LUA_CPATH="/workdir/frameworks/torch/src/install/lib/?.so;/root/.luarocks/lib/lua/5.1/?.so;/workdir/frameworks/torch/src/install/lib/lua/5.1/?.so;./?.so;/usr/local/lib/lua/5.1/?.so;/usr/local/lib/lua/5.1/loadall.so" 9 | ENV LUA_PATH="/root/.luarocks/share/lua/5.1/?.lua;/root/.luarocks/share/lua/5.1/?/init.lua;/workdir/frameworks/torch/src/install/share/lua/5.1/?.lua;/workdir/frameworks/torch/src/install/share/lua/5.1/?/init.lua;./?.lua;/workdir/frameworks/torch/src/install/share/luajit-2.1.0-beta1/?.lua;/usr/local/share/lua/5.1/?.lua;/usr/local/share/lua/5.1/?/init.lua" 10 | 11 | RUN apt-get update --fix-missing 12 | RUN apt-get -y install curl wget python python-numpy python-scipy python-dev python-pip git-core vim 13 | 14 | WORKDIR /workdir 15 | ADD . /workdir 16 | 17 | RUN make global_dependencies 18 | 19 | # Caffe 20 | WORKDIR /workdir/frameworks/caffe 21 | RUN make dependencies 22 | RUN make src 23 | RUN make build 24 | RUN make install 25 | RUN make load_test 26 | 27 | # Tensorflow 28 | WORKDIR /workdir/frameworks/tensorflow 29 | RUN make dependencies 30 | RUN make src 31 | RUN make build 32 | RUN make install 33 | RUN make load_test 34 | 35 | # Keras 36 | WORKDIR /workdir/frameworks/keras 37 | RUN make dependencies 38 | RUN make src 39 | RUN make build 40 | RUN make install 41 | RUN make load_test 42 | 43 | # Neon 44 | WORKDIR /workdir/frameworks/neon 45 | RUN make dependencies 46 | RUN make src 47 | RUN make build 48 | RUN make install 49 | RUN make load_test 50 | 51 | # Chainer 52 | WORKDIR /workdir/frameworks/chainer 53 | RUN make dependencies 54 | RUN make src 55 | RUN make build 56 | RUN make install 57 | RUN make load_test 58 | 59 | # MXNet 60 | WORKDIR /workdir/frameworks/mxnet 61 | RUN make dependencies 62 | RUN make src 63 | RUN make build 64 | RUN make install 65 | RUN make load_test 66 | 67 | # Torch 68 | WORKDIR /workdir/frameworks/torch 69 | RUN make dependencies 70 | RUN make src 71 | RUN make build 72 | RUN make install 73 | RUN make load_test 74 | 75 | # Openface 76 | WORKDIR /workdir/frameworks/openface 77 | RUN make dependencies 78 | RUN make src 79 | RUN make build 80 | RUN make install 81 | RUN make load_test 82 | 83 | # Transferflow 84 | WORKDIR /workdir/frameworks/transferflow 85 | RUN make dependencies 86 | RUN make src 87 | RUN make build 88 | RUN make install 89 | RUN make load_test 90 | 91 | # Cleanup to make container smaller 92 | WORKDIR /workdir 93 | RUN make clean_global_dependencies 94 | RUN ln -s /dev/null /dev/raw1394 95 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | DEEP_BASE_VERSION = 1.2 3 | GPU_SUPPORT ?= 0 4 | 5 | .PHONY: docker.build.gpu 6 | docker.build.gpu: 7 | -@docker stop dominiek/deep-base 8 | -@docker rm dominiek/deep-base 9 | docker build -t dominiek/deep-base-gpu -f Dockerfile.gpu . 10 | 11 | .PHONY: docker.build 12 | docker.build: 13 | -@docker stop dominiek/deep-base 14 | -@docker rm dominiek/deep-base 15 | docker build -t dominiek/deep-base . 16 | 17 | .PHONY: docker.push.gpu 18 | docker.push.gpu: 19 | docker push dominiek/deep-base-gpu:latest 20 | docker push dominiek/deep-base-gpu:v$(DEEP_BASE_VERSION) 21 | 22 | .PHONY: docker.tag.gpu 23 | docker.tag.gpu: 24 | docker tag dominiek/deep-base-gpu dominiek/deep-base-gpu:v$(DEEP_BASE_VERSION) 25 | 26 | .PHONY: docker.push 27 | docker.push: 28 | docker push dominiek/deep-base:latest 29 | docker push dominiek/deep-base:v$(DEEP_BASE_VERSION) 30 | 31 | .PHONY: docker.tag 32 | docker.tag: 33 | docker tag dominiek/deep-base dominiek/deep-base:v$(DEEP_BASE_VERSION) 34 | 35 | .PHONY: docker.clean 36 | docker.clean: 37 | docker rm $(shell docker ps -a -q) 38 | docker rmi $(shell docker images -q) 39 | 40 | .PHONY: global_dependencies 41 | global_dependencies: 42 | echo $(DEEP_BASE_VERSION) > /etc/deep_base_version 43 | ln -s /dev/null /dev/raw1394 44 | pip install cython 45 | pip install scikit-learn 46 | pip install bhtsne 47 | ifeq ($(GPU_SUPPORT),1) 48 | @echo "Building with GPU support" 49 | #echo 'export PYTHONPATH=/workdir/frameworks/caffe/src/python:$$PYTHONPATH' >> ~/.bashrc 50 | #echo 'export LD_LIBRARY_PATH=$$LD_LIBRARY_PATH:/usr/local/cuda/lib64' >> ~/.bashrc 51 | cd utils/cuda_device_query; make; cp deviceQuery /usr/local/bin/cuda_device_query 52 | else 53 | @echo "Building CPU-only" 54 | endif 55 | 56 | .PHONY: clean_global_dependencies 57 | clean_global_dependencies: 58 | apt-get remove -y git-core curl 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Deep Learning Base Image 3 | 4 | Today's deep learning frameworks require an extraordinary amount of work to install and run. This Docker container bundles all popular deep learning frameworks into a single Docker instance. Ubuntu Linux is the base OS of choice for this container (it is a requirement for CUDA and all DL frameworks play nice with it). 5 | 6 | Supported DL frameworks: 7 | 8 | - [Tensorflow](https://www.tensorflow.org/) (v1.0.1) 9 | - [Caffe](http://caffe.berkeleyvision.org/) (RC5) 10 | - [Theano](http://deeplearning.net/software/theano/) 11 | - [Keras](http://keras.io/) (v1.2.2) 12 | - [MXNet](http://mxnet.readthedocs.io/en/latest/) (v0.9.3) 13 | - [Torch](http://torch.ch/) 14 | - [Chainer](http://chainer.org/) 15 | - [Neon](http://neon.nervanasys.com/docs/latest/index.html) (v1.8.2) 16 | - [Transferflow](http://github.com/dominiek/transferflow) (v0.1.4) 17 | 18 | Other ML frameworks: 19 | 20 | - Python / SciPy / Numpy / DLib 21 | - [Scikit-Learn](http://scikit-learn.org/stable/) 22 | - [Scikit-Image](http://scikit-image.org/) 23 | - [OpenFace](https://cmusatyalab.github.io/openface/) (v0.2.1) 24 | 25 | ### Usage 26 | 27 | _For GPU usage see below_ 28 | 29 | Run the latest version. All DL frameworks are available at your fingertips: 30 | 31 | ``` 32 | docker run -it dominiek/deep-base:latest python 33 | import tensorflow 34 | import matplotlib 35 | matplotlib.use('Agg') 36 | import caffe 37 | import openface 38 | ``` 39 | 40 | Or a specific version tag: 41 | 42 | ``` 43 | docker pull dominiek/deep-base:v1.3 44 | ``` 45 | 46 | In order to use `deep-base` as a base for your deployment's docker container specify the right `FROM` directive following in your `Dockerfile`: 47 | 48 | ``` 49 | FROM dominiek/deep-base:v1.3 50 | ``` 51 | 52 | To run code from the Host OS simply mount the source code dir: 53 | 54 | ``` 55 | mkdir code 56 | echo 'import tensorflow' > code/app.py 57 | docker run --volume `pwd`/code:/code -it dominiek/deep-base:latest python /code/app.py 58 | ``` 59 | 60 | ### GPU Usage 61 | 62 | GPU support requires many additional libraries like Nvidia CUDA and CuDNN. There is a separate Docker repository for the GPU version: 63 | 64 | ``` 65 | FROM dominiek/deep-base-gpu:v1.3 66 | ``` 67 | 68 | Running the GPU image requires you to bind the host OS's CUDA libraries and devices. This requires the same CUDA version on the host OS as inside deep-base (Cuda 8.0) 69 | 70 | The most reliable way to do this is to use [NVIDIA Docker](https://github.com/NVIDIA/nvidia-docker): 71 | 72 | ```bash 73 | nvidia-docker run -it dominiek/deep-base-gpu /bin/bash 74 | ``` 75 | 76 | Alternatively, you can use vanilla docker and bind the devices: 77 | 78 | ```bash 79 | export CUDA_SO=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}') 80 | export CUDA_DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') 81 | docker run --privileged $CUDA_SO $CUDA_DEVICES -it dominiek/deep-base-gpu /bin/bash 82 | ``` 83 | 84 | Now, to make sure that the GPU hardware is working correctly, use the `cuda_device_query` command inside the container: 85 | 86 | ```bash 87 | root@37a895460633:/workdir# cuda_device_query 88 | ... 89 | Result = PASS 90 | ``` 91 | 92 | ### Build a customized Docker image 93 | 94 | This is optional. In order to start the build process execute: 95 | 96 | ``` 97 | make docker.build 98 | ``` 99 | 100 | During the build process small tests will be done to make sure compiled Python bindings load properly. 101 | 102 | For GPU support (requires CUDA-compatible host hardware and Linux host OS): 103 | 104 | ``` 105 | make docker.build.gpu 106 | ``` 107 | 108 | ### Performance 109 | 110 | There is a CPU and a GPU version of this Docker container. The latter will require [CUDA compatible hardware](https://developer.nvidia.com/cuda-gpus) which include AWS GPU instances. When running Docker on a Linux host OS there is no virtual machine used and all CUDA hardware can be fully utilized. 111 | 112 | Note however that on Windows and Mac OS X a virtual machine like VirtualBox is used which does not support GPU passthrough. This means no GPU can be used on these host OS's. The recommended pattern here is to use virtualization in a Windows/Mac based local development environment, but really use Linux for staging and production environments. 113 | 114 | ### TODO 115 | 116 | - Add the MNIST example that can be run easily 117 | - Create a benchmark utility that shows performance of frameworks in running instance 118 | - Use OpenBlas for frameworks that support it 119 | - Reduce container size footprint of image 120 | -------------------------------------------------------------------------------- /downloads/cudnn-7.5-linux-x64-v5.0-ga.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dominiek/deep-base/e3adfca44620b2b54ad82d92fdaf9c5bc80beea2/downloads/cudnn-7.5-linux-x64-v5.0-ga.tgz -------------------------------------------------------------------------------- /frameworks/caffe/Makefile: -------------------------------------------------------------------------------- 1 | 2 | GPU_SUPPORT ?= 0 3 | 4 | .PHONY: dependencies 5 | dependencies: 6 | apt-get -y install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler 7 | apt-get -y install --no-install-recommends libboost-all-dev 8 | apt-get -y install libatlas-base-dev 9 | apt-get -y install libgflags-dev libgoogle-glog-dev liblmdb-dev 10 | apt-get -y install libdc1394-22-dev libdc1394-22 libdc1394-utils python-opencv python-pip gfortran 11 | pip install cython 12 | pip install scikit-image 13 | pip install protobuf 14 | 15 | .PHONY: src 16 | src: 17 | git clone --recursive https://github.com/BVLC/caffe.git src 18 | cd src; git checkout rc5 19 | 20 | .PHONY: build 21 | build: 22 | ifeq ($(GPU_SUPPORT),1) 23 | cp config/Makefile.config.gpu src/Makefile.config 24 | else 25 | cp config/Makefile.config.cpu src/Makefile.config 26 | endif 27 | cd src; make all 28 | cd src; make pycaffe 29 | 30 | .PHONY: install 31 | install: 32 | cd src; make distribute 33 | export PYTHONPATH=/workdir/frameworks/caffe/src/python:$$PYTHONPATH 34 | echo 'export PYTHONPATH=/workdir/frameworks/caffe/src/python:$$PYTHONPATH' >> ~/.bashrc 35 | 36 | .PHONY: load_test 37 | load_test: 38 | ifeq ($(GPU_SUPPORT),1) 39 | LD_LIBRARY_PATH=$(LD_LIBRARY_PATH):/usr/local/cuda/lib64 PYTHONPATH=/workdir/frameworks/caffe/src/python:$(PYTHONPATH) python test/load.py 40 | else 41 | PYTHONPATH=/workdir/frameworks/caffe/src/python:$(PYTHONPATH) python test/load.py 42 | endif 43 | -------------------------------------------------------------------------------- /frameworks/caffe/config/Makefile.config.cpu: -------------------------------------------------------------------------------- 1 | ## Refer to http://caffe.berkeleyvision.org/installation.html 2 | # Contributions simplifying and improving our build system are welcome! 3 | 4 | # cuDNN acceleration switch (uncomment to build with cuDNN). 5 | # USE_CUDNN := 1 6 | 7 | # CPU-only switch (uncomment to build without GPU support). 8 | CPU_ONLY := 1 9 | 10 | # uncomment to disable IO dependencies and corresponding data layers 11 | USE_OPENCV := 1 12 | # USE_LEVELDB := 0 13 | # USE_LMDB := 0 14 | 15 | # uncomment to allow MDB_NOLOCK when reading LMDB files (only if necessary) 16 | # You should not set this flag if you will be reading LMDBs with any 17 | # possibility of simultaneous read and write 18 | # ALLOW_LMDB_NOLOCK := 1 19 | 20 | # Uncomment if you're using OpenCV 3 21 | # OPENCV_VERSION := 3 22 | 23 | # To customize your choice of compiler, uncomment and set the following. 24 | # N.B. the default for Linux is g++ and the default for OSX is clang++ 25 | # CUSTOM_CXX := g++ 26 | 27 | # CUDA directory contains bin/ and lib/ directories that we need. 28 | CUDA_DIR := /usr/local/cuda 29 | # On Ubuntu 14.04, if cuda tools are installed via 30 | # "sudo apt-get install nvidia-cuda-toolkit" then use this instead: 31 | # CUDA_DIR := /usr 32 | 33 | # CUDA architecture setting: going with all of them. 34 | # For CUDA < 6.0, comment the *_50 lines for compatibility. 35 | CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \ 36 | -gencode arch=compute_20,code=sm_21 \ 37 | -gencode arch=compute_30,code=sm_30 \ 38 | -gencode arch=compute_35,code=sm_35 \ 39 | -gencode arch=compute_50,code=sm_50 \ 40 | -gencode arch=compute_50,code=compute_50 41 | 42 | # BLAS choice: 43 | # atlas for ATLAS (default) 44 | # mkl for MKL 45 | # open for OpenBlas 46 | BLAS := atlas 47 | # Custom (MKL/ATLAS/OpenBLAS) include and lib directories. 48 | # Leave commented to accept the defaults for your choice of BLAS 49 | # (which should work)! 50 | # BLAS_INCLUDE := /path/to/your/blas 51 | # BLAS_LIB := /path/to/your/blas 52 | 53 | # Homebrew puts openblas in a directory that is not on the standard search path 54 | # BLAS_INCLUDE := $(shell brew --prefix openblas)/include 55 | # BLAS_LIB := $(shell brew --prefix openblas)/lib 56 | 57 | # This is required only if you will compile the matlab interface. 58 | # MATLAB directory should contain the mex binary in /bin. 59 | # MATLAB_DIR := /usr/local 60 | # MATLAB_DIR := /Applications/MATLAB_R2012b.app 61 | 62 | # NOTE: this is required only if you will compile the python interface. 63 | # We need to be able to find Python.h and numpy/arrayobject.h. 64 | PYTHON_INCLUDE := /usr/include/python2.7 \ 65 | /usr/lib/python2.7/dist-packages/numpy/core/include 66 | # Anaconda Python distribution is quite popular. Include path: 67 | # Verify anaconda location, sometimes it's in root. 68 | # ANACONDA_HOME := $(HOME)/anaconda 69 | # PYTHON_INCLUDE := $(ANACONDA_HOME)/include \ 70 | # $(ANACONDA_HOME)/include/python2.7 \ 71 | # $(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include \ 72 | 73 | # Uncomment to use Python 3 (default is Python 2) 74 | # PYTHON_LIBRARIES := boost_python3 python3.5m 75 | # PYTHON_INCLUDE := /usr/include/python3.5m \ 76 | # /usr/lib/python3.5/dist-packages/numpy/core/include 77 | 78 | # We need to be able to find libpythonX.X.so or .dylib. 79 | PYTHON_LIB := /usr/lib 80 | # PYTHON_LIB := $(ANACONDA_HOME)/lib 81 | 82 | # Homebrew installs numpy in a non standard path (keg only) 83 | # PYTHON_INCLUDE += $(dir $(shell python -c 'import numpy.core; print(numpy.core.__file__)'))/include 84 | # PYTHON_LIB += $(shell brew --prefix numpy)/lib 85 | 86 | # Uncomment to support layers written in Python (will link against Python libs) 87 | # WITH_PYTHON_LAYER := 1 88 | 89 | # Whatever else you find you need goes here. 90 | INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include 91 | LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib 92 | 93 | # If Homebrew is installed at a non standard location (for example your home directory) and you use it for general dependencies 94 | # INCLUDE_DIRS += $(shell brew --prefix)/include 95 | # LIBRARY_DIRS += $(shell brew --prefix)/lib 96 | 97 | # Uncomment to use `pkg-config` to specify OpenCV library paths. 98 | # (Usually not necessary -- OpenCV libraries are normally installed in one of the above $LIBRARY_DIRS.) 99 | # USE_PKG_CONFIG := 1 100 | 101 | # N.B. both build and distribute dirs are cleared on `make clean` 102 | BUILD_DIR := build 103 | DISTRIBUTE_DIR := distribute 104 | 105 | # Uncomment for debugging. Does not work on OSX due to https://github.com/BVLC/caffe/issues/171 106 | # DEBUG := 1 107 | 108 | # The ID of the GPU that 'make runtest' will use to run unit tests. 109 | TEST_GPUID := 0 110 | 111 | # enable pretty build (comment to see full commands) 112 | Q ?= @ 113 | -------------------------------------------------------------------------------- /frameworks/caffe/config/Makefile.config.gpu: -------------------------------------------------------------------------------- 1 | ## Refer to http://caffe.berkeleyvision.org/installation.html 2 | # Contributions simplifying and improving our build system are welcome! 3 | 4 | # cuDNN acceleration switch (uncomment to build with cuDNN). 5 | USE_CUDNN := 1 6 | 7 | # CPU-only switch (uncomment to build without GPU support). 8 | # CPU_ONLY := 1 9 | 10 | # uncomment to disable IO dependencies and corresponding data layers 11 | USE_OPENCV := 1 12 | # USE_LEVELDB := 0 13 | # USE_LMDB := 0 14 | 15 | # uncomment to allow MDB_NOLOCK when reading LMDB files (only if necessary) 16 | # You should not set this flag if you will be reading LMDBs with any 17 | # possibility of simultaneous read and write 18 | # ALLOW_LMDB_NOLOCK := 1 19 | 20 | # Uncomment if you're using OpenCV 3 21 | # OPENCV_VERSION := 3 22 | 23 | # To customize your choice of compiler, uncomment and set the following. 24 | # N.B. the default for Linux is g++ and the default for OSX is clang++ 25 | # CUSTOM_CXX := g++ 26 | 27 | # CUDA directory contains bin/ and lib/ directories that we need. 28 | CUDA_DIR := /usr/local/cuda 29 | # On Ubuntu 14.04, if cuda tools are installed via 30 | # "sudo apt-get install nvidia-cuda-toolkit" then use this instead: 31 | # CUDA_DIR := /usr 32 | 33 | # CUDA architecture setting: going with all of them. 34 | # For CUDA < 6.0, comment the *_50 lines for compatibility. 35 | CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \ 36 | -gencode arch=compute_20,code=sm_21 \ 37 | -gencode arch=compute_30,code=sm_30 \ 38 | -gencode arch=compute_35,code=sm_35 \ 39 | -gencode arch=compute_50,code=sm_50 \ 40 | -gencode arch=compute_50,code=compute_50 41 | 42 | # BLAS choice: 43 | # atlas for ATLAS (default) 44 | # mkl for MKL 45 | # open for OpenBlas 46 | BLAS := atlas 47 | # Custom (MKL/ATLAS/OpenBLAS) include and lib directories. 48 | # Leave commented to accept the defaults for your choice of BLAS 49 | # (which should work)! 50 | # BLAS_INCLUDE := /path/to/your/blas 51 | # BLAS_LIB := /path/to/your/blas 52 | 53 | # Homebrew puts openblas in a directory that is not on the standard search path 54 | # BLAS_INCLUDE := $(shell brew --prefix openblas)/include 55 | # BLAS_LIB := $(shell brew --prefix openblas)/lib 56 | 57 | # This is required only if you will compile the matlab interface. 58 | # MATLAB directory should contain the mex binary in /bin. 59 | # MATLAB_DIR := /usr/local 60 | # MATLAB_DIR := /Applications/MATLAB_R2012b.app 61 | 62 | # NOTE: this is required only if you will compile the python interface. 63 | # We need to be able to find Python.h and numpy/arrayobject.h. 64 | PYTHON_INCLUDE := /usr/include/python2.7 \ 65 | /usr/lib/python2.7/dist-packages/numpy/core/include 66 | # Anaconda Python distribution is quite popular. Include path: 67 | # Verify anaconda location, sometimes it's in root. 68 | # ANACONDA_HOME := $(HOME)/anaconda 69 | # PYTHON_INCLUDE := $(ANACONDA_HOME)/include \ 70 | # $(ANACONDA_HOME)/include/python2.7 \ 71 | # $(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include \ 72 | 73 | # Uncomment to use Python 3 (default is Python 2) 74 | # PYTHON_LIBRARIES := boost_python3 python3.5m 75 | # PYTHON_INCLUDE := /usr/include/python3.5m \ 76 | # /usr/lib/python3.5/dist-packages/numpy/core/include 77 | 78 | # We need to be able to find libpythonX.X.so or .dylib. 79 | PYTHON_LIB := /usr/lib 80 | # PYTHON_LIB := $(ANACONDA_HOME)/lib 81 | 82 | # Homebrew installs numpy in a non standard path (keg only) 83 | # PYTHON_INCLUDE += $(dir $(shell python -c 'import numpy.core; print(numpy.core.__file__)'))/include 84 | # PYTHON_LIB += $(shell brew --prefix numpy)/lib 85 | 86 | # Uncomment to support layers written in Python (will link against Python libs) 87 | # WITH_PYTHON_LAYER := 1 88 | 89 | # Whatever else you find you need goes here. 90 | INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include 91 | LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib 92 | 93 | # If Homebrew is installed at a non standard location (for example your home directory) and you use it for general dependencies 94 | # INCLUDE_DIRS += $(shell brew --prefix)/include 95 | # LIBRARY_DIRS += $(shell brew --prefix)/lib 96 | 97 | # Uncomment to use `pkg-config` to specify OpenCV library paths. 98 | # (Usually not necessary -- OpenCV libraries are normally installed in one of the above $LIBRARY_DIRS.) 99 | # USE_PKG_CONFIG := 1 100 | 101 | # N.B. both build and distribute dirs are cleared on `make clean` 102 | BUILD_DIR := build 103 | DISTRIBUTE_DIR := distribute 104 | 105 | # Uncomment for debugging. Does not work on OSX due to https://github.com/BVLC/caffe/issues/171 106 | # DEBUG := 1 107 | 108 | # The ID of the GPU that 'make runtest' will use to run unit tests. 109 | TEST_GPUID := 0 110 | 111 | # enable pretty build (comment to see full commands) 112 | Q ?= @ 113 | -------------------------------------------------------------------------------- /frameworks/caffe/test/load.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use('Agg') 3 | import caffe 4 | 5 | print('Python-Caffe loaded OK') 6 | -------------------------------------------------------------------------------- /frameworks/chainer/Makefile: -------------------------------------------------------------------------------- 1 | 2 | .PHONY: dependencies 3 | dependencies: 4 | pip install -U setuptools 5 | 6 | .PHONY: src 7 | src: 8 | echo "OK" 9 | 10 | .PHONY: build 11 | build: 12 | echo "OK" 13 | 14 | .PHONY: install 15 | install: 16 | pip install chainer 17 | 18 | .PHONY: load_test 19 | load_test: 20 | python test/load.py 21 | -------------------------------------------------------------------------------- /frameworks/chainer/test/load.py: -------------------------------------------------------------------------------- 1 | import chainer 2 | -------------------------------------------------------------------------------- /frameworks/keras/Makefile: -------------------------------------------------------------------------------- 1 | 2 | .PHONY: dependencies 3 | dependencies: 4 | apt-get -y install python-h5py python-yaml 5 | 6 | .PHONY: src 7 | src: 8 | git clone --recursive https://github.com/fchollet/keras.git src 9 | cd src; git checkout 1.2.2 10 | 11 | .PHONY: build 12 | build: 13 | cd src; python setup.py install 14 | 15 | .PHONY: install 16 | install: 17 | echo "OK" 18 | 19 | .PHONY: load_test 20 | load_test: 21 | python test/load.py 22 | -------------------------------------------------------------------------------- /frameworks/keras/test/load.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import keras 3 | -------------------------------------------------------------------------------- /frameworks/mxnet/Makefile: -------------------------------------------------------------------------------- 1 | 2 | .PHONY: dependencies 3 | dependencies: 4 | echo "OK" 5 | 6 | .PHONY: src 7 | src: 8 | git clone --recursive https://github.com/dmlc/mxnet src 9 | cd src; git checkout v0.9.3 10 | 11 | .PHONY: build 12 | build: 13 | cd src; make -j4 14 | 15 | .PHONY: install 16 | install: 17 | export PYTHONPATH=/workdir/frameworks/mxnet/src/python:$$PYTHONPATH 18 | echo 'export PYTHONPATH=/workdir/frameworks/mxnet/src/python:$$PYTHONPATH' >> ~/.bashrc 19 | 20 | .PHONY: load_test 21 | load_test: 22 | PYTHONPATH=/workdir/frameworks/mxnet/src/python:$(PYTHONPATH) python test/load.py 23 | -------------------------------------------------------------------------------- /frameworks/mxnet/test/load.py: -------------------------------------------------------------------------------- 1 | 2 | import mxnet 3 | 4 | print('Python-MXNet loaded OK') 5 | -------------------------------------------------------------------------------- /frameworks/neon/Makefile: -------------------------------------------------------------------------------- 1 | 2 | .PHONY: dependencies 3 | dependencies: 4 | apt-get -y install python-virtualenv 5 | 6 | .PHONY: src 7 | src: 8 | git clone --recursive https://github.com/NervanaSystems/neon.git src 9 | cd src; git checkout v1.8.2 10 | 11 | .PHONY: build 12 | build: 13 | cd src; make sysinstall 14 | 15 | .PHONY: install 16 | install: 17 | echo "OK" 18 | 19 | .PHONY: load_test 20 | load_test: 21 | python test/load.py 22 | -------------------------------------------------------------------------------- /frameworks/neon/test/load.py: -------------------------------------------------------------------------------- 1 | import neon 2 | -------------------------------------------------------------------------------- /frameworks/openface/Makefile: -------------------------------------------------------------------------------- 1 | 2 | .PHONY: dependencies 3 | dependencies: 4 | apt-get -y install libboost-python-dev cmake 5 | pip install dlib 6 | . ~/.bashrc; for lib in dpnn nn optim optnet csvigo cutorch cunn fblualib torchx tds; do \ 7 | luarocks install $$lib ; \ 8 | done 9 | 10 | .PHONY: src 11 | src: 12 | git clone --recursive https://github.com/cmusatyalab/openface.git src 13 | cd src; git checkout 0.2.1 14 | 15 | .PHONY: build 16 | build: 17 | echo "OK" 18 | 19 | .PHONY: install 20 | install: 21 | cd src; python setup.py install 22 | 23 | .PHONY: load_test 24 | load_test: 25 | echo "Skip" 26 | -------------------------------------------------------------------------------- /frameworks/tensorflow/Makefile: -------------------------------------------------------------------------------- 1 | 2 | GPU_SUPPORT ?= 0 3 | 4 | .PHONY: dependencies 5 | dependencies: 6 | apt-get -y install software-properties-common 7 | add-apt-repository -y ppa:webupd8team/java 8 | apt-get update 9 | echo "oracle-java8-installer shared/accepted-oracle-license-v1-1 select true" | sudo debconf-set-selections 10 | apt-get -y install oracle-java8-installer unzip 11 | wget "https://github.com/bazelbuild/bazel/releases/download/0.4.4/bazel-0.4.4-installer-linux-x86_64.sh" 12 | chmod +x bazel-0.4.4-installer-linux-x86_64.sh 13 | ./bazel-0.4.4-installer-linux-x86_64.sh 14 | rm -f bazel-0.4.4-installer-linux-x86_64.sh 15 | bash /usr/local/lib/bazel/bin/bazel-complete.bash 16 | apt-get -y install swig 17 | apt-get -y install libcurl3-dev 18 | pip install protobuf==3.2.0 19 | pip install six --upgrade --target="/usr/lib/python2.7/dist-packages" 20 | 21 | .PHONY: src 22 | src: 23 | git clone --recurse-submodules https://github.com/tensorflow/tensorflow src 24 | cd src; git checkout v1.0.1 25 | 26 | .PHONY: build 27 | build: 28 | ifeq ($(GPU_SUPPORT),1) 29 | cd src; echo "/usr/local/lib/python2.7/dist-packages" > configure_params.txt 30 | mkdir /usr/lib/x86_64-linux-gnu/include 31 | ln -s /usr/include/cudnn.h /usr/lib/x86_64-linux-gnu/include/cudnn.h 32 | cd src; TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2 TF_NEED_CUDA=1 LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:$(LD_LIBRARY_PATH) tensorflow/tools/ci_build/builds/configured GPU 33 | #cd src; TF_NEED_OPENCL=0 PYTHON_LIB_PATH=/usr/lib/python2.7/dist-packages TF_ENABLE_XLA=0 TF_NEED_HDFS=0 TF_NEED_JEMALLOC=1 CC_OPT_FLAGS="-march=native" GCC_HOST_COMPILER_PATH=/usr/bin/gcc PYTHON_BIN_PATH=/usr/bin/python TF_NEED_GCP=0 CUDA_TOOLKIT_PATH="/usr/local/cuda" CUDNN_INSTALL_PATH="/usr/local/cuda" TF_NEED_CUDA=1 TF_CUDA_COMPUTE_CAPABILITIES="3.0" TF_CUDNN_VERSION="5" TF_CUDA_VERSION="8.0" TF_CUDA_VERSION_TOOLKIT=8.0 TF_NEED_HDFS=0 ./configure < configure_params.txt 34 | cd src; bazel build -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_package 35 | else 36 | cd src; TF_NEED_OPENCL=0 PYTHON_LIB_PATH=/usr/lib/python2.7/dist-packages TF_ENABLE_XLA=0 TF_NEED_HDFS=0 PYTHON_BIN_PATH=/usr/bin/python TF_NEED_JEMALLOC=1 CC_OPT_FLAGS="-march=native" TF_NEED_GCP=0 TF_NEED_CUDA=0 ./configure 37 | cd src; bazel build -c opt //tensorflow/tools/pip_package:build_pip_package 38 | endif 39 | 40 | .PHONY: install 41 | install: 42 | cd src; bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg 43 | pip install /tmp/tensorflow_pkg/* 44 | 45 | .PHONY: load_test 46 | load_test: 47 | ifeq ($(GPU_SUPPORT),1) 48 | LD_LIBRARY_PATH=$(LD_LIBRARY_PATH):/usr/local/cuda/lib64 python test/load.py 49 | else 50 | python test/load.py 51 | endif 52 | -------------------------------------------------------------------------------- /frameworks/tensorflow/test/load.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow 3 | 4 | print('Python-Tensorflow loaded OK') 5 | -------------------------------------------------------------------------------- /frameworks/torch/Makefile: -------------------------------------------------------------------------------- 1 | 2 | .PHONY: dependencies 3 | dependencies: 4 | echo "OK" 5 | 6 | .PHONY: src 7 | src: 8 | git clone --recursive https://github.com/torch/distro.git src 9 | 10 | .PHONY: build 11 | build: 12 | cd src; bash install-deps 13 | 14 | .PHONY: install 15 | install: 16 | cd src; ./install.sh 17 | 18 | .PHONY: load_test 19 | load_test: 20 | . ~/.bashrc; th -h 21 | -------------------------------------------------------------------------------- /frameworks/transferflow/Makefile: -------------------------------------------------------------------------------- 1 | 2 | .PHONY: dependencies 3 | dependencies: 4 | echo "OK" 5 | 6 | .PHONY: src 7 | src: 8 | git clone --recursive https://github.com/dominiek/transferflow.git src 9 | cd src; git checkout v0.1.4 10 | 11 | .PHONY: build 12 | build: 13 | cd src; make 14 | 15 | .PHONY: install 16 | install: 17 | cd src; python setup.py install 18 | 19 | .PHONY: load_test 20 | load_test: 21 | python test/load.py 22 | -------------------------------------------------------------------------------- /frameworks/transferflow/test/load.py: -------------------------------------------------------------------------------- 1 | 2 | import transferflow 3 | 4 | print('Python-Transferflow loaded OK') 5 | -------------------------------------------------------------------------------- /utils/cuda_device_query/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # 3 | # Copyright 1993-2015 NVIDIA Corporation. All rights reserved. 4 | # 5 | # NOTICE TO USER: 6 | # 7 | # This source code is subject to NVIDIA ownership rights under U.S. and 8 | # international Copyright laws. 9 | # 10 | # NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE 11 | # CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR 12 | # IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH 13 | # REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF 14 | # MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 15 | # IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, 16 | # OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS 17 | # OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 18 | # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE 19 | # OR PERFORMANCE OF THIS SOURCE CODE. 20 | # 21 | # U.S. Government End Users. This source code is a "commercial item" as 22 | # that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of 23 | # "commercial computer software" and "commercial computer software 24 | # documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) 25 | # and is provided to the U.S. Government only as a commercial end item. 26 | # Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through 27 | # 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the 28 | # source code with only those rights set forth herein. 29 | # 30 | ################################################################################ 31 | # 32 | # Makefile project only supported on Mac OS X and Linux Platforms) 33 | # 34 | ################################################################################ 35 | 36 | # Location of the CUDA Toolkit 37 | CUDA_PATH ?= /usr/local/cuda-8.0 38 | 39 | ############################## 40 | # start deprecated interface # 41 | ############################## 42 | ifeq ($(x86_64),1) 43 | $(info WARNING - x86_64 variable has been deprecated) 44 | $(info WARNING - please use TARGET_ARCH=x86_64 instead) 45 | TARGET_ARCH ?= x86_64 46 | endif 47 | ifeq ($(ARMv7),1) 48 | $(info WARNING - ARMv7 variable has been deprecated) 49 | $(info WARNING - please use TARGET_ARCH=armv7l instead) 50 | TARGET_ARCH ?= armv7l 51 | endif 52 | ifeq ($(aarch64),1) 53 | $(info WARNING - aarch64 variable has been deprecated) 54 | $(info WARNING - please use TARGET_ARCH=aarch64 instead) 55 | TARGET_ARCH ?= aarch64 56 | endif 57 | ifeq ($(ppc64le),1) 58 | $(info WARNING - ppc64le variable has been deprecated) 59 | $(info WARNING - please use TARGET_ARCH=ppc64le instead) 60 | TARGET_ARCH ?= ppc64le 61 | endif 62 | ifneq ($(GCC),) 63 | $(info WARNING - GCC variable has been deprecated) 64 | $(info WARNING - please use HOST_COMPILER=$(GCC) instead) 65 | HOST_COMPILER ?= $(GCC) 66 | endif 67 | ifneq ($(abi),) 68 | $(error ERROR - abi variable has been removed) 69 | endif 70 | ############################ 71 | # end deprecated interface # 72 | ############################ 73 | 74 | # architecture 75 | HOST_ARCH := $(shell uname -m) 76 | TARGET_ARCH ?= $(HOST_ARCH) 77 | ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le armv7l)) 78 | ifneq ($(TARGET_ARCH),$(HOST_ARCH)) 79 | ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le)) 80 | TARGET_SIZE := 64 81 | else ifneq (,$(filter $(TARGET_ARCH),armv7l)) 82 | TARGET_SIZE := 32 83 | endif 84 | else 85 | TARGET_SIZE := $(shell getconf LONG_BIT) 86 | endif 87 | else 88 | $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!) 89 | endif 90 | ifneq ($(TARGET_ARCH),$(HOST_ARCH)) 91 | ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-ppc64le)) 92 | $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!) 93 | endif 94 | endif 95 | 96 | # When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l 97 | ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32) 98 | TARGET_ARCH = armv7l 99 | endif 100 | 101 | # operating system 102 | HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]") 103 | TARGET_OS ?= $(HOST_OS) 104 | ifeq (,$(filter $(TARGET_OS),linux darwin qnx android)) 105 | $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!) 106 | endif 107 | 108 | # host compiler 109 | ifeq ($(TARGET_OS),darwin) 110 | ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) 111 | HOST_COMPILER ?= clang++ 112 | endif 113 | else ifneq ($(TARGET_ARCH),$(HOST_ARCH)) 114 | ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) 115 | ifeq ($(TARGET_OS),linux) 116 | HOST_COMPILER ?= arm-linux-gnueabihf-g++ 117 | else ifeq ($(TARGET_OS),qnx) 118 | ifeq ($(QNX_HOST),) 119 | $(error ERROR - QNX_HOST must be passed to the QNX host toolchain) 120 | endif 121 | ifeq ($(QNX_TARGET),) 122 | $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain) 123 | endif 124 | export QNX_HOST 125 | export QNX_TARGET 126 | HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++ 127 | else ifeq ($(TARGET_OS),android) 128 | HOST_COMPILER ?= arm-linux-androideabi-g++ 129 | endif 130 | else ifeq ($(TARGET_ARCH),aarch64) 131 | ifeq ($(TARGET_OS), linux) 132 | HOST_COMPILER ?= aarch64-linux-gnu-g++ 133 | else ifeq ($(TARGET_OS),qnx) 134 | ifeq ($(QNX_HOST),) 135 | $(error ERROR - QNX_HOST must be passed to the QNX host toolchain) 136 | endif 137 | ifeq ($(QNX_TARGET),) 138 | $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain) 139 | endif 140 | export QNX_HOST 141 | export QNX_TARGET 142 | HOST_COMPILER ?= $(QNX_HOST)/usr/bin/aarch64-unknown-nto-qnx7.0.0-g++ 143 | else ifeq ($(TARGET_OS), android) 144 | HOST_COMPILER ?= aarch64-linux-android-g++ 145 | endif 146 | else ifeq ($(TARGET_ARCH),ppc64le) 147 | HOST_COMPILER ?= powerpc64le-linux-gnu-g++ 148 | endif 149 | endif 150 | HOST_COMPILER ?= g++ 151 | NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) 152 | 153 | # internal flags 154 | NVCCFLAGS := -m${TARGET_SIZE} 155 | CCFLAGS := 156 | LDFLAGS := 157 | 158 | # build flags 159 | ifeq ($(TARGET_OS),darwin) 160 | LDFLAGS += -rpath $(CUDA_PATH)/lib 161 | CCFLAGS += -arch $(HOST_ARCH) 162 | else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux) 163 | LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3 164 | CCFLAGS += -mfloat-abi=hard 165 | else ifeq ($(TARGET_OS),android) 166 | LDFLAGS += -pie 167 | CCFLAGS += -fpie -fpic -fexceptions 168 | endif 169 | 170 | ifneq ($(TARGET_ARCH),$(HOST_ARCH)) 171 | ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux) 172 | ifneq ($(TARGET_FS),) 173 | GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6) 174 | ifeq ($(GCCVERSIONLTEQ46),1) 175 | CCFLAGS += --sysroot=$(TARGET_FS) 176 | endif 177 | LDFLAGS += --sysroot=$(TARGET_FS) 178 | LDFLAGS += -rpath-link=$(TARGET_FS)/lib 179 | LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib 180 | LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf 181 | endif 182 | endif 183 | endif 184 | 185 | # Debug build flags 186 | ifeq ($(dbg),1) 187 | NVCCFLAGS += -g -G 188 | BUILD_TYPE := debug 189 | else 190 | BUILD_TYPE := release 191 | endif 192 | 193 | ALL_CCFLAGS := 194 | ALL_CCFLAGS += $(NVCCFLAGS) 195 | ALL_CCFLAGS += $(EXTRA_NVCCFLAGS) 196 | ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS)) 197 | ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS)) 198 | 199 | SAMPLE_ENABLED := 1 200 | 201 | ALL_LDFLAGS := 202 | ALL_LDFLAGS += $(ALL_CCFLAGS) 203 | ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS)) 204 | ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS)) 205 | 206 | # Common includes and paths for CUDA 207 | INCLUDES := -I../../common/inc 208 | LIBRARIES := 209 | 210 | ################################################################################ 211 | 212 | # Gencode arguments 213 | SMS ?= 20 30 35 37 50 52 60 214 | 215 | ifeq ($(SMS),) 216 | $(info >>> WARNING - no SM architectures have been specified - waiving sample <<<) 217 | SAMPLE_ENABLED := 0 218 | endif 219 | 220 | ifeq ($(GENCODE_FLAGS),) 221 | # Generate SASS code for each SM architecture listed in $(SMS) 222 | $(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm))) 223 | 224 | # Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility 225 | HIGHEST_SM := $(lastword $(sort $(SMS))) 226 | ifneq ($(HIGHEST_SM),) 227 | GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) 228 | endif 229 | endif 230 | 231 | ifeq ($(SAMPLE_ENABLED),0) 232 | EXEC ?= @echo "[@]" 233 | endif 234 | 235 | ################################################################################ 236 | 237 | # Target rules 238 | all: build 239 | 240 | build: deviceQuery 241 | 242 | check.deps: 243 | ifeq ($(SAMPLE_ENABLED),0) 244 | @echo "Sample will be waived due to the above missing dependencies" 245 | else 246 | @echo "Sample is ready - all dependencies have been met" 247 | endif 248 | 249 | deviceQuery.o:deviceQuery.cpp 250 | $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $< 251 | 252 | deviceQuery: deviceQuery.o 253 | $(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES) 254 | $(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE) 255 | $(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE) 256 | 257 | run: build 258 | $(EXEC) ./deviceQuery 259 | 260 | clean: 261 | rm -f deviceQuery deviceQuery.o 262 | rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/deviceQuery 263 | 264 | clobber: clean 265 | -------------------------------------------------------------------------------- /utils/cuda_device_query/NsightEclipse.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | deviceQuery 5 | 6 | cudaSetDevice 7 | cudaGetDeviceCount 8 | cudaGetDeviceProperties 9 | cudaDriverGetVersion 10 | cudaRuntimeGetVersion 11 | 12 | 13 | whole 14 | 15 | ./ 16 | ../ 17 | ../../common/inc 18 | 19 | 20 | CUDA Runtime API 21 | Device Query 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | true 30 | deviceQuery.cpp 31 | 32 | 1:CUDA Basic Topics 33 | 34 | sm20 35 | sm30 36 | sm35 37 | sm37 38 | sm50 39 | sm52 40 | sm60 41 | 42 | 43 | x86_64 44 | linux 45 | 46 | 47 | windows7 48 | 49 | 50 | x86_64 51 | macosx 52 | 53 | 54 | arm 55 | 56 | 57 | ppc64le 58 | linux 59 | 60 | 61 | 62 | all 63 | 64 | Device Query 65 | exe 66 | 67 | -------------------------------------------------------------------------------- /utils/cuda_device_query/deviceQuery: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dominiek/deep-base/e3adfca44620b2b54ad82d92fdaf9c5bc80beea2/utils/cuda_device_query/deviceQuery -------------------------------------------------------------------------------- /utils/cuda_device_query/deviceQuery.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | /* This sample queries the properties of the CUDA devices present in the system via CUDA Runtime API. */ 12 | 13 | // Shared Utilities (QA Testing) 14 | 15 | // std::system includes 16 | #include 17 | #include 18 | 19 | #include 20 | #include 21 | 22 | 23 | 24 | int *pArgc = NULL; 25 | char **pArgv = NULL; 26 | 27 | #if CUDART_VERSION < 5000 28 | 29 | // CUDA-C includes 30 | #include 31 | 32 | // This function wraps the CUDA Driver API into a template function 33 | template 34 | inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device) 35 | { 36 | CUresult error = cuDeviceGetAttribute(attribute, device_attribute, device); 37 | 38 | if (CUDA_SUCCESS != error) 39 | { 40 | fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n", 41 | error, __FILE__, __LINE__); 42 | 43 | exit(EXIT_FAILURE); 44 | } 45 | } 46 | 47 | #endif /* CUDART_VERSION < 5000 */ 48 | 49 | //////////////////////////////////////////////////////////////////////////////// 50 | // Program main 51 | //////////////////////////////////////////////////////////////////////////////// 52 | int 53 | main(int argc, char **argv) 54 | { 55 | pArgc = &argc; 56 | pArgv = argv; 57 | 58 | printf("%s Starting...\n\n", argv[0]); 59 | printf(" CUDA Device Query (Runtime API) version (CUDART static linking)\n\n"); 60 | 61 | int deviceCount = 0; 62 | cudaError_t error_id = cudaGetDeviceCount(&deviceCount); 63 | 64 | if (error_id != cudaSuccess) 65 | { 66 | printf("cudaGetDeviceCount returned %d\n-> %s\n", (int)error_id, cudaGetErrorString(error_id)); 67 | printf("Result = FAIL\n"); 68 | exit(EXIT_FAILURE); 69 | } 70 | 71 | // This function call returns 0 if there are no CUDA capable devices. 72 | if (deviceCount == 0) 73 | { 74 | printf("There are no available device(s) that support CUDA\n"); 75 | } 76 | else 77 | { 78 | printf("Detected %d CUDA Capable device(s)\n", deviceCount); 79 | } 80 | 81 | int dev, driverVersion = 0, runtimeVersion = 0; 82 | 83 | for (dev = 0; dev < deviceCount; ++dev) 84 | { 85 | cudaSetDevice(dev); 86 | cudaDeviceProp deviceProp; 87 | cudaGetDeviceProperties(&deviceProp, dev); 88 | 89 | printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name); 90 | 91 | // Console log 92 | cudaDriverGetVersion(&driverVersion); 93 | cudaRuntimeGetVersion(&runtimeVersion); 94 | printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, (driverVersion%100)/10, runtimeVersion/1000, (runtimeVersion%100)/10); 95 | printf(" CUDA Capability Major/Minor version number: %d.%d\n", deviceProp.major, deviceProp.minor); 96 | 97 | char msg[256]; 98 | SPRINTF(msg, " Total amount of global memory: %.0f MBytes (%llu bytes)\n", 99 | (float)deviceProp.totalGlobalMem/1048576.0f, (unsigned long long) deviceProp.totalGlobalMem); 100 | printf("%s", msg); 101 | 102 | printf(" (%2d) Multiprocessors, (%3d) CUDA Cores/MP: %d CUDA Cores\n", 103 | deviceProp.multiProcessorCount, 104 | _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor), 105 | _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount); 106 | printf(" GPU Max Clock rate: %.0f MHz (%0.2f GHz)\n", deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f); 107 | 108 | 109 | #if CUDART_VERSION >= 5000 110 | // This is supported in CUDA 5.0 (runtime API device properties) 111 | printf(" Memory Clock rate: %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f); 112 | printf(" Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth); 113 | 114 | if (deviceProp.l2CacheSize) 115 | { 116 | printf(" L2 Cache Size: %d bytes\n", deviceProp.l2CacheSize); 117 | } 118 | 119 | #else 120 | // This only available in CUDA 4.0-4.2 (but these were only exposed in the CUDA Driver API) 121 | int memoryClock; 122 | getCudaAttribute(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev); 123 | printf(" Memory Clock rate: %.0f Mhz\n", memoryClock * 1e-3f); 124 | int memBusWidth; 125 | getCudaAttribute(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev); 126 | printf(" Memory Bus Width: %d-bit\n", memBusWidth); 127 | int L2CacheSize; 128 | getCudaAttribute(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev); 129 | 130 | if (L2CacheSize) 131 | { 132 | printf(" L2 Cache Size: %d bytes\n", L2CacheSize); 133 | } 134 | 135 | #endif 136 | 137 | printf(" Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, %d), 3D=(%d, %d, %d)\n", 138 | deviceProp.maxTexture1D , deviceProp.maxTexture2D[0], deviceProp.maxTexture2D[1], 139 | deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]); 140 | printf(" Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n", 141 | deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]); 142 | printf(" Maximum Layered 2D Texture Size, (num) layers 2D=(%d, %d), %d layers\n", 143 | deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], deviceProp.maxTexture2DLayered[2]); 144 | 145 | 146 | printf(" Total amount of constant memory: %lu bytes\n", deviceProp.totalConstMem); 147 | printf(" Total amount of shared memory per block: %lu bytes\n", deviceProp.sharedMemPerBlock); 148 | printf(" Total number of registers available per block: %d\n", deviceProp.regsPerBlock); 149 | printf(" Warp size: %d\n", deviceProp.warpSize); 150 | printf(" Maximum number of threads per multiprocessor: %d\n", deviceProp.maxThreadsPerMultiProcessor); 151 | printf(" Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock); 152 | printf(" Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n", 153 | deviceProp.maxThreadsDim[0], 154 | deviceProp.maxThreadsDim[1], 155 | deviceProp.maxThreadsDim[2]); 156 | printf(" Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n", 157 | deviceProp.maxGridSize[0], 158 | deviceProp.maxGridSize[1], 159 | deviceProp.maxGridSize[2]); 160 | printf(" Maximum memory pitch: %lu bytes\n", deviceProp.memPitch); 161 | printf(" Texture alignment: %lu bytes\n", deviceProp.textureAlignment); 162 | printf(" Concurrent copy and kernel execution: %s with %d copy engine(s)\n", (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount); 163 | printf(" Run time limit on kernels: %s\n", deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No"); 164 | printf(" Integrated GPU sharing Host Memory: %s\n", deviceProp.integrated ? "Yes" : "No"); 165 | printf(" Support host page-locked memory mapping: %s\n", deviceProp.canMapHostMemory ? "Yes" : "No"); 166 | printf(" Alignment requirement for Surfaces: %s\n", deviceProp.surfaceAlignment ? "Yes" : "No"); 167 | printf(" Device has ECC support: %s\n", deviceProp.ECCEnabled ? "Enabled" : "Disabled"); 168 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 169 | printf(" CUDA Device Driver Mode (TCC or WDDM): %s\n", deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)" : "WDDM (Windows Display Driver Model)"); 170 | #endif 171 | printf(" Device supports Unified Addressing (UVA): %s\n", deviceProp.unifiedAddressing ? "Yes" : "No"); 172 | printf(" Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n", deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID); 173 | 174 | const char *sComputeMode[] = 175 | { 176 | "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)", 177 | "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)", 178 | "Prohibited (no host thread can use ::cudaSetDevice() with this device)", 179 | "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)", 180 | "Unknown", 181 | NULL 182 | }; 183 | printf(" Compute Mode:\n"); 184 | printf(" < %s >\n", sComputeMode[deviceProp.computeMode]); 185 | } 186 | 187 | // If there are 2 or more GPUs, query to determine whether RDMA is supported 188 | if (deviceCount >= 2) 189 | { 190 | cudaDeviceProp prop[64]; 191 | int gpuid[64]; // we want to find the first two GPUs that can support P2P 192 | int gpu_p2p_count = 0; 193 | 194 | for (int i=0; i < deviceCount; i++) 195 | { 196 | checkCudaErrors(cudaGetDeviceProperties(&prop[i], i)); 197 | 198 | // Only boards based on Fermi or later can support P2P 199 | if ((prop[i].major >= 2) 200 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 201 | // on Windows (64-bit), the Tesla Compute Cluster driver for windows must be enabled to support this 202 | && prop[i].tccDriver 203 | #endif 204 | ) 205 | { 206 | // This is an array of P2P capable GPUs 207 | gpuid[gpu_p2p_count++] = i; 208 | } 209 | } 210 | 211 | // Show all the combinations of support P2P GPUs 212 | int can_access_peer; 213 | 214 | if (gpu_p2p_count >= 2) 215 | { 216 | for (int i = 0; i < gpu_p2p_count; i++) 217 | { 218 | for (int j = 0; j < gpu_p2p_count; j++) 219 | { 220 | if (gpuid[i] == gpuid[j]) 221 | { 222 | continue; 223 | } 224 | checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer, gpuid[i], gpuid[j])); 225 | printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[i]].name, gpuid[i], 226 | prop[gpuid[j]].name, gpuid[j] , 227 | can_access_peer ? "Yes" : "No"); 228 | } 229 | } 230 | } 231 | } 232 | 233 | // csv masterlog info 234 | // ***************************** 235 | // exe and CUDA driver name 236 | printf("\n"); 237 | std::string sProfileString = "deviceQuery, CUDA Driver = CUDART"; 238 | char cTemp[16]; 239 | 240 | // driver version 241 | sProfileString += ", CUDA Driver Version = "; 242 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 243 | sprintf_s(cTemp, 10, "%d.%d", driverVersion/1000, (driverVersion%100)/10); 244 | #else 245 | sprintf(cTemp, "%d.%d", driverVersion/1000, (driverVersion%100)/10); 246 | #endif 247 | sProfileString += cTemp; 248 | 249 | // Runtime version 250 | sProfileString += ", CUDA Runtime Version = "; 251 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 252 | sprintf_s(cTemp, 10, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10); 253 | #else 254 | sprintf(cTemp, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10); 255 | #endif 256 | sProfileString += cTemp; 257 | 258 | // Device count 259 | sProfileString += ", NumDevs = "; 260 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 261 | sprintf_s(cTemp, 10, "%d", deviceCount); 262 | #else 263 | sprintf(cTemp, "%d", deviceCount); 264 | #endif 265 | sProfileString += cTemp; 266 | 267 | // Print Out all device Names 268 | for (dev = 0; dev < deviceCount; ++dev) 269 | { 270 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 271 | sprintf_s(cTemp, 13, ", Device%d = ", dev); 272 | #else 273 | sprintf(cTemp, ", Device%d = ", dev); 274 | #endif 275 | cudaDeviceProp deviceProp; 276 | cudaGetDeviceProperties(&deviceProp, dev); 277 | sProfileString += cTemp; 278 | sProfileString += deviceProp.name; 279 | } 280 | 281 | sProfileString += "\n"; 282 | printf("%s", sProfileString.c_str()); 283 | 284 | printf("Result = PASS\n"); 285 | 286 | // finish 287 | exit(EXIT_SUCCESS); 288 | } 289 | -------------------------------------------------------------------------------- /utils/cuda_device_query/deviceQuery.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dominiek/deep-base/e3adfca44620b2b54ad82d92fdaf9c5bc80beea2/utils/cuda_device_query/deviceQuery.o -------------------------------------------------------------------------------- /utils/cuda_device_query/readme.txt: -------------------------------------------------------------------------------- 1 | Sample: deviceQuery 2 | Minimum spec: SM 2.0 3 | 4 | This sample enumerates the properties of the CUDA devices present in the system. 5 | 6 | Key concepts: 7 | CUDA Runtime API 8 | Device Query 9 | --------------------------------------------------------------------------------