├── Dockerfile
├── Dockerfile.gpu
├── Makefile
├── README.md
├── downloads
└── cudnn-7.5-linux-x64-v5.0-ga.tgz
├── frameworks
├── caffe
│ ├── Makefile
│ ├── config
│ │ ├── Makefile.config.cpu
│ │ └── Makefile.config.gpu
│ └── test
│ │ └── load.py
├── chainer
│ ├── Makefile
│ └── test
│ │ └── load.py
├── keras
│ ├── Makefile
│ └── test
│ │ └── load.py
├── mxnet
│ ├── Makefile
│ └── test
│ │ └── load.py
├── neon
│ ├── Makefile
│ └── test
│ │ └── load.py
├── openface
│ └── Makefile
├── tensorflow
│ ├── Makefile
│ └── test
│ │ └── load.py
├── torch
│ └── Makefile
└── transferflow
│ ├── Makefile
│ └── test
│ └── load.py
└── utils
└── cuda_device_query
├── Makefile
├── NsightEclipse.xml
├── deviceQuery
├── deviceQuery.cpp
├── deviceQuery.o
└── readme.txt
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:14.04
2 |
3 | ENV GPU_SUPPORT=0
4 | ENV PYTHONPATH="/workdir/frameworks/mxnet/src/python:/workdir/frameworks/caffe/src/python:/workdir/frameworks/caffe/src/python:"
5 | ENV PATH="/workdir/frameworks/torch/src/install/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/cuda/bin"
6 | ENV DYLD_LIBRARY_PATH="/workdir/frameworks/torch/src/install/lib:"
7 | ENV LD_LIBRARY_PATH="/workdir/frameworks/torch/src/install/lib::/usr/local/cuda/lib64"
8 | ENV LUA_CPATH="/workdir/frameworks/torch/src/install/lib/?.so;/root/.luarocks/lib/lua/5.1/?.so;/workdir/frameworks/torch/src/install/lib/lua/5.1/?.so;./?.so;/usr/local/lib/lua/5.1/?.so;/usr/local/lib/lua/5.1/loadall.so"
9 | ENV LUA_PATH="/root/.luarocks/share/lua/5.1/?.lua;/root/.luarocks/share/lua/5.1/?/init.lua;/workdir/frameworks/torch/src/install/share/lua/5.1/?.lua;/workdir/frameworks/torch/src/install/share/lua/5.1/?/init.lua;./?.lua;/workdir/frameworks/torch/src/install/share/luajit-2.1.0-beta1/?.lua;/usr/local/share/lua/5.1/?.lua;/usr/local/share/lua/5.1/?/init.lua"
10 |
11 | RUN apt-get update --fix-missing
12 | RUN apt-get -y install curl wget python python-numpy python-scipy python-dev python-pip git-core vim
13 |
14 | WORKDIR /workdir
15 | ADD . /workdir
16 |
17 | RUN make global_dependencies
18 |
19 | # Caffe
20 | WORKDIR /workdir/frameworks/caffe
21 | RUN make dependencies
22 | RUN make src
23 | RUN make build
24 | RUN make install
25 | RUN make load_test
26 |
27 | # Tensorflow
28 | WORKDIR /workdir/frameworks/tensorflow
29 | RUN make dependencies
30 | RUN make src
31 | RUN make build
32 | RUN make install
33 | RUN make load_test
34 |
35 | # Keras
36 | WORKDIR /workdir/frameworks/keras
37 | RUN make dependencies
38 | RUN make src
39 | RUN make build
40 | RUN make install
41 | RUN make load_test
42 |
43 | # Neon
44 | WORKDIR /workdir/frameworks/neon
45 | RUN make dependencies
46 | RUN make src
47 | RUN make build
48 | RUN make install
49 | RUN make load_test
50 |
51 | # Chainer
52 | WORKDIR /workdir/frameworks/chainer
53 | RUN make dependencies
54 | RUN make src
55 | RUN make build
56 | RUN make install
57 | RUN make load_test
58 |
59 | # MXNet
60 | WORKDIR /workdir/frameworks/mxnet
61 | RUN make dependencies
62 | RUN make src
63 | RUN make build
64 | RUN make install
65 | RUN make load_test
66 |
67 | # Torch
68 | WORKDIR /workdir/frameworks/torch
69 | RUN make dependencies
70 | RUN make src
71 | RUN make build
72 | RUN make install
73 | RUN make load_test
74 |
75 | # Openface
76 | WORKDIR /workdir/frameworks/openface
77 | RUN make dependencies
78 | RUN make src
79 | RUN make build
80 | RUN make install
81 | RUN make load_test
82 |
83 | # Transferflow
84 | WORKDIR /workdir/frameworks/transferflow
85 | RUN make dependencies
86 | RUN make src
87 | RUN make build
88 | RUN make install
89 | RUN make load_test
90 |
91 | # Cleanup to make container smaller
92 | WORKDIR /workdir
93 | RUN make clean_global_dependencies
94 | RUN ln -s /dev/null /dev/raw1394
95 | RUN ln -s /usr/local/cuda-8.0 /usr/local/cuda
96 |
--------------------------------------------------------------------------------
/Dockerfile.gpu:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu14.04
2 |
3 | ENV GPU_SUPPORT=1
4 | ENV PYTHONPATH="/workdir/frameworks/mxnet/src/python:/workdir/frameworks/caffe/src/python:/workdir/frameworks/caffe/src/python:"
5 | ENV PATH="/workdir/frameworks/torch/src/install/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/cuda/bin"
6 | ENV DYLD_LIBRARY_PATH="/workdir/frameworks/torch/src/install/lib:"
7 | ENV LD_LIBRARY_PATH="/workdir/frameworks/torch/src/install/lib::/usr/local/cuda/lib64"
8 | ENV LUA_CPATH="/workdir/frameworks/torch/src/install/lib/?.so;/root/.luarocks/lib/lua/5.1/?.so;/workdir/frameworks/torch/src/install/lib/lua/5.1/?.so;./?.so;/usr/local/lib/lua/5.1/?.so;/usr/local/lib/lua/5.1/loadall.so"
9 | ENV LUA_PATH="/root/.luarocks/share/lua/5.1/?.lua;/root/.luarocks/share/lua/5.1/?/init.lua;/workdir/frameworks/torch/src/install/share/lua/5.1/?.lua;/workdir/frameworks/torch/src/install/share/lua/5.1/?/init.lua;./?.lua;/workdir/frameworks/torch/src/install/share/luajit-2.1.0-beta1/?.lua;/usr/local/share/lua/5.1/?.lua;/usr/local/share/lua/5.1/?/init.lua"
10 |
11 | RUN apt-get update --fix-missing
12 | RUN apt-get -y install curl wget python python-numpy python-scipy python-dev python-pip git-core vim
13 |
14 | WORKDIR /workdir
15 | ADD . /workdir
16 |
17 | RUN make global_dependencies
18 |
19 | # Caffe
20 | WORKDIR /workdir/frameworks/caffe
21 | RUN make dependencies
22 | RUN make src
23 | RUN make build
24 | RUN make install
25 | RUN make load_test
26 |
27 | # Tensorflow
28 | WORKDIR /workdir/frameworks/tensorflow
29 | RUN make dependencies
30 | RUN make src
31 | RUN make build
32 | RUN make install
33 | RUN make load_test
34 |
35 | # Keras
36 | WORKDIR /workdir/frameworks/keras
37 | RUN make dependencies
38 | RUN make src
39 | RUN make build
40 | RUN make install
41 | RUN make load_test
42 |
43 | # Neon
44 | WORKDIR /workdir/frameworks/neon
45 | RUN make dependencies
46 | RUN make src
47 | RUN make build
48 | RUN make install
49 | RUN make load_test
50 |
51 | # Chainer
52 | WORKDIR /workdir/frameworks/chainer
53 | RUN make dependencies
54 | RUN make src
55 | RUN make build
56 | RUN make install
57 | RUN make load_test
58 |
59 | # MXNet
60 | WORKDIR /workdir/frameworks/mxnet
61 | RUN make dependencies
62 | RUN make src
63 | RUN make build
64 | RUN make install
65 | RUN make load_test
66 |
67 | # Torch
68 | WORKDIR /workdir/frameworks/torch
69 | RUN make dependencies
70 | RUN make src
71 | RUN make build
72 | RUN make install
73 | RUN make load_test
74 |
75 | # Openface
76 | WORKDIR /workdir/frameworks/openface
77 | RUN make dependencies
78 | RUN make src
79 | RUN make build
80 | RUN make install
81 | RUN make load_test
82 |
83 | # Transferflow
84 | WORKDIR /workdir/frameworks/transferflow
85 | RUN make dependencies
86 | RUN make src
87 | RUN make build
88 | RUN make install
89 | RUN make load_test
90 |
91 | # Cleanup to make container smaller
92 | WORKDIR /workdir
93 | RUN make clean_global_dependencies
94 | RUN ln -s /dev/null /dev/raw1394
95 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 |
2 | DEEP_BASE_VERSION = 1.2
3 | GPU_SUPPORT ?= 0
4 |
5 | .PHONY: docker.build.gpu
6 | docker.build.gpu:
7 | -@docker stop dominiek/deep-base
8 | -@docker rm dominiek/deep-base
9 | docker build -t dominiek/deep-base-gpu -f Dockerfile.gpu .
10 |
11 | .PHONY: docker.build
12 | docker.build:
13 | -@docker stop dominiek/deep-base
14 | -@docker rm dominiek/deep-base
15 | docker build -t dominiek/deep-base .
16 |
17 | .PHONY: docker.push.gpu
18 | docker.push.gpu:
19 | docker push dominiek/deep-base-gpu:latest
20 | docker push dominiek/deep-base-gpu:v$(DEEP_BASE_VERSION)
21 |
22 | .PHONY: docker.tag.gpu
23 | docker.tag.gpu:
24 | docker tag dominiek/deep-base-gpu dominiek/deep-base-gpu:v$(DEEP_BASE_VERSION)
25 |
26 | .PHONY: docker.push
27 | docker.push:
28 | docker push dominiek/deep-base:latest
29 | docker push dominiek/deep-base:v$(DEEP_BASE_VERSION)
30 |
31 | .PHONY: docker.tag
32 | docker.tag:
33 | docker tag dominiek/deep-base dominiek/deep-base:v$(DEEP_BASE_VERSION)
34 |
35 | .PHONY: docker.clean
36 | docker.clean:
37 | docker rm $(shell docker ps -a -q)
38 | docker rmi $(shell docker images -q)
39 |
40 | .PHONY: global_dependencies
41 | global_dependencies:
42 | echo $(DEEP_BASE_VERSION) > /etc/deep_base_version
43 | ln -s /dev/null /dev/raw1394
44 | pip install cython
45 | pip install scikit-learn
46 | pip install bhtsne
47 | ifeq ($(GPU_SUPPORT),1)
48 | @echo "Building with GPU support"
49 | #echo 'export PYTHONPATH=/workdir/frameworks/caffe/src/python:$$PYTHONPATH' >> ~/.bashrc
50 | #echo 'export LD_LIBRARY_PATH=$$LD_LIBRARY_PATH:/usr/local/cuda/lib64' >> ~/.bashrc
51 | cd utils/cuda_device_query; make; cp deviceQuery /usr/local/bin/cuda_device_query
52 | else
53 | @echo "Building CPU-only"
54 | endif
55 |
56 | .PHONY: clean_global_dependencies
57 | clean_global_dependencies:
58 | apt-get remove -y git-core curl
59 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Deep Learning Base Image
3 |
4 | Today's deep learning frameworks require an extraordinary amount of work to install and run. This Docker container bundles all popular deep learning frameworks into a single Docker instance. Ubuntu Linux is the base OS of choice for this container (it is a requirement for CUDA and all DL frameworks play nice with it).
5 |
6 | Supported DL frameworks:
7 |
8 | - [Tensorflow](https://www.tensorflow.org/) (v1.0.1)
9 | - [Caffe](http://caffe.berkeleyvision.org/) (RC5)
10 | - [Theano](http://deeplearning.net/software/theano/)
11 | - [Keras](http://keras.io/) (v1.2.2)
12 | - [MXNet](http://mxnet.readthedocs.io/en/latest/) (v0.9.3)
13 | - [Torch](http://torch.ch/)
14 | - [Chainer](http://chainer.org/)
15 | - [Neon](http://neon.nervanasys.com/docs/latest/index.html) (v1.8.2)
16 | - [Transferflow](http://github.com/dominiek/transferflow) (v0.1.4)
17 |
18 | Other ML frameworks:
19 |
20 | - Python / SciPy / Numpy / DLib
21 | - [Scikit-Learn](http://scikit-learn.org/stable/)
22 | - [Scikit-Image](http://scikit-image.org/)
23 | - [OpenFace](https://cmusatyalab.github.io/openface/) (v0.2.1)
24 |
25 | ### Usage
26 |
27 | _For GPU usage see below_
28 |
29 | Run the latest version. All DL frameworks are available at your fingertips:
30 |
31 | ```
32 | docker run -it dominiek/deep-base:latest python
33 | import tensorflow
34 | import matplotlib
35 | matplotlib.use('Agg')
36 | import caffe
37 | import openface
38 | ```
39 |
40 | Or a specific version tag:
41 |
42 | ```
43 | docker pull dominiek/deep-base:v1.3
44 | ```
45 |
46 | In order to use `deep-base` as a base for your deployment's docker container specify the right `FROM` directive following in your `Dockerfile`:
47 |
48 | ```
49 | FROM dominiek/deep-base:v1.3
50 | ```
51 |
52 | To run code from the Host OS simply mount the source code dir:
53 |
54 | ```
55 | mkdir code
56 | echo 'import tensorflow' > code/app.py
57 | docker run --volume `pwd`/code:/code -it dominiek/deep-base:latest python /code/app.py
58 | ```
59 |
60 | ### GPU Usage
61 |
62 | GPU support requires many additional libraries like Nvidia CUDA and CuDNN. There is a separate Docker repository for the GPU version:
63 |
64 | ```
65 | FROM dominiek/deep-base-gpu:v1.3
66 | ```
67 |
68 | Running the GPU image requires you to bind the host OS's CUDA libraries and devices. This requires the same CUDA version on the host OS as inside deep-base (Cuda 8.0)
69 |
70 | The most reliable way to do this is to use [NVIDIA Docker](https://github.com/NVIDIA/nvidia-docker):
71 |
72 | ```bash
73 | nvidia-docker run -it dominiek/deep-base-gpu /bin/bash
74 | ```
75 |
76 | Alternatively, you can use vanilla docker and bind the devices:
77 |
78 | ```bash
79 | export CUDA_SO=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}')
80 | export CUDA_DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
81 | docker run --privileged $CUDA_SO $CUDA_DEVICES -it dominiek/deep-base-gpu /bin/bash
82 | ```
83 |
84 | Now, to make sure that the GPU hardware is working correctly, use the `cuda_device_query` command inside the container:
85 |
86 | ```bash
87 | root@37a895460633:/workdir# cuda_device_query
88 | ...
89 | Result = PASS
90 | ```
91 |
92 | ### Build a customized Docker image
93 |
94 | This is optional. In order to start the build process execute:
95 |
96 | ```
97 | make docker.build
98 | ```
99 |
100 | During the build process small tests will be done to make sure compiled Python bindings load properly.
101 |
102 | For GPU support (requires CUDA-compatible host hardware and Linux host OS):
103 |
104 | ```
105 | make docker.build.gpu
106 | ```
107 |
108 | ### Performance
109 |
110 | There is a CPU and a GPU version of this Docker container. The latter will require [CUDA compatible hardware](https://developer.nvidia.com/cuda-gpus) which include AWS GPU instances. When running Docker on a Linux host OS there is no virtual machine used and all CUDA hardware can be fully utilized.
111 |
112 | Note however that on Windows and Mac OS X a virtual machine like VirtualBox is used which does not support GPU passthrough. This means no GPU can be used on these host OS's. The recommended pattern here is to use virtualization in a Windows/Mac based local development environment, but really use Linux for staging and production environments.
113 |
114 | ### TODO
115 |
116 | - Add the MNIST example that can be run easily
117 | - Create a benchmark utility that shows performance of frameworks in running instance
118 | - Use OpenBlas for frameworks that support it
119 | - Reduce container size footprint of image
120 |
--------------------------------------------------------------------------------
/downloads/cudnn-7.5-linux-x64-v5.0-ga.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dominiek/deep-base/e3adfca44620b2b54ad82d92fdaf9c5bc80beea2/downloads/cudnn-7.5-linux-x64-v5.0-ga.tgz
--------------------------------------------------------------------------------
/frameworks/caffe/Makefile:
--------------------------------------------------------------------------------
1 |
2 | GPU_SUPPORT ?= 0
3 |
4 | .PHONY: dependencies
5 | dependencies:
6 | apt-get -y install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler
7 | apt-get -y install --no-install-recommends libboost-all-dev
8 | apt-get -y install libatlas-base-dev
9 | apt-get -y install libgflags-dev libgoogle-glog-dev liblmdb-dev
10 | apt-get -y install libdc1394-22-dev libdc1394-22 libdc1394-utils python-opencv python-pip gfortran
11 | pip install cython
12 | pip install scikit-image
13 | pip install protobuf
14 |
15 | .PHONY: src
16 | src:
17 | git clone --recursive https://github.com/BVLC/caffe.git src
18 | cd src; git checkout rc5
19 |
20 | .PHONY: build
21 | build:
22 | ifeq ($(GPU_SUPPORT),1)
23 | cp config/Makefile.config.gpu src/Makefile.config
24 | else
25 | cp config/Makefile.config.cpu src/Makefile.config
26 | endif
27 | cd src; make all
28 | cd src; make pycaffe
29 |
30 | .PHONY: install
31 | install:
32 | cd src; make distribute
33 | export PYTHONPATH=/workdir/frameworks/caffe/src/python:$$PYTHONPATH
34 | echo 'export PYTHONPATH=/workdir/frameworks/caffe/src/python:$$PYTHONPATH' >> ~/.bashrc
35 |
36 | .PHONY: load_test
37 | load_test:
38 | ifeq ($(GPU_SUPPORT),1)
39 | LD_LIBRARY_PATH=$(LD_LIBRARY_PATH):/usr/local/cuda/lib64 PYTHONPATH=/workdir/frameworks/caffe/src/python:$(PYTHONPATH) python test/load.py
40 | else
41 | PYTHONPATH=/workdir/frameworks/caffe/src/python:$(PYTHONPATH) python test/load.py
42 | endif
43 |
--------------------------------------------------------------------------------
/frameworks/caffe/config/Makefile.config.cpu:
--------------------------------------------------------------------------------
1 | ## Refer to http://caffe.berkeleyvision.org/installation.html
2 | # Contributions simplifying and improving our build system are welcome!
3 |
4 | # cuDNN acceleration switch (uncomment to build with cuDNN).
5 | # USE_CUDNN := 1
6 |
7 | # CPU-only switch (uncomment to build without GPU support).
8 | CPU_ONLY := 1
9 |
10 | # uncomment to disable IO dependencies and corresponding data layers
11 | USE_OPENCV := 1
12 | # USE_LEVELDB := 0
13 | # USE_LMDB := 0
14 |
15 | # uncomment to allow MDB_NOLOCK when reading LMDB files (only if necessary)
16 | # You should not set this flag if you will be reading LMDBs with any
17 | # possibility of simultaneous read and write
18 | # ALLOW_LMDB_NOLOCK := 1
19 |
20 | # Uncomment if you're using OpenCV 3
21 | # OPENCV_VERSION := 3
22 |
23 | # To customize your choice of compiler, uncomment and set the following.
24 | # N.B. the default for Linux is g++ and the default for OSX is clang++
25 | # CUSTOM_CXX := g++
26 |
27 | # CUDA directory contains bin/ and lib/ directories that we need.
28 | CUDA_DIR := /usr/local/cuda
29 | # On Ubuntu 14.04, if cuda tools are installed via
30 | # "sudo apt-get install nvidia-cuda-toolkit" then use this instead:
31 | # CUDA_DIR := /usr
32 |
33 | # CUDA architecture setting: going with all of them.
34 | # For CUDA < 6.0, comment the *_50 lines for compatibility.
35 | CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \
36 | -gencode arch=compute_20,code=sm_21 \
37 | -gencode arch=compute_30,code=sm_30 \
38 | -gencode arch=compute_35,code=sm_35 \
39 | -gencode arch=compute_50,code=sm_50 \
40 | -gencode arch=compute_50,code=compute_50
41 |
42 | # BLAS choice:
43 | # atlas for ATLAS (default)
44 | # mkl for MKL
45 | # open for OpenBlas
46 | BLAS := atlas
47 | # Custom (MKL/ATLAS/OpenBLAS) include and lib directories.
48 | # Leave commented to accept the defaults for your choice of BLAS
49 | # (which should work)!
50 | # BLAS_INCLUDE := /path/to/your/blas
51 | # BLAS_LIB := /path/to/your/blas
52 |
53 | # Homebrew puts openblas in a directory that is not on the standard search path
54 | # BLAS_INCLUDE := $(shell brew --prefix openblas)/include
55 | # BLAS_LIB := $(shell brew --prefix openblas)/lib
56 |
57 | # This is required only if you will compile the matlab interface.
58 | # MATLAB directory should contain the mex binary in /bin.
59 | # MATLAB_DIR := /usr/local
60 | # MATLAB_DIR := /Applications/MATLAB_R2012b.app
61 |
62 | # NOTE: this is required only if you will compile the python interface.
63 | # We need to be able to find Python.h and numpy/arrayobject.h.
64 | PYTHON_INCLUDE := /usr/include/python2.7 \
65 | /usr/lib/python2.7/dist-packages/numpy/core/include
66 | # Anaconda Python distribution is quite popular. Include path:
67 | # Verify anaconda location, sometimes it's in root.
68 | # ANACONDA_HOME := $(HOME)/anaconda
69 | # PYTHON_INCLUDE := $(ANACONDA_HOME)/include \
70 | # $(ANACONDA_HOME)/include/python2.7 \
71 | # $(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include \
72 |
73 | # Uncomment to use Python 3 (default is Python 2)
74 | # PYTHON_LIBRARIES := boost_python3 python3.5m
75 | # PYTHON_INCLUDE := /usr/include/python3.5m \
76 | # /usr/lib/python3.5/dist-packages/numpy/core/include
77 |
78 | # We need to be able to find libpythonX.X.so or .dylib.
79 | PYTHON_LIB := /usr/lib
80 | # PYTHON_LIB := $(ANACONDA_HOME)/lib
81 |
82 | # Homebrew installs numpy in a non standard path (keg only)
83 | # PYTHON_INCLUDE += $(dir $(shell python -c 'import numpy.core; print(numpy.core.__file__)'))/include
84 | # PYTHON_LIB += $(shell brew --prefix numpy)/lib
85 |
86 | # Uncomment to support layers written in Python (will link against Python libs)
87 | # WITH_PYTHON_LAYER := 1
88 |
89 | # Whatever else you find you need goes here.
90 | INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include
91 | LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib
92 |
93 | # If Homebrew is installed at a non standard location (for example your home directory) and you use it for general dependencies
94 | # INCLUDE_DIRS += $(shell brew --prefix)/include
95 | # LIBRARY_DIRS += $(shell brew --prefix)/lib
96 |
97 | # Uncomment to use `pkg-config` to specify OpenCV library paths.
98 | # (Usually not necessary -- OpenCV libraries are normally installed in one of the above $LIBRARY_DIRS.)
99 | # USE_PKG_CONFIG := 1
100 |
101 | # N.B. both build and distribute dirs are cleared on `make clean`
102 | BUILD_DIR := build
103 | DISTRIBUTE_DIR := distribute
104 |
105 | # Uncomment for debugging. Does not work on OSX due to https://github.com/BVLC/caffe/issues/171
106 | # DEBUG := 1
107 |
108 | # The ID of the GPU that 'make runtest' will use to run unit tests.
109 | TEST_GPUID := 0
110 |
111 | # enable pretty build (comment to see full commands)
112 | Q ?= @
113 |
--------------------------------------------------------------------------------
/frameworks/caffe/config/Makefile.config.gpu:
--------------------------------------------------------------------------------
1 | ## Refer to http://caffe.berkeleyvision.org/installation.html
2 | # Contributions simplifying and improving our build system are welcome!
3 |
4 | # cuDNN acceleration switch (uncomment to build with cuDNN).
5 | USE_CUDNN := 1
6 |
7 | # CPU-only switch (uncomment to build without GPU support).
8 | # CPU_ONLY := 1
9 |
10 | # uncomment to disable IO dependencies and corresponding data layers
11 | USE_OPENCV := 1
12 | # USE_LEVELDB := 0
13 | # USE_LMDB := 0
14 |
15 | # uncomment to allow MDB_NOLOCK when reading LMDB files (only if necessary)
16 | # You should not set this flag if you will be reading LMDBs with any
17 | # possibility of simultaneous read and write
18 | # ALLOW_LMDB_NOLOCK := 1
19 |
20 | # Uncomment if you're using OpenCV 3
21 | # OPENCV_VERSION := 3
22 |
23 | # To customize your choice of compiler, uncomment and set the following.
24 | # N.B. the default for Linux is g++ and the default for OSX is clang++
25 | # CUSTOM_CXX := g++
26 |
27 | # CUDA directory contains bin/ and lib/ directories that we need.
28 | CUDA_DIR := /usr/local/cuda
29 | # On Ubuntu 14.04, if cuda tools are installed via
30 | # "sudo apt-get install nvidia-cuda-toolkit" then use this instead:
31 | # CUDA_DIR := /usr
32 |
33 | # CUDA architecture setting: going with all of them.
34 | # For CUDA < 6.0, comment the *_50 lines for compatibility.
35 | CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \
36 | -gencode arch=compute_20,code=sm_21 \
37 | -gencode arch=compute_30,code=sm_30 \
38 | -gencode arch=compute_35,code=sm_35 \
39 | -gencode arch=compute_50,code=sm_50 \
40 | -gencode arch=compute_50,code=compute_50
41 |
42 | # BLAS choice:
43 | # atlas for ATLAS (default)
44 | # mkl for MKL
45 | # open for OpenBlas
46 | BLAS := atlas
47 | # Custom (MKL/ATLAS/OpenBLAS) include and lib directories.
48 | # Leave commented to accept the defaults for your choice of BLAS
49 | # (which should work)!
50 | # BLAS_INCLUDE := /path/to/your/blas
51 | # BLAS_LIB := /path/to/your/blas
52 |
53 | # Homebrew puts openblas in a directory that is not on the standard search path
54 | # BLAS_INCLUDE := $(shell brew --prefix openblas)/include
55 | # BLAS_LIB := $(shell brew --prefix openblas)/lib
56 |
57 | # This is required only if you will compile the matlab interface.
58 | # MATLAB directory should contain the mex binary in /bin.
59 | # MATLAB_DIR := /usr/local
60 | # MATLAB_DIR := /Applications/MATLAB_R2012b.app
61 |
62 | # NOTE: this is required only if you will compile the python interface.
63 | # We need to be able to find Python.h and numpy/arrayobject.h.
64 | PYTHON_INCLUDE := /usr/include/python2.7 \
65 | /usr/lib/python2.7/dist-packages/numpy/core/include
66 | # Anaconda Python distribution is quite popular. Include path:
67 | # Verify anaconda location, sometimes it's in root.
68 | # ANACONDA_HOME := $(HOME)/anaconda
69 | # PYTHON_INCLUDE := $(ANACONDA_HOME)/include \
70 | # $(ANACONDA_HOME)/include/python2.7 \
71 | # $(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include \
72 |
73 | # Uncomment to use Python 3 (default is Python 2)
74 | # PYTHON_LIBRARIES := boost_python3 python3.5m
75 | # PYTHON_INCLUDE := /usr/include/python3.5m \
76 | # /usr/lib/python3.5/dist-packages/numpy/core/include
77 |
78 | # We need to be able to find libpythonX.X.so or .dylib.
79 | PYTHON_LIB := /usr/lib
80 | # PYTHON_LIB := $(ANACONDA_HOME)/lib
81 |
82 | # Homebrew installs numpy in a non standard path (keg only)
83 | # PYTHON_INCLUDE += $(dir $(shell python -c 'import numpy.core; print(numpy.core.__file__)'))/include
84 | # PYTHON_LIB += $(shell brew --prefix numpy)/lib
85 |
86 | # Uncomment to support layers written in Python (will link against Python libs)
87 | # WITH_PYTHON_LAYER := 1
88 |
89 | # Whatever else you find you need goes here.
90 | INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include
91 | LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib
92 |
93 | # If Homebrew is installed at a non standard location (for example your home directory) and you use it for general dependencies
94 | # INCLUDE_DIRS += $(shell brew --prefix)/include
95 | # LIBRARY_DIRS += $(shell brew --prefix)/lib
96 |
97 | # Uncomment to use `pkg-config` to specify OpenCV library paths.
98 | # (Usually not necessary -- OpenCV libraries are normally installed in one of the above $LIBRARY_DIRS.)
99 | # USE_PKG_CONFIG := 1
100 |
101 | # N.B. both build and distribute dirs are cleared on `make clean`
102 | BUILD_DIR := build
103 | DISTRIBUTE_DIR := distribute
104 |
105 | # Uncomment for debugging. Does not work on OSX due to https://github.com/BVLC/caffe/issues/171
106 | # DEBUG := 1
107 |
108 | # The ID of the GPU that 'make runtest' will use to run unit tests.
109 | TEST_GPUID := 0
110 |
111 | # enable pretty build (comment to see full commands)
112 | Q ?= @
113 |
--------------------------------------------------------------------------------
/frameworks/caffe/test/load.py:
--------------------------------------------------------------------------------
1 | import matplotlib
2 | matplotlib.use('Agg')
3 | import caffe
4 |
5 | print('Python-Caffe loaded OK')
6 |
--------------------------------------------------------------------------------
/frameworks/chainer/Makefile:
--------------------------------------------------------------------------------
1 |
2 | .PHONY: dependencies
3 | dependencies:
4 | pip install -U setuptools
5 |
6 | .PHONY: src
7 | src:
8 | echo "OK"
9 |
10 | .PHONY: build
11 | build:
12 | echo "OK"
13 |
14 | .PHONY: install
15 | install:
16 | pip install chainer
17 |
18 | .PHONY: load_test
19 | load_test:
20 | python test/load.py
21 |
--------------------------------------------------------------------------------
/frameworks/chainer/test/load.py:
--------------------------------------------------------------------------------
1 | import chainer
2 |
--------------------------------------------------------------------------------
/frameworks/keras/Makefile:
--------------------------------------------------------------------------------
1 |
2 | .PHONY: dependencies
3 | dependencies:
4 | apt-get -y install python-h5py python-yaml
5 |
6 | .PHONY: src
7 | src:
8 | git clone --recursive https://github.com/fchollet/keras.git src
9 | cd src; git checkout 1.2.2
10 |
11 | .PHONY: build
12 | build:
13 | cd src; python setup.py install
14 |
15 | .PHONY: install
16 | install:
17 | echo "OK"
18 |
19 | .PHONY: load_test
20 | load_test:
21 | python test/load.py
22 |
--------------------------------------------------------------------------------
/frameworks/keras/test/load.py:
--------------------------------------------------------------------------------
1 | import theano
2 | import keras
3 |
--------------------------------------------------------------------------------
/frameworks/mxnet/Makefile:
--------------------------------------------------------------------------------
1 |
2 | .PHONY: dependencies
3 | dependencies:
4 | echo "OK"
5 |
6 | .PHONY: src
7 | src:
8 | git clone --recursive https://github.com/dmlc/mxnet src
9 | cd src; git checkout v0.9.3
10 |
11 | .PHONY: build
12 | build:
13 | cd src; make -j4
14 |
15 | .PHONY: install
16 | install:
17 | export PYTHONPATH=/workdir/frameworks/mxnet/src/python:$$PYTHONPATH
18 | echo 'export PYTHONPATH=/workdir/frameworks/mxnet/src/python:$$PYTHONPATH' >> ~/.bashrc
19 |
20 | .PHONY: load_test
21 | load_test:
22 | PYTHONPATH=/workdir/frameworks/mxnet/src/python:$(PYTHONPATH) python test/load.py
23 |
--------------------------------------------------------------------------------
/frameworks/mxnet/test/load.py:
--------------------------------------------------------------------------------
1 |
2 | import mxnet
3 |
4 | print('Python-MXNet loaded OK')
5 |
--------------------------------------------------------------------------------
/frameworks/neon/Makefile:
--------------------------------------------------------------------------------
1 |
2 | .PHONY: dependencies
3 | dependencies:
4 | apt-get -y install python-virtualenv
5 |
6 | .PHONY: src
7 | src:
8 | git clone --recursive https://github.com/NervanaSystems/neon.git src
9 | cd src; git checkout v1.8.2
10 |
11 | .PHONY: build
12 | build:
13 | cd src; make sysinstall
14 |
15 | .PHONY: install
16 | install:
17 | echo "OK"
18 |
19 | .PHONY: load_test
20 | load_test:
21 | python test/load.py
22 |
--------------------------------------------------------------------------------
/frameworks/neon/test/load.py:
--------------------------------------------------------------------------------
1 | import neon
2 |
--------------------------------------------------------------------------------
/frameworks/openface/Makefile:
--------------------------------------------------------------------------------
1 |
2 | .PHONY: dependencies
3 | dependencies:
4 | apt-get -y install libboost-python-dev cmake
5 | pip install dlib
6 | . ~/.bashrc; for lib in dpnn nn optim optnet csvigo cutorch cunn fblualib torchx tds; do \
7 | luarocks install $$lib ; \
8 | done
9 |
10 | .PHONY: src
11 | src:
12 | git clone --recursive https://github.com/cmusatyalab/openface.git src
13 | cd src; git checkout 0.2.1
14 |
15 | .PHONY: build
16 | build:
17 | echo "OK"
18 |
19 | .PHONY: install
20 | install:
21 | cd src; python setup.py install
22 |
23 | .PHONY: load_test
24 | load_test:
25 | echo "Skip"
26 |
--------------------------------------------------------------------------------
/frameworks/tensorflow/Makefile:
--------------------------------------------------------------------------------
1 |
2 | GPU_SUPPORT ?= 0
3 |
4 | .PHONY: dependencies
5 | dependencies:
6 | apt-get -y install software-properties-common
7 | add-apt-repository -y ppa:webupd8team/java
8 | apt-get update
9 | echo "oracle-java8-installer shared/accepted-oracle-license-v1-1 select true" | sudo debconf-set-selections
10 | apt-get -y install oracle-java8-installer unzip
11 | wget "https://github.com/bazelbuild/bazel/releases/download/0.4.4/bazel-0.4.4-installer-linux-x86_64.sh"
12 | chmod +x bazel-0.4.4-installer-linux-x86_64.sh
13 | ./bazel-0.4.4-installer-linux-x86_64.sh
14 | rm -f bazel-0.4.4-installer-linux-x86_64.sh
15 | bash /usr/local/lib/bazel/bin/bazel-complete.bash
16 | apt-get -y install swig
17 | apt-get -y install libcurl3-dev
18 | pip install protobuf==3.2.0
19 | pip install six --upgrade --target="/usr/lib/python2.7/dist-packages"
20 |
21 | .PHONY: src
22 | src:
23 | git clone --recurse-submodules https://github.com/tensorflow/tensorflow src
24 | cd src; git checkout v1.0.1
25 |
26 | .PHONY: build
27 | build:
28 | ifeq ($(GPU_SUPPORT),1)
29 | cd src; echo "/usr/local/lib/python2.7/dist-packages" > configure_params.txt
30 | mkdir /usr/lib/x86_64-linux-gnu/include
31 | ln -s /usr/include/cudnn.h /usr/lib/x86_64-linux-gnu/include/cudnn.h
32 | cd src; TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2 TF_NEED_CUDA=1 LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:$(LD_LIBRARY_PATH) tensorflow/tools/ci_build/builds/configured GPU
33 | #cd src; TF_NEED_OPENCL=0 PYTHON_LIB_PATH=/usr/lib/python2.7/dist-packages TF_ENABLE_XLA=0 TF_NEED_HDFS=0 TF_NEED_JEMALLOC=1 CC_OPT_FLAGS="-march=native" GCC_HOST_COMPILER_PATH=/usr/bin/gcc PYTHON_BIN_PATH=/usr/bin/python TF_NEED_GCP=0 CUDA_TOOLKIT_PATH="/usr/local/cuda" CUDNN_INSTALL_PATH="/usr/local/cuda" TF_NEED_CUDA=1 TF_CUDA_COMPUTE_CAPABILITIES="3.0" TF_CUDNN_VERSION="5" TF_CUDA_VERSION="8.0" TF_CUDA_VERSION_TOOLKIT=8.0 TF_NEED_HDFS=0 ./configure < configure_params.txt
34 | cd src; bazel build -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_package
35 | else
36 | cd src; TF_NEED_OPENCL=0 PYTHON_LIB_PATH=/usr/lib/python2.7/dist-packages TF_ENABLE_XLA=0 TF_NEED_HDFS=0 PYTHON_BIN_PATH=/usr/bin/python TF_NEED_JEMALLOC=1 CC_OPT_FLAGS="-march=native" TF_NEED_GCP=0 TF_NEED_CUDA=0 ./configure
37 | cd src; bazel build -c opt //tensorflow/tools/pip_package:build_pip_package
38 | endif
39 |
40 | .PHONY: install
41 | install:
42 | cd src; bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
43 | pip install /tmp/tensorflow_pkg/*
44 |
45 | .PHONY: load_test
46 | load_test:
47 | ifeq ($(GPU_SUPPORT),1)
48 | LD_LIBRARY_PATH=$(LD_LIBRARY_PATH):/usr/local/cuda/lib64 python test/load.py
49 | else
50 | python test/load.py
51 | endif
52 |
--------------------------------------------------------------------------------
/frameworks/tensorflow/test/load.py:
--------------------------------------------------------------------------------
1 |
2 | import tensorflow
3 |
4 | print('Python-Tensorflow loaded OK')
5 |
--------------------------------------------------------------------------------
/frameworks/torch/Makefile:
--------------------------------------------------------------------------------
1 |
2 | .PHONY: dependencies
3 | dependencies:
4 | echo "OK"
5 |
6 | .PHONY: src
7 | src:
8 | git clone --recursive https://github.com/torch/distro.git src
9 |
10 | .PHONY: build
11 | build:
12 | cd src; bash install-deps
13 |
14 | .PHONY: install
15 | install:
16 | cd src; ./install.sh
17 |
18 | .PHONY: load_test
19 | load_test:
20 | . ~/.bashrc; th -h
21 |
--------------------------------------------------------------------------------
/frameworks/transferflow/Makefile:
--------------------------------------------------------------------------------
1 |
2 | .PHONY: dependencies
3 | dependencies:
4 | echo "OK"
5 |
6 | .PHONY: src
7 | src:
8 | git clone --recursive https://github.com/dominiek/transferflow.git src
9 | cd src; git checkout v0.1.4
10 |
11 | .PHONY: build
12 | build:
13 | cd src; make
14 |
15 | .PHONY: install
16 | install:
17 | cd src; python setup.py install
18 |
19 | .PHONY: load_test
20 | load_test:
21 | python test/load.py
22 |
--------------------------------------------------------------------------------
/frameworks/transferflow/test/load.py:
--------------------------------------------------------------------------------
1 |
2 | import transferflow
3 |
4 | print('Python-Transferflow loaded OK')
5 |
--------------------------------------------------------------------------------
/utils/cuda_device_query/Makefile:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | #
3 | # Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
4 | #
5 | # NOTICE TO USER:
6 | #
7 | # This source code is subject to NVIDIA ownership rights under U.S. and
8 | # international Copyright laws.
9 | #
10 | # NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
11 | # CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
12 | # IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
13 | # REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
14 | # MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
15 | # IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
16 | # OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
17 | # OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
18 | # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
19 | # OR PERFORMANCE OF THIS SOURCE CODE.
20 | #
21 | # U.S. Government End Users. This source code is a "commercial item" as
22 | # that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
23 | # "commercial computer software" and "commercial computer software
24 | # documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
25 | # and is provided to the U.S. Government only as a commercial end item.
26 | # Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
27 | # 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
28 | # source code with only those rights set forth herein.
29 | #
30 | ################################################################################
31 | #
32 | # Makefile project only supported on Mac OS X and Linux Platforms)
33 | #
34 | ################################################################################
35 |
36 | # Location of the CUDA Toolkit
37 | CUDA_PATH ?= /usr/local/cuda-8.0
38 |
39 | ##############################
40 | # start deprecated interface #
41 | ##############################
42 | ifeq ($(x86_64),1)
43 | $(info WARNING - x86_64 variable has been deprecated)
44 | $(info WARNING - please use TARGET_ARCH=x86_64 instead)
45 | TARGET_ARCH ?= x86_64
46 | endif
47 | ifeq ($(ARMv7),1)
48 | $(info WARNING - ARMv7 variable has been deprecated)
49 | $(info WARNING - please use TARGET_ARCH=armv7l instead)
50 | TARGET_ARCH ?= armv7l
51 | endif
52 | ifeq ($(aarch64),1)
53 | $(info WARNING - aarch64 variable has been deprecated)
54 | $(info WARNING - please use TARGET_ARCH=aarch64 instead)
55 | TARGET_ARCH ?= aarch64
56 | endif
57 | ifeq ($(ppc64le),1)
58 | $(info WARNING - ppc64le variable has been deprecated)
59 | $(info WARNING - please use TARGET_ARCH=ppc64le instead)
60 | TARGET_ARCH ?= ppc64le
61 | endif
62 | ifneq ($(GCC),)
63 | $(info WARNING - GCC variable has been deprecated)
64 | $(info WARNING - please use HOST_COMPILER=$(GCC) instead)
65 | HOST_COMPILER ?= $(GCC)
66 | endif
67 | ifneq ($(abi),)
68 | $(error ERROR - abi variable has been removed)
69 | endif
70 | ############################
71 | # end deprecated interface #
72 | ############################
73 |
74 | # architecture
75 | HOST_ARCH := $(shell uname -m)
76 | TARGET_ARCH ?= $(HOST_ARCH)
77 | ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le armv7l))
78 | ifneq ($(TARGET_ARCH),$(HOST_ARCH))
79 | ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le))
80 | TARGET_SIZE := 64
81 | else ifneq (,$(filter $(TARGET_ARCH),armv7l))
82 | TARGET_SIZE := 32
83 | endif
84 | else
85 | TARGET_SIZE := $(shell getconf LONG_BIT)
86 | endif
87 | else
88 | $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
89 | endif
90 | ifneq ($(TARGET_ARCH),$(HOST_ARCH))
91 | ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-ppc64le))
92 | $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
93 | endif
94 | endif
95 |
96 | # When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
97 | ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
98 | TARGET_ARCH = armv7l
99 | endif
100 |
101 | # operating system
102 | HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
103 | TARGET_OS ?= $(HOST_OS)
104 | ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
105 | $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
106 | endif
107 |
108 | # host compiler
109 | ifeq ($(TARGET_OS),darwin)
110 | ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
111 | HOST_COMPILER ?= clang++
112 | endif
113 | else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
114 | ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
115 | ifeq ($(TARGET_OS),linux)
116 | HOST_COMPILER ?= arm-linux-gnueabihf-g++
117 | else ifeq ($(TARGET_OS),qnx)
118 | ifeq ($(QNX_HOST),)
119 | $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
120 | endif
121 | ifeq ($(QNX_TARGET),)
122 | $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
123 | endif
124 | export QNX_HOST
125 | export QNX_TARGET
126 | HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
127 | else ifeq ($(TARGET_OS),android)
128 | HOST_COMPILER ?= arm-linux-androideabi-g++
129 | endif
130 | else ifeq ($(TARGET_ARCH),aarch64)
131 | ifeq ($(TARGET_OS), linux)
132 | HOST_COMPILER ?= aarch64-linux-gnu-g++
133 | else ifeq ($(TARGET_OS),qnx)
134 | ifeq ($(QNX_HOST),)
135 | $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
136 | endif
137 | ifeq ($(QNX_TARGET),)
138 | $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
139 | endif
140 | export QNX_HOST
141 | export QNX_TARGET
142 | HOST_COMPILER ?= $(QNX_HOST)/usr/bin/aarch64-unknown-nto-qnx7.0.0-g++
143 | else ifeq ($(TARGET_OS), android)
144 | HOST_COMPILER ?= aarch64-linux-android-g++
145 | endif
146 | else ifeq ($(TARGET_ARCH),ppc64le)
147 | HOST_COMPILER ?= powerpc64le-linux-gnu-g++
148 | endif
149 | endif
150 | HOST_COMPILER ?= g++
151 | NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
152 |
153 | # internal flags
154 | NVCCFLAGS := -m${TARGET_SIZE}
155 | CCFLAGS :=
156 | LDFLAGS :=
157 |
158 | # build flags
159 | ifeq ($(TARGET_OS),darwin)
160 | LDFLAGS += -rpath $(CUDA_PATH)/lib
161 | CCFLAGS += -arch $(HOST_ARCH)
162 | else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
163 | LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
164 | CCFLAGS += -mfloat-abi=hard
165 | else ifeq ($(TARGET_OS),android)
166 | LDFLAGS += -pie
167 | CCFLAGS += -fpie -fpic -fexceptions
168 | endif
169 |
170 | ifneq ($(TARGET_ARCH),$(HOST_ARCH))
171 | ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
172 | ifneq ($(TARGET_FS),)
173 | GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
174 | ifeq ($(GCCVERSIONLTEQ46),1)
175 | CCFLAGS += --sysroot=$(TARGET_FS)
176 | endif
177 | LDFLAGS += --sysroot=$(TARGET_FS)
178 | LDFLAGS += -rpath-link=$(TARGET_FS)/lib
179 | LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
180 | LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
181 | endif
182 | endif
183 | endif
184 |
185 | # Debug build flags
186 | ifeq ($(dbg),1)
187 | NVCCFLAGS += -g -G
188 | BUILD_TYPE := debug
189 | else
190 | BUILD_TYPE := release
191 | endif
192 |
193 | ALL_CCFLAGS :=
194 | ALL_CCFLAGS += $(NVCCFLAGS)
195 | ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
196 | ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
197 | ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
198 |
199 | SAMPLE_ENABLED := 1
200 |
201 | ALL_LDFLAGS :=
202 | ALL_LDFLAGS += $(ALL_CCFLAGS)
203 | ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
204 | ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
205 |
206 | # Common includes and paths for CUDA
207 | INCLUDES := -I../../common/inc
208 | LIBRARIES :=
209 |
210 | ################################################################################
211 |
212 | # Gencode arguments
213 | SMS ?= 20 30 35 37 50 52 60
214 |
215 | ifeq ($(SMS),)
216 | $(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
217 | SAMPLE_ENABLED := 0
218 | endif
219 |
220 | ifeq ($(GENCODE_FLAGS),)
221 | # Generate SASS code for each SM architecture listed in $(SMS)
222 | $(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
223 |
224 | # Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
225 | HIGHEST_SM := $(lastword $(sort $(SMS)))
226 | ifneq ($(HIGHEST_SM),)
227 | GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
228 | endif
229 | endif
230 |
231 | ifeq ($(SAMPLE_ENABLED),0)
232 | EXEC ?= @echo "[@]"
233 | endif
234 |
235 | ################################################################################
236 |
237 | # Target rules
238 | all: build
239 |
240 | build: deviceQuery
241 |
242 | check.deps:
243 | ifeq ($(SAMPLE_ENABLED),0)
244 | @echo "Sample will be waived due to the above missing dependencies"
245 | else
246 | @echo "Sample is ready - all dependencies have been met"
247 | endif
248 |
249 | deviceQuery.o:deviceQuery.cpp
250 | $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
251 |
252 | deviceQuery: deviceQuery.o
253 | $(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
254 | $(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
255 | $(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
256 |
257 | run: build
258 | $(EXEC) ./deviceQuery
259 |
260 | clean:
261 | rm -f deviceQuery deviceQuery.o
262 | rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/deviceQuery
263 |
264 | clobber: clean
265 |
--------------------------------------------------------------------------------
/utils/cuda_device_query/NsightEclipse.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | deviceQuery
5 |
6 | cudaSetDevice
7 | cudaGetDeviceCount
8 | cudaGetDeviceProperties
9 | cudaDriverGetVersion
10 | cudaRuntimeGetVersion
11 |
12 |
13 | whole
14 |
15 | ./
16 | ../
17 | ../../common/inc
18 |
19 |
20 | CUDA Runtime API
21 | Device Query
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 | true
30 | deviceQuery.cpp
31 |
32 | 1:CUDA Basic Topics
33 |
34 | sm20
35 | sm30
36 | sm35
37 | sm37
38 | sm50
39 | sm52
40 | sm60
41 |
42 |
43 | x86_64
44 | linux
45 |
46 |
47 | windows7
48 |
49 |
50 | x86_64
51 | macosx
52 |
53 |
54 | arm
55 |
56 |
57 | ppc64le
58 | linux
59 |
60 |
61 |
62 | all
63 |
64 | Device Query
65 | exe
66 |
67 |
--------------------------------------------------------------------------------
/utils/cuda_device_query/deviceQuery:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dominiek/deep-base/e3adfca44620b2b54ad82d92fdaf9c5bc80beea2/utils/cuda_device_query/deviceQuery
--------------------------------------------------------------------------------
/utils/cuda_device_query/deviceQuery.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
3 | *
4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
5 | * with this source code for terms and conditions that govern your use of
6 | * this software. Any use, reproduction, disclosure, or distribution of
7 | * this software and related documentation outside the terms of the EULA
8 | * is strictly prohibited.
9 | *
10 | */
11 | /* This sample queries the properties of the CUDA devices present in the system via CUDA Runtime API. */
12 |
13 | // Shared Utilities (QA Testing)
14 |
15 | // std::system includes
16 | #include
17 | #include
18 |
19 | #include
20 | #include
21 |
22 |
23 |
24 | int *pArgc = NULL;
25 | char **pArgv = NULL;
26 |
27 | #if CUDART_VERSION < 5000
28 |
29 | // CUDA-C includes
30 | #include
31 |
32 | // This function wraps the CUDA Driver API into a template function
33 | template
34 | inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
35 | {
36 | CUresult error = cuDeviceGetAttribute(attribute, device_attribute, device);
37 |
38 | if (CUDA_SUCCESS != error)
39 | {
40 | fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n",
41 | error, __FILE__, __LINE__);
42 |
43 | exit(EXIT_FAILURE);
44 | }
45 | }
46 |
47 | #endif /* CUDART_VERSION < 5000 */
48 |
49 | ////////////////////////////////////////////////////////////////////////////////
50 | // Program main
51 | ////////////////////////////////////////////////////////////////////////////////
52 | int
53 | main(int argc, char **argv)
54 | {
55 | pArgc = &argc;
56 | pArgv = argv;
57 |
58 | printf("%s Starting...\n\n", argv[0]);
59 | printf(" CUDA Device Query (Runtime API) version (CUDART static linking)\n\n");
60 |
61 | int deviceCount = 0;
62 | cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
63 |
64 | if (error_id != cudaSuccess)
65 | {
66 | printf("cudaGetDeviceCount returned %d\n-> %s\n", (int)error_id, cudaGetErrorString(error_id));
67 | printf("Result = FAIL\n");
68 | exit(EXIT_FAILURE);
69 | }
70 |
71 | // This function call returns 0 if there are no CUDA capable devices.
72 | if (deviceCount == 0)
73 | {
74 | printf("There are no available device(s) that support CUDA\n");
75 | }
76 | else
77 | {
78 | printf("Detected %d CUDA Capable device(s)\n", deviceCount);
79 | }
80 |
81 | int dev, driverVersion = 0, runtimeVersion = 0;
82 |
83 | for (dev = 0; dev < deviceCount; ++dev)
84 | {
85 | cudaSetDevice(dev);
86 | cudaDeviceProp deviceProp;
87 | cudaGetDeviceProperties(&deviceProp, dev);
88 |
89 | printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name);
90 |
91 | // Console log
92 | cudaDriverGetVersion(&driverVersion);
93 | cudaRuntimeGetVersion(&runtimeVersion);
94 | printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, (driverVersion%100)/10, runtimeVersion/1000, (runtimeVersion%100)/10);
95 | printf(" CUDA Capability Major/Minor version number: %d.%d\n", deviceProp.major, deviceProp.minor);
96 |
97 | char msg[256];
98 | SPRINTF(msg, " Total amount of global memory: %.0f MBytes (%llu bytes)\n",
99 | (float)deviceProp.totalGlobalMem/1048576.0f, (unsigned long long) deviceProp.totalGlobalMem);
100 | printf("%s", msg);
101 |
102 | printf(" (%2d) Multiprocessors, (%3d) CUDA Cores/MP: %d CUDA Cores\n",
103 | deviceProp.multiProcessorCount,
104 | _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor),
105 | _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount);
106 | printf(" GPU Max Clock rate: %.0f MHz (%0.2f GHz)\n", deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
107 |
108 |
109 | #if CUDART_VERSION >= 5000
110 | // This is supported in CUDA 5.0 (runtime API device properties)
111 | printf(" Memory Clock rate: %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f);
112 | printf(" Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth);
113 |
114 | if (deviceProp.l2CacheSize)
115 | {
116 | printf(" L2 Cache Size: %d bytes\n", deviceProp.l2CacheSize);
117 | }
118 |
119 | #else
120 | // This only available in CUDA 4.0-4.2 (but these were only exposed in the CUDA Driver API)
121 | int memoryClock;
122 | getCudaAttribute(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev);
123 | printf(" Memory Clock rate: %.0f Mhz\n", memoryClock * 1e-3f);
124 | int memBusWidth;
125 | getCudaAttribute(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev);
126 | printf(" Memory Bus Width: %d-bit\n", memBusWidth);
127 | int L2CacheSize;
128 | getCudaAttribute(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev);
129 |
130 | if (L2CacheSize)
131 | {
132 | printf(" L2 Cache Size: %d bytes\n", L2CacheSize);
133 | }
134 |
135 | #endif
136 |
137 | printf(" Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, %d), 3D=(%d, %d, %d)\n",
138 | deviceProp.maxTexture1D , deviceProp.maxTexture2D[0], deviceProp.maxTexture2D[1],
139 | deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]);
140 | printf(" Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n",
141 | deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]);
142 | printf(" Maximum Layered 2D Texture Size, (num) layers 2D=(%d, %d), %d layers\n",
143 | deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], deviceProp.maxTexture2DLayered[2]);
144 |
145 |
146 | printf(" Total amount of constant memory: %lu bytes\n", deviceProp.totalConstMem);
147 | printf(" Total amount of shared memory per block: %lu bytes\n", deviceProp.sharedMemPerBlock);
148 | printf(" Total number of registers available per block: %d\n", deviceProp.regsPerBlock);
149 | printf(" Warp size: %d\n", deviceProp.warpSize);
150 | printf(" Maximum number of threads per multiprocessor: %d\n", deviceProp.maxThreadsPerMultiProcessor);
151 | printf(" Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock);
152 | printf(" Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n",
153 | deviceProp.maxThreadsDim[0],
154 | deviceProp.maxThreadsDim[1],
155 | deviceProp.maxThreadsDim[2]);
156 | printf(" Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n",
157 | deviceProp.maxGridSize[0],
158 | deviceProp.maxGridSize[1],
159 | deviceProp.maxGridSize[2]);
160 | printf(" Maximum memory pitch: %lu bytes\n", deviceProp.memPitch);
161 | printf(" Texture alignment: %lu bytes\n", deviceProp.textureAlignment);
162 | printf(" Concurrent copy and kernel execution: %s with %d copy engine(s)\n", (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount);
163 | printf(" Run time limit on kernels: %s\n", deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
164 | printf(" Integrated GPU sharing Host Memory: %s\n", deviceProp.integrated ? "Yes" : "No");
165 | printf(" Support host page-locked memory mapping: %s\n", deviceProp.canMapHostMemory ? "Yes" : "No");
166 | printf(" Alignment requirement for Surfaces: %s\n", deviceProp.surfaceAlignment ? "Yes" : "No");
167 | printf(" Device has ECC support: %s\n", deviceProp.ECCEnabled ? "Enabled" : "Disabled");
168 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
169 | printf(" CUDA Device Driver Mode (TCC or WDDM): %s\n", deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)" : "WDDM (Windows Display Driver Model)");
170 | #endif
171 | printf(" Device supports Unified Addressing (UVA): %s\n", deviceProp.unifiedAddressing ? "Yes" : "No");
172 | printf(" Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n", deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
173 |
174 | const char *sComputeMode[] =
175 | {
176 | "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
177 | "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
178 | "Prohibited (no host thread can use ::cudaSetDevice() with this device)",
179 | "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
180 | "Unknown",
181 | NULL
182 | };
183 | printf(" Compute Mode:\n");
184 | printf(" < %s >\n", sComputeMode[deviceProp.computeMode]);
185 | }
186 |
187 | // If there are 2 or more GPUs, query to determine whether RDMA is supported
188 | if (deviceCount >= 2)
189 | {
190 | cudaDeviceProp prop[64];
191 | int gpuid[64]; // we want to find the first two GPUs that can support P2P
192 | int gpu_p2p_count = 0;
193 |
194 | for (int i=0; i < deviceCount; i++)
195 | {
196 | checkCudaErrors(cudaGetDeviceProperties(&prop[i], i));
197 |
198 | // Only boards based on Fermi or later can support P2P
199 | if ((prop[i].major >= 2)
200 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
201 | // on Windows (64-bit), the Tesla Compute Cluster driver for windows must be enabled to support this
202 | && prop[i].tccDriver
203 | #endif
204 | )
205 | {
206 | // This is an array of P2P capable GPUs
207 | gpuid[gpu_p2p_count++] = i;
208 | }
209 | }
210 |
211 | // Show all the combinations of support P2P GPUs
212 | int can_access_peer;
213 |
214 | if (gpu_p2p_count >= 2)
215 | {
216 | for (int i = 0; i < gpu_p2p_count; i++)
217 | {
218 | for (int j = 0; j < gpu_p2p_count; j++)
219 | {
220 | if (gpuid[i] == gpuid[j])
221 | {
222 | continue;
223 | }
224 | checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer, gpuid[i], gpuid[j]));
225 | printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[i]].name, gpuid[i],
226 | prop[gpuid[j]].name, gpuid[j] ,
227 | can_access_peer ? "Yes" : "No");
228 | }
229 | }
230 | }
231 | }
232 |
233 | // csv masterlog info
234 | // *****************************
235 | // exe and CUDA driver name
236 | printf("\n");
237 | std::string sProfileString = "deviceQuery, CUDA Driver = CUDART";
238 | char cTemp[16];
239 |
240 | // driver version
241 | sProfileString += ", CUDA Driver Version = ";
242 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
243 | sprintf_s(cTemp, 10, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
244 | #else
245 | sprintf(cTemp, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
246 | #endif
247 | sProfileString += cTemp;
248 |
249 | // Runtime version
250 | sProfileString += ", CUDA Runtime Version = ";
251 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
252 | sprintf_s(cTemp, 10, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
253 | #else
254 | sprintf(cTemp, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
255 | #endif
256 | sProfileString += cTemp;
257 |
258 | // Device count
259 | sProfileString += ", NumDevs = ";
260 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
261 | sprintf_s(cTemp, 10, "%d", deviceCount);
262 | #else
263 | sprintf(cTemp, "%d", deviceCount);
264 | #endif
265 | sProfileString += cTemp;
266 |
267 | // Print Out all device Names
268 | for (dev = 0; dev < deviceCount; ++dev)
269 | {
270 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
271 | sprintf_s(cTemp, 13, ", Device%d = ", dev);
272 | #else
273 | sprintf(cTemp, ", Device%d = ", dev);
274 | #endif
275 | cudaDeviceProp deviceProp;
276 | cudaGetDeviceProperties(&deviceProp, dev);
277 | sProfileString += cTemp;
278 | sProfileString += deviceProp.name;
279 | }
280 |
281 | sProfileString += "\n";
282 | printf("%s", sProfileString.c_str());
283 |
284 | printf("Result = PASS\n");
285 |
286 | // finish
287 | exit(EXIT_SUCCESS);
288 | }
289 |
--------------------------------------------------------------------------------
/utils/cuda_device_query/deviceQuery.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dominiek/deep-base/e3adfca44620b2b54ad82d92fdaf9c5bc80beea2/utils/cuda_device_query/deviceQuery.o
--------------------------------------------------------------------------------
/utils/cuda_device_query/readme.txt:
--------------------------------------------------------------------------------
1 | Sample: deviceQuery
2 | Minimum spec: SM 2.0
3 |
4 | This sample enumerates the properties of the CUDA devices present in the system.
5 |
6 | Key concepts:
7 | CUDA Runtime API
8 | Device Query
9 |
--------------------------------------------------------------------------------