├── .gitignore
├── CMakeLists.txt
├── README.rst
├── cmake
    ├── findMKL.cmake
    └── findThreads.cmake
├── convdata.py
├── convnet.py
├── data.py
├── example-layers
    ├── layer-params-18pct.cfg
    ├── layer-params-19pct.cfg
    ├── layer-params-80sec.cfg
    ├── layer-params-conv-local-11pct.cfg
    ├── layer-params-conv-local-13pct.cfg
    ├── layer-params-example.cfg
    ├── layer-params.gc.cfg
    ├── layers-18pct.cfg
    ├── layers-19pct.cfg
    ├── layers-80sec.cfg
    ├── layers-conv-local-11pct.cfg
    ├── layers-conv-local-13pct.cfg
    ├── layers-example.cfg
    └── layers.gc.cfg
├── gpumodel.py
├── include
    ├── common
    │   ├── matrix.h
    │   ├── matrix_funcs.h
    │   ├── queue.h
    │   └── thread.h
    ├── convnet.cuh
    ├── cost.cuh
    ├── cudaconv2
    │   ├── conv_util.cuh
    │   └── cudaconv2.cuh
    ├── data.cuh
    ├── layer.cuh
    ├── layer_kernels.cuh
    ├── neuron.cuh
    ├── nvmatrix
    │   ├── nvmatrix.cuh
    │   ├── nvmatrix_kernels.cuh
    │   └── nvmatrix_operators.cuh
    ├── pyconvnet.cuh
    ├── util.cuh
    ├── weights.cuh
    └── worker.cuh
├── layer.py
├── options.py
├── ordereddict.py
├── shownet.py
├── src
    ├── common
    │   ├── CMakeLists.txt
    │   └── matrix.cpp
    ├── convnet.cu
    ├── cost.cu
    ├── cudaconv2
    │   ├── CMakeLists.txt
    │   ├── conv_util.cu
    │   ├── filter_acts.cu
    │   ├── img_acts.cu
    │   └── weight_acts.cu
    ├── data.cu
    ├── layer.cu
    ├── layer_kernels.cu
    ├── neuron.cu
    ├── nvmatrix
    │   ├── CMakeLists.txt
    │   ├── nvmatrix.cu
    │   └── nvmatrix_kernels.cu
    ├── pyconvnet.cu
    ├── util.cu
    ├── weights.cu
    └── worker.cu
└── util.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # CMake
 2 | CMakeCache.txt
 3 | CMakeFiles
 4 | Makefile
 5 | cmake_install.cmake
 6 | install_manifest.txt
 7 | 
 8 | # Byte-compiled / optimized / DLL files
 9 | __pycache__/
10 | *.py[cod]
11 | 
12 | # C extensions
13 | *.so
14 | *.a
15 | 
16 | # Distribution / packaging
17 | .Python
18 | env/
19 | bin/
20 | build/
21 | develop-eggs/
22 | dist/
23 | eggs/
24 | lib/
25 | lib64/
26 | parts/
27 | sdist/
28 | var/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Mr Developer
38 | .mr.developer.cfg
39 | .project
40 | .pydevproject
41 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required (VERSION 2.8)
  2 | project (cuda-convnet )
  3 | 
  4 | if(MSVC)
  5 |   set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/")
  6 |   set(THREADS_USE_PTHREADS_WIN32 TRUE)
  7 | 
  8 |   # set the place of pthread.h
  9 |   set(THREADS_PTHREAD_WIN32_INC "C:/cuda-convnet/pthreads-w32-2-9-1-release/Pre-built.2/include")
 10 | 
 11 |   # set the place you install mkl (the folder containing "./mkl")
 12 |   set(MKL_ROOT "C:/Program Files (x86)/Intel/Composer XE 2013")
 13 | 
 14 |   string(REGEX REPLACE "/Z[iI7]" ""
 15 |          CMAKE_CXX_FLAGS_RELEASE
 16 |          "${CMAKE_CXX_FLAGS_RELEASE}")
 17 |   set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Z7")
 18 | endif()
 19 | 
 20 | find_package (CUDA REQUIRED)
 21 | if(MSVC)
 22 |   find_package (MKL REQUIRED)
 23 |   find_package (Threads REQUIRED)
 24 | else()
 25 |   find_package (BLAS REQUIRED)
 26 |   find_path(BLAS_INCLUDE_DIRS cblas.h
 27 |     HINTS
 28 |       "/System/Library/Frameworks/vecLib.framework/Versions/Current/Headers/"
 29 |       "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.9.sdk/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/"
 30 |   )
 31 | endif()
 32 | find_package (PythonLibs 2.7 REQUIRED)
 33 | find_package (PythonInterp 2.7 REQUIRED)
 34 | 
 35 | find_path(CUDA_COMMON_INCLUDE_DIRS
 36 |   helper_cuda.h
 37 |   PATHS ${CUDA_SDK_SEARCH_PATH}
 38 |     "/usr/local/cuda"
 39 |     "/Developer/NVIDIA/CUDA-6.0"
 40 |     "C:/ProgramData/NVIDIA Corporation/CUDA Samples/v6.0/common/inc"
 41 |   PATH_SUFFIXES "samples/common/inc"
 42 |   DOC "Location of helper_cuda.h"
 43 |   NO_DEFAULT_PATH
 44 | )
 45 | 
 46 | if(APPLE)
 47 |   LIST(APPEND CUDA_NVCC_FLAGS -ccbin /usr/bin/clang)
 48 |   SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libstdc++")
 49 |   SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libstdc++")
 50 | endif(APPLE)
 51 | 
 52 | #list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_20,code=sm_20)
 53 | list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30)
 54 | #list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_32,code=sm_32)
 55 | #list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_35,code=sm_35)
 56 | #list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_50,code=sm_50)
 57 | 
 58 | IF(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
 59 |   list(APPEND CMAKE_CXX_FLAGS -fPIC)
 60 | ENDIF(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
 61 | 
 62 | execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
 63 |   "import numpy as n; print(n.get_include());"
 64 |   RESULT_VARIABLE _NUMPY_SEARCH_SUCCESS
 65 |   OUTPUT_VARIABLE _NUMPY_VALUES
 66 |   ERROR_VARIABLE _NUMPY_ERROR_VALUE
 67 |   OUTPUT_STRIP_TRAILING_WHITESPACE
 68 | )
 69 | if(_NUMPY_SEARCH_SUCCESS MATCHES 0)
 70 |   string(REGEX REPLACE ";" "\\\\;" _NUMPY_VALUES ${_NUMPY_VALUES})
 71 |   string(REGEX REPLACE "\n" ";" _NUMPY_VALUES ${_NUMPY_VALUES})
 72 |   list(GET _NUMPY_VALUES 0 NUMPY_INCLUDE_DIRS)
 73 |   string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIRS ${NUMPY_INCLUDE_DIRS})
 74 | else()
 75 |   message(FATAL_ERROR "NumPy import failure:\n${_NUMPY_ERROR_VALUE}")
 76 | endif()
 77 | 
 78 | if (MSVC)
 79 |   include_directories (
 80 |     ${CUDA_INCLUDE_DIRS}
 81 |     ${CUDA_COMMON_INCLUDE_DIRS}
 82 |     ${PYTHON_INCLUDE_DIRS}
 83 |     ${NUMPY_INCLUDE_DIRS}/numpy
 84 |     ${MKL_INCLUDE_DIR}
 85 |     ${THREADS_PTHREADS_INCLUDE_DIR}
 86 |     include/common
 87 |     include/nvmatrix
 88 |     include/cudaconv2
 89 |     include/
 90 |   )
 91 | else()
 92 |   include_directories (
 93 |     ${BLAS_INCLUDE_DIRS}
 94 |     ${CUDA_INCLUDE_DIRS}
 95 |     ${CUDA_COMMON_INCLUDE_DIRS}
 96 |     ${PYTHON_INCLUDE_DIRS}
 97 |     ${NUMPY_INCLUDE_DIRS}/numpy
 98 |     include/common
 99 |     include/nvmatrix
100 |     include/cudaconv2
101 |     include/
102 |   )
103 | endif()
104 | 
105 | add_subdirectory (src/common)
106 | add_subdirectory (src/nvmatrix)
107 | add_subdirectory (src/cudaconv2)
108 | 
109 | CUDA_ADD_LIBRARY (convnet SHARED
110 |   src/convnet.cu
111 |   src/cost.cu
112 |   src/data.cu
113 |   src/layer.cu
114 |   src/layer_kernels.cu
115 |   src/neuron.cu
116 |   src/pyconvnet.cu
117 |   src/util.cu
118 |   src/weights.cu
119 |   src/worker.cu
120 | )
121 | 
122 | if(MSVC)
123 |   set_target_properties (convnet
124 |     PROPERTIES
125 |     PREFIX "_"
126 |     SUFFIX ".pyd"
127 |   )
128 | else()
129 |   set_target_properties (convnet
130 |     PROPERTIES
131 |     PREFIX "_"
132 |     SUFFIX ".so"
133 |   )
134 | endif()
135 | 
136 | if(MSVC)
137 |   TARGET_LINK_LIBRARIES (convnet
138 |     common
139 |     nvmatrix
140 |     cudaconv2
141 |     ${PYTHON_LIBRARIES}
142 |     ${CUDA_CUBLAS_LIBRARIES}
143 |     ${MKL_LIBRARIES}
144 |     ${CMAKE_THREAD_LIBS_INIT}
145 |   )
146 | else()
147 |   TARGET_LINK_LIBRARIES (convnet
148 |     common
149 |     nvmatrix
150 |     cudaconv2
151 |     ${PYTHON_LIBRARIES}
152 |     ${BLAS_LIBRARIES}
153 |     ${CUDA_CUBLAS_LIBRARIES}
154 |   )
155 | endif()
156 | 
157 | CUDA_BUILD_CLEAN_TARGET()
158 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | This is my fork of the ``cuda-convnet`` convolutional neural network
 2 | implementation written by Alex Krizhevsky.
 3 | 
 4 | ``cuda-convnet`` has quite extensive documentation itself.  Find the
 5 | `MAIN DOCUMENTATION HERE <http://code.google.com/p/cuda-convnet/>`_.
 6 | 
 7 | **Update**: A newer version, `cuda-convnet 2
 8 | <https://code.google.com/p/cuda-convnet2/>`_, has been released by
 9 | Alex.  This fork is still based on the original cuda-convnet.
10 | 
11 | ===================
12 | Additional features
13 | ===================
14 | 
15 | This document will only describe the small differences between
16 | ``cuda-convnet`` as hosted on Google Code and this version.
17 | 
18 | Dropout
19 | =======
20 | 
21 | Dropout is a relatively new regularization technique for neural
22 | networks.  See the `Improving neural networks by preventing
23 | co-adaptation of feature detectors <http://arxiv.org/abs/1207.0580>`_
24 | and `Improving Neural Networks with Dropout
25 | <http://www.cs.toronto.edu/~nitish/msc_thesis.pdf‎>`_ papers for
26 | details.
27 | 
28 | To set a dropout rate for one of our layers, we use the ``dropout``
29 | parameter in our model's ``layer-params`` configuration file.  For
30 | example, we could use dropout for the last layer in the CIFAR example
31 | by modifying the section for the fc10 layer to look like so::
32 | 
33 |   [fc10]
34 |   epsW=0.001
35 |   epsB=0.002
36 |   # ...
37 |   dropout=0.5
38 | 
39 | In practice, you'll probably also want to double the number of
40 | ``outputs`` in that layer.
41 | 
42 | 
43 | CURAND random seeding
44 | =====================
45 | 
46 | An environment variable ``CONVNET_RANDOM_SEED``, if set, will be used
47 | to set the CURAND library's random seed.  This is important in order
48 | to get reproducable results.
49 | 
50 | 
51 | Updated to work with CUDA via CMake
52 | ===================================
53 | 
54 | The build configuration and code has been updated to work with CUDA
55 | via CMake. Run ``cmake .`` and then ``make``. If you have an alternative
56 | BLAS library just set it with for example ``cmake -DBLAS_LIBRARIES=/usr/lib/libcblas.so  .``.
57 | 


--------------------------------------------------------------------------------
/cmake/findMKL.cmake:
--------------------------------------------------------------------------------
 1 | # - Find the MKL libraries
 2 | # Modified from Armadillo's ARMA_FindMKL.cmake
 3 | # This module defines
 4 | #  MKL_INCLUDE_DIR, the directory for the MKL headers
 5 | #  MKL_LIB_DIR, the directory for the MKL library files
 6 | #  MKL_COMPILER_LIB_DIR, the directory for the MKL compiler library files
 7 | #  MKL_LIBRARIES, the libraries needed to use Intel's implementation of BLAS & LAPACK.
 8 | #  MKL_FOUND, If false, do not try to use MKL; if true, the macro definition USE_MKL is added.
 9 | 
10 | # Set the include path
11 | # TODO: what if MKL is not installed in /opt/intel/mkl?
12 | # try to find at /opt/intel/mkl
13 | # in windows, try to find MKL at C:/Program Files (x86)/Intel/Composer XE/mkl
14 | 
15 | if ( WIN32 )
16 |     set(MKLROOT_PATH ${MKL_ROOT} CACHE PATH "Where the MKL are stored")
17 | else ( WIN32 )
18 |     set(MKLROOT_PATH "/opt/intel" CACHE PATH "Where the MKL are stored")
19 | endif ( WIN32 )
20 | 
21 | if (EXISTS ${MKLROOT_PATH}/mkl)
22 |     SET(MKL_FOUND TRUE)
23 |     message("MKL is found at ${MKLROOT_PATH}/mkl")
24 |     IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
25 |         set( USE_MKL_64BIT On )
26 |         if ( ARMADILLO_FOUND )
27 |             if ( ARMADILLO_BLAS_LONG_LONG )
28 |                 set( USE_MKL_64BIT_LIB On )
29 |                 ADD_DEFINITIONS(-DMKL_ILP64)
30 |                 message("MKL is linked against ILP64 interface ... ")
31 |             endif ( ARMADILLO_BLAS_LONG_LONG )
32 |         endif ( ARMADILLO_FOUND )
33 |     ELSE(CMAKE_SIZEOF_VOID_P EQUAL 8)
34 |         set( USE_MKL_64BIT Off )
35 |     ENDIF(CMAKE_SIZEOF_VOID_P EQUAL 8)
36 | else (EXISTS ${MKLROOT_PATH}/mkl)
37 |     SET(MKL_FOUND FALSE)
38 |     message("MKL is NOT found ... ")
39 | endif (EXISTS ${MKLROOT_PATH}/mkl)
40 | 
41 | if (MKL_FOUND)
42 |     set(MKL_INCLUDE_DIR "${MKLROOT_PATH}/mkl/include")
43 |     ADD_DEFINITIONS(-DUSE_MKL)
44 |     if ( USE_MKL_64BIT )
45 |         set(MKL_LIB_DIR "${MKLROOT_PATH}/mkl/lib/intel64")
46 |         set(MKL_COMPILER_LIB_DIR "${MKLROOT_PATH}/compiler/lib/intel64")
47 |         set(MKL_COMPILER_LIB_DIR ${MKL_COMPILER_LIB_DIR} "${MKLROOT_PATH}/lib/intel64")
48 |         if ( USE_MKL_64BIT_LIB )
49 |             if ( WIN32 )
50 |                 set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_ilp64)
51 |             else ( WIN32 )
52 |                 set(MKL_LIBRARIES ${MKL_LIBRARIES} libmkl_intel_ilp64.a)
53 |             endif ( WIN32 )
54 |         else ( USE_MKL_64BIT_LIB )
55 |             if ( WIN32 )
56 |                 set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_lp64)
57 |             else ( WIN32 )
58 |                 set(MKL_LIBRARIES ${MKL_LIBRARIES} libmkl_intel_lp64.a)
59 |             endif ( WIN32 )
60 |         endif ( USE_MKL_64BIT_LIB )
61 |     else ( USE_MKL_64BIT )
62 |         set(MKL_LIB_DIR "${MKLROOT_PATH}/mkl/lib/ia32")
63 |         set(MKL_COMPILER_LIB_DIR "${MKLROOT_PATH}/compiler/lib/ia32")
64 |         set(MKL_COMPILER_LIB_DIR ${MKL_COMPILER_LIB_DIR} "${MKLROOT_PATH}/lib/ia32")
65 |         if ( WIN32 )
66 |             set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_c)
67 |         else ( WIN32 )
68 |             set(MKL_LIBRARIES ${MKL_LIBRARIES} libmkl_intel.a)
69 |         endif ( WIN32 )
70 |     endif ( USE_MKL_64BIT )
71 | 
72 |     if ( WIN32 )
73 |         SET(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_thread)
74 |         SET(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_core)
75 |         SET(MKL_LIBRARIES ${MKL_LIBRARIES} libiomp5md)
76 |     else ( WIN32 )
77 |         SET(MKL_LIBRARIES ${MKL_LIBRARIES} libmkl_intel_thread.a)
78 |         SET(MKL_LIBRARIES ${MKL_LIBRARIES} libmkl_core.a)
79 |         SET(MKL_LIBRARIES ${MKL_LIBRARIES} iomp5)
80 |     endif ( WIN32 )
81 | endif (MKL_FOUND)
82 | 
83 | IF (MKL_FOUND)
84 |     IF (NOT MKL_FIND_QUIETLY)
85 |         MESSAGE(STATUS "Found MKL libraries: ${MKL_LIBRARIES}")
86 |         MESSAGE(STATUS "MKL_INCLUDE_DIR: ${MKL_INCLUDE_DIR}")
87 |         MESSAGE(STATUS "MKL_LIB_DIR: ${MKL_LIB_DIR}")
88 |         MESSAGE(STATUS "MKL_COMPILER_LIB_DIR: ${MKL_COMPILER_LIB_DIR}")
89 |     ENDIF (NOT MKL_FIND_QUIETLY)
90 | 
91 |     INCLUDE_DIRECTORIES( ${MKL_INCLUDE_DIR} )
92 |     LINK_DIRECTORIES( ${MKL_LIB_DIR} ${MKL_COMPILER_LIB_DIR} )
93 | ELSE (MKL_FOUND)
94 |     IF (MKL_FIND_REQUIRED)
95 |         MESSAGE(FATAL_ERROR "Could not find MKL libraries")
96 |     ENDIF (MKL_FIND_REQUIRED)
97 | ENDIF (MKL_FOUND)
98 | 
99 | # MARK_AS_ADVANCED(MKL_LIBRARY)


--------------------------------------------------------------------------------
/cmake/findThreads.cmake:
--------------------------------------------------------------------------------
  1 | # Updated FindThreads.cmake that supports pthread-win32
  2 | # Downloaded from http://www.vtk.org/Bug/bug_view_advanced_page.php?bug_id=6399
  3 | 
  4 | # - This module determines the thread library of the system.
  5 | #
  6 | # The following variables are set
  7 | #  CMAKE_THREAD_LIBS_INIT     - the thread library
  8 | #  CMAKE_USE_SPROC_INIT       - are we using sproc?
  9 | #  CMAKE_USE_WIN32_THREADS_INIT - using WIN32 threads?
 10 | #  CMAKE_USE_PTHREADS_INIT    - are we using pthreads
 11 | #  CMAKE_HP_PTHREADS_INIT     - are we using hp pthreads
 12 | #
 13 | # If use of pthreads-win32 is desired, the following variables
 14 | # can be set.
 15 | #
 16 | #  THREADS_USE_PTHREADS_WIN32 -
 17 | #    Setting this to true searches for the pthreads-win32
 18 | #    port (since CMake 2.8.0)
 19 | #
 20 | #  THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME
 21 | #      C  = no exceptions (default)
 22 | #         (NOTE: This is the default scheme on most POSIX thread
 23 | #          implementations and what you should probably be using)
 24 | #      CE = C++ Exception Handling
 25 | #      SE = Structure Exception Handling (MSVC only)
 26 | #      (NOTE: Changing this option from the default may affect
 27 | #       the portability of your application.  See pthreads-win32
 28 | #       documentation for more details.)
 29 | #
 30 | #======================================================
 31 | # Example usage where threading library
 32 | # is provided by the system:
 33 | #
 34 | #   find_package(Threads REQUIRED)
 35 | #   add_executable(foo foo.cc)
 36 | #   target_link_libraries(foo ${CMAKE_THREAD_LIBS_INIT})
 37 | #
 38 | # Example usage if pthreads-win32 is desired on Windows
 39 | # or a system provided thread library:
 40 | #
 41 | #   set(THREADS_USE_PTHREADS_WIN32 true)
 42 | #   find_package(Threads REQUIRED)
 43 | #   include_directories(${THREADS_PTHREADS_INCLUDE_DIR})
 44 | #
 45 | #   add_executable(foo foo.cc)
 46 | #   target_link_libraries(foo ${CMAKE_THREAD_LIBS_INIT})
 47 | #
 48 | 
 49 | INCLUDE (CheckIncludeFiles)
 50 | INCLUDE (CheckLibraryExists)
 51 | SET(Threads_FOUND FALSE)
 52 | 
 53 | IF(WIN32 AND NOT CYGWIN AND THREADS_USE_PTHREADS_WIN32)
 54 |   SET(_Threads_ptwin32 true)
 55 | ENDIF()
 56 | 
 57 | # Do we have sproc?
 58 | IF(CMAKE_SYSTEM MATCHES IRIX)
 59 |   CHECK_INCLUDE_FILES("sys/types.h;sys/prctl.h"  CMAKE_HAVE_SPROC_H)
 60 | ENDIF()
 61 | 
 62 | IF(CMAKE_HAVE_SPROC_H)
 63 |   # We have sproc
 64 |   SET(CMAKE_USE_SPROC_INIT 1)
 65 | 
 66 | ELSEIF(_Threads_ptwin32)
 67 | 
 68 |   IF(NOT DEFINED THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME)
 69 |     # Assign the default scheme
 70 |     SET(THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME "C")
 71 |   ELSE()
 72 |     # Validate the scheme specified by the user
 73 |     IF(NOT THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME STREQUAL "C" AND
 74 |        NOT THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME STREQUAL "CE" AND
 75 |        NOT THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME STREQUAL "SE")
 76 |          MESSAGE(FATAL_ERROR "See documentation for FindPthreads.cmake, only C, CE, and SE modes are allowed")
 77 |     ENDIF()
 78 |     IF(NOT MSVC AND THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME STREQUAL "SE")
 79 |       MESSAGE(FATAL_ERROR "Structured Exception Handling is only allowed for MSVC")
 80 |     ENDIF(NOT MSVC AND THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME STREQUAL "SE")
 81 |   ENDIF()
 82 | 
 83 |   FIND_PATH(THREADS_PTHREADS_INCLUDE_DIR 
 84 |     pthread.h 
 85 |     PATHS 
 86 |     ${THREADS_PTHREAD_WIN32_INC}
 87 |     )
 88 |   
 89 |   # Determine the library filename
 90 |   IF(MSVC)
 91 |     SET(_Threads_pthreads_libname
 92 |         pthreadV${THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME}2)
 93 |   ELSEIF(MINGW)
 94 |     SET(_Threads_pthreads_libname
 95 |         pthreadG${THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME}2)
 96 |   ELSE()
 97 |     SET(_Threads_pthreads_libname
 98 |         pthreadV${THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME}2)
 99 |   ENDIF()
100 | 
101 |   # Use the include path to help find the library if possible
102 |   SET(_Threads_lib_paths "")
103 |   IF(THREADS_PTHREADS_INCLUDE_DIR)
104 |      GET_FILENAME_COMPONENT(_Threads_root_dir
105 |                             ${THREADS_PTHREADS_INCLUDE_DIR} PATH)
106 |      SET(_Threads_lib_paths ${_Threads_root_dir}/lib/x64)
107 |   ENDIF()
108 |   FIND_LIBRARY(THREADS_PTHREADS_WIN32_LIBRARY
109 |                NAMES ${_Threads_pthreads_libname}
110 |                PATHS ${_Threads_lib_paths}
111 |                DOC "The Portable Threads Library for Win32"
112 |                NO_SYSTEM_PATH
113 |                )
114 | 
115 |   IF(THREADS_PTHREADS_INCLUDE_DIR AND THREADS_PTHREADS_WIN32_LIBRARY)
116 |     MARK_AS_ADVANCED(THREADS_PTHREADS_INCLUDE_DIR)
117 |     SET(CMAKE_THREAD_LIBS_INIT ${THREADS_PTHREADS_WIN32_LIBRARY})
118 |     SET(CMAKE_HAVE_THREADS_LIBRARY 1)
119 |     SET(Threads_FOUND TRUE)
120 |   ENDIF()
121 | 
122 |   MARK_AS_ADVANCED(THREADS_PTHREADS_WIN32_LIBRARY)
123 | 
124 | ELSE()
125 |   # Do we have pthreads?
126 |   CHECK_INCLUDE_FILES("pthread.h" CMAKE_HAVE_PTHREAD_H)
127 |   IF(CMAKE_HAVE_PTHREAD_H)
128 | 
129 |     #
130 |     # We have pthread.h
131 |     # Let's check for the library now.
132 |     #
133 |     SET(CMAKE_HAVE_THREADS_LIBRARY)
134 |     IF(NOT THREADS_HAVE_PTHREAD_ARG)
135 | 
136 |       # Do we have -lpthreads
137 |       CHECK_LIBRARY_EXISTS(pthreads pthread_create "" CMAKE_HAVE_PTHREADS_CREATE)
138 |       IF(CMAKE_HAVE_PTHREADS_CREATE)
139 |         SET(CMAKE_THREAD_LIBS_INIT "-lpthreads")
140 |         SET(CMAKE_HAVE_THREADS_LIBRARY 1)
141 |         SET(Threads_FOUND TRUE)
142 |       ENDIF()
143 | 
144 |       # Ok, how about -lpthread
145 |       CHECK_LIBRARY_EXISTS(pthread pthread_create "" CMAKE_HAVE_PTHREAD_CREATE)
146 |       IF(CMAKE_HAVE_PTHREAD_CREATE)
147 |         SET(CMAKE_THREAD_LIBS_INIT "-lpthread")
148 |         SET(Threads_FOUND TRUE)
149 |         SET(CMAKE_HAVE_THREADS_LIBRARY 1)
150 |       ENDIF()
151 | 
152 |       IF(CMAKE_SYSTEM MATCHES "SunOS.*")
153 |         # On sun also check for -lthread
154 |         CHECK_LIBRARY_EXISTS(thread thr_create "" CMAKE_HAVE_THR_CREATE)
155 |         IF(CMAKE_HAVE_THR_CREATE)
156 |           SET(CMAKE_THREAD_LIBS_INIT "-lthread")
157 |           SET(CMAKE_HAVE_THREADS_LIBRARY 1)
158 |           SET(Threads_FOUND TRUE)
159 |         ENDIF()
160 |       ENDIF(CMAKE_SYSTEM MATCHES "SunOS.*")
161 | 
162 |     ENDIF(NOT THREADS_HAVE_PTHREAD_ARG)
163 | 
164 |     IF(NOT CMAKE_HAVE_THREADS_LIBRARY)
165 |       # If we did not found -lpthread, -lpthread, or -lthread, look for -pthread
166 |       IF("THREADS_HAVE_PTHREAD_ARG" MATCHES "^THREADS_HAVE_PTHREAD_ARG")
167 |         MESSAGE(STATUS "Check if compiler accepts -pthread")
168 |         TRY_RUN(THREADS_PTHREAD_ARG THREADS_HAVE_PTHREAD_ARG
169 |           ${CMAKE_BINARY_DIR}
170 |           ${CMAKE_ROOT}/Modules/CheckForPthreads.c
171 |           CMAKE_FLAGS -DLINK_LIBRARIES:STRING=-pthread
172 |           COMPILE_OUTPUT_VARIABLE OUTPUT)
173 | 
174 |         IF(THREADS_HAVE_PTHREAD_ARG)
175 |           IF(THREADS_PTHREAD_ARG MATCHES "^2$")
176 |             SET(Threads_FOUND TRUE)
177 |             MESSAGE(STATUS "Check if compiler accepts -pthread - yes")
178 |           ELSE()
179 |             MESSAGE(STATUS "Check if compiler accepts -pthread - no")
180 |             FILE(APPEND 
181 |               ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log 
182 |               "Determining if compiler accepts -pthread returned ${THREADS_PTHREAD_ARG} instead of 2. The compiler had the following output:\n${OUTPUT}\n\n")
183 |           ENDIF()
184 |         ELSE()
185 |           MESSAGE(STATUS "Check if compiler accepts -pthread - no")
186 |           FILE(APPEND 
187 |             ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log 
188 |             "Determining if compiler accepts -pthread failed with the following output:\n${OUTPUT}\n\n")
189 |         ENDIF()
190 | 
191 |       ENDIF("THREADS_HAVE_PTHREAD_ARG" MATCHES "^THREADS_HAVE_PTHREAD_ARG")
192 | 
193 |       IF(THREADS_HAVE_PTHREAD_ARG)
194 |         SET(Threads_FOUND TRUE)
195 |         SET(CMAKE_THREAD_LIBS_INIT "-pthread")
196 |       ENDIF()
197 | 
198 |     ENDIF(NOT CMAKE_HAVE_THREADS_LIBRARY)
199 |   ENDIF(CMAKE_HAVE_PTHREAD_H)
200 | ENDIF()
201 | 
202 | IF(CMAKE_THREAD_LIBS_INIT)
203 |   SET(CMAKE_USE_PTHREADS_INIT 1)
204 |   SET(Threads_FOUND TRUE)
205 | ENDIF()
206 | 
207 | IF(CMAKE_SYSTEM MATCHES "Windows"
208 |    AND NOT THREADS_USE_PTHREADS_WIN32)
209 |   SET(CMAKE_USE_WIN32_THREADS_INIT 1)
210 |   SET(Threads_FOUND TRUE)
211 | ENDIF()
212 | 
213 | IF(CMAKE_USE_PTHREADS_INIT)
214 |   IF(CMAKE_SYSTEM MATCHES "HP-UX-*")
215 |     # Use libcma if it exists and can be used.  It provides more
216 |     # symbols than the plain pthread library.  CMA threads
217 |     # have actually been deprecated:
218 |     #   http://docs.hp.com/en/B3920-90091/ch12s03.html#d0e11395
219 |     #   http://docs.hp.com/en/947/d8.html
220 |     # but we need to maintain compatibility here.
221 |     # The CMAKE_HP_PTHREADS setting actually indicates whether CMA threads
222 |     # are available.
223 |     CHECK_LIBRARY_EXISTS(cma pthread_attr_create "" CMAKE_HAVE_HP_CMA)
224 |     IF(CMAKE_HAVE_HP_CMA)
225 |       SET(CMAKE_THREAD_LIBS_INIT "-lcma")
226 |       SET(CMAKE_HP_PTHREADS_INIT 1)
227 |       SET(Threads_FOUND TRUE)
228 |     ENDIF(CMAKE_HAVE_HP_CMA)
229 |     SET(CMAKE_USE_PTHREADS_INIT 1)
230 |   ENDIF()
231 | 
232 |   IF(CMAKE_SYSTEM MATCHES "OSF1-V*")
233 |     SET(CMAKE_USE_PTHREADS_INIT 0)
234 |     SET(CMAKE_THREAD_LIBS_INIT )
235 |   ENDIF()
236 | 
237 |   IF(CMAKE_SYSTEM MATCHES "CYGWIN_NT*")
238 |     SET(CMAKE_USE_PTHREADS_INIT 1)
239 |     SET(Threads_FOUND TRUE)
240 |     SET(CMAKE_THREAD_LIBS_INIT )
241 |     SET(CMAKE_USE_WIN32_THREADS_INIT 0)
242 |   ENDIF()
243 | ENDIF(CMAKE_USE_PTHREADS_INIT)
244 | 
245 | INCLUDE(FindPackageHandleStandardArgs)
246 | IF(_Threads_ptwin32)
247 |   FIND_PACKAGE_HANDLE_STANDARD_ARGS(Threads DEFAULT_MSG
248 |     THREADS_PTHREADS_WIN32_LIBRARY THREADS_PTHREADS_INCLUDE_DIR)
249 | ELSE()
250 |   FIND_PACKAGE_HANDLE_STANDARD_ARGS(Threads DEFAULT_MSG Threads_FOUND)
251 | ENDIF()


--------------------------------------------------------------------------------
/convdata.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without modification,
  5 | # are permitted provided that the following conditions are met:
  6 | #
  7 | # - Redistributions of source code must retain the above copyright notice,
  8 | #   this list of conditions and the following disclaimer.
  9 | #
 10 | # - Redistributions in binary form must reproduce the above copyright notice,
 11 | #   this list of conditions and the following disclaimer in the documentation
 12 | #   and/or other materials provided with the distribution.
 13 | #
 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | 
 25 | from data import *
 26 | import numpy.random as nr
 27 | import numpy as n
 28 | import random as r
 29 | 
 30 | class CIFARDataProvider(LabeledMemoryDataProvider):
 31 |     def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
 32 |         LabeledMemoryDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
 33 |         self.data_mean = self.batch_meta['data_mean']
 34 |         self.num_colors = 3
 35 |         self.img_size = 32
 36 |         # Subtract the mean from the data and make sure that both data and
 37 |         # labels are in single-precision floating point.
 38 |         for d in self.data_dic:
 39 |             # This converts the data matrix to single precision and makes sure that it is C-ordered
 40 |             d['data'] = n.require((d['data'] - self.data_mean), dtype=n.single, requirements='C')
 41 |             d['labels'] = n.require(d['labels'].reshape((1, d['data'].shape[1])), dtype=n.single, requirements='C')
 42 | 
 43 |     def get_next_batch(self):
 44 |         epoch, batchnum, datadic = LabeledMemoryDataProvider.get_next_batch(self)
 45 |         return epoch, batchnum, [datadic['data'], datadic['labels']]
 46 | 
 47 |     # Returns the dimensionality of the two data matrices returned by get_next_batch
 48 |     # idx is the index of the matrix.
 49 |     def get_data_dims(self, idx=0):
 50 |         return self.img_size**2 * self.num_colors if idx == 0 else 1
 51 | 
 52 |     # Takes as input an array returned by get_next_batch
 53 |     # Returns a (numCases, imgSize, imgSize, 3) array which can be
 54 |     # fed to pylab for plotting.
 55 |     # This is used by shownet.py to plot test case predictions.
 56 |     def get_plottable_data(self, data):
 57 |         return n.require((data + self.data_mean).T.reshape(data.shape[1], 3, self.img_size, self.img_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
 58 | 
 59 | class CroppedCIFARDataProvider(LabeledMemoryDataProvider):
 60 |     def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params=None, test=False):
 61 |         LabeledMemoryDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
 62 | 
 63 |         self.border_size = dp_params['crop_border']
 64 |         self.inner_size = 32 - self.border_size*2
 65 |         self.multiview = dp_params['multiview_test'] and test
 66 |         self.num_views = 5*2
 67 |         self.data_mult = self.num_views if self.multiview else 1
 68 |         self.num_colors = 3
 69 | 
 70 |         for d in self.data_dic:
 71 |             d['data'] = n.require(d['data'], requirements='C')
 72 |             d['labels'] = n.require(n.tile(d['labels'].reshape((1, d['data'].shape[1])), (1, self.data_mult)), requirements='C')
 73 | 
 74 |         self.cropped_data = [n.zeros((self.get_data_dims(), self.data_dic[0]['data'].shape[1]*self.data_mult), dtype=n.single) for x in xrange(2)]
 75 | 
 76 |         self.batches_generated = 0
 77 |         self.data_mean = self.batch_meta['data_mean'].reshape((3,32,32))[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size].reshape((self.get_data_dims(), 1))
 78 | 
 79 |     def get_next_batch(self):
 80 |         epoch, batchnum, datadic = LabeledMemoryDataProvider.get_next_batch(self)
 81 | 
 82 |         cropped = self.cropped_data[self.batches_generated % 2]
 83 | 
 84 |         self.__trim_borders(datadic['data'], cropped)
 85 |         cropped -= self.data_mean
 86 |         self.batches_generated += 1
 87 |         return epoch, batchnum, [cropped, datadic['labels']]
 88 | 
 89 |     def get_data_dims(self, idx=0):
 90 |         return self.inner_size**2 * 3 if idx == 0 else 1
 91 | 
 92 |     # Takes as input an array returned by get_next_batch
 93 |     # Returns a (numCases, imgSize, imgSize, 3) array which can be
 94 |     # fed to pylab for plotting.
 95 |     # This is used by shownet.py to plot test case predictions.
 96 |     def get_plottable_data(self, data):
 97 |         return n.require((data + self.data_mean).T.reshape(data.shape[1], 3, self.inner_size, self.inner_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
 98 | 
 99 |     def __trim_borders(self, x, target):
100 |         y = x.reshape(3, 32, 32, x.shape[1])
101 | 
102 |         if self.test: # don't need to loop over cases
103 |             if self.multiview:
104 |                 start_positions = [(0,0),  (0, self.border_size*2),
105 |                                    (self.border_size, self.border_size),
106 |                                   (self.border_size*2, 0), (self.border_size*2, self.border_size*2)]
107 |                 end_positions = [(sy+self.inner_size, sx+self.inner_size) for (sy,sx) in start_positions]
108 |                 for i in xrange(self.num_views/2):
109 |                     pic = y[:,start_positions[i][0]:end_positions[i][0],start_positions[i][1]:end_positions[i][1],:]
110 |                     target[:,i * x.shape[1]:(i+1)* x.shape[1]] = pic.reshape((self.get_data_dims(),x.shape[1]))
111 |                     target[:,(self.num_views/2 + i) * x.shape[1]:(self.num_views/2 +i+1)* x.shape[1]] = pic[:,:,::-1,:].reshape((self.get_data_dims(),x.shape[1]))
112 |             else:
113 |                 pic = y[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size, :] # just take the center for now
114 |                 target[:,:] = pic.reshape((self.get_data_dims(), x.shape[1]))
115 |         else:
116 |             for c in xrange(x.shape[1]): # loop over cases
117 |                 startY, startX = nr.randint(0,self.border_size*2 + 1), nr.randint(0,self.border_size*2 + 1)
118 |                 endY, endX = startY + self.inner_size, startX + self.inner_size
119 |                 pic = y[:,startY:endY,startX:endX, c]
120 |                 if nr.randint(2) == 0: # also flip the image with 50% probability
121 |                     pic = pic[:,:,::-1]
122 |                 target[:,c] = pic.reshape((self.get_data_dims(),))
123 | 
124 | class DummyConvNetDataProvider(LabeledDummyDataProvider):
125 |     def __init__(self, data_dim):
126 |         LabeledDummyDataProvider.__init__(self, data_dim)
127 | 
128 |     def get_next_batch(self):
129 |         epoch, batchnum, dic = LabeledDummyDataProvider.get_next_batch(self)
130 | 
131 |         dic['data'] = n.require(dic['data'].T, requirements='C')
132 |         dic['labels'] = n.require(dic['labels'].T, requirements='C')
133 | 
134 |         return epoch, batchnum, [dic['data'], dic['labels']]
135 | 
136 |     # Returns the dimensionality of the two data matrices returned by get_next_batch
137 |     def get_data_dims(self, idx=0):
138 |         return self.batch_meta['num_vis'] if idx == 0 else 1
139 | 


--------------------------------------------------------------------------------
/convnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without modification,
  5 | # are permitted provided that the following conditions are met:
  6 | #
  7 | # - Redistributions of source code must retain the above copyright notice,
  8 | #   this list of conditions and the following disclaimer.
  9 | #
 10 | # - Redistributions in binary form must reproduce the above copyright notice,
 11 | #   this list of conditions and the following disclaimer in the documentation
 12 | #   and/or other materials provided with the distribution.
 13 | #
 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | 
 25 | import numpy as n
 26 | import numpy.random as nr
 27 | from util import *
 28 | from data import *
 29 | from options import *
 30 | from gpumodel import *
 31 | import sys
 32 | import math as m
 33 | import layer as lay
 34 | from convdata import *
 35 | from os import linesep as NL
 36 | #import pylab as pl
 37 | 
 38 | class ConvNet(IGPUModel):
 39 |     def __init__(self, op, load_dic, dp_params={}):
 40 |         filename_options = []
 41 |         dp_params['multiview_test'] = op.get_value('multiview_test')
 42 |         dp_params['crop_border'] = op.get_value('crop_border')
 43 |         IGPUModel.__init__(self, "ConvNet", op, load_dic, filename_options, dp_params=dp_params)
 44 | 
 45 |     def import_model(self):
 46 |         lib_name = "_convnet"
 47 |         print "========================="
 48 |         print "Importing %s C++ module" % lib_name
 49 |         self.libmodel = __import__(lib_name)
 50 | 
 51 |     def init_model_lib(self):
 52 |         self.libmodel.initModel(self.layers, self.minibatch_size, self.device_ids[0])
 53 | 
 54 |     def init_model_state(self):
 55 |         ms = self.model_state
 56 |         if self.load_file:
 57 |             ms['layers'] = lay.LayerParser.parse_layers(self.layer_def, self.layer_params, self, ms['layers'])
 58 |         else:
 59 |             ms['layers'] = lay.LayerParser.parse_layers(self.layer_def, self.layer_params, self)
 60 |         self.layers_dic = dict(zip([l['name'] for l in ms['layers']], ms['layers']))
 61 | 
 62 |         logreg_name = self.op.get_value('logreg_name')
 63 |         if logreg_name:
 64 |             self.logreg_idx = self.get_layer_idx(logreg_name, check_type='cost.logreg')
 65 | 
 66 |         # Convert convolutional layers to local
 67 |         if len(self.op.get_value('conv_to_local')) > 0:
 68 |             for i, layer in enumerate(ms['layers']):
 69 |                 if layer['type'] == 'conv' and layer['name'] in self.op.get_value('conv_to_local'):
 70 |                     lay.LocalLayerParser.conv_to_local(ms['layers'], i)
 71 |         # Decouple weight matrices
 72 |         if len(self.op.get_value('unshare_weights')) > 0:
 73 |             for name_str in self.op.get_value('unshare_weights'):
 74 |                 if name_str:
 75 |                     name = lay.WeightLayerParser.get_layer_name(name_str)
 76 |                     if name is not None:
 77 |                         name, idx = name[0], name[1]
 78 |                         if name not in self.layers_dic:
 79 |                             raise ModelStateException("Layer '%s' does not exist; unable to unshare" % name)
 80 |                         layer = self.layers_dic[name]
 81 |                         lay.WeightLayerParser.unshare_weights(layer, ms['layers'], matrix_idx=idx)
 82 |                     else:
 83 |                         raise ModelStateException("Invalid layer name '%s'; unable to unshare." % name_str)
 84 |         self.op.set_value('conv_to_local', [], parse=False)
 85 |         self.op.set_value('unshare_weights', [], parse=False)
 86 | 
 87 |     def get_layer_idx(self, layer_name, check_type=None):
 88 |         try:
 89 |             layer_idx = [l['name'] for l in self.model_state['layers']].index(layer_name)
 90 |             if check_type:
 91 |                 layer_type = self.model_state['layers'][layer_idx]['type']
 92 |                 if layer_type != check_type:
 93 |                     raise ModelStateException("Layer with name '%s' has type '%s'; should be '%s'." % (layer_name, layer_type, check_type))
 94 |             return layer_idx
 95 |         except ValueError:
 96 |             raise ModelStateException("Layer with name '%s' not defined." % layer_name)
 97 | 
 98 |     def fill_excused_options(self):
 99 |         if self.op.get_value('check_grads'):
100 |             self.op.set_value('save_path', '')
101 |             self.op.set_value('train_batch_range', '0')
102 |             self.op.set_value('test_batch_range', '0')
103 |             self.op.set_value('data_path', '')
104 | 
105 |     # Make sure the data provider returned data in proper format
106 |     def parse_batch_data(self, batch_data, train=True):
107 |         if max(d.dtype != n.single for d in batch_data[2]):
108 |             raise DataProviderException("All matrices returned by data provider must consist of single-precision floats.")
109 |         return batch_data
110 | 
111 |     def start_batch(self, batch_data, train=True):
112 |         data = batch_data[2]
113 |         if self.check_grads:
114 |             self.libmodel.checkGradients(data)
115 |         elif not train and self.multiview_test:
116 |             self.libmodel.startMultiviewTest(data, self.train_data_provider.num_views, self.logreg_idx)
117 |         else:
118 |             self.libmodel.startBatch(data, not train)
119 | 
120 |     def print_iteration(self):
121 |         print "%d.%d..." % (self.epoch, self.batchnum),
122 | 
123 |     def print_train_time(self, compute_time_py):
124 |         print "(%.3f sec)" % (compute_time_py)
125 | 
126 |     def print_costs(self, cost_outputs):
127 |         costs, num_cases = cost_outputs[0], cost_outputs[1]
128 |         for errname in costs.keys():
129 |             costs[errname] = [(v/num_cases) for v in costs[errname]]
130 |             print "%s: " % errname,
131 |             print ", ".join("%6f" % v for v in costs[errname]),
132 |             if sum(m.isnan(v) for v in costs[errname]) > 0 or sum(m.isinf(v) for v in costs[errname]):
133 |                 print "^ got nan or inf!"
134 |                 sys.exit(1)
135 | 
136 |     def print_train_results(self):
137 |         self.print_costs(self.train_outputs[-1])
138 | 
139 |     def print_test_status(self):
140 |         pass
141 | 
142 |     def print_test_results(self):
143 |         print ""
144 |         print "======================Test output======================"
145 |         self.print_costs(self.test_outputs[-1])
146 |         print ""
147 |         print "-------------------------------------------------------",
148 |         for i,l in enumerate(self.layers): # This is kind of hacky but will do for now.
149 |             if 'weights' in l:
150 |                 if type(l['weights']) == n.ndarray:
151 |                     print "%sLayer '%s' weights: %e [%e]" % (NL, l['name'], n.mean(n.abs(l['weights'])), n.mean(n.abs(l['weightsInc']))),
152 |                 elif type(l['weights']) == list:
153 |                     print ""
154 |                     print NL.join("Layer '%s' weights[%d]: %e [%e]" % (l['name'], i, n.mean(n.abs(w)), n.mean(n.abs(wi))) for i,(w,wi) in enumerate(zip(l['weights'],l['weightsInc']))),
155 |                 print "%sLayer '%s' biases: %e [%e]" % (NL, l['name'], n.mean(n.abs(l['biases'])), n.mean(n.abs(l['biasesInc']))),
156 |         print ""
157 | 
158 |     def conditional_save(self):
159 |         self.save_state()
160 |         print "-------------------------------------------------------"
161 |         print "Saved checkpoint to %s" % os.path.join(self.save_path, self.save_file)
162 |         print "=======================================================",
163 | 
164 |     def aggregate_test_outputs(self, test_outputs):
165 |         num_cases = sum(t[1] for t in test_outputs)
166 |         for i in xrange(1 ,len(test_outputs)):
167 |             for k,v in test_outputs[i][0].items():
168 |                 for j in xrange(len(v)):
169 |                     test_outputs[0][0][k][j] += test_outputs[i][0][k][j]
170 |         return (test_outputs[0][0], num_cases)
171 | 
172 |     @classmethod
173 |     def get_options_parser(cls):
174 |         op = IGPUModel.get_options_parser()
175 |         op.add_option("mini", "minibatch_size", IntegerOptionParser, "Minibatch size", default=128)
176 |         op.add_option("layer-def", "layer_def", StringOptionParser, "Layer definition file", set_once=True)
177 |         op.add_option("layer-params", "layer_params", StringOptionParser, "Layer parameter file")
178 |         op.add_option("check-grads", "check_grads", BooleanOptionParser, "Check gradients and quit?", default=0, excuses=['data_path','save_path','train_batch_range','test_batch_range'])
179 |         op.add_option("multiview-test", "multiview_test", BooleanOptionParser, "Cropped DP: test on multiple patches?", default=0, requires=['logreg_name'])
180 |         op.add_option("crop-border", "crop_border", IntegerOptionParser, "Cropped DP: crop border size", default=4, set_once=True)
181 |         op.add_option("logreg-name", "logreg_name", StringOptionParser, "Cropped DP: logreg layer name (for --multiview-test)", default="")
182 |         op.add_option("conv-to-local", "conv_to_local", ListOptionParser(StringOptionParser), "Convert given conv layers to unshared local", default=[])
183 |         op.add_option("unshare-weights", "unshare_weights", ListOptionParser(StringOptionParser), "Unshare weight matrices in given layers", default=[])
184 |         op.add_option("conserve-mem", "conserve_mem", BooleanOptionParser, "Conserve GPU memory (slower)?", default=0)
185 | 
186 |         op.delete_option('max_test_err')
187 |         op.options["max_filesize_mb"].default = 0
188 |         op.options["testing_freq"].default = 50
189 |         op.options["num_epochs"].default = 50000
190 |         op.options['dp_type'].default = None
191 | 
192 |         DataProvider.register_data_provider('cifar', 'CIFAR', CIFARDataProvider)
193 |         DataProvider.register_data_provider('dummy-cn-n', 'Dummy ConvNet', DummyConvNetDataProvider)
194 |         DataProvider.register_data_provider('cifar-cropped', 'Cropped CIFAR', CroppedCIFARDataProvider)
195 | 
196 |         return op
197 | 
198 | if __name__ == "__main__":
199 |     #nr.seed(5)
200 |     op = ConvNet.get_options_parser()
201 | 
202 |     op, load_dic = IGPUModel.parse_options(op)
203 |     model = ConvNet(op, load_dic)
204 |     model.start()
205 | 


--------------------------------------------------------------------------------
/data.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without modification,
  5 | # are permitted provided that the following conditions are met:
  6 | #
  7 | # - Redistributions of source code must retain the above copyright notice,
  8 | #   this list of conditions and the following disclaimer.
  9 | #
 10 | # - Redistributions in binary form must reproduce the above copyright notice,
 11 | #   this list of conditions and the following disclaimer in the documentation
 12 | #   and/or other materials provided with the distribution.
 13 | #
 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | 
 25 | import numpy as n
 26 | from numpy.random import randn, rand, random_integers
 27 | import os
 28 | from util import *
 29 | 
 30 | BATCH_META_FILE = "batches.meta"
 31 | 
 32 | class DataProvider:
 33 |     BATCH_REGEX = re.compile('^data_batch_(\d+)(\.\d+)?$')
 34 |     def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
 35 |         if batch_range == None:
 36 |             batch_range = DataProvider.get_batch_nums(data_dir)
 37 |         if init_batchnum is None or init_batchnum not in batch_range:
 38 |             init_batchnum = batch_range[0]
 39 | 
 40 |         self.data_dir = data_dir
 41 |         self.batch_range = batch_range
 42 |         self.curr_epoch = init_epoch
 43 |         self.curr_batchnum = init_batchnum
 44 |         self.dp_params = dp_params
 45 |         self.batch_meta = self.get_batch_meta(data_dir)
 46 |         self.data_dic = None
 47 |         self.test = test
 48 |         self.batch_idx = batch_range.index(init_batchnum)
 49 | 
 50 |     def get_next_batch(self):
 51 |         if self.data_dic is None or len(self.batch_range) > 1:
 52 |             self.data_dic = self.get_batch(self.curr_batchnum)
 53 |         epoch, batchnum = self.curr_epoch, self.curr_batchnum
 54 |         self.advance_batch()
 55 | 
 56 |         return epoch, batchnum, self.data_dic
 57 | 
 58 |     def __add_subbatch(self, batch_num, sub_batchnum, batch_dic):
 59 |         subbatch_path = "%s.%d" % (os.path.join(self.data_dir, self.get_data_file_name(batch_num)), sub_batchnum)
 60 |         if os.path.exists(subbatch_path):
 61 |             sub_dic = unpickle(subbatch_path)
 62 |             self._join_batches(batch_dic, sub_dic)
 63 |         else:
 64 |             raise IndexError("Sub-batch %d.%d does not exist in %s" % (batch_num,sub_batchnum, self.data_dir))
 65 | 
 66 |     def _join_batches(self, main_batch, sub_batch):
 67 |         main_batch['data'] = n.r_[main_batch['data'], sub_batch['data']]
 68 | 
 69 |     def get_batch(self, batch_num):
 70 |         if os.path.exists(self.get_data_file_name(batch_num) + '.1'): # batch in sub-batches
 71 |             dic = unpickle(self.get_data_file_name(batch_num) + '.1')
 72 |             sb_idx = 2
 73 |             while True:
 74 |                 try:
 75 |                     self.__add_subbatch(batch_num, sb_idx, dic)
 76 |                     sb_idx += 1
 77 |                 except IndexError:
 78 |                     break
 79 |         else:
 80 |             dic = unpickle(self.get_data_file_name(batch_num))
 81 |         return dic
 82 | 
 83 |     def get_data_dims(self):
 84 |         return self.batch_meta['num_vis']
 85 | 
 86 |     def advance_batch(self):
 87 |         self.batch_idx = self.get_next_batch_idx()
 88 |         self.curr_batchnum = self.batch_range[self.batch_idx]
 89 |         if self.batch_idx == 0: # we wrapped
 90 |             self.curr_epoch += 1
 91 | 
 92 |     def get_next_batch_idx(self):
 93 |         return (self.batch_idx + 1) % len(self.batch_range)
 94 | 
 95 |     def get_next_batch_num(self):
 96 |         return self.batch_range[self.get_next_batch_idx()]
 97 | 
 98 |     # get filename of current batch
 99 |     def get_data_file_name(self, batchnum=None):
100 |         if batchnum is None:
101 |             batchnum = self.curr_batchnum
102 |         return os.path.join(self.data_dir, 'data_batch_%d' % batchnum)
103 | 
104 |     @classmethod
105 |     def get_instance(cls, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, type="default", dp_params={}, test=False):
106 |         # why the fuck can't i reference DataProvider in the original definition?
107 |         #cls.dp_classes['default'] = DataProvider
108 |         type = type or DataProvider.get_batch_meta(data_dir)['dp_type'] # allow data to decide data provider
109 |         if type.startswith("dummy-"):
110 |             name = "-".join(type.split('-')[:-1]) + "-n"
111 |             if name not in dp_types:
112 |                 raise DataProviderException("No such data provider: %s" % type)
113 |             _class = dp_classes[name]
114 |             dims = int(type.split('-')[-1])
115 |             return _class(dims)
116 |         elif type in dp_types:
117 |             _class = dp_classes[type]
118 |             return _class(data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
119 | 
120 |         raise DataProviderException("No such data provider: %s" % type)
121 | 
122 |     @classmethod
123 |     def register_data_provider(cls, name, desc, _class):
124 |         if name in dp_types:
125 |             raise DataProviderException("Data provider %s already registered" % name)
126 |         dp_types[name] = desc
127 |         dp_classes[name] = _class
128 | 
129 |     @staticmethod
130 |     def get_batch_meta(data_dir):
131 |         return unpickle(os.path.join(data_dir, BATCH_META_FILE))
132 | 
133 |     @staticmethod
134 |     def get_batch_filenames(srcdir):
135 |         return sorted([f for f in os.listdir(srcdir) if DataProvider.BATCH_REGEX.match(f)], key=alphanum_key)
136 | 
137 |     @staticmethod
138 |     def get_batch_nums(srcdir):
139 |         names = DataProvider.get_batch_filenames(srcdir)
140 |         return sorted(list(set(int(DataProvider.BATCH_REGEX.match(n).group(1)) for n in names)))
141 | 
142 |     @staticmethod
143 |     def get_num_batches(srcdir):
144 |         return len(DataProvider.get_batch_nums(srcdir))
145 | 
146 | class DummyDataProvider(DataProvider):
147 |     def __init__(self, data_dim):
148 |         #self.data_dim = data_dim
149 |         self.batch_range = [1]
150 |         self.batch_meta = {'num_vis': data_dim, 'data_in_rows':True}
151 |         self.curr_epoch = 1
152 |         self.curr_batchnum = 1
153 |         self.batch_idx = 0
154 | 
155 |     def get_next_batch(self):
156 |         epoch,  batchnum = self.curr_epoch, self.curr_batchnum
157 |         self.advance_batch()
158 |         data = rand(512, self.get_data_dims()).astype(n.single)
159 |         return self.curr_epoch, self.curr_batchnum, {'data':data}
160 | 
161 | 
162 | class LabeledDummyDataProvider(DummyDataProvider):
163 |     def __init__(self, data_dim, num_classes=10, num_cases=512):
164 |         #self.data_dim = data_dim
165 |         self.batch_range = [1]
166 |         self.batch_meta = {'num_vis': data_dim,
167 |                            'label_names': [str(x) for x in range(num_classes)],
168 |                            'data_in_rows':True}
169 |         self.num_cases = num_cases
170 |         self.num_classes = num_classes
171 |         self.curr_epoch = 1
172 |         self.curr_batchnum = 1
173 |         self.batch_idx=0
174 | 
175 |     def get_num_classes(self):
176 |         return self.num_classes
177 | 
178 |     def get_next_batch(self):
179 |         epoch,  batchnum = self.curr_epoch, self.curr_batchnum
180 |         self.advance_batch()
181 |         data = rand(self.num_cases, self.get_data_dims()).astype(n.single) # <--changed to rand
182 |         labels = n.require(n.c_[random_integers(0,self.num_classes-1,self.num_cases)], requirements='C', dtype=n.single)
183 | 
184 |         return self.curr_epoch, self.curr_batchnum, {'data':data, 'labels':labels}
185 | 
186 | class MemoryDataProvider(DataProvider):
187 |     def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params=None, test=False):
188 |         DataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
189 |         self.data_dic = []
190 |         for i in self.batch_range:
191 |             self.data_dic += [self.get_batch(i)]
192 | 
193 |     def get_next_batch(self):
194 |         epoch, batchnum = self.curr_epoch, self.curr_batchnum
195 |         self.advance_batch()
196 | 
197 |         return epoch, batchnum, self.data_dic[batchnum - self.batch_range[0]]
198 | 
199 | class LabeledDataProvider(DataProvider):
200 |     def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
201 |         DataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
202 | 
203 |     def get_num_classes(self):
204 |         return len(self.batch_meta['label_names'])
205 | 
206 | class LabeledMemoryDataProvider(LabeledDataProvider):
207 |     def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
208 |         LabeledDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
209 |         self.data_dic = []
210 |         for i in batch_range:
211 |             self.data_dic += [unpickle(self.get_data_file_name(i))]
212 |             self.data_dic[-1]["labels"] = n.c_[n.require(self.data_dic[-1]['labels'], dtype=n.single)]
213 | 
214 |     def get_next_batch(self):
215 |         epoch, batchnum = self.curr_epoch, self.curr_batchnum
216 |         self.advance_batch()
217 |         bidx = batchnum - self.batch_range[0]
218 |         return epoch, batchnum, self.data_dic[bidx]
219 | 
220 | dp_types = {"default": "The default data provider; loads one batch into memory at a time",
221 |             "memory": "Loads the entire dataset into memory",
222 |             "labeled": "Returns data and labels (used by classifiers)",
223 |             "labeled-memory": "Combination labeled + memory",
224 |             "dummy-n": "Dummy data provider for n-dimensional data",
225 |             "dummy-labeled-n": "Labeled dummy data provider for n-dimensional data"}
226 | dp_classes = {"default": DataProvider,
227 |               "memory": MemoryDataProvider,
228 |               "labeled": LabeledDataProvider,
229 |               "labeled-memory": LabeledMemoryDataProvider,
230 |               "dummy-n": DummyDataProvider,
231 |               "dummy-labeled-n": LabeledDummyDataProvider}
232 | 
233 | class DataProviderException(Exception):
234 |     pass
235 | 


--------------------------------------------------------------------------------
/example-layers/layer-params-18pct.cfg:
--------------------------------------------------------------------------------
 1 | # 18% error on CIFAR-10 in 20 minutes - layer definition file
 2 | 
 3 | # Reduce all learning rates by factor of 10 after 120 epochs.
 4 | # Then another factor of 10 after 10 more epochs.
 5 | 
 6 | [conv1]
 7 | epsW=0.001
 8 | epsB=0.002
 9 | momW=0.9
10 | momB=0.9
11 | wc=0.004
12 | 
13 | [conv2]
14 | epsW=0.001
15 | epsB=0.002
16 | momW=0.9
17 | momB=0.9
18 | wc=0.004
19 | 
20 | [conv3]
21 | epsW=0.001
22 | epsB=0.002
23 | momW=0.9
24 | momB=0.9
25 | wc=0.004
26 | 
27 | [fc10]
28 | epsW=0.001
29 | epsB=0.002
30 | momW=0.9
31 | momB=0.9
32 | wc=1
33 | 
34 | [logprob]
35 | coeff=1
36 | 
37 | [rnorm1]
38 | scale=0.00005
39 | pow=.75
40 | 
41 | [rnorm2]
42 | scale=0.00005
43 | pow=.75
44 | 


--------------------------------------------------------------------------------
/example-layers/layer-params-19pct.cfg:
--------------------------------------------------------------------------------
 1 | # 19% error on CIFAR-10 in 20 minutes - layer parameter file
 2 | # Set wc to 0 for translations -- 14.2%
 3 | 
 4 | [conv1]
 5 | epsW=0.001
 6 | epsB=0.002
 7 | momW=0.9
 8 | momB=0.9
 9 | wc=0.004
10 | 
11 | [conv2]
12 | epsW=0.001
13 | epsB=0.002
14 | momW=0.9
15 | momB=0.9
16 | wc=0.004
17 | 
18 | [conv3]
19 | epsW=0.001
20 | epsB=0.002
21 | momW=0.9
22 | momB=0.9
23 | wc=0.004
24 | 
25 | [fc10]
26 | epsW=0.001
27 | epsB=0.002
28 | momW=0.9
29 | momB=0.9
30 | wc=3
31 | 
32 | [logprob]
33 | coeff=1
34 | 


--------------------------------------------------------------------------------
/example-layers/layer-params-80sec.cfg:
--------------------------------------------------------------------------------
 1 | # 26% error on CIFAR-10 in 80 seconds - layer parameter file
 2 | # You should reduce the learning rate after 8 epochs by a factor of 10.
 3 | 
 4 | [conv1]
 5 | epsW=0.001
 6 | epsB=0.002
 7 | momW=0.9
 8 | momB=0.9
 9 | wc=0.004
10 | 
11 | [conv2]
12 | epsW=0.001
13 | epsB=0.002
14 | momW=0.9
15 | momB=0.9
16 | wc=0.004
17 | 
18 | [conv3]
19 | epsW=0.001
20 | epsB=0.002
21 | momW=0.9
22 | momB=0.9
23 | wc=0.004
24 | 
25 | [fc64]
26 | epsW=0.001
27 | epsB=0.002
28 | momW=0.9
29 | momB=0.9
30 | wc=.03
31 | 
32 | [fc10]
33 | epsW=0.001
34 | epsB=0.002
35 | momW=0.9
36 | momB=0.9
37 | wc=.03
38 | 
39 | [logprob]
40 | coeff=1
41 | 


--------------------------------------------------------------------------------
/example-layers/layer-params-conv-local-11pct.cfg:
--------------------------------------------------------------------------------
 1 | # 11% error on CIFAR-10 - layer parameter file
 2 | # Methodology:
 3 | # 1. Train on batches 1-4, use batch 5 for validation.
 4 | # 2. After about 350 epochs, validation error no longer making improvements.
 5 | # 3. Fold in batch 5.
 6 | # 4. Train on batches 1-5 for about 150 more epochs, until the batch 5 error is near the errors for batches 1-4. It takes forever to actually get there but after 150 epochs it's close enough.
 7 | # 5. Lower learning rates (epsW) by a factor of 10 to 0.0001, train for 10 more epochs.
 8 | # 6. Lower learning rates (epsW) by another factor of 10 to 0.00001, train for 10 more epochs.
 9 | # 7. Stop. Test on batch 6 with --test-range=6 --multiview-test=1 --logreg-name=logprob (read more about what this does here: http://code.google.com/p/cuda-convnet/wiki/TrainingNet#Training_on_image_translations )
10 | 
11 | # More details about methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology
12 | 
13 | [conv1]
14 | epsW=0.001
15 | epsB=0.002
16 | momW=0.9
17 | momB=0.9
18 | wc=0.000
19 | 
20 | [conv2]
21 | epsW=0.001
22 | epsB=0.002
23 | momW=0.9
24 | momB=0.9
25 | wc=0.000
26 | 
27 | [local3]
28 | epsW=0.001
29 | epsB=0.002
30 | momW=0.9
31 | momB=0.9
32 | wc=0.004
33 | 
34 | [local4]
35 | epsW=0.001
36 | epsB=0.002
37 | momW=0.9
38 | momB=0.9
39 | wc=0.004
40 | 
41 | [fc10]
42 | epsW=0.001
43 | epsB=0.002
44 | momW=0.9
45 | momB=0.9
46 | wc=0.01
47 | 
48 | [logprob]
49 | coeff=1
50 | 
51 | [rnorm1]
52 | scale=0.001
53 | pow=0.75
54 | 
55 | [rnorm2]
56 | scale=0.001
57 | pow=0.75
58 | 


--------------------------------------------------------------------------------
/example-layers/layer-params-conv-local-13pct.cfg:
--------------------------------------------------------------------------------
 1 | # 13% error on CIFAR-10 - layer parameter file
 2 | # See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology
 3 | 
 4 | [conv1]
 5 | epsW=0.001
 6 | epsB=0.002
 7 | momW=0.9
 8 | momB=0.9
 9 | wc=0.00
10 | 
11 | [conv2]
12 | epsW=0.001
13 | epsB=0.002
14 | momW=0.9
15 | momB=0.9
16 | wc=0.00
17 | 
18 | [local3]
19 | epsW=0.001
20 | epsB=0.002
21 | momW=0.9
22 | momB=0.9
23 | wc=0.004
24 | 
25 | [local4]
26 | epsW=0.001
27 | epsB=0.002
28 | momW=0.9
29 | momB=0.9
30 | wc=0.004
31 | 
32 | [fc10]
33 | epsW=0.001
34 | epsB=0.002
35 | momW=0.9
36 | momB=0.9
37 | wc=0.004
38 | 
39 | [logprob]
40 | coeff=1
41 | 


--------------------------------------------------------------------------------
/example-layers/layer-params-example.cfg:
--------------------------------------------------------------------------------
 1 | [conv32]
 2 | epsW=0.001
 3 | epsB=0.002
 4 | momW=0.9
 5 | momB=0.9
 6 | wc=0
 7 | 
 8 | [local32]
 9 | epsW=0.001
10 | epsB=0.002
11 | momW=0.9
12 | momB=0.9
13 | wc=0
14 | 
15 | [fc1024]
16 | momW=0.9
17 | momB=0.9
18 | epsW=0.00001
19 | epsB=0.00002
20 | wc=0
21 | 
22 | [conv32-2]
23 | epsW=0.001
24 | epsB=0.002
25 | momW=0.9
26 | momB=0.9
27 | wc=0
28 | 
29 | [conv32-3]
30 | epsW=0.001
31 | epsB=0.002
32 | momW=0.9
33 | momB=0.9
34 | wc=0
35 | 
36 | [fc10]
37 | epsW=0.0001,0.001
38 | epsB=0.002
39 | momW=0.5,0.9
40 | momB=0.9
41 | wc=0,0
42 | 
43 | [logprob]
44 | coeff=1
45 | 


--------------------------------------------------------------------------------
/example-layers/layer-params.gc.cfg:
--------------------------------------------------------------------------------
 1 | [conv32]
 2 | epsW=0.001
 3 | epsB=0.002
 4 | momW=0.9
 5 | momB=0.9
 6 | wc=0
 7 | 
 8 | [local32]
 9 | epsW=0.001
10 | epsB=0.002
11 | momW=0.9
12 | momB=0.9
13 | wc=0
14 | 
15 | [fc10]
16 | wc=0,0
17 | momB=0
18 | momW=0,0
19 | epsW=0.00001,0.00001
20 | epsB=0.00002
21 | 
22 | [logprob]
23 | coeff=1
24 | 


--------------------------------------------------------------------------------
/example-layers/layers-18pct.cfg:
--------------------------------------------------------------------------------
  1 | # 18% error on CIFAR-10 in 20 minutes - layer definition file
  2 | 
  3 | [data]
  4 | type=data
  5 | dataIdx=0
  6 | 
  7 | [labels]
  8 | type=data
  9 | dataIdx=1
 10 | 
 11 | [conv1]
 12 | type=conv
 13 | inputs=data
 14 | channels=3
 15 | filters=32
 16 | padding=2
 17 | stride=1
 18 | filterSize=5
 19 | initW=0.0001
 20 | partialSum=4
 21 | sharedBiases=1
 22 | 
 23 | [pool1]
 24 | type=pool
 25 | pool=max
 26 | inputs=conv1
 27 | start=0
 28 | sizeX=3
 29 | stride=2
 30 | outputsX=0
 31 | channels=32
 32 | neuron=relu
 33 | 
 34 | [rnorm1]
 35 | type=rnorm
 36 | inputs=pool1
 37 | channels=32
 38 | size=3
 39 | 
 40 | [conv2]
 41 | type=conv
 42 | inputs=rnorm1
 43 | filters=32
 44 | padding=2
 45 | stride=1
 46 | filterSize=5
 47 | channels=32
 48 | neuron=relu
 49 | initW=0.01
 50 | partialSum=4
 51 | sharedBiases=1
 52 | 
 53 | [pool2]
 54 | type=pool
 55 | pool=avg
 56 | inputs=conv2
 57 | start=0
 58 | sizeX=3
 59 | stride=2
 60 | outputsX=0
 61 | channels=32
 62 | 
 63 | [rnorm2]
 64 | type=rnorm
 65 | inputs=pool2
 66 | channels=32
 67 | size=3
 68 | 
 69 | [conv3]
 70 | type=conv
 71 | inputs=rnorm2
 72 | filters=64
 73 | padding=2
 74 | stride=1
 75 | filterSize=5
 76 | channels=32
 77 | neuron=relu
 78 | initW=0.01
 79 | partialSum=4
 80 | sharedBiases=1
 81 | 
 82 | [pool3]
 83 | type=pool
 84 | pool=avg
 85 | inputs=conv3
 86 | start=0
 87 | sizeX=3
 88 | stride=2
 89 | outputsX=0
 90 | channels=64
 91 | 
 92 | [fc10]
 93 | type=fc
 94 | outputs=10
 95 | inputs=pool3
 96 | initW=0.01
 97 | 
 98 | [probs]
 99 | type=softmax
100 | inputs=fc10
101 | 
102 | [logprob]
103 | type=cost.logreg
104 | inputs=labels,probs
105 | 


--------------------------------------------------------------------------------
/example-layers/layers-19pct.cfg:
--------------------------------------------------------------------------------
 1 | # 19% error on CIFAR-10 in 20 minutes - layer definition file
 2 | 
 3 | [data]
 4 | type=data
 5 | dataIdx=0
 6 | 
 7 | [labels]
 8 | type=data
 9 | dataIdx=1
10 | 
11 | [conv1]
12 | type=conv
13 | inputs=data
14 | channels=3
15 | filters=32
16 | padding=2
17 | stride=1
18 | filterSize=5
19 | initW=0.0001
20 | partialSum=1
21 | sharedBiases=1
22 | 
23 | [pool1]
24 | type=pool
25 | pool=max
26 | inputs=conv1
27 | start=0
28 | sizeX=3
29 | stride=2
30 | outputsX=0
31 | channels=32
32 | neuron=relu
33 | 
34 | [conv2]
35 | type=conv
36 | inputs=pool1
37 | filters=32
38 | padding=2
39 | stride=1
40 | filterSize=5
41 | channels=32
42 | neuron=relu
43 | initW=0.01
44 | partialSum=1
45 | sharedBiases=1
46 | 
47 | [pool2]
48 | type=pool
49 | pool=avg
50 | inputs=conv2
51 | start=0
52 | sizeX=3
53 | stride=2
54 | outputsX=0
55 | channels=32
56 | 
57 | [conv3]
58 | type=conv
59 | inputs=pool2
60 | filters=64
61 | padding=2
62 | stride=1
63 | filterSize=5
64 | channels=32
65 | neuron=relu
66 | initW=0.01
67 | partialSum=1
68 | sharedBiases=1
69 | 
70 | [pool3]
71 | type=pool
72 | pool=avg
73 | inputs=conv3
74 | start=0
75 | sizeX=3
76 | stride=2
77 | outputsX=0
78 | channels=64
79 | 
80 | [fc10]
81 | type=fc
82 | outputs=10
83 | inputs=pool3
84 | initW=0.01
85 | 
86 | [probs]
87 | type=softmax
88 | inputs=fc10
89 | 
90 | [logprob]
91 | type=cost.logreg
92 | inputs=labels,probs
93 | 


--------------------------------------------------------------------------------
/example-layers/layers-80sec.cfg:
--------------------------------------------------------------------------------
  1 | # 26% error on CIFAR-10 in 80 seconds - layer definition file
  2 | 
  3 | [data]
  4 | type=data
  5 | dataIdx=0
  6 | 
  7 | [labels]
  8 | type=data
  9 | dataIdx=1
 10 | 
 11 | [conv1]
 12 | type=conv
 13 | inputs=data
 14 | channels=3
 15 | filters=32
 16 | padding=2
 17 | stride=1
 18 | filterSize=5
 19 | initW=0.0001
 20 | partialSum=4
 21 | sharedBiases=1
 22 | 
 23 | [pool1]
 24 | type=pool
 25 | pool=max
 26 | inputs=conv1
 27 | start=0
 28 | sizeX=3
 29 | stride=2
 30 | outputsX=0
 31 | channels=32
 32 | neuron=relu
 33 | 
 34 | [conv2]
 35 | type=conv
 36 | inputs=pool1
 37 | filters=32
 38 | padding=2
 39 | stride=1
 40 | filterSize=5
 41 | channels=32
 42 | neuron=relu
 43 | initW=0.01
 44 | partialSum=4
 45 | sharedBiases=1
 46 | 
 47 | [pool2]
 48 | type=pool
 49 | pool=avg
 50 | inputs=conv2
 51 | start=0
 52 | sizeX=3
 53 | stride=2
 54 | outputsX=0
 55 | channels=32
 56 | 
 57 | [conv3]
 58 | type=conv
 59 | inputs=pool2
 60 | filters=64
 61 | padding=2
 62 | stride=1
 63 | filterSize=5
 64 | channels=32
 65 | neuron=relu
 66 | initW=0.01
 67 | partialSum=4
 68 | sharedBiases=1
 69 | 
 70 | [pool3]
 71 | type=pool
 72 | pool=avg
 73 | inputs=conv3
 74 | start=0
 75 | sizeX=3
 76 | stride=2
 77 | outputsX=0
 78 | channels=64
 79 | 
 80 | [fc64]
 81 | type=fc
 82 | outputs=64
 83 | inputs=pool3
 84 | initW=0.1
 85 | neuron=relu
 86 | 
 87 | [fc10]
 88 | type=fc
 89 | outputs=10
 90 | inputs=fc64
 91 | initW=0.1
 92 | 
 93 | [probs]
 94 | type=softmax
 95 | inputs=fc10
 96 | 
 97 | [logprob]
 98 | type=cost.logreg
 99 | inputs=labels,probs
100 | 


--------------------------------------------------------------------------------
/example-layers/layers-conv-local-11pct.cfg:
--------------------------------------------------------------------------------
  1 | [data]
  2 | type=data
  3 | dataIdx=0
  4 | 
  5 | [labels]
  6 | type=data
  7 | dataIdx=1
  8 | 
  9 | [conv1]
 10 | type=conv
 11 | inputs=data
 12 | channels=3
 13 | filters=64
 14 | padding=2
 15 | stride=1
 16 | filterSize=5
 17 | neuron=relu
 18 | initW=0.0001
 19 | partialSum=4
 20 | sharedBiases=1
 21 | 
 22 | [pool1]
 23 | type=pool
 24 | pool=max
 25 | inputs=conv1
 26 | start=0
 27 | sizeX=3
 28 | stride=2
 29 | outputsX=0
 30 | channels=64
 31 | 
 32 | [rnorm1]
 33 | type=cmrnorm
 34 | inputs=pool1
 35 | channels=64
 36 | size=9
 37 | 
 38 | [conv2]
 39 | type=conv
 40 | inputs=rnorm1
 41 | filters=64
 42 | padding=2
 43 | stride=1
 44 | filterSize=5
 45 | channels=64
 46 | neuron=relu
 47 | initW=0.01
 48 | partialSum=8
 49 | sharedBiases=1
 50 | 
 51 | [rnorm2]
 52 | type=cmrnorm
 53 | inputs=conv2
 54 | channels=64
 55 | size=9
 56 | 
 57 | [pool2]
 58 | type=pool
 59 | pool=max
 60 | inputs=rnorm2
 61 | start=0
 62 | sizeX=3
 63 | stride=2
 64 | outputsX=0
 65 | channels=64
 66 | 
 67 | [local3]
 68 | type=local
 69 | inputs=pool2
 70 | filters=64
 71 | padding=1
 72 | stride=1
 73 | filterSize=3
 74 | channels=64
 75 | neuron=relu
 76 | initW=0.04
 77 | 
 78 | [local4]
 79 | type=local
 80 | inputs=local3
 81 | filters=32
 82 | padding=1
 83 | stride=1
 84 | filterSize=3
 85 | channels=64
 86 | neuron=relu
 87 | initW=0.04
 88 | 
 89 | [fc10]
 90 | type=fc
 91 | outputs=10
 92 | inputs=local4
 93 | initW=0.01
 94 | 
 95 | [probs]
 96 | type=softmax
 97 | inputs=fc10
 98 | 
 99 | [logprob]
100 | type=cost.logreg
101 | inputs=labels,probs
102 | 


--------------------------------------------------------------------------------
/example-layers/layers-conv-local-13pct.cfg:
--------------------------------------------------------------------------------
 1 | # 13% error on CIFAR-10 in 20 minutes - layer definition file
 2 | # See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology
 3 | 
 4 | [data]
 5 | type=data
 6 | dataIdx=0
 7 | 
 8 | [labels]
 9 | type=data
10 | dataIdx=1
11 | 
12 | [conv1]
13 | type=conv
14 | inputs=data
15 | channels=3
16 | filters=64
17 | padding=2
18 | stride=1
19 | filterSize=5
20 | neuron=relu
21 | initW=0.0001
22 | partialSum=4
23 | sharedBiases=1
24 | 
25 | [pool1]
26 | type=pool
27 | pool=max
28 | inputs=conv1
29 | start=0
30 | sizeX=3
31 | stride=2
32 | outputsX=0
33 | channels=64
34 | 
35 | [conv2]
36 | type=conv
37 | inputs=pool1
38 | filters=64
39 | padding=2
40 | stride=1
41 | filterSize=5
42 | channels=64
43 | neuron=relu
44 | initW=0.01
45 | partialSum=8
46 | sharedBiases=1
47 | 
48 | [pool2]
49 | type=pool
50 | pool=max
51 | inputs=conv2
52 | start=0
53 | sizeX=3
54 | stride=2
55 | outputsX=0
56 | channels=64
57 | 
58 | [local3]
59 | type=local
60 | inputs=pool2
61 | filters=32
62 | padding=1
63 | stride=1
64 | filterSize=3
65 | channels=64
66 | neuron=relu
67 | initW=0.04
68 | 
69 | [local4]
70 | type=local
71 | inputs=local3
72 | filters=32
73 | padding=1
74 | stride=1
75 | filterSize=3
76 | channels=32
77 | neuron=relu
78 | initW=0.04
79 | 
80 | [fc10]
81 | type=fc
82 | outputs=10
83 | inputs=local4
84 | initW=0.01
85 | neuron=ident
86 | 
87 | [probs]
88 | type=softmax
89 | inputs=fc10
90 | 
91 | [logprob]
92 | type=cost.logreg
93 | inputs=labels,probs
94 | 


--------------------------------------------------------------------------------
/example-layers/layers-example.cfg:
--------------------------------------------------------------------------------
  1 | # This is a layer configuration file that contains all the
  2 | # layer types supported by this code. It's not actually good for anything
  3 | # other than demonstrating how layers are specified and connected to one another.
  4 | 
  5 | # Note: this file has gotten so big that the resultant net will not run on anything short of a 3GB GTX 580.
  6 | # But there's no particular reason to run the net specified by this file. It's not actually good.
  7 | 
  8 | [data]
  9 | type=data
 10 | dataIdx=0
 11 | 
 12 | [labels]
 13 | type=data
 14 | dataIdx=1
 15 | 
 16 | [conv32]
 17 | type=conv
 18 | inputs=data
 19 | channels=3
 20 | filters=32
 21 | padding=4
 22 | stride=1
 23 | filterSize=9
 24 | neuron=logistic
 25 | initW=0.00001
 26 | partialSum=1
 27 | sharedBiases=true
 28 | 
 29 | [local32]
 30 | type=local
 31 | inputs=conv32
 32 | channels=32
 33 | filters=32
 34 | padding=4
 35 | stride=1
 36 | filterSize=9
 37 | neuron=logistic
 38 | initW=0.00001
 39 | 
 40 | [fc1024]
 41 | type=fc
 42 | outputs=1024
 43 | inputs=data
 44 | initW=0.001
 45 | neuron=relu
 46 | 
 47 | [maxpool]
 48 | type=pool
 49 | pool=max
 50 | inputs=local32
 51 | start=0
 52 | sizeX=4
 53 | stride=2
 54 | outputsX=0
 55 | channels=32
 56 | 
 57 | [rnorm1]
 58 | type=rnorm
 59 | inputs=maxpool
 60 | channels=32
 61 | sizeX=5
 62 | scale=0.0000125
 63 | pow=0.75
 64 | 
 65 | [cnorm1]
 66 | type=cnorm
 67 | inputs=rnorm1
 68 | channels=32
 69 | sizeX=7
 70 | scale=0.001
 71 | pow=0.5
 72 | 
 73 | [conv32-2]
 74 | type=conv
 75 | inputs=cnorm1
 76 | groups=4
 77 | channels=32
 78 | filters=32
 79 | padding=2
 80 | stride=1
 81 | filterSize=5
 82 | neuron=relu
 83 | initW=0.0001
 84 | partialSum=1
 85 | sharedBiases=false
 86 | 
 87 | [conv32-3]
 88 | type=conv
 89 | inputs=conv32-2
 90 | groups=4
 91 | channels=128
 92 | filters=32
 93 | padding=2
 94 | stride=2
 95 | filterSize=5
 96 | neuron=relu
 97 | initW=0.0001
 98 | partialSum=1
 99 | randSparse=true
100 | filterChannels=64
101 | 
102 | [fc10]
103 | type=fc
104 | outputs=10
105 | inputs=conv32-3,fc1024
106 | initW=0.0001,0.0001
107 | neuron=ident
108 | 
109 | [probs]
110 | type=softmax
111 | inputs=fc10
112 | 
113 | [logprob]
114 | type=cost.logreg
115 | inputs=labels,probs
116 | 


--------------------------------------------------------------------------------
/example-layers/layers.gc.cfg:
--------------------------------------------------------------------------------
 1 | [data]
 2 | type=data
 3 | dataIdx=0
 4 | 
 5 | [labels]
 6 | type=data
 7 | dataIdx=1
 8 | 
 9 | [conv32]
10 | type=conv
11 | inputs=data
12 | filters=16
13 | padding=0
14 | stride=1
15 | filterSize=3
16 | channels=3
17 | neuron=linear[3,2.2]
18 | initW=0.8
19 | partialSum=1
20 | sharedBiases=true
21 | 
22 | [avgpool]
23 | type=pool
24 | pool=avg
25 | inputs=conv32
26 | start=-2
27 | sizeX=4
28 | stride=4
29 | outputsX=0
30 | channels=16
31 | 
32 | [local32]
33 | type=local
34 | inputs=avgpool
35 | filters=32
36 | padding=2
37 | stride=3
38 | filterSize=5
39 | channels=16
40 | neuron=tanh[1.79,-0.66]
41 | initW=0.4
42 | #partialSum=1
43 | #sharedBiases=true
44 | groups=2
45 | randSparse=true
46 | 
47 | [fc10]
48 | type=fc
49 | outputs=10
50 | inputs=local32,conv32
51 | initW=0.8,0.008
52 | 
53 | [probs]
54 | type=softmax
55 | inputs=fc10
56 | 
57 | [logprob]
58 | type=cost.logreg
59 | inputs=labels,probs
60 | 


--------------------------------------------------------------------------------
/include/common/matrix.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  *
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef MATRIX_H_
 28 | #define MATRIX_H_
 29 | 
 30 | #include <matrix_funcs.h>
 31 | #include <Python.h>
 32 | #include <arrayobject.h>
 33 | #include <limits>
 34 | #include <assert.h>
 35 | #include <stdio.h>
 36 | #include <string.h>
 37 | #include <math.h>
 38 | 
 39 | #if defined(_WIN64) || defined(_WIN32)
 40 | #include <float.h>
 41 | #define isnan(_X) (_isnan(_X))
 42 | #define isinf(_X) (!_finite(_X))
 43 | #define uint unsigned int
 44 | double sqrt(int _X);
 45 | double log(int _X);
 46 | #endif
 47 | 
 48 | #ifdef USE_MKL
 49 | #include <mkl.h>
 50 | #include <mkl_cblas.h>
 51 | #include <mkl_vsl.h>
 52 | #include <mkl_vml.h>
 53 | 
 54 | #define IS_MKL true
 55 | 
 56 | #ifdef DOUBLE_PRECISION
 57 | #define MKL_UNIFORM vdRngUniform
 58 | #define MKL_NORMAL vdRngGaussian
 59 | #define MKL_UNIFORM_RND_METHOD VSL_METHOD_DUNIFORM_STD_ACCURATE
 60 | #define MKL_GAUSSIAN_RND_METHOD VSL_METHOD_DGAUSSIAN_BOXMULLER
 61 | #define MKL_EXP vdExp
 62 | #define MKL_RECIP vdInv
 63 | #define MKL_SQUARE vdSqr
 64 | #define MKL_TANH vdTanh
 65 | #define MKL_LOG vdLn
 66 | #define MKL_VECMUL vdMul
 67 | #define MKL_VECDIV vdDiv
 68 | #else
 69 | #define MKL_UNIFORM vsRngUniform
 70 | #define MKL_NORMAL vsRngGaussian
 71 | #define MKL_UNIFORM_RND_METHOD VSL_METHOD_SUNIFORM_STD_ACCURATE
 72 | #define MKL_GAUSSIAN_RND_METHOD VSL_METHOD_SGAUSSIAN_BOXMULLER
 73 | #define MKL_EXP vsExp
 74 | #define MKL_RECIP vsInv
 75 | #define MKL_SQUARE vsSqr
 76 | #define MKL_TANH vsTanh
 77 | #define MKL_LOG vsLn
 78 | #define MKL_VECMUL vsMul
 79 | #define MKL_VECDIV vsDiv
 80 | #endif /* DOUBLE_PRECISION */
 81 | 
 82 | #else
 83 | extern "C" {
 84 | #include <cblas.h>
 85 | }
 86 | #define IS_MKL false
 87 | #endif /* USE_MKL */
 88 | 
 89 | #ifdef DOUBLE_PRECISION
 90 | #define CBLAS_GEMM cblas_dgemm
 91 | #define CBLAS_SCAL cblas_dscal
 92 | #define CBLAS_AXPY cblas_daxpy
 93 | #else
 94 | #define CBLAS_GEMM cblas_sgemm
 95 | #define CBLAS_SCAL cblas_sscal
 96 | #define CBLAS_AXPY cblas_saxpy
 97 | #endif /* DOUBLE_PRECISION */
 98 | 
 99 | #define MTYPE_MAX numeric_limits<MTYPE>::max()
100 | 
101 | class Matrix {
102 | private:
103 |     MTYPE* _data;
104 |     bool _ownsData;
105 |     long int _numRows, _numCols;
106 |     long int _numElements;
107 |     CBLAS_TRANSPOSE _trans;
108 | 
109 |     void _init(MTYPE* data, long int numRows, long int numCols, bool transpose, bool ownsData);
110 |     void _tileTo2(Matrix& target) const;
111 |     void _copyAllTo(Matrix& target) const;
112 |     MTYPE _sum_column(long int col) const;
113 |     MTYPE _sum_row(long int row) const;
114 |     MTYPE _aggregate(MTYPE(*agg_func)(MTYPE, MTYPE), MTYPE initialValue) const;
115 |     void _aggregate(long int axis, Matrix& target, MTYPE(*agg_func)(MTYPE, MTYPE), MTYPE initialValue) const;
116 |     MTYPE _aggregateRow(long int row, MTYPE(*agg_func)(MTYPE, MTYPE), MTYPE initialValue) const;
117 |     MTYPE _aggregateCol(long int row, MTYPE(*agg_func)(MTYPE, MTYPE), MTYPE initialValue) const;
118 |     void _updateDims(long int numRows, long int numCols);
119 |     void _applyLoop(MTYPE(*func)(MTYPE));
120 |     void _applyLoop(MTYPE (*func)(MTYPE), Matrix& target);
121 |     void _applyLoop2(const Matrix& a, MTYPE(*func)(MTYPE, MTYPE), Matrix& target) const;
122 |     void _applyLoop2(const Matrix& a, MTYPE (*func)(MTYPE,MTYPE, MTYPE), MTYPE scalar, Matrix& target) const;
123 |     void _applyLoopScalar(const MTYPE scalar, MTYPE(*func)(MTYPE, MTYPE), Matrix& target) const;
124 |     void _checkBounds(long int startRow, long int endRow, long int startCol, long int endCol) const;
125 |     void _divideByVector(const Matrix& vec, Matrix& target);
126 |     inline long int _getNumColsBackEnd() const {
127 |         return _trans == CblasNoTrans ? _numCols : _numRows;
128 |     }
129 | public:
130 |     enum FUNCTION {
131 |         TANH, RECIPROCAL, SQUARE, ABS, EXP, LOG, ZERO, ONE, LOGISTIC1, LOGISTIC2, SIGN
132 |     };
133 |     Matrix();
134 |     Matrix(long int numRows, long int numCols);
135 |     Matrix(const PyArrayObject *src);
136 |     Matrix(const Matrix &like);
137 |     Matrix(MTYPE* data, long int numRows, long int numCols);
138 |     Matrix(MTYPE* data, long int numRows, long int numCols, bool transpose);
139 |     ~Matrix();
140 | 
141 |     inline MTYPE& getCell(long int i, long int j) const {
142 |         assert(i >= 0 && i < _numRows);
143 |         assert(j >= 0 && j < _numCols);
144 |         if (_trans == CblasTrans) {
145 |             return _data[j * _numRows + i];
146 |         }
147 |         return _data[i * _numCols + j];
148 |     }
149 | 
150 |     MTYPE& operator()(long int i, long int j) const {
151 |         return getCell(i, j);
152 |     }
153 | 
154 |     inline MTYPE* getData() const {
155 |         return _data;
156 |     }
157 | 
158 |     inline bool isView() const {
159 |         return !_ownsData;
160 |     }
161 | 
162 |     inline long int getNumRows() const {
163 |         return _numRows;
164 |     }
165 | 
166 |     inline long int getNumCols() const {
167 |         return _numCols;
168 |     }
169 | 
170 |     inline long int getNumDataBytes() const {
171 |         return _numElements * sizeof(MTYPE);
172 |     }
173 | 
174 |     inline long int getNumElements() const {
175 |         return _numElements;
176 |     }
177 | 
178 |     inline long int getLeadingDim() const {
179 |         return _trans == CblasTrans ? _numRows : _numCols;
180 |     }
181 | 
182 |     inline long int getFollowingDim() const {
183 |         return _trans == CblasTrans ? _numCols : _numRows;
184 |     }
185 | 
186 |     inline CBLAS_TRANSPOSE getBLASTrans() const {
187 |         return _trans;
188 |     }
189 | 
190 |     inline bool isSameDims(const Matrix& a) const {
191 |         return a.getNumRows() == getNumRows() && a.getNumCols() == getNumCols();
192 |     }
193 | 
194 |     inline bool isTrans() const {
195 |         return _trans == CblasTrans;
196 |     }
197 | 
198 |     /*
199 |      * Only use if you know what you're doing!
200 |      * Does not update any dimensions. Just flips the _trans flag.
201 |      *
202 |      * Use transpose() if you want to get the transpose of this matrix.
203 |      */
204 |     inline void setTrans(bool trans) {
205 |         assert(isTrans() == trans || !isView());
206 |         _trans = trans ? CblasTrans : CblasNoTrans;
207 |     }
208 | 
209 |     void apply(FUNCTION f);
210 |     void apply(Matrix::FUNCTION f, Matrix& target);
211 |     void subtractFromScalar(MTYPE scalar);
212 |     void subtractFromScalar(MTYPE scalar, Matrix &target) const;
213 |     void biggerThanScalar(MTYPE scalar);
214 |     void smallerThanScalar(MTYPE scalar);
215 |     void equalsScalar(MTYPE scalar);
216 |     void biggerThanScalar(MTYPE scalar, Matrix& target) const;
217 |     void smallerThanScalar(MTYPE scalar, Matrix& target) const;
218 |     void equalsScalar(MTYPE scalar, Matrix& target) const;
219 |     void biggerThan(Matrix& a);
220 |     void biggerThan(Matrix& a, Matrix& target) const;
221 |     void smallerThan(Matrix& a);
222 |     void smallerThan(Matrix& a, Matrix& target) const;
223 |     void minWith(Matrix &a);
224 |     void minWith(Matrix &a, Matrix &target) const;
225 |     void maxWith(Matrix &a);
226 |     void maxWith(Matrix &a, Matrix &target) const;
227 |     void equals(Matrix& a);
228 |     void equals(Matrix& a, Matrix& target) const;
229 |     void notEquals(Matrix& a) ;
230 |     void notEquals(Matrix& a, Matrix& target) const;
231 |     void add(const Matrix &m);
232 |     void add(const Matrix &m, MTYPE scale);
233 |     void add(const Matrix &m, Matrix& target);
234 |     void add(const Matrix &m, MTYPE scale, Matrix& target);
235 |     void subtract(const Matrix &m);
236 |     void subtract(const Matrix &m, Matrix& target);
237 |     void subtract(const Matrix &m, MTYPE scale);
238 |     void subtract(const Matrix &m, MTYPE scale, Matrix& target);
239 |     void addVector(const Matrix& vec, MTYPE scale);
240 |     void addVector(const Matrix& vec, MTYPE scale, Matrix& target);
241 |     void addVector(const Matrix& vec);
242 |     void addVector(const Matrix& vec, Matrix& target);
243 |     void addScalar(MTYPE scalar);
244 |     void addScalar(MTYPE scalar, Matrix& target) const;
245 |     void maxWithScalar(MTYPE scalar);
246 |     void maxWithScalar(MTYPE scalar, Matrix &target) const;
247 |     void minWithScalar(MTYPE scalar);
248 |     void minWithScalar(MTYPE scalar, Matrix &target) const;
249 |     void eltWiseMultByVector(const Matrix& vec);
250 |     void eltWiseMultByVector(const Matrix& vec, Matrix& target);
251 |     void eltWiseDivideByVector(const Matrix& vec);
252 |     void eltWiseDivideByVector(const Matrix& vec, Matrix& target);
253 |     void resize(long int newNumRows, long int newNumCols);
254 |     void resize(const Matrix& like);
255 |     Matrix& slice(long int startRow, long int endRow, long int startCol, long int endCol) const;
256 |     void slice(long int startRow, long int endRow, long int startCol, long int endCol, Matrix &target) const;
257 |     Matrix& sliceRows(long int startRow, long int endRow) const;
258 |     void sliceRows(long int startRow, long int endRow, Matrix& target) const;
259 |     Matrix& sliceCols(long int startCol, long int endCol) const;
260 |     void sliceCols(long int startCol, long int endCol, Matrix& target) const;
261 |     void rightMult(const Matrix &b, MTYPE scale);
262 |     void rightMult(const Matrix &b, Matrix &target) const;
263 |     void rightMult(const Matrix &b);
264 |     void rightMult(const Matrix &b, MTYPE scaleAB, Matrix &target) const;
265 |     void addProduct(const Matrix &a, const Matrix &b, MTYPE scaleAB, MTYPE scaleThis);
266 |     void addProduct(const Matrix& a, const Matrix& b);
267 |     void eltWiseMult(const Matrix& a);
268 |     void eltWiseMult(const Matrix& a, Matrix& target) const;
269 |     void eltWiseDivide(const Matrix& a);
270 |     void eltWiseDivide(const Matrix& a, Matrix &target) const;
271 |     Matrix& transpose() const;
272 |     Matrix& transpose(bool hard) const;
273 |     Matrix& tile(long int timesY, long int timesX) const;
274 |     void tile(long int timesY, long int timesX, Matrix& target) const;
275 |     void copy(Matrix &dest, long int srcStartRow, long int srcEndRow, long int srcStartCol, long int srcEndCol, long int destStartRow, long int destStartCol) const;
276 |     Matrix& copy() const;
277 |     void copy(Matrix& target) const;
278 |     Matrix& sum(long int axis) const;
279 |     void sum(long int axis, Matrix &target) const;
280 |     MTYPE sum() const;
281 |     MTYPE max() const;
282 |     Matrix& max(long int axis) const;
283 |     void max(long int axis, Matrix& target) const;
284 |     MTYPE min() const;
285 |     Matrix& min(long int axis) const;
286 |     void min(long int axis, Matrix& target) const;
287 |     MTYPE norm() const;
288 |     MTYPE norm2() const;
289 |     void scale(MTYPE scale);
290 |     void scale(MTYPE alpha, Matrix& target);
291 |     void reshape(long int numRows, long int numCols);
292 |     Matrix& reshaped(long int numRows, long int numCols);
293 |     void printShape(const char* name) const;
294 |     bool hasNan() const;
295 |     bool hasInf() const;
296 | #ifdef USE_MKL
297 |     void randomizeNormal(VSLStreamStatePtr stream, MTYPE mean, MTYPE stdev);
298 |     void randomizeUniform(VSLStreamStatePtr stream);
299 |     void randomizeNormal(VSLStreamStatePtr stream);
300 | #else
301 |     void randomizeNormal(MTYPE mean, MTYPE stdev);
302 |     void randomizeUniform();
303 |     void randomizeNormal();
304 | #endif
305 |     void print() const;
306 |     void print(long int startRow,long int rows, long int startCol,long int cols) const;
307 |     void print(long int rows, long int cols) const;
308 | };
309 | 
310 | #endif /* MATRIX_H_ */
311 | 


--------------------------------------------------------------------------------
/include/common/matrix_funcs.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  *
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef MATRIX_FUNCS_H_
 28 | #define MATRIX_FUNCS_H_
 29 | 
 30 | #include <stdlib.h>
 31 | #include <math.h>
 32 | #include <algorithm>
 33 | 
 34 | #ifdef DOUBLE_PRECISION
 35 | #define MTYPE double
 36 | #else
 37 | #define MTYPE float
 38 | #endif
 39 | 
 40 | #define MYRAND ((double)rand() / ((double)RAND_MAX + 1))
 41 | 
 42 | inline MTYPE _zero(MTYPE x) {
 43 |     return 0;
 44 | }
 45 | 
 46 | inline MTYPE _one(MTYPE x) {
 47 |     return 1;
 48 | }
 49 | 
 50 | inline MTYPE _abs(MTYPE x) {
 51 |     return x > 0 ? x : -x;
 52 | }
 53 | 
 54 | inline MTYPE _square(MTYPE x) {
 55 |     return x * x;
 56 | }
 57 | 
 58 | inline MTYPE _sigma1(MTYPE x) {
 59 |     return (tanh(x / 2) + 1) / 2;
 60 | }
 61 | 
 62 | inline MTYPE _sigma2(MTYPE x) {
 63 |     return 1 / (1 + exp(-x));
 64 | }
 65 | 
 66 | inline MTYPE _recip(MTYPE x) {
 67 |     return 1 / x;
 68 | }
 69 | 
 70 | inline MTYPE _exp(MTYPE x) {
 71 |     return exp(x);
 72 | }
 73 | 
 74 | inline MTYPE _log(MTYPE x) {
 75 |     return log(x);
 76 | }
 77 | 
 78 | inline MTYPE _tanh(MTYPE x) {
 79 |     return tanh(x);
 80 | }
 81 | 
 82 | inline MTYPE _sign(MTYPE x) {
 83 |     return x > 0 ? 1 : -1;
 84 | }
 85 | 
 86 | inline MTYPE _rand(MTYPE x) {
 87 |     return MYRAND;
 88 | }
 89 | 
 90 | inline MTYPE _divide(MTYPE x, MTYPE y) {
 91 |     return x / y;
 92 | }
 93 | 
 94 | inline MTYPE _mult(MTYPE x, MTYPE y) {
 95 |     return x * y;
 96 | }
 97 | 
 98 | inline MTYPE _add(MTYPE x, MTYPE y) {
 99 |     return x + y;
100 | }
101 | 
102 | inline MTYPE _addSquare(MTYPE x, MTYPE y) {
103 |     return x*x + y;
104 | }
105 | 
106 | inline MTYPE _addWithScale(MTYPE x, MTYPE y, MTYPE scale) {
107 |     return x + scale*y;
108 | }
109 | 
110 | inline MTYPE _max(MTYPE x, MTYPE y) {
111 |     return std::max(x, y);
112 | }
113 | 
114 | inline MTYPE _min(MTYPE x, MTYPE y) {
115 |     return std::min(x, y);
116 | }
117 | 
118 | inline MTYPE _bigger(MTYPE x, MTYPE y) {
119 |     return x > y;
120 | }
121 | 
122 | inline MTYPE _smaller(MTYPE x, MTYPE y) {
123 |     return x < y;
124 | }
125 | 
126 | inline MTYPE _equal(MTYPE x, MTYPE y) {
127 |     return x == y;
128 | }
129 | 
130 | inline MTYPE _notEqual(MTYPE x, MTYPE y) {
131 |     return x != y;
132 | }
133 | 
134 | #endif /* MATRIX_FUNCS_H_ */
135 | 


--------------------------------------------------------------------------------
/include/common/queue.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  *
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef QUEUE_H_
 28 | #define QUEUE_H_
 29 | #include <pthread.h>
 30 | #include <stdlib.h>
 31 | 
 32 | /*
 33 |  * A thread-safe circular queue that automatically grows but never shrinks.
 34 |  */
 35 | template <class T>
 36 | class Queue {
 37 | private:
 38 |     T *_elements;
 39 |     int _numElements;
 40 |     int _head, _tail;
 41 |     int _maxSize;
 42 |     pthread_mutex_t *_queueMutex;
 43 |     pthread_cond_t *_queueCV;
 44 | 
 45 |     void _init(int initialSize) {
 46 |         _numElements = 0;
 47 |         _head = 0;
 48 |         _tail = 0;
 49 |         _maxSize = initialSize;
 50 |         _elements = new T[initialSize];
 51 |         _queueCV = (pthread_cond_t*)(malloc(sizeof (pthread_cond_t)));
 52 |         _queueMutex = (pthread_mutex_t*)(malloc(sizeof (pthread_mutex_t)));
 53 |         pthread_mutex_init(_queueMutex, NULL);
 54 |         pthread_cond_init(_queueCV, NULL);
 55 |     }
 56 | 
 57 |     void expand() {
 58 |         T *newStorage = new T[_maxSize * 2];
 59 |         memcpy(newStorage, _elements + _head, (_maxSize - _head) * sizeof(T));
 60 |         memcpy(newStorage + _maxSize - _head, _elements, _tail * sizeof(T));
 61 |         delete[] _elements;
 62 |         _elements = newStorage;
 63 |         _head = 0;
 64 |         _tail = _numElements;
 65 |         _maxSize *= 2;
 66 |     }
 67 | public:
 68 |     Queue(int initialSize) {
 69 |         _init(initialSize);
 70 |     }
 71 | 
 72 |     Queue()  {
 73 |         _init(1);
 74 |     }
 75 | 
 76 |     ~Queue() {
 77 |         pthread_mutex_destroy(_queueMutex);
 78 |         pthread_cond_destroy(_queueCV);
 79 |         delete[] _elements;
 80 |         free(_queueMutex);
 81 |         free(_queueCV);
 82 |     }
 83 | 
 84 |     void enqueue(T el) {
 85 |         pthread_mutex_lock(_queueMutex);
 86 |         if(_numElements == _maxSize) {
 87 |             expand();
 88 |         }
 89 |         _elements[_tail] = el;
 90 |         _tail = (_tail + 1) % _maxSize;
 91 |         _numElements++;
 92 | 
 93 |         pthread_cond_signal(_queueCV);
 94 |         pthread_mutex_unlock(_queueMutex);
 95 |     }
 96 | 
 97 |     /*
 98 |      * Blocks until not empty.
 99 |      */
100 |     T dequeue() {
101 |         pthread_mutex_lock(_queueMutex);
102 |         if(_numElements == 0) {
103 |             pthread_cond_wait(_queueCV, _queueMutex);
104 |         }
105 |         T el = _elements[_head];
106 |         _head = (_head + 1) % _maxSize;
107 |         _numElements--;
108 |         pthread_mutex_unlock(_queueMutex);
109 |         return el;
110 |     }
111 | 
112 |     /*
113 |      * Obviously this number can change by the time you actually look at it.
114 |      */
115 |     inline int getNumElements() const {
116 |         return _numElements;
117 |     }
118 | };
119 | 
120 | #endif /* QUEUE_H_ */
121 | 


--------------------------------------------------------------------------------
/include/common/thread.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  *
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef THREAD_H_
28 | #define THREAD_H_
29 | #include <pthread.h>
30 | #include <stdio.h>
31 | #include <errno.h>
32 | #include <assert.h>
33 | 
34 | /*
35 |  * Abstract joinable thread class.
36 |  * The only thing the implementer has to fill in is the run method.
37 |  */
38 | class Thread {
39 | private:
40 |     pthread_attr_t _pthread_attr;
41 |     pthread_t _threadID;
42 |     bool _joinable, _startable;
43 | 
44 |     static void* start_pthread_func(void *obj) {
45 |         void* retval = reinterpret_cast<Thread*>(obj)->run();
46 |         pthread_exit(retval);
47 |         return retval;
48 |     }
49 | protected:
50 |     virtual void* run() = 0;
51 | public:
52 |     Thread(bool joinable) : _joinable(joinable), _startable(true) {
53 |         pthread_attr_init(&_pthread_attr);
54 |         pthread_attr_setdetachstate(&_pthread_attr, joinable ? PTHREAD_CREATE_JOINABLE : PTHREAD_CREATE_DETACHED);
55 |     }
56 | 
57 |     virtual ~Thread() {
58 |     }
59 | 
60 |     pthread_t start() {
61 |         assert(_startable);
62 |         _startable = false;
63 |         int n;
64 |         if ((n = pthread_create(&_threadID, &_pthread_attr, &Thread::start_pthread_func, (void*)this))) {
65 |             errno = n;
66 |             perror("pthread_create error");
67 |         }
68 |         return _threadID;
69 |     }
70 | 
71 |     void join(void **status) {
72 |         assert(_joinable);
73 |         int n;
74 |         if((n = pthread_join(_threadID, status))) {
75 |             errno = n;
76 |             perror("pthread_join error");
77 |         }
78 |     }
79 | 
80 |     void join() {
81 |         join(NULL);
82 |     }
83 | 
84 |     pthread_t getThreadID() const {
85 |         return _threadID;
86 |     }
87 | };
88 | 
89 | #endif /* THREAD_H_ */
90 | 


--------------------------------------------------------------------------------
/include/convnet.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  *
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef CONVNET3
28 | #define	CONVNET3
29 | 
30 | #include <vector>
31 | #include <string>
32 | #include <helper_cuda.h>
33 | #include <time.h>
34 | #include <queue.h>
35 | #include <thread.h>
36 | #include <math.h>
37 | 
38 | #include "layer.cuh"
39 | #include "data.cuh"
40 | #include "worker.cuh"
41 | #include "weights.cuh"
42 | 
43 | class Worker;
44 | class WorkResult;
45 | class Layer;
46 | class DataLayer;
47 | class CostLayer;
48 | 
49 | class ConvNet : public Thread {
50 | protected:
51 |     std::vector<Layer*> _layers;
52 |     std::vector<DataLayer*> _dataLayers;
53 |     std::vector<CostLayer*> _costs;
54 |     GPUData* _data;
55 | 
56 |     DataProvider* _dp;
57 |     int _deviceID;
58 | 
59 |     Queue<Worker*> _workerQueue;
60 |     Queue<WorkResult*> _resultQueue;
61 | 
62 |     // For gradient checking
63 |     int _numFailures;
64 |     int _numTests;
65 |     double _baseErr;
66 | 
67 |     virtual Layer* initLayer(string& layerType, PyObject* paramsDict);
68 |     void initCuda();
69 |     void* run();
70 | public:
71 |     ConvNet(PyListObject* layerParams, int minibatchSize, int deviceID);
72 | 
73 |     Queue<Worker*>& getWorkerQueue();
74 |     Queue<WorkResult*>& getResultQueue();
75 |     DataProvider& getDataProvider();
76 | 
77 |     Layer& operator[](int idx);
78 |     Layer& getLayer(int idx);
79 |     void copyToCPU();
80 |     void copyToGPU();
81 |     void updateWeights();
82 |     void reset();
83 |     int getNumLayers();
84 | 
85 |     void bprop(PASS_TYPE passType);
86 |     void fprop(PASS_TYPE passType);
87 |     void fprop(int miniIdx, PASS_TYPE passType);
88 |     void fprop(GPUData& data, PASS_TYPE passType);
89 | 
90 |     bool checkGradient(const std::string& name, float eps, Weights& weights);
91 |     void checkGradients();
92 |     Cost& getCost();
93 |     Cost& getCost(Cost& cost);
94 |     double getCostValue();
95 | };
96 | 
97 | #endif	/* CONVNET3 */
98 | 
99 | 


--------------------------------------------------------------------------------
/include/cost.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  *
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef COST_CUH
28 | #define	COST_CUH
29 | 
30 | #include <vector>
31 | #include <map>
32 | #include <helper_cuda.h>
33 | 
34 | #include "layer.cuh"
35 | #include "util.cuh"
36 | 
37 | class CostLayer;
38 | 
39 | /*
40 |  * Wrapper for dictionary mapping cost name to vector of returned values.
41 |  */
42 | class Cost {
43 | private:
44 |     int _numCases;
45 |     CostMap _costMap;
46 |     CostCoeffMap _costCoeffMap;
47 | public:
48 |     Cost(int numCases);
49 |     Cost(int numCases, std::vector<CostLayer*>& costs);
50 |     doublev& operator [](const std::string s);
51 |     CostMap& getCostMap();
52 |     CostCoeffMap& getCostCoeffMap();
53 |     int getNumCases();
54 |     /*
55 |      * Returns sum of first values returned by all the costs, weighted by the cost coefficients.
56 |      */
57 |     double getValue();
58 |     Cost& operator += (Cost& er);
59 |     Cost& operator /= (const double v);
60 |     virtual ~Cost();
61 | };
62 | 
63 | 
64 | #endif	/* COST_CUH */
65 | 
66 | 


--------------------------------------------------------------------------------
/include/cudaconv2/cudaconv2.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  *
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef COMMON_CUH
 28 | #define	COMMON_CUH
 29 | 
 30 | #include <helper_cuda.h>
 31 | #include <nvmatrix.cuh>
 32 | #include "conv_util.cuh"
 33 | 
 34 | enum FILTER_OUTPUT_ORDER {MODULE_FILTER_IMAGE, FILTER_MODULE_IMAGE};
 35 | 
 36 | void convFilterActs(NVMatrix& images, NVMatrix& filters, NVMatrix& targets,
 37 |                     int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride,
 38 |                     int numImgColors, int numGroups);
 39 | void convFilterActs(NVMatrix& images, NVMatrix& filters, NVMatrix& targets,
 40 |                    int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride,
 41 |                    int numImgColors, int numGroups,
 42 |                    float scaleTargets, float scaleOutput);
 43 | 
 44 | void localFilterActs(NVMatrix& images, NVMatrix& filters, NVMatrix& targets,
 45 |                      int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride,
 46 |                      int numImgColors, int numGroups);
 47 | void localFilterActs(NVMatrix& images, NVMatrix& filters, NVMatrix& targets,
 48 |                      int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride,
 49 |                      int numImgColors, int numGroups,
 50 |                      float scaleTargets, float scaleOutput);
 51 | 
 52 | void convImgActs(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets,
 53 |                  int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numGroups);
 54 | void convImgActs(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets,
 55 |                  int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numGroups,
 56 |                  float scaleTargets, float scaleOutput);
 57 | 
 58 | void localImgActs(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets,
 59 |                   int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numGroups);
 60 | void localImgActs(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets,
 61 |                   int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numGroups,
 62 |                   float scaleTargets, float scaleOutput);
 63 | 
 64 | void convWeightActs(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets,
 65 |                     int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart,
 66 |                     int moduleStride, int numImgColors, int numGroups, int partialSum);
 67 | void convWeightActs(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets,
 68 |                     int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride,
 69 |                     int numImgColors, int numGroups, int partialSum,
 70 |                     float scaleTargets, float scaleOutput);
 71 | 
 72 | void localWeightActs(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets,
 73 |                      int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart,
 74 |                      int moduleStride, int numImgColors, int numGroups);
 75 | 
 76 | void localWeightActs(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets,
 77 |                      int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride,
 78 |                      int numImgColors, int numGroups, float scaleTargets, float scaleOutput);
 79 | 
 80 | void convFilterActsSparse(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, int* dColorIndices,
 81 |                           int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride,
 82 |                           int numImgColors, int numFilterColors, int numGroups);
 83 | void convFilterActsSparse(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, int* dColorIndices,
 84 |                           int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride,
 85 |                           int numImgColors, int numFilterColors, int numGroups,
 86 |                           float scaleTargets, float scaleOutput);
 87 | 
 88 | void localFilterActsSparse(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, int* dColorIndices,
 89 |                           int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride,
 90 |                           int numImgColors, int numFilterColors, int numGroups,
 91 |                           float scaleTargets, float scaleOutput);
 92 | void localFilterActsSparse(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, int* dColorIndices,
 93 |                           int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride,
 94 |                           int numImgColors, int numFilterColors, int numGroups);
 95 | 
 96 | void convWeightActsSparse(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, int* dColorIndices,
 97 |                          int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride,
 98 |                          int numImgColors, int numFilterColors, int numGroups);
 99 | void convWeightActsSparse(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, int* dColorIndices,
100 |                         int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride, int numImgColors, int numFilterColors,
101 |                         int numGroups, int partialSum, float scaleTargets, float scaleOutput);
102 | 
103 | void localWeightActsSparse(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, int* dColorIndices,
104 |                          int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride,
105 |                          int numImgColors, int numFilterColors, int numGroups);
106 | void localWeightActsSparse(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, int* dColorIndices,
107 |                         int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride, int numImgColors, int numFilterColors,
108 |                         int numGroups, float scaleTargets, float scaleOutput);
109 | 
110 | void convImgActsSparse(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, int* dColorIndices,
111 |                        int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, int numGroups);
112 | void convImgActsSparse(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, int* dColorIndices,
113 |                        int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, int numGroups,
114 |                        float scaleTargets, float scaleOutput);
115 | 
116 | void localImgActsSparse(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, int* dColorIndices,
117 |                         int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, int numGroups);
118 | void localImgActsSparse(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, int* dColorIndices,
119 |                        int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, int numGroups,
120 |                        float scaleTargets, float scaleOutput);
121 | 
122 | 
123 | #endif	/* COMMON_CUH */
124 | 
125 | 


--------------------------------------------------------------------------------
/include/data.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  *
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef DATA_CUH
28 | #define	DATA_CUH
29 | 
30 | #include <vector>
31 | #include <algorithm>
32 | #include "util.cuh"
33 | 
34 | template <class T>
35 | class Data {
36 | protected:
37 |     std::vector<T*>* _data;
38 | public:
39 |     typedef typename std::vector<T*>::iterator T_iter;
40 | 
41 |     Data(std::vector<T*>& data) : _data(&data) {
42 |         assert(_data->size() > 0);
43 |         for (int i = 1; i < data.size(); i++) {
44 |             assert(data[i-1]->getLeadingDim() == data[i]->getLeadingDim());
45 |         }
46 |         assert(data[0]->getLeadingDim() > 0);
47 |     }
48 | 
49 |     ~Data() {
50 |         for (T_iter it = _data->begin(); it != _data->end(); ++it) {
51 |             delete *it;
52 |         }
53 |         delete _data;
54 |     }
55 | 
56 |     T& operator [](int idx) {
57 |         return *_data->at(idx);
58 |     }
59 | 
60 |     int getSize() {
61 |         return _data->size();
62 |     }
63 | 
64 |     std::vector<T*>& getData() {
65 |         return *_data;
66 |     }
67 | 
68 |     int getNumCases() {
69 |         return _data->at(0)->getLeadingDim();
70 |     }
71 | };
72 | 
73 | typedef Data<NVMatrix> GPUData;
74 | typedef Data<Matrix> CPUData;
75 | 
76 | class DataProvider {
77 | protected:
78 |     CPUData* _hData;
79 |     NVMatrixV _data;
80 |     int _minibatchSize;
81 |     long int _dataSize;
82 | public:
83 |     DataProvider(int minibatchSize);
84 |     GPUData& operator[](int idx);
85 |     void setData(CPUData&);
86 |     void clearData();
87 |     GPUData& getMinibatch(int idx);
88 |     GPUData& getDataSlice(int startCase, int endCase);
89 |     int getNumMinibatches();
90 |     int getMinibatchSize();
91 |     int getNumCases();
92 |     int getNumCasesInMinibatch(int idx);
93 | };
94 | 
95 | #endif	/* DATA_CUH */
96 | 
97 | 


--------------------------------------------------------------------------------
/include/layer.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  *
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef LAYER_CUH
 28 | #define	LAYER_CUH
 29 | 
 30 | #include <string>
 31 | #include <vector>
 32 | #include <map>
 33 | #include <assert.h>
 34 | #include <nvmatrix.cuh>
 35 | 
 36 | #include "convnet.cuh"
 37 | #include "cost.cuh"
 38 | #include "weights.cuh"
 39 | #include "neuron.cuh"
 40 | 
 41 | class Cost;
 42 | class ConvNet;
 43 | class CostLayer;
 44 | class DataLayer;
 45 | 
 46 | /*
 47 |  * Abstract layer.
 48 |  */
 49 | class Layer {
 50 | protected:
 51 |     ConvNet* _convNet;
 52 |     std::vector<Layer*> _prev, _next;
 53 |     int _rcvdFInputs, _rcvdBInputs;
 54 | 
 55 |     NVMatrixV _inputs;
 56 |     NVMatrix *_outputs; // TODO: make this a pointer so you can reuse previous layers' matrices
 57 |     NVMatrix *_actsGrad; // Layer activity gradients
 58 |     bool _gradConsumer, _foundGradConsumers, _trans;
 59 |     bool _conserveMem;
 60 |     int _numGradProducersNext;
 61 |     int _actsTarget, _actsGradTarget;
 62 |     std::string _name, _type;
 63 | 
 64 |     NVMatrix _dropout_mask;
 65 |     float _dropout;
 66 |     
 67 |     void fpropNext(PASS_TYPE passType);
 68 |     virtual void truncBwdActs();
 69 |     virtual void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) = 0;
 70 | 
 71 |     virtual void bpropCommon(NVMatrix& v, PASS_TYPE passType) {
 72 |         // Do nothing by default
 73 |     }
 74 |     virtual void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
 75 |         assert(!isGradProducer()); // Only do nothing if not grad producer
 76 |     }
 77 | public:
 78 |     static bool _saveActsGrad, _saveActs;
 79 | 
 80 |     Layer(ConvNet* convNet, PyObject* paramsDict, bool trans);
 81 | 
 82 |     virtual void fprop(PASS_TYPE passType);
 83 |     void fprop(NVMatrix& v, PASS_TYPE passType);
 84 |     virtual void fprop(NVMatrixV& v, PASS_TYPE passType);
 85 |     virtual void bprop(PASS_TYPE passType);
 86 |     void bprop(NVMatrix& v, PASS_TYPE passType);
 87 |     virtual void reset();
 88 |     int incRcvdBInputs();
 89 |     int getRcvdFInputs();
 90 |     int getRcvdBInputs();
 91 |     bool isGradConsumer();
 92 |     virtual bool isGradProducer();
 93 |     std::string& getName();
 94 |     std::string& getType();
 95 |     void addNext(Layer* l);
 96 |     void addPrev(Layer* l);
 97 |     std::vector<Layer*>& getPrev();
 98 |     std::vector<Layer*>& getNext();
 99 |     virtual NVMatrix& getActs();
100 |     virtual NVMatrix& getActsGrad();
101 |     virtual void postInit();
102 | 
103 |     // Do nothing if this layer has no weights
104 |     virtual void updateWeights() {
105 |     }
106 |     virtual void checkGradients() {
107 |     }
108 |     virtual void copyToCPU() {
109 |     }
110 |     virtual void copyToGPU()  {
111 |     }
112 | };
113 | 
114 | class NeuronLayer : public Layer {
115 | protected:
116 |     Neuron* _neuron;
117 | 
118 |     virtual void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
119 |     virtual void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
120 | public:
121 |     NeuronLayer(ConvNet* convNet, PyObject* paramsDict);
122 | };
123 | 
124 | class WeightLayer : public Layer {
125 | protected:
126 |     WeightList _weights;
127 |     Weights *_biases;
128 |     float _wStep, _bStep;
129 | 
130 |     void bpropCommon(NVMatrix& v, PASS_TYPE passType);
131 |     virtual void bpropBiases(NVMatrix& v, PASS_TYPE passType) = 0;
132 |     virtual void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType) = 0;
133 | public:
134 |     WeightLayer(ConvNet* convNet, PyObject* paramsDict, bool trans, bool useGrad);
135 |     virtual void updateWeights();
136 |     virtual void copyToCPU();
137 |     virtual void copyToGPU();
138 |     void checkGradients();
139 |     Weights& getWeights(int idx);
140 | };
141 | 
142 | class FCLayer : public WeightLayer {
143 | protected:
144 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
145 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
146 |     void bpropBiases(NVMatrix& v, PASS_TYPE passType);
147 |     void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType);
148 | public:
149 |     FCLayer(ConvNet* convNet, PyObject* paramsDict);
150 | };
151 | 
152 | class SoftmaxLayer : public Layer {
153 | protected:
154 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
155 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
156 | public:
157 |     SoftmaxLayer(ConvNet* convNet, PyObject* paramsDict);
158 | };
159 | 
160 | class EltwiseSumLayer : public Layer {
161 | protected:
162 |     vector<float>* _coeffs;
163 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
164 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
165 | public:
166 |     EltwiseSumLayer(ConvNet* convNet, PyObject* paramsDict);
167 | };
168 | 
169 | class EltwiseMaxLayer : public Layer {
170 | protected:
171 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
172 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
173 | public:
174 |     EltwiseMaxLayer(ConvNet* convNet, PyObject* paramsDict);
175 | };
176 | 
177 | class DataLayer : public Layer {
178 | private:
179 |     int _dataIdx;
180 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
181 | public:
182 |     DataLayer(ConvNet* convNet, PyObject* paramsDict);
183 | 
184 |     bool isGradProducer();
185 |     void fprop(PASS_TYPE passType);
186 |     void fprop(NVMatrixV& data, PASS_TYPE passType);
187 | };
188 | 
189 | class LocalLayer : public WeightLayer {
190 | protected:
191 |     struct FilterConns {
192 |         int* hFilterConns;
193 |         int* dFilterConns;
194 |     };
195 |     vector<FilterConns>* _filterConns;
196 | 
197 |     intv* _padding, *_stride, *_filterSize, *_channels, *_imgSize, *_groups;
198 |     intv* _imgPixels, *_filterPixels, *_filterChannels, *_overSample, *_randSparse;
199 |     int _modulesX, _modules, _numFilters;
200 | 
201 |     void copyToGPU();
202 | 
203 | public:
204 |     LocalLayer(ConvNet* convNet, PyObject* paramsDict, bool useGrad);
205 | };
206 | 
207 | class ConvLayer : public LocalLayer {
208 | protected:
209 |     int _partialSum;
210 |     bool _sharedBiases;
211 | 
212 |     NVMatrix _weightGradTmp, _actGradTmp;
213 | 
214 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
215 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
216 |     void bpropBiases(NVMatrix& v, PASS_TYPE passType);
217 |     void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType);
218 |     void truncBwdActs();
219 | 
220 | public:
221 |     ConvLayer(ConvNet* convNet, PyObject* paramsDict);
222 | };
223 | 
224 | class LocalUnsharedLayer : public LocalLayer {
225 | protected:
226 |     NVMatrix _sexMask;
227 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
228 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
229 |     void bpropBiases(NVMatrix& v, PASS_TYPE passType);
230 |     void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType);
231 | public:
232 |     LocalUnsharedLayer(ConvNet* convNet, PyObject* paramsDict);
233 | };
234 | 
235 | class PoolLayer : public Layer {
236 | protected:
237 |     int _channels, _sizeX, _start, _stride, _outputsX;
238 |     int _imgSize;
239 |     string _pool;
240 | public:
241 |     PoolLayer(ConvNet* convNet, PyObject* paramsDict, bool trans);
242 | 
243 |     static PoolLayer& makePoolLayer(ConvNet* convNet, PyObject* paramsDict);
244 | };
245 | 
246 | class AvgPoolLayer : public PoolLayer {
247 | protected:
248 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
249 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
250 | public:
251 |     AvgPoolLayer(ConvNet* convNet, PyObject* paramsDict);
252 | };
253 | 
254 | class MaxPoolLayer : public PoolLayer {
255 | protected:
256 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
257 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
258 | public:
259 |     MaxPoolLayer(ConvNet* convNet, PyObject* paramsDict);
260 | };
261 | 
262 | class NailbedLayer : public Layer {
263 | protected:
264 |     int _channels, _start, _stride, _outputsX;
265 |     int _imgSize;
266 | public:
267 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
268 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
269 | 
270 |     NailbedLayer(ConvNet* convNet, PyObject* paramsDict);
271 | };
272 | 
273 | class GaussianBlurLayer : public Layer {
274 | protected:
275 |     int _channels;
276 |     Matrix* _hFilter;
277 |     NVMatrix _filter;
278 |     NVMatrix _actGradsTmp;
279 | public:
280 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
281 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
282 |     void copyToGPU();
283 | 
284 |     GaussianBlurLayer(ConvNet* convNet, PyObject* paramsDict);
285 | };
286 | 
287 | class ResizeLayer : public Layer {
288 | protected:
289 |     int _channels;
290 |     float _scale;
291 |     int _imgSize, _tgtSize;
292 | public:
293 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
294 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
295 | 
296 |     ResizeLayer(ConvNet* convNet, PyObject* paramsDict);
297 | };
298 | 
299 | class RGBToYUVLayer : public Layer {
300 | public:
301 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
302 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
303 | 
304 |     RGBToYUVLayer(ConvNet* convNet, PyObject* paramsDict);
305 | };
306 | 
307 | class RGBToLABLayer : public Layer {
308 | protected:
309 |     bool _center;
310 | public:
311 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
312 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
313 | 
314 |     RGBToLABLayer(ConvNet* convNet, PyObject* paramsDict);
315 | };
316 | 
317 | class ResponseNormLayer : public Layer {
318 | protected:
319 |     int _channels, _size;
320 |     float _scale, _pow;
321 |     NVMatrix _denoms;
322 | 
323 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
324 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
325 |     void truncBwdActs();
326 | public:
327 |     ResponseNormLayer(ConvNet* convNet, PyObject* paramsDict);
328 | };
329 | 
330 | class CrossMapResponseNormLayer : public ResponseNormLayer {
331 | protected:
332 |     bool _blocked;
333 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
334 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
335 | public:
336 |     CrossMapResponseNormLayer(ConvNet* convNet, PyObject* paramsDict);
337 | };
338 | 
339 | class ContrastNormLayer : public ResponseNormLayer {
340 | protected:
341 |     int _imgSize;
342 |     NVMatrix _meanDiffs;
343 | 
344 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
345 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
346 |     void truncBwdActs();
347 | public:
348 |     ContrastNormLayer(ConvNet* convNet, PyObject* paramsDict);
349 | };
350 | 
351 | class CostLayer : public Layer {
352 | protected:
353 |     float _coeff;
354 |     doublev _costv;
355 | public:
356 |     CostLayer(ConvNet* convNet, PyObject* paramsDict, bool trans);
357 |     void bprop(PASS_TYPE passType);
358 |     virtual doublev& getCost();
359 |     float getCoeff();
360 |     bool isGradProducer();
361 | 
362 |     static CostLayer& makeCostLayer(ConvNet* convNet, string& type, PyObject* paramsDict);
363 | };
364 | 
365 | /*
366 |  * Input 0: labels
367 |  * Input 1: softmax outputs
368 |  */
369 | class LogregCostLayer : public CostLayer {
370 | protected:
371 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
372 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
373 | public:
374 |     LogregCostLayer(ConvNet* convNet, PyObject* paramsDict);
375 | };
376 | 
377 | class SumOfSquaresCostLayer : public CostLayer {
378 | protected:
379 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
380 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
381 | public:
382 |     SumOfSquaresCostLayer(ConvNet* convNet, PyObject* paramsDict);
383 | };
384 | 
385 | #endif	/* LAYER_CUH */
386 | 
387 | 


--------------------------------------------------------------------------------
/include/layer_kernels.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  *
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef LAYER_KERNELS_CUH
28 | #define	LAYER_KERNELS_CUH
29 | 
30 | #include <helper_cuda.h>
31 | #include <nvmatrix.cuh>
32 | 
33 | #define LOGREG_GRAD_THREADS_X      32
34 | #define LOGREG_GRAD_THREADS_Y      4
35 | 
36 | #define LOGREG_ERR_THREADS_X        128
37 | #define LOGREG_ERR_THREADS_Y        1
38 | 
39 | void computeLogregCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& labelLogProbs_out, NVMatrix& correctProbs_out);
40 | void computeLogregGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff);
41 | void computeSoftmaxGrad(NVMatrix& acts, NVMatrix& actsGrad, NVMatrix& target, bool add);
42 | 
43 | // Numerical stability optimization: this routine combines computeLogregGrad with computeSoftmaxGrad
44 | // to avoi dividing and then multiplying by quantities that may be near zero.
45 | void computeLogregSoftmaxGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff);
46 | void computeEltwiseMaxGrad(NVMatrix& actGrad, NVMatrix& input, NVMatrix& output, NVMatrix& target, bool add);
47 | 
48 | #endif	/* LAYER_KERNELS_CUH */
49 | 
50 | 


--------------------------------------------------------------------------------
/include/nvmatrix/nvmatrix_operators.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  *
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef NVMATRIX_OPERATORS_CUH
 28 | #define	NVMATRIX_OPERATORS_CUH
 29 | 
 30 | #include <helper_cuda.h>
 31 | 
 32 | class NVMatrixOps {
 33 | public:
 34 |     class Exp {
 35 |     public:
 36 |         __device__ inline float operator()(const float a) const {
 37 |             return __expf(a);
 38 |         }
 39 |     };
 40 | 
 41 |     class Logistic {
 42 |     public:
 43 |         __device__ inline float operator()(const float a) const {
 44 |             return __fdividef(1.0f, 1.0f + __expf(-a));
 45 |         }
 46 |     };
 47 | 
 48 |     class Log {
 49 |     public:
 50 |         __device__ inline float operator()(const float a) const {
 51 |             return __logf(a);
 52 |         }
 53 |     };
 54 | 
 55 |     class Square {
 56 |     public:
 57 |         __device__ inline float operator()(const float a) const {
 58 |             return a * a;
 59 |         }
 60 |     };
 61 | 
 62 |     class Sqrt {
 63 |     public:
 64 |         __device__ inline float operator()(const float a) const {
 65 |             return sqrtf(a);
 66 |         }
 67 |     };
 68 | 
 69 |     class Reciprocal {
 70 |     public:
 71 |         __device__ inline float operator()(const float a) const {
 72 |             return 1.0f / a;
 73 |         }
 74 |     };
 75 | 
 76 |     class Abs {
 77 |     public:
 78 |         __device__ inline float operator()(const float a) const {
 79 |             return a > 0 ? a : -a;
 80 |         }
 81 |     };
 82 | 
 83 |     class Sign {
 84 |     public:
 85 |         __device__ inline float operator()(const float a) const {
 86 |             return (a > 0) - (a < 0);
 87 |         }
 88 |     };
 89 | 
 90 |     class Identity {
 91 |     public:
 92 |         __device__ inline float operator()(const float a) const {
 93 |             return a;
 94 |         }
 95 |     };
 96 | 
 97 |     class Zero {
 98 |     public:
 99 |         __device__ inline float operator()(const float a) const {
100 |             return 0;
101 |         }
102 |     };
103 | 
104 |     class One {
105 |     public:
106 |         __device__ inline float operator()(const float a) const {
107 |             return 1;
108 |         }
109 |     };
110 | 
111 |     class SmallerThanScalar {
112 |     private:
113 |         const float scalar;
114 |     public:
115 |         SmallerThanScalar(const float _scalar) : scalar(_scalar) {
116 |         }
117 |         __device__ inline float operator()(const float a) const {
118 |             return a < scalar;
119 |         }
120 |     };
121 | 
122 |     class BiggerThanScalar {
123 |     private:
124 |         const float scalar;
125 |     public:
126 |         BiggerThanScalar(const float _scalar) : scalar(_scalar) {
127 |         }
128 |         __device__ inline float operator()(const float a) const {
129 |             return a > scalar;
130 |         }
131 |     };
132 | 
133 |     class AddScalar {
134 |     private:
135 |         const float scalar;
136 |     public:
137 |         AddScalar(const float _scalar) : scalar(_scalar) {
138 |         }
139 |         __device__ inline float operator()(const float a) const {
140 |             return a + scalar;
141 |         }
142 |     };
143 | 
144 |     class WeightedAddScalar {
145 |     private:
146 |         const float weight, scalar;
147 |     public:
148 |         WeightedAddScalar(const float _weight, const float _scalar) : weight(_weight), scalar(_scalar) {
149 |         }
150 |         __device__ inline float operator()(const float a) const {
151 |             return weight * a + scalar;
152 |         }
153 |     };
154 | 
155 |     class MultByScalar {
156 |     private:
157 |         const float scalar;
158 |     public:
159 |         MultByScalar(const float _scalar) : scalar(_scalar) {
160 |         }
161 |         __device__ inline float operator()(const float a) const {
162 |             return a * scalar;
163 |         }
164 |     };
165 | 
166 |     class Pow {
167 |     private:
168 |         const float p;
169 |     public:
170 |         Pow(const float _p) : p(_p) {
171 |         }
172 |         __device__ inline float operator()(const float a) const {
173 |             return __powf(a, p);
174 |         }
175 |     };
176 | 
177 |     template <bool exclusive>
178 |     class InRange {
179 |     private:
180 |         const float lower, upper;
181 |     public:
182 |         InRange(const float _lower, const float _upper) : lower(_lower), upper(_upper) {
183 |         }
184 |         __device__ inline float operator()(const float a) const {
185 |             return exclusive ? a > lower && a < upper : a >= lower && a <= upper;
186 |         }
187 |     };
188 | 
189 |     class MinWithScalar {
190 |     private:
191 |         const float scalar;
192 |     public:
193 |         MinWithScalar(const float _scalar) : scalar(_scalar) {
194 |         }
195 |         __device__ inline float operator()(const float a) const {
196 |             return a > scalar ? scalar : a;
197 |         }
198 |     };
199 | 
200 |     class MaxWithScalar {
201 |     private:
202 |         const float scalar;
203 |     public:
204 |         MaxWithScalar(const float _scalar) : scalar(_scalar) {
205 |         }
206 |         __device__ inline float operator()(const float a) const {
207 |             return a > scalar ? a : scalar;
208 |         }
209 |     };
210 | };
211 | 
212 | class NVMatrixBinaryOps {
213 | public:
214 |     class Equals {
215 |     public:
216 |         __device__ inline float operator()(const float a, const float b) const {
217 |             return a == b;
218 |         }
219 |     };
220 | 
221 |     class BiggerThan {
222 |     public:
223 |         __device__ inline float operator()(const float a, const float b) const {
224 |             return a > b;
225 |         }
226 |     };
227 | 
228 |     class Divide {
229 |     public:
230 |         __device__ inline float operator()(const float a, const float b) const  {
231 |             return __fdividef(a, b);
232 |         }
233 |     };
234 | 
235 |     class Multiply {
236 |     public:
237 |         __device__ inline float operator()(const float a, const float b) const {
238 |             return a * b;
239 |         }
240 |     };
241 | 
242 |     class SquaredDiff {
243 |     public:
244 |         __device__ inline float operator()(const float a, const float b) const {
245 |             return (a - b) * (a - b);
246 |         }
247 |     };
248 | 
249 |     class WeightedAdd {
250 |     private:
251 |         const float scaleA, scaleB;
252 |     public:
253 |         WeightedAdd(const float _scaleA, const float _scaleB) : scaleA(_scaleA), scaleB(_scaleB) {
254 |         }
255 |         __device__ inline float operator()(const float a, const float b) const {
256 |             return a * scaleA + b * scaleB;
257 |         }
258 |     };
259 | 
260 |     class Add {
261 |     public:
262 |         __device__ inline float operator()(const float a, const float b) const {
263 |             return a + b;
264 |         }
265 |     };
266 | 
267 |     class First {
268 |     public:
269 |         __device__ inline float operator()(const float a, const float b) const {
270 |             return a;
271 |         }
272 |     };
273 | 
274 |     class Second {
275 |     public:
276 |         __device__ inline float operator()(const float a, const float b) const {
277 |             return b;
278 |         }
279 |     };
280 | 
281 |     class SecondScaled {
282 |     private:
283 |         const float scale;
284 |     public:
285 |         SecondScaled(const float _scale) : scale(_scale) {
286 |         }
287 |         __device__ inline float operator()(const float a, const float b) const {
288 |             return scale * b;
289 |         }
290 |     };
291 | };
292 | 
293 | class NVMatrixAggs {
294 | public:
295 |     class Sum {
296 |     public:
297 |         __device__ inline float operator()(const float a, const float b) const {
298 |             return a + b;
299 |         }
300 |         __device__ inline float getBaseValue() {
301 |             return 0;
302 |         }
303 |     };
304 | 
305 |     class Max {
306 |     public:
307 |         __device__ inline float operator()(const float a, const float b) const {
308 |             return a > b ? a : b;
309 |         }
310 |         __device__ inline float getBaseValue() {
311 |             return -2e38;
312 |         }
313 |     };
314 | 
315 |     class Min {
316 |     public:
317 |         __device__ inline float operator()(const float a, const float b) const {
318 |             return a > b ? b : a;
319 |         }
320 |         __device__ inline float getBaseValue() {
321 |             return 2e38;
322 |         }
323 |     };
324 | 
325 |     template<class UnaryOperator>
326 |     class ArgMax {
327 |     private:
328 |        UnaryOperator u;
329 |     public:
330 |        ArgMax(UnaryOperator _u) : u(_u) {
331 |        }
332 |        __device__ inline float operator()(const float a, const float b) const {
333 |            return u(a) > u(b) ? a : b;
334 |        }
335 |        __device__ inline float getBaseValue() {
336 |            return u.getArgMin();
337 |        }
338 |     };
339 | };
340 | 
341 | class NVMatrixTernaryOps {
342 | public:
343 |     class Add {
344 |     public:
345 |         __device__ inline float operator()(const float a, const float b, const float c) const {
346 |             return a + b + c;
347 |         }
348 |     };
349 | };
350 | 
351 | #endif	/* NVMATRIX_OPERATORS_CUH */
352 | 
353 | 


--------------------------------------------------------------------------------
/include/pyconvnet.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  *
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef PYCONVNET3_CUH
28 | #define	PYCONVNET3_CUH
29 | 
30 | #define _QUOTEME(x) #x
31 | #define QUOTEME(x) _QUOTEME(x)
32 | 
33 | extern "C" PyMODINIT_FUNC init_convnet();
34 | 
35 | PyObject* initModel(PyObject *self, PyObject *args);
36 | PyObject* startBatch(PyObject *self, PyObject *args);
37 | PyObject* finishBatch(PyObject *self, PyObject *args);
38 | PyObject* checkGradients(PyObject *self, PyObject *args);
39 | PyObject* syncWithHost(PyObject *self, PyObject *args);
40 | PyObject* startMultiviewTest(PyObject *self, PyObject *args);
41 | PyObject* startFeatureWriter(PyObject *self, PyObject *args);
42 | 
43 | #endif	/* PYCONVNET3_CUH */
44 | 


--------------------------------------------------------------------------------
/include/util.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  *
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef UTIL_H
28 | #define	UTIL_H
29 | 
30 | #include <vector>
31 | #include <map>
32 | #include <string>
33 | #include <sstream>
34 | 
35 | #include <nvmatrix.cuh>
36 | #include <matrix.h>
37 | 
38 | #include <Python.h>
39 | 
40 | /*
41 |  * The types of passes that the convnet supports. Used in the fprop and bprop functions in
42 |  * ConvNet class. Most of the layers ignore the pass type, but some make use of it.
43 |  */
44 | enum PASS_TYPE {PASS_TRAIN, PASS_TEST, PASS_GC};
45 | 
46 | // For gradient checking
47 | #define GC_SUPPRESS_PASSES          true
48 | #define GC_REL_ERR_THRESH           0.02
49 | 
50 | /*
51 |  * Store entire data matrix on GPU if its size does not exceed this many MB.
52 |  * Otherwise store only one minibatch at a time.
53 |  */
54 | #define MAX_DATA_ON_GPU             200
55 | 
56 | typedef std::vector<Matrix*> MatrixV;
57 | typedef std::vector<NVMatrix*> NVMatrixV;
58 | typedef std::map<std::string,std::vector<double>*> CostMap;
59 | typedef std::map<std::string,double> CostCoeffMap;
60 | typedef std::vector<double> doublev;
61 | typedef std::vector<float> floatv;
62 | typedef std::vector<int> intv;
63 | 
64 | floatv* getFloatV(PyObject* pyList);
65 | intv* getIntV(PyObject* pyList);
66 | MatrixV* getMatrixV(PyObject* pyList);
67 | int* getIntA(PyObject* pyList);
68 | 
69 | int pyDictGetInt(PyObject* dict, const char* key);
70 | intv* pyDictGetIntV(PyObject* dict, const char* key);
71 | std::string pyDictGetString(PyObject* dict, const char* key);
72 | float pyDictGetFloat(PyObject* dict, const char* key);
73 | floatv* pyDictGetFloatV(PyObject* dict, const char* key);
74 | Matrix* pyDictGetMatrix(PyObject* dict, const char* key);
75 | MatrixV* pyDictGetMatrixV(PyObject* dict, const char* key);
76 | int* pyDictGetIntA(PyObject* dict, const char* key);
77 | 
78 | template<typename T>
79 | std::string tostr(T n) {
80 |     std::ostringstream result;
81 |     result << n;
82 |     return result.str();
83 | }
84 | 
85 | #endif	/* UTIL_H */
86 | 
87 | 


--------------------------------------------------------------------------------
/include/weights.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  *
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef WEIGHTS_CUH
 28 | #define	WEIGHTS_CUH
 29 | 
 30 | #include <string>
 31 | #include <vector>
 32 | #include <iostream>
 33 | #include <helper_cuda.h>
 34 | #include <assert.h>
 35 | #include <nvmatrix.cuh>
 36 | #include <matrix.h>
 37 | #include "util.cuh"
 38 | 
 39 | using namespace std;
 40 | 
 41 | class Weights {
 42 | private:
 43 |     Matrix* _hWeights, *_hWeightsInc;
 44 |     NVMatrix* _weights, *_weightsInc, *_weightsGrad;
 45 | 
 46 |     float _epsW, _wc, _mom;
 47 |     bool _onGPU, _useGrad;
 48 |     int _numUpdates;
 49 |     static bool _autoCopyToGPU;
 50 | 
 51 |     // Non-NULL if these weights are really shared from some other layer
 52 |     Weights* _srcWeights;
 53 | 
 54 | public:
 55 |     NVMatrix& operator*() {
 56 |         return getW();
 57 |     }
 58 | 
 59 |     Weights(Weights& srcWeights, float epsW) : _srcWeights(&srcWeights), _epsW(epsW), _wc(0), _onGPU(false), _numUpdates(0),
 60 |                                                _weights(NULL), _weightsInc(NULL), _weightsGrad(NULL){
 61 |         _hWeights = &srcWeights.getCPUW();
 62 |         _hWeightsInc = &srcWeights.getCPUWInc();
 63 |         _mom = srcWeights.getMom();
 64 |         _useGrad = srcWeights.isUseGrad();
 65 |         if (_autoCopyToGPU) {
 66 |             copyToGPU();
 67 |         }
 68 |     }
 69 | 
 70 |     Weights(Matrix& hWeights, Matrix& hWeightsInc, float epsW, float wc, float mom, bool useGrad)
 71 |         : _srcWeights(NULL), _hWeights(&hWeights), _hWeightsInc(&hWeightsInc), _numUpdates(0),
 72 |           _epsW(epsW), _wc(wc), _mom(mom), _useGrad(useGrad), _onGPU(false), _weights(NULL),
 73 |           _weightsInc(NULL), _weightsGrad(NULL) {
 74 |         if (_autoCopyToGPU) {
 75 |             copyToGPU();
 76 |         }
 77 |     }
 78 | 
 79 |     ~Weights() {
 80 |         delete _hWeights;
 81 |         delete _hWeightsInc;
 82 |         if (_srcWeights == NULL) {
 83 |             delete _weights;
 84 |             delete _weightsInc;
 85 |             delete _weightsGrad;
 86 |         }
 87 |     }
 88 | 
 89 |     static void setAutoCopyToGPU(bool autoCopyToGPU) {
 90 |         _autoCopyToGPU = autoCopyToGPU;
 91 |     }
 92 | 
 93 |     NVMatrix& getW() {
 94 |         assert(_onGPU);
 95 |         return *_weights;
 96 |     }
 97 | 
 98 |     NVMatrix& getInc() {
 99 |         assert(_onGPU);
100 |         return *_weightsInc;
101 |     }
102 | 
103 |     NVMatrix& getGrad() {
104 |         assert(_onGPU);
105 |         return _useGrad ? *_weightsGrad : *_weightsInc;
106 |     }
107 | 
108 |     Matrix& getCPUW() {
109 |         return *_hWeights;
110 |     }
111 | 
112 |     Matrix& getCPUWInc() {
113 |         return *_hWeightsInc;
114 |     }
115 | 
116 |     int getNumRows() const {
117 |         return _hWeights->getNumRows();
118 |     }
119 | 
120 |     int getNumCols() const {
121 |         return _hWeights->getNumCols();
122 |     }
123 | 
124 |     void copyToCPU() {
125 |         if (_srcWeights == NULL) {
126 |             assert(_onGPU);
127 |             _weights->copyToHost(*_hWeights);
128 |             _weightsInc->copyToHost(*_hWeightsInc);
129 |         }
130 |     }
131 | 
132 |     // This function is assumed to be called in the order in which the layers
133 |     // were defined
134 |     void copyToGPU() {
135 |         if (_srcWeights == NULL) {
136 |             _weights = new NVMatrix();
137 |             _weightsInc = new NVMatrix();
138 |             _weights->copyFromHost(*_hWeights, true);
139 |             _weightsInc->copyFromHost(*_hWeightsInc, true);
140 |             _weightsGrad = _useGrad ? new NVMatrix() : NULL;
141 |         } else {
142 |             _weights = _srcWeights->_weights;
143 |             _weightsInc = _srcWeights->_weightsInc;
144 |             _weightsGrad = _srcWeights->_weightsGrad;
145 |         }
146 |         _onGPU = true;
147 |     }
148 | 
149 |     // Scale your gradient by epsW / numCases!
150 |     void update() {
151 |         // Only true owner of weights updates
152 |         if (_srcWeights == NULL && _epsW > 0) {
153 |             assert(_onGPU);
154 |             if (_useGrad) {
155 |                 _weightsInc->add(*_weightsGrad, _mom, 1);
156 |             }
157 |             if (_wc > 0) {
158 |                 _weightsInc->add(*_weights, -_wc * _epsW);
159 |             }
160 |             _weights->add(*_weightsInc);
161 |             _numUpdates = 0;
162 |         }
163 |     }
164 | 
165 |     int incNumUpdates() {
166 |         if (_srcWeights != NULL) {
167 |             return _srcWeights->incNumUpdates();
168 |         }
169 |         return _numUpdates++;
170 |     }
171 | 
172 |     // Returns the number of times a gradient has been computed for this
173 |     // weight matrix during the current pass (interval between two calls of update())
174 |     // through the net. This number will only be greater than 1 if this weight matrix
175 |     // is *shared* by multiple layers in the net.
176 |     int getNumUpdates() const {
177 |         if (_srcWeights != NULL) {
178 |             return _srcWeights->getNumUpdates();
179 |         }
180 |         return _numUpdates;
181 |     }
182 | 
183 |     float getEps() const {
184 |         return _epsW;
185 |     }
186 | 
187 |     float getMom() const {
188 |         return _mom;
189 |     }
190 | 
191 |     float getWC() const {
192 |         return _wc;
193 |     }
194 | 
195 |     bool isUseGrad() const { // is good grammar
196 |         return _useGrad;
197 |     }
198 | };
199 | 
200 | class WeightList {
201 | private:
202 |     std::vector<Weights*> _weightList;
203 | 
204 | public:
205 |     Weights& operator[](const int idx) const {
206 |         return *_weightList[idx];
207 |     }
208 | 
209 |     ~WeightList() {
210 |         for (int i = 0; i < _weightList.size(); i++) {
211 |             delete _weightList[i];
212 |         }
213 |     }
214 | 
215 | //    WeightList(MatrixV& hWeights, MatrixV& hWeightsInc, floatv& epsW, floatv& wc, floatv& mom, bool useGrads) : _initialized(false) {
216 | //        initialize(hWeights, hWeightsInc, epsW, wc, mom, useGrads);
217 | //    }
218 | 
219 |     WeightList() {
220 |     }
221 | 
222 | //    void initialize(MatrixV& hWeights, MatrixV& hWeightsInc, floatv& epsW, floatv& wc, floatv& mom, bool useGrads) {
223 | //        for (int i = 0; i < hWeights.size(); i++) {
224 | //            _weightList.push_back(new Weights(*hWeights[i], *hWeightsInc[i], epsW[i], wc[i], mom[i], useGrads));
225 | //        }
226 | //        _initialized = true;
227 | //        delete &hWeights;
228 | //        delete &hWeightsInc;
229 | //        delete &epsW;
230 | //        delete &wc;
231 | //        delete &mom;
232 | //    }
233 | 
234 |     void addWeights(Weights& w) {
235 |         _weightList.push_back(&w);
236 |     }
237 | 
238 | //    void addWeights(WeightList& wl) {
239 | //        for (int i = 0; i < wl.getSize(); i++) {
240 | //            addWeights(wl[i]);
241 | //        }
242 | //    }
243 | 
244 |     void update() {
245 |         for (int i = 0; i < getSize(); i++) {
246 |             _weightList[i]->update();
247 |         }
248 |     }
249 | 
250 |     void copyToCPU() {
251 |         for (int i = 0; i < getSize(); i++) {
252 |             _weightList[i]->copyToCPU();
253 |         }
254 |     }
255 | 
256 |     void copyToGPU() {
257 |         for (int i = 0; i < getSize(); i++) {
258 |             _weightList[i]->copyToGPU();
259 |         }
260 |     }
261 | 
262 |     int getSize() {
263 |         return _weightList.size();
264 |     }
265 | };
266 | 
267 | #endif	/* WEIGHTS_CUH */


--------------------------------------------------------------------------------
/include/worker.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  *
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef WORKER_CUH
 28 | #define	WORKER_CUH
 29 | 
 30 | #include "convnet.cuh"
 31 | #include "cost.cuh"
 32 | #include "data.cuh"
 33 | 
 34 | class ConvNet;
 35 | class Cost;
 36 | 
 37 | class WorkResult {
 38 | public:
 39 |     enum RESULTS {BATCH_DONE, SYNC_DONE};
 40 | protected:
 41 |     WorkResult::RESULTS _resultType;
 42 |     Cost* _results;
 43 | public:
 44 |     WorkResult(WorkResult::RESULTS resultType, Cost& results);
 45 |     WorkResult(WorkResult::RESULTS resultType);
 46 |     virtual ~WorkResult();
 47 |     Cost& getResults() const;
 48 |     WorkResult::RESULTS getResultType() const;
 49 | };
 50 | 
 51 | class Worker {
 52 | protected:
 53 |     ConvNet* _convNet;
 54 | public:
 55 |     Worker(ConvNet& convNet);
 56 |     virtual void run() = 0;
 57 | };
 58 | 
 59 | class DataWorker : public Worker {
 60 | protected:
 61 |     CPUData* _data;
 62 |     DataProvider* _dp;
 63 | public:
 64 |     DataWorker(ConvNet& convNet, CPUData& data);
 65 |     virtual ~DataWorker();
 66 | };
 67 | 
 68 | class TrainingWorker : public DataWorker {
 69 | protected:
 70 |     bool _test;
 71 | public:
 72 |     TrainingWorker(ConvNet& convNet, CPUData& data, bool test);
 73 |     void run();
 74 | };
 75 | 
 76 | class SyncWorker : public Worker {
 77 | public:
 78 |     SyncWorker(ConvNet& convNet);
 79 |     void run();
 80 | };
 81 | 
 82 | class GradCheckWorker : public DataWorker {
 83 | public:
 84 |     GradCheckWorker(ConvNet& convNet, CPUData& data);
 85 |     void run();
 86 | };
 87 | 
 88 | class MultiviewTestWorker : public DataWorker {
 89 | protected:
 90 |     int _numViews, _logregIdx;
 91 | public:
 92 |     MultiviewTestWorker(ConvNet& convNet, CPUData& data, int numViews, int logregIdx);
 93 |     void run();
 94 | };
 95 | 
 96 | class FeatureWorker : public DataWorker {
 97 | protected:
 98 |     Matrix* _ftrs;
 99 |     int _layerIdx;
100 | public:
101 |     FeatureWorker(ConvNet& convNet, CPUData& data, Matrix& ftrs, int layerIdx);
102 |     ~FeatureWorker();
103 |     void run();
104 | };
105 | 
106 | #endif	/* WORKER_CUH */
107 | 
108 | 


--------------------------------------------------------------------------------
/ordereddict.py:
--------------------------------------------------------------------------------
  1 | # Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy.
  2 | # Passes Python2.7's test suite and incorporates all the latest updates.
  3 | 
  4 | try:
  5 |     from thread import get_ident as _get_ident
  6 | except ImportError:
  7 |     from dummy_thread import get_ident as _get_ident
  8 | 
  9 | try:
 10 |     from _abcoll import KeysView, ValuesView, ItemsView
 11 | except ImportError:
 12 |     pass
 13 | 
 14 | 
 15 | class OrderedDict(dict):
 16 |     'Dictionary that remembers insertion order'
 17 |     # An inherited dict maps keys to values.
 18 |     # The inherited dict provides __getitem__, __len__, __contains__, and get.
 19 |     # The remaining methods are order-aware.
 20 |     # Big-O running times for all methods are the same as for regular dictionaries.
 21 | 
 22 |     # The internal self.__map dictionary maps keys to links in a doubly linked list.
 23 |     # The circular doubly linked list starts and ends with a sentinel element.
 24 |     # The sentinel element never gets deleted (this simplifies the algorithm).
 25 |     # Each link is stored as a list of length three:  [PREV, NEXT, KEY].
 26 | 
 27 |     def __init__(self, *args, **kwds):
 28 |         '''Initialize an ordered dictionary.  Signature is the same as for
 29 |         regular dictionaries, but keyword arguments are not recommended
 30 |         because their insertion order is arbitrary.
 31 | 
 32 |         '''
 33 |         if len(args) > 1:
 34 |             raise TypeError('expected at most 1 arguments, got %d' % len(args))
 35 |         try:
 36 |             self.__root
 37 |         except AttributeError:
 38 |             self.__root = root = []                     # sentinel node
 39 |             root[:] = [root, root, None]
 40 |             self.__map = {}
 41 |         self.__update(*args, **kwds)
 42 | 
 43 |     def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
 44 |         'od.__setitem__(i, y) <==> od[i]=y'
 45 |         # Setting a new item creates a new link which goes at the end of the linked
 46 |         # list, and the inherited dictionary is updated with the new key/value pair.
 47 |         if key not in self:
 48 |             root = self.__root
 49 |             last = root[0]
 50 |             last[1] = root[0] = self.__map[key] = [last, root, key]
 51 |         dict_setitem(self, key, value)
 52 | 
 53 |     def __delitem__(self, key, dict_delitem=dict.__delitem__):
 54 |         'od.__delitem__(y) <==> del od[y]'
 55 |         # Deleting an existing item uses self.__map to find the link which is
 56 |         # then removed by updating the links in the predecessor and successor nodes.
 57 |         dict_delitem(self, key)
 58 |         link_prev, link_next, key = self.__map.pop(key)
 59 |         link_prev[1] = link_next
 60 |         link_next[0] = link_prev
 61 | 
 62 |     def __iter__(self):
 63 |         'od.__iter__() <==> iter(od)'
 64 |         root = self.__root
 65 |         curr = root[1]
 66 |         while curr is not root:
 67 |             yield curr[2]
 68 |             curr = curr[1]
 69 | 
 70 |     def __reversed__(self):
 71 |         'od.__reversed__() <==> reversed(od)'
 72 |         root = self.__root
 73 |         curr = root[0]
 74 |         while curr is not root:
 75 |             yield curr[2]
 76 |             curr = curr[0]
 77 | 
 78 |     def clear(self):
 79 |         'od.clear() -> None.  Remove all items from od.'
 80 |         try:
 81 |             for node in self.__map.itervalues():
 82 |                 del node[:]
 83 |             root = self.__root
 84 |             root[:] = [root, root, None]
 85 |             self.__map.clear()
 86 |         except AttributeError:
 87 |             pass
 88 |         dict.clear(self)
 89 | 
 90 |     def popitem(self, last=True):
 91 |         '''od.popitem() -> (k, v), return and remove a (key, value) pair.
 92 |         Pairs are returned in LIFO order if last is true or FIFO order if false.
 93 | 
 94 |         '''
 95 |         if not self:
 96 |             raise KeyError('dictionary is empty')
 97 |         root = self.__root
 98 |         if last:
 99 |             link = root[0]
100 |             link_prev = link[0]
101 |             link_prev[1] = root
102 |             root[0] = link_prev
103 |         else:
104 |             link = root[1]
105 |             link_next = link[1]
106 |             root[1] = link_next
107 |             link_next[0] = root
108 |         key = link[2]
109 |         del self.__map[key]
110 |         value = dict.pop(self, key)
111 |         return key, value
112 | 
113 |     # -- the following methods do not depend on the internal structure --
114 | 
115 |     def keys(self):
116 |         'od.keys() -> list of keys in od'
117 |         return list(self)
118 | 
119 |     def values(self):
120 |         'od.values() -> list of values in od'
121 |         return [self[key] for key in self]
122 | 
123 |     def items(self):
124 |         'od.items() -> list of (key, value) pairs in od'
125 |         return [(key, self[key]) for key in self]
126 | 
127 |     def iterkeys(self):
128 |         'od.iterkeys() -> an iterator over the keys in od'
129 |         return iter(self)
130 | 
131 |     def itervalues(self):
132 |         'od.itervalues -> an iterator over the values in od'
133 |         for k in self:
134 |             yield self[k]
135 | 
136 |     def iteritems(self):
137 |         'od.iteritems -> an iterator over the (key, value) items in od'
138 |         for k in self:
139 |             yield (k, self[k])
140 | 
141 |     def update(*args, **kwds):
142 |         '''od.update(E, **F) -> None.  Update od from dict/iterable E and F.
143 | 
144 |         If E is a dict instance, does:           for k in E: od[k] = E[k]
145 |         If E has a .keys() method, does:         for k in E.keys(): od[k] = E[k]
146 |         Or if E is an iterable of items, does:   for k, v in E: od[k] = v
147 |         In either case, this is followed by:     for k, v in F.items(): od[k] = v
148 | 
149 |         '''
150 |         if len(args) > 2:
151 |             raise TypeError('update() takes at most 2 positional '
152 |                             'arguments (%d given)' % (len(args),))
153 |         elif not args:
154 |             raise TypeError('update() takes at least 1 argument (0 given)')
155 |         self = args[0]
156 |         # Make progressively weaker assumptions about "other"
157 |         other = ()
158 |         if len(args) == 2:
159 |             other = args[1]
160 |         if isinstance(other, dict):
161 |             for key in other:
162 |                 self[key] = other[key]
163 |         elif hasattr(other, 'keys'):
164 |             for key in other.keys():
165 |                 self[key] = other[key]
166 |         else:
167 |             for key, value in other:
168 |                 self[key] = value
169 |         for key, value in kwds.items():
170 |             self[key] = value
171 | 
172 |     __update = update  # let subclasses override update without breaking __init__
173 | 
174 |     __marker = object()
175 | 
176 |     def pop(self, key, default=__marker):
177 |         '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value.
178 |         If key is not found, d is returned if given, otherwise KeyError is raised.
179 | 
180 |         '''
181 |         if key in self:
182 |             result = self[key]
183 |             del self[key]
184 |             return result
185 |         if default is self.__marker:
186 |             raise KeyError(key)
187 |         return default
188 | 
189 |     def setdefault(self, key, default=None):
190 |         'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
191 |         if key in self:
192 |             return self[key]
193 |         self[key] = default
194 |         return default
195 | 
196 |     def __repr__(self, _repr_running={}):
197 |         'od.__repr__() <==> repr(od)'
198 |         call_key = id(self), _get_ident()
199 |         if call_key in _repr_running:
200 |             return '...'
201 |         _repr_running[call_key] = 1
202 |         try:
203 |             if not self:
204 |                 return '%s()' % (self.__class__.__name__,)
205 |             return '%s(%r)' % (self.__class__.__name__, self.items())
206 |         finally:
207 |             del _repr_running[call_key]
208 | 
209 |     def __reduce__(self):
210 |         'Return state information for pickling'
211 |         items = [[k, self[k]] for k in self]
212 |         inst_dict = vars(self).copy()
213 |         for k in vars(OrderedDict()):
214 |             inst_dict.pop(k, None)
215 |         if inst_dict:
216 |             return (self.__class__, (items,), inst_dict)
217 |         return self.__class__, (items,)
218 | 
219 |     def copy(self):
220 |         'od.copy() -> a shallow copy of od'
221 |         return self.__class__(self)
222 | 
223 |     @classmethod
224 |     def fromkeys(cls, iterable, value=None):
225 |         '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
226 |         and values equal to v (which defaults to None).
227 | 
228 |         '''
229 |         d = cls()
230 |         for key in iterable:
231 |             d[key] = value
232 |         return d
233 | 
234 |     def __eq__(self, other):
235 |         '''od.__eq__(y) <==> od==y.  Comparison to another OD is order-sensitive
236 |         while comparison to a regular mapping is order-insensitive.
237 | 
238 |         '''
239 |         if isinstance(other, OrderedDict):
240 |             return len(self)==len(other) and self.items() == other.items()
241 |         return dict.__eq__(self, other)
242 | 
243 |     def __ne__(self, other):
244 |         return not self == other
245 | 
246 |     # -- the following methods are only used in Python 2.7 --
247 | 
248 |     def viewkeys(self):
249 |         "od.viewkeys() -> a set-like object providing a view on od's keys"
250 |         return KeysView(self)
251 | 
252 |     def viewvalues(self):
253 |         "od.viewvalues() -> an object providing a view on od's values"
254 |         return ValuesView(self)
255 | 
256 |     def viewitems(self):
257 |         "od.viewitems() -> a set-like object providing a view on od's items"
258 |         return ItemsView(self)
259 | 


--------------------------------------------------------------------------------
/src/common/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | ADD_LIBRARY (common
2 |   matrix.cpp
3 | )
4 | 
5 | TARGET_LINK_LIBRARIES (common
6 |   ${BLAS_LIBRARIES}
7 | )
8 | 


--------------------------------------------------------------------------------
/src/convnet.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  *
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <vector>
 28 | #include <iostream>
 29 | #include <string>
 30 | 
 31 | #include <nvmatrix.cuh>
 32 | #include <nvmatrix_operators.cuh>
 33 | #include <matrix.h>
 34 | #include <convnet.cuh>
 35 | #include <util.cuh>
 36 | 
 37 | using namespace std;
 38 | 
 39 | /*
 40 |  * =======================
 41 |  * ConvNet
 42 |  * =======================
 43 |  */
 44 | ConvNet::ConvNet(PyListObject* layerParams, int minibatchSize, int deviceID) : Thread(false),  _deviceID(deviceID), _data(NULL) {
 45 |     try {
 46 |         int numLayers = PyList_GET_SIZE(layerParams);
 47 | 
 48 |         for (int i = 0; i < numLayers; i++) {
 49 |             PyObject* paramsDict = PyList_GET_ITEM(layerParams, i);
 50 |             string layerType = pyDictGetString(paramsDict, "type");
 51 | 
 52 |             Layer* l = initLayer(layerType, paramsDict);
 53 |             // Connect backward links in graph for this layer
 54 |             intv* inputLayers = pyDictGetIntV(paramsDict, "inputs");
 55 |             if (inputLayers != NULL) {
 56 |                 for (int i = 0; i < inputLayers->size(); i++) {
 57 |                     l->addPrev(&getLayer(inputLayers->at(i)));
 58 |                 }
 59 |             }
 60 |             delete inputLayers;
 61 |         }
 62 | 
 63 |         // Connect the forward links in the graph
 64 |         for (int i = 0; i < _layers.size(); i++) {
 65 |             vector<Layer*>& prev = _layers[i]->getPrev();
 66 |             for (int j = 0; j < prev.size(); j++) {
 67 |                 prev[j]->addNext(_layers[i]);
 68 |             }
 69 |         }
 70 | 
 71 |         // Execute post-initialization stuff
 72 |         for (int i = 0; i < _layers.size(); i++) {
 73 |             _layers[i]->postInit();
 74 |         }
 75 | 
 76 |         _dp = new DataProvider(minibatchSize);
 77 |     } catch (string& s) {
 78 |         cout << "Error creating ConvNet: " << s << endl;
 79 |         exit(1);
 80 |     }
 81 | }
 82 | 
 83 | /*
 84 |  * Override this in derived classes
 85 |  */
 86 | Layer* ConvNet::initLayer(string& layerType, PyObject* paramsDict) {
 87 |     if (layerType == "fc") {
 88 |         _layers.push_back(new FCLayer(this, paramsDict));
 89 |     } else if (layerType == "conv") {
 90 |         _layers.push_back(new ConvLayer(this, paramsDict));
 91 |     } else if (layerType == "local") {
 92 |         _layers.push_back(new LocalUnsharedLayer(this, paramsDict));
 93 |     } else if (layerType == "pool") {
 94 |         _layers.push_back(&PoolLayer::makePoolLayer(this, paramsDict));
 95 |     } else if (layerType == "rnorm") {
 96 |         _layers.push_back(new ResponseNormLayer(this, paramsDict));
 97 |     } else if (layerType == "cmrnorm") {
 98 |         _layers.push_back(new CrossMapResponseNormLayer(this, paramsDict));
 99 |     } else if (layerType == "cnorm") {
100 |         _layers.push_back(new ContrastNormLayer(this, paramsDict));
101 |     } else if (layerType == "softmax") {
102 |         _layers.push_back(new SoftmaxLayer(this, paramsDict));
103 |     } else if (layerType == "eltsum") {
104 |         _layers.push_back(new EltwiseSumLayer(this, paramsDict));
105 |     } else if (layerType == "eltmax") {
106 |         _layers.push_back(new EltwiseMaxLayer(this, paramsDict));
107 |     } else if (layerType == "neuron") {
108 |         _layers.push_back(new NeuronLayer(this, paramsDict));
109 |     } else if (layerType == "nailbed") {
110 |         _layers.push_back(new NailbedLayer(this, paramsDict));
111 |     } else if (layerType == "blur") {
112 |         _layers.push_back(new GaussianBlurLayer(this, paramsDict));
113 |     } else if (layerType == "resize") {
114 |         _layers.push_back(new ResizeLayer(this, paramsDict));
115 |     } else if (layerType == "rgb2yuv") {
116 |         _layers.push_back(new RGBToYUVLayer(this, paramsDict));
117 |     } else if (layerType == "rgb2lab") {
118 |         _layers.push_back(new RGBToLABLayer(this, paramsDict));
119 |     } else if (layerType == "data") {
120 |         DataLayer *d = new DataLayer(this, paramsDict);
121 |         _layers.push_back(d);
122 |         _dataLayers.push_back(d);
123 |     } else if (strncmp(layerType.c_str(), "cost.", 5) == 0) {
124 |         CostLayer *c = &CostLayer::makeCostLayer(this, layerType, paramsDict);
125 |         _layers.push_back(c);
126 |         _costs.push_back(c);
127 |     } else {
128 |         throw string("Unknown layer type ") + layerType;
129 |     }
130 | 
131 |     return _layers.back();
132 | }
133 | 
134 | /*
135 |  * This executes in a new CPU thread so it's OK to initialize CUDA stuff here.
136 |  */
137 | void ConvNet::initCuda() {
138 |     int randomSeed = time(0);
139 |     char* randomSeedEnv;
140 | 
141 |     cudaSetDevice(_deviceID < 0 ? gpuGetMaxGflopsDeviceId() : _deviceID);
142 |     cudaDeviceSetCacheConfig(cudaFuncCachePreferShared);
143 |     cublasInit();
144 | 
145 |     randomSeedEnv = getenv("CONVNET_RANDOM_SEED");
146 |     if (randomSeedEnv != NULL) {
147 |       randomSeed = atoi(randomSeedEnv);
148 |     }
149 | 
150 |     NVMatrix::initRandom(randomSeed);
151 | 
152 |     copyToGPU();
153 | }
154 | 
155 | void* ConvNet::run() {
156 |     initCuda();
157 | 
158 |     while (true) {
159 |         Worker* worker = _workerQueue.dequeue();
160 |         worker->run();
161 |         delete worker;
162 |     }
163 |     return NULL;
164 | }
165 | 
166 | Queue<Worker*>& ConvNet::getWorkerQueue() {
167 |     return _workerQueue;
168 | }
169 | 
170 | Queue<WorkResult*>& ConvNet::getResultQueue() {
171 |     return _resultQueue;
172 | }
173 | 
174 | DataProvider& ConvNet::getDataProvider() {
175 |     return *_dp;
176 | }
177 | 
178 | Layer& ConvNet::operator[](int idx) {
179 |     return *_layers[idx];
180 | }
181 | 
182 | Layer& ConvNet::getLayer(int idx) {
183 |     return *_layers[idx];
184 | }
185 | 
186 | void ConvNet::copyToCPU() {
187 |     for (int i = 0; i < _layers.size(); i++) {
188 |         _layers[i]->copyToCPU();
189 |     }
190 | }
191 | 
192 | void ConvNet::copyToGPU() {
193 |     for (int i = 0; i < _layers.size(); i++) {
194 |         _layers[i]->copyToGPU();
195 |     }
196 | }
197 | 
198 | void ConvNet::updateWeights() {
199 |     for (int i = 0; i < _layers.size(); i++) {
200 |         _layers[i]->updateWeights();
201 |     }
202 | }
203 | 
204 | void ConvNet::reset() {
205 |     for (int i = 0; i < _layers.size(); i++) {
206 |         _layers[i]->reset();
207 |     }
208 | }
209 | 
210 | int ConvNet::getNumLayers() {
211 |     return _layers.size();
212 | }
213 | 
214 | void ConvNet::bprop(PASS_TYPE passType) {
215 |     for (int i = 0; i < _costs.size(); i++) {
216 |         _costs[i]->bprop(passType);
217 |     }
218 |     reset();
219 | }
220 | 
221 | void ConvNet::fprop(PASS_TYPE passType) {
222 |     assert(_data != NULL);
223 |     reset();
224 |     for (int i = 0; i < _dataLayers.size(); i++) {
225 |         _dataLayers[i]->fprop(_data->getData(), passType);
226 |     }
227 | }
228 | 
229 | void ConvNet::fprop(GPUData& data, PASS_TYPE passType) {
230 |     if (&data != _data) {
231 |         delete _data;
232 |     }
233 |     _data = &data;
234 |     fprop(passType);
235 | }
236 | 
237 | void ConvNet::fprop(int miniIdx, PASS_TYPE passType) {
238 |     delete _data;
239 |     _data = &_dp->getMinibatch(miniIdx);
240 |     fprop(passType);
241 | }
242 | 
243 | Cost& ConvNet::getCost() {
244 |     return *new Cost(_data->getNumCases(), _costs);
245 | }
246 | 
247 | // Same as getCost() but adds results to given cost and returns it
248 | Cost& ConvNet::getCost(Cost& cost) {
249 |     Cost& newCost = getCost();
250 |     cost += newCost;
251 |     delete &newCost;
252 |     return cost;
253 | }
254 | 
255 | double ConvNet::getCostValue() {
256 |     Cost& cost = getCost();
257 |     double val = cost.getValue();
258 |     delete &cost;
259 |     return val;
260 | }
261 | 
262 | /*
263 |  * Gradient checking stuff
264 |  */
265 | void ConvNet::checkGradients() {
266 |     _numFailures = 0;
267 |     _numTests = 0;
268 |     fprop(0, PASS_GC);
269 |     _baseErr = getCostValue();
270 |     bprop(PASS_GC);
271 | 
272 |     for (vector<Layer*>::iterator it = _layers.begin(); it != _layers.end(); ++it) {
273 |         (*it)->checkGradients();
274 |     }
275 | 
276 |     cout << "------------------------" << endl;
277 |     if (_numFailures > 0) {
278 |         cout << _numFailures << "/" << _numTests << " TESTS FAILED" << endl;
279 |     } else {
280 |         cout << "ALL " << _numTests << " TESTS PASSED" << endl;
281 |     }
282 | }
283 | 
284 | /*
285 |  * name: weight matrix name
286 |  * eps: finite difference step
287 |  */
288 | bool ConvNet::checkGradient(const string& name, float eps, Weights& weights) {
289 |     Matrix numGrad(weights.getNumRows(), weights.getNumCols());
290 |     Matrix diff(numGrad);
291 |     numGrad.apply(Matrix::ZERO);
292 |     Matrix weightsCPU;
293 | 
294 |     weights.getW().copyToHost(weightsCPU, true);
295 | 
296 |     for(int i = 0; i < weights.getNumRows(); i++) {
297 |         for (int j = 0; j < weights.getNumCols(); j++) {
298 |             float v = weightsCPU(i,j);
299 |             weightsCPU(i,j) += eps;
300 |             weights.getW().copyFromHost(weightsCPU);
301 |             weightsCPU(i,j) = v;
302 |             fprop(PASS_GC);
303 |             double err = getCostValue();
304 |             numGrad(i,j) = (err - _baseErr) / (_data->getNumCases() * eps);
305 |             if (isnan(numGrad(i,j)) || isinf(numGrad(i,j))) {
306 |                 cout << "Numerical computation produced nan or inf when checking '" << name << "': " << numGrad(i,j) << endl;
307 |                 cout << "Consider reducing the sizes of the weights or finite difference steps." << endl;
308 |                 cout << "Exiting." << endl;
309 |                 exit(1);
310 |             }
311 |             weights.getW().copyFromHost(weightsCPU);
312 |         }
313 |     }
314 | 
315 |     Matrix gradCPU;
316 |     weights.getGrad().copyToHost(gradCPU, true);
317 |     gradCPU.scale(-1.0 / _data->getNumCases());
318 |     float analNorm = gradCPU.norm();
319 |     float numNorm = numGrad.norm();
320 |     numGrad.subtract(gradCPU, diff);
321 |     float relErr = diff.norm() / analNorm;
322 |     bool fail = relErr >= GC_REL_ERR_THRESH;
323 |     if (fail || !GC_SUPPRESS_PASSES) {
324 |         cout << "========================" << endl;
325 |         printf("(%s) %s GRADIENT CHECK\n", fail ? "****FAIL****" : "PASS", name.c_str());
326 |         cout << "========================" << endl;
327 |         cout << "Analytic:" << endl;
328 |         gradCPU.print(6,4);
329 |         cout << "Numeric:" << endl;
330 |         numGrad.print(6,4);
331 |         printf("Analytic norm: %e\n", analNorm);
332 |         printf("Numeric norm:  %e\n", numNorm);
333 |         printf("Relative error: %e\n", relErr);
334 |     }
335 |     _numTests++;
336 |     _numFailures += fail;
337 |     return fail;
338 | }
339 | 


--------------------------------------------------------------------------------
/src/cost.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  *
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <iostream>
 28 | #include <cost.cuh>
 29 | 
 30 | using namespace std;
 31 | 
 32 | /*
 33 |  * =====================
 34 |  * Cost
 35 |  * =====================
 36 |  */
 37 | 
 38 | Cost::Cost(int numCases) : _numCases(numCases) {
 39 | }
 40 | 
 41 | Cost::Cost(int numCases, vector<CostLayer*>& costs) : _numCases(numCases) {
 42 |     for (vector<CostLayer*>::iterator it = costs.begin(); it != costs.end(); ++it) {
 43 |         _costMap[(*it)->getName()] = &(*it)->getCost();
 44 |         _costCoeffMap[(*it)->getName()] = (*it)->getCoeff();
 45 |     }
 46 | }
 47 | 
 48 | int Cost::getNumCases() {
 49 |     return _numCases;
 50 | }
 51 | 
 52 | doublev& Cost::operator [](const string s) {
 53 |     return *_costMap[s];
 54 | }
 55 | 
 56 | CostMap& Cost::getCostMap() {
 57 |     return _costMap;
 58 | }
 59 | 
 60 | CostCoeffMap& Cost::getCostCoeffMap() {
 61 |     return _costCoeffMap;
 62 | }
 63 | 
 64 | double Cost::getValue() {
 65 |     double val = 0;
 66 |     for (CostMap::iterator it = _costMap.begin(); it != _costMap.end(); ++it) {
 67 |         val += _costCoeffMap[it->first] * it->second->at(0);
 68 |     }
 69 |     return val;
 70 | }
 71 | 
 72 | Cost& Cost::operator += (Cost& er) {
 73 |     CostMap& otherMap = er.getCostMap();
 74 |     CostCoeffMap& otherCoeffMap = er.getCostCoeffMap();
 75 |     for (CostMap::const_iterator it = otherMap.begin(); it != otherMap.end(); ++it) {
 76 |         if (_costMap.count(it->first) == 0) {
 77 |             _costMap[it->first] = new doublev();
 78 |             _costCoeffMap[it->first] = otherCoeffMap[it->first];
 79 |         }
 80 | 
 81 |         vector<double>& myVec = *_costMap[it->first];
 82 |         vector<double>& otherVec = *otherMap[it->first];
 83 |         for (int i = 0; i < otherVec.size(); i++) {
 84 |             if (myVec.size() <= i) {
 85 |                 myVec.push_back(0);
 86 |             }
 87 |             myVec[i] += otherVec[i];
 88 |         }
 89 |     }
 90 |     _numCases += er.getNumCases();
 91 |     return *this;
 92 | }
 93 | 
 94 | Cost& Cost::operator /= (const double v) {
 95 |     for (CostMap::const_iterator it = _costMap.begin(); it != _costMap.end(); ++it) {
 96 |         for (doublev::iterator it2 = it->second->begin(); it2 != it->second->end(); ++it2) {
 97 |             *it2 /= v;
 98 |         }
 99 |     }
100 |     return *this;
101 | }
102 | 
103 | Cost::~Cost() {
104 |     for (CostMap::const_iterator it = _costMap.begin(); it != _costMap.end(); ++it) {
105 |         delete it->second;
106 |     }
107 | }


--------------------------------------------------------------------------------
/src/cudaconv2/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | CUDA_ADD_LIBRARY (cudaconv2
 2 |   conv_util.cu
 3 |   filter_acts.cu
 4 |   img_acts.cu
 5 |   weight_acts.cu
 6 | )
 7 | 
 8 | TARGET_LINK_LIBRARIES (cudaconv2
 9 |   nvmatrix
10 |   ${BLAS_LIBRARIES}
11 |   ${CUDA_CUBLAS_LIBRARIES}
12 | )
13 | 


--------------------------------------------------------------------------------
/src/data.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  *
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <algorithm>
 28 | #include <data.cuh>
 29 | 
 30 | using namespace std;
 31 | 
 32 | DataProvider::DataProvider(int minibatchSize) :
 33 |     _minibatchSize(minibatchSize), _hData(NULL) {
 34 | 
 35 | }
 36 | 
 37 | GPUData& DataProvider::operator[](int idx) {
 38 |     return getMinibatch(idx);
 39 | }
 40 | 
 41 | void DataProvider::clearData() {
 42 |     delete _hData;
 43 |     _hData = NULL;
 44 |     _dataSize = 0;
 45 | }
 46 | 
 47 | void DataProvider::setData(CPUData& hData) {
 48 |     // This is now deleted by the DataWorker's destructor
 49 | //    delete _hData; // Delete old CPU matrices
 50 | 
 51 |     _hData = &hData;
 52 |     _dataSize = 0;
 53 |     for (int i = 0; i < hData.getSize(); i++) {
 54 |         _dataSize += hData[i].getNumDataBytes();
 55 |     }
 56 |     _dataSize /= 1024 * 1024;
 57 |     if (_dataSize < MAX_DATA_ON_GPU) {
 58 |         for (int i = 0; i < hData.getSize(); i++) {
 59 |             if (i >= _data.size()) {
 60 |                 _data.push_back(new NVMatrix());
 61 |             }
 62 |             _data[i]->copyFromHost(hData[i], true);
 63 |         }
 64 |     }
 65 | }
 66 | 
 67 | GPUData& DataProvider::getMinibatch(int idx) {
 68 |     assert(idx >= 0 && idx < getNumMinibatches());
 69 |     return getDataSlice(idx * _minibatchSize, (idx + 1) * _minibatchSize);
 70 | }
 71 | 
 72 | GPUData& DataProvider::getDataSlice(int startCase, int endCase) {
 73 |     assert(_hData != NULL);
 74 |     assert(_hData->getNumCases() > 0);
 75 | 
 76 |     NVMatrixV& miniData = *new NVMatrixV();
 77 | 
 78 |     for (int i = 0; i < _hData->getData().size(); i++) {
 79 |         miniData.push_back(new NVMatrix());
 80 |         if (_dataSize < MAX_DATA_ON_GPU) {
 81 |             if (_data[i]->isTrans()) {
 82 |                 _data[i]->sliceRows(startCase, min(_hData->getNumCases(), endCase), *miniData[i]);
 83 |             } else {
 84 |                 _data[i]->sliceCols(startCase, min(_hData->getNumCases(), endCase), *miniData[i]);
 85 |             }
 86 |         } else {
 87 |             Matrix tmp;
 88 |             if ((*_hData)[i].isTrans()) {
 89 |                 (*_hData)[i].sliceRows(startCase, min(_hData->getNumCases(), endCase), tmp);
 90 |             } else {
 91 |                 (*_hData)[i].sliceCols(startCase, min(_hData->getNumCases(), endCase), tmp);
 92 |             }
 93 |             miniData.back()->copyFromHost(tmp, true);
 94 |         }
 95 |     }
 96 | 
 97 |     return *new GPUData(miniData);
 98 | }
 99 | 
100 | int DataProvider::getNumMinibatches() {
101 |     assert(_hData != NULL);
102 |     assert(_hData->getNumCases() > 0);
103 |     return DIVUP(_hData->getNumCases(), _minibatchSize);
104 | }
105 | 
106 | int DataProvider::getMinibatchSize() {
107 |     return _minibatchSize;
108 | }
109 | 
110 | int DataProvider::getNumCases() {
111 |     assert(_hData != NULL);
112 |     assert(_hData->getNumCases() > 0);
113 |     return _hData->getNumCases();
114 | }
115 | 
116 | int DataProvider::getNumCasesInMinibatch(int idx) {
117 |     assert(_hData != NULL);
118 |     assert(_hData->getNumCases() > 0);
119 |     assert(idx >= 0 && idx < getNumMinibatches());
120 |     return min(_minibatchSize, max(0, _hData->getNumCases() - idx * _minibatchSize));
121 | }


--------------------------------------------------------------------------------
/src/layer_kernels.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  *
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <assert.h>
 28 | 
 29 | #include <layer_kernels.cuh>
 30 | 
 31 | /*
 32 |  * E = -log(y_t)
 33 |  * probs:           (numOut, numCases)
 34 |  * labels:          (1, numCases)
 35 |  * maxProbs:        (1, numCases)
 36 |  * labelLogProbs:   (1, numCases)   (*out)
 37 |  * correctProbs:    (1, numCases)   (*out)
 38 |  *
 39 |  * target:          (1, numCases)
 40 |  */
 41 | __global__ void kLogregCost(float* probs, float* labels, float* maxProbs, float* labelLogProbs, float* correctProbs,
 42 |                             const int numCases, const int numOut) {
 43 |     const int tx = blockIdx.x * LOGREG_ERR_THREADS_X + threadIdx.x;
 44 | 
 45 |     if (tx < numCases) {
 46 |         const int label = int(labels[tx]);
 47 |         const float maxp = maxProbs[tx];
 48 |         const float labelp = probs[label * numCases + tx];
 49 | 
 50 |         labelLogProbs[tx] = __logf(labelp);
 51 | 
 52 |         /*
 53 |          * Compute the probability of guessing the correct case if you take the most-probable label.
 54 |          *
 55 |          * This is done like this:
 56 |          *
 57 |          * - If the most probable label is not equal to the true label, then the probability is zero.
 58 |          * - Otherwise, the probability is 1 / (number of labels whose probability is equal to the maximum).
 59 |          *
 60 |          * This is certainly overkill -- in practice, it's just about impossible for two labels to get assigned
 61 |          * maximum probability. But it's a safety measure to prevent over-estimating your accuracy.
 62 |          * Though it could never happen in reality. Well it could. But it wouldn't. Cool?
 63 |          */
 64 |         if (labelp != maxp) {
 65 |             correctProbs[tx] = 0;
 66 |         } else {
 67 |             int numMax = 0;
 68 |             for (int i = 0; i < numOut; i++) {
 69 |                 numMax += probs[i * numCases + tx] == maxp;
 70 |             }
 71 |             correctProbs[tx] = 1.0f / float(numMax);
 72 |         }
 73 |     }
 74 | }
 75 | 
 76 | /*
 77 |  * E = -log(y_t)
 78 |  * y_l:     (numOut, numCases)
 79 |  * labels:  (1, numCases)
 80 |  *
 81 |  * dE_dy_l: (numOut, numCases)
 82 |  */
 83 | template <bool add>
 84 | __global__ void kLogregCostGrad(float* y_l, float* labels, float* dE_dy_l, const int numCases,
 85 |                                  const int numOut, const float gradCoeff) {
 86 |     const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x;
 87 |     const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y;
 88 |     const int tidx = ty * numCases + tx;
 89 | 
 90 |     if (ty < numOut && tx < numCases) {
 91 |         const int label = int(labels[tx]);
 92 |         float v = gradCoeff * (label == ty);
 93 |         v = __fdividef(v, y_l[tidx]);
 94 |         if (add) {
 95 |             dE_dy_l[tidx] += v;
 96 |         } else {
 97 |             dE_dy_l[tidx] = v;
 98 |         }
 99 |     }
100 | }
101 | 
102 | /*
103 |  * dE_dy_l: (numOut, numCases)
104 |  * y_l:     (numOut, numCases)
105 |  *
106 |  * dE_dx_l: (numOut, numCases)
107 |  */
108 | template <bool add>
109 | __global__ void kSoftmaxGrad(float* dE_dy_l, float* y_l, float* dE_dx_l, const int numCases, const int numOut) {
110 |     const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x;
111 |     const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y;
112 |     const int tidx = ty * numCases + tx;
113 | 
114 |     if (ty < numOut && tx < numCases) {
115 |         float v = 0;
116 |         for (int j = 0; j < numOut; j++) {
117 |             v += dE_dy_l[j * numCases + tx] * ((j == ty) - y_l[j * numCases + tx]);
118 |         }
119 |         v *= y_l[tidx];
120 | 
121 |         if (add) {
122 |             dE_dx_l[tidx] += v;
123 |         } else {
124 |             dE_dx_l[tidx] = v;
125 |         }
126 |     }
127 | }
128 | 
129 | /*
130 |  * E = -log(y_t)
131 |  * y_l:     (numOut, numCases)
132 |  * labels:  (1, numCases)
133 |  *
134 |  * dE_dx_l: (numOut, numCases)
135 |  */
136 | template <bool add>
137 | __global__ void kLogregSoftmaxGrad(float* y_l, float* labels, float* dE_dx_l, const int numCases,
138 |                                  const int numOut, const float gradCoeff) {
139 |     const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x;
140 |     const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y;
141 |     const int tidx = ty * numCases + tx;
142 | 
143 |     if (ty < numOut && tx < numCases) {
144 |         const int label = int(labels[tx]);
145 |         float v = gradCoeff * ((label == ty) - y_l[tidx]);
146 |         if (add) {
147 |             dE_dx_l[tidx] += v;
148 |         } else {
149 |             dE_dx_l[tidx] = v;
150 |         }
151 |     }
152 | }
153 | 
154 | template <int B_X, bool add>
155 | __global__ void kEltwiseMaxGrad(float* actGrad, float* input, float* output, float* target,
156 |                                 const int numElements) {
157 |     for (int i = B_X * blockIdx.x + threadIdx.x; i < numElements; i += B_X * gridDim.x) {
158 |         if (add) {
159 |             target[i] += actGrad[i] * (output[i] == input[i]);
160 |         } else {
161 |             target[i] = actGrad[i] * (output[i] == input[i]);
162 |         }
163 |     }
164 | }
165 | 
166 | void computeEltwiseMaxGrad(NVMatrix& actGrad, NVMatrix& input, NVMatrix& output, NVMatrix& target, bool add) {
167 |     assert(actGrad.isContiguous());
168 |     assert(output.isContiguous());
169 |     assert(input.isContiguous());
170 |     assert(actGrad.isSameDims(input));
171 |     assert(actGrad.isSameDims(output));
172 | 
173 |     dim3 blocks(DIVUP(actGrad.getNumElements(), 128));
174 |     dim3 threads(128);
175 |     if (add) {
176 |         assert(actGrad.isSameDims(target));
177 |         cudaFuncSetCacheConfig(kEltwiseMaxGrad<128, true>, cudaFuncCachePreferL1);
178 |         kEltwiseMaxGrad<128, true><<<blocks, threads>>>(actGrad.getDevData(), input.getDevData(), output.getDevData(), target.getDevData(), actGrad.getNumElements());
179 |     } else {
180 |         target.resize(actGrad);
181 |         cudaFuncSetCacheConfig(kEltwiseMaxGrad<128, false>, cudaFuncCachePreferL1);
182 |         kEltwiseMaxGrad<128, false><<<blocks, threads>>>(actGrad.getDevData(), input.getDevData(), output.getDevData(), target.getDevData(), actGrad.getNumElements());
183 |     }
184 | 
185 |     getLastCudaError("computeEltwiseMaxGrad: Kernel execution failed");
186 | }
187 | 
188 | /*
189 |  * E = -log(y_t)
190 |  * probs:           (numOut, numCases)
191 |  * labels:          (1, numCases)
192 |  * maxProbs:        (1, numCases)
193 |  * labelLogProbs:   (1, numCases)   (*out)
194 |  * correctProbs:    (1, numCases)   (*out)
195 |  *
196 |  * target:          (1, numCases)
197 |  */
198 | void computeLogregCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& labelLogProbs_out, NVMatrix& correctProbs_out) {
199 |     int numCases = probs.getNumCols();
200 |     int numOut = probs.getNumRows();
201 | 
202 |     assert(labels.getNumElements() == numCases);
203 |     assert(!labels.isTrans());
204 |     assert(!probs.isTrans());
205 |     assert(labels.isContiguous());
206 |     assert(probs.isContiguous());
207 | 
208 |     NVMatrix& maxProbs = probs.max(0);
209 | 
210 |     labelLogProbs_out.resize(1, numCases);
211 |     correctProbs_out.resize(1, numCases);
212 |     dim3 threads(LOGREG_ERR_THREADS_X, 1);
213 |     dim3 blocks(DIVUP(numCases, LOGREG_ERR_THREADS_X), 1);
214 |     cudaFuncSetCacheConfig(kLogregCost, cudaFuncCachePreferL1);
215 |     kLogregCost<<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), maxProbs.getDevData(),
216 |                                      labelLogProbs_out.getDevData(), correctProbs_out.getDevData(),
217 |                                      numCases, numOut);
218 |     getLastCudaError("computeLogregCost: Kernel execution failed");
219 | //    cudaThreadSynchronize();
220 |     delete &maxProbs;
221 | }
222 | 
223 | void computeLogregGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff) {
224 |     int numCases = probs.getLeadingDim();
225 |     int numOut = probs.getFollowingDim();
226 |     assert(labels.getNumElements() == numCases);
227 |     assert(probs.isContiguous());
228 |     assert(target.isContiguous());
229 |     assert(labels.isContiguous());
230 |     assert(!labels.isTrans());
231 |     assert(!probs.isTrans());
232 | 
233 |     dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y);
234 |     dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y));
235 |     if (!add) {
236 |         target.resize(probs);
237 |         kLogregCostGrad<false><<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), target.getDevData(),
238 |                                                      numCases, numOut, coeff);
239 |     } else {
240 |         kLogregCostGrad<true><<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), target.getDevData(),
241 |                                                      numCases, numOut, coeff);
242 |     }
243 | 
244 |     getLastCudaError("computeLogregGrad: Kernel execution failed");
245 | }
246 | 
247 | void computeSoftmaxGrad(NVMatrix& acts, NVMatrix& actsGrad, NVMatrix& target, bool add) {
248 |     int numCases = acts.getLeadingDim();
249 |     int numOut = acts.getFollowingDim();
250 | 
251 |     assert(acts.isSameDims(actsGrad));
252 |     assert(acts.isContiguous());
253 |     assert(actsGrad.isContiguous());
254 |     assert(target.isContiguous());
255 |     assert(acts.isTrans());
256 |     assert(actsGrad.isTrans());
257 | 
258 |     dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y);
259 |     dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y));
260 |     if (!add) {
261 |         target.resize(acts);
262 |         kSoftmaxGrad<false><<<blocks, threads>>>(actsGrad.getDevData(), acts.getDevData(), target.getDevData(), numCases, numOut);
263 |     } else {
264 |         kSoftmaxGrad<true><<<blocks, threads>>>(actsGrad.getDevData(), acts.getDevData(), target.getDevData(), numCases, numOut);
265 |     }
266 |     getLastCudaError("computeSoftmaxGrad: Kernel execution failed");
267 | }
268 | 
269 | void computeLogregSoftmaxGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff) {
270 |     int numCases = probs.getLeadingDim();
271 |     int numOut = probs.getFollowingDim();
272 |     assert(labels.getNumElements() == numCases);
273 |     assert(probs.isContiguous());
274 |     assert(target.isContiguous());
275 |     assert(labels.isContiguous());
276 |     assert(probs.isTrans());
277 | 
278 |     dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y);
279 |     dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y));
280 |     if (!add) {
281 |         target.resize(probs);
282 |         kLogregSoftmaxGrad<false><<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), target.getDevData(),
283 |                                                      numCases, numOut, coeff);
284 |     } else {
285 |         kLogregSoftmaxGrad<true><<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), target.getDevData(),
286 |                                                      numCases, numOut, coeff);
287 |     }
288 | 
289 |     getLastCudaError("computeLogregSoftmaxGrad: Kernel execution failed");
290 | }
291 | 


--------------------------------------------------------------------------------
/src/neuron.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  *
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #include <neuron.cuh>
28 | #include <util.cuh>
29 | 
30 | using namespace std;
31 | 
32 | Neuron& Neuron::makeNeuron(PyObject* neuronDict) {
33 |     string type = pyDictGetString(neuronDict, "type");
34 |     PyObject* neuronParamsDict = PyDict_GetItemString(neuronDict, "params");
35 | 
36 |     if (type == "relu") {
37 |         return *new ReluNeuron();
38 |     }
39 | 
40 |     if (type == "softrelu") {
41 |         return *new SoftReluNeuron();
42 |     }
43 | 
44 |     if (type == "brelu") {
45 |         float a = pyDictGetFloat(neuronParamsDict, "a");
46 |         return *new BoundedReluNeuron(a);
47 |     }
48 | 
49 |     if (type == "abs") {
50 |         return *new AbsNeuron();
51 |     }
52 | 
53 |     if (type == "logistic") {
54 |         return *new LogisticNeuron();
55 |     }
56 | 
57 |     if (type == "tanh") {
58 |         float a = pyDictGetFloat(neuronParamsDict, "a");
59 |         float b = pyDictGetFloat(neuronParamsDict, "b");
60 | 
61 |         return *new TanhNeuron(a, b);
62 |     }
63 | 
64 |     if (type == "square") {
65 |         return *new SquareNeuron();
66 |     }
67 | 
68 |     if (type == "sqrt") {
69 |         return *new SqrtNeuron();
70 |     }
71 | 
72 |     if (type == "linear") {
73 |         float a = pyDictGetFloat(neuronParamsDict, "a");
74 |         float b = pyDictGetFloat(neuronParamsDict, "b");
75 |         return *new LinearNeuron(a, b);
76 |     }
77 | 
78 |     if (type == "ident") {
79 |         return *new Neuron();
80 |     }
81 | 
82 |     throw string("Unknown neuron type: ") + type;
83 | }
84 | 


--------------------------------------------------------------------------------
/src/nvmatrix/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | CUDA_ADD_LIBRARY (nvmatrix
 2 |   nvmatrix.cu
 3 |   nvmatrix_kernels.cu
 4 | )
 5 | 
 6 | TARGET_LINK_LIBRARIES (nvmatrix
 7 |   common
 8 |   ${BLAS_LIBRARIES}
 9 |   ${CUDA_CUBLAS_LIBRARIES}
10 | )


--------------------------------------------------------------------------------
/src/nvmatrix/nvmatrix_kernels.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  *
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #include <stdio.h>
28 | #include <cuda_runtime.h>
29 | #include <nvmatrix_kernels.cuh>
30 | 
31 | __global__ void kTile(const float* src, float* tgt, const uint srcWidth, const uint srcHeight, const uint tgtWidth, const uint tgtHeight) {
32 |     const int idx = blockIdx.x * blockDim.x + threadIdx.x;
33 |     const int numThreads = blockDim.x * gridDim.x;
34 |     //    const unsigned int numEls = tgtWidth * tgtHeight;
35 |     for (uint i = idx; i < tgtWidth * tgtHeight; i += numThreads) {
36 |         const uint y = i / tgtWidth;
37 |         const uint x = i % tgtWidth;
38 |         const uint srcY = y % srcHeight;
39 |         const uint srcX = x % srcWidth;
40 |         tgt[i] = src[srcY * srcWidth + srcX];
41 |     }
42 | }
43 | 
44 | __global__ void kDotProduct_r(float* a, float* b, float* target, const uint numCols, const uint numElements) {
45 |     __shared__ float shmem[DP_BLOCKSIZE];
46 | 
47 |     uint eidx = DP_BLOCKSIZE * blockIdx.x + threadIdx.x;
48 |     shmem[threadIdx.x] = 0;
49 |     if (eidx < numCols) {
50 |         for (; eidx < numElements; eidx += numCols) {
51 |             shmem[threadIdx.x] += a[eidx] * b[eidx];
52 |         }
53 |     }
54 |     __syncthreads();
55 |     if (threadIdx.x < 256) {
56 |         shmem[threadIdx.x] += shmem[threadIdx.x + 256];
57 |     }
58 |     __syncthreads();
59 |     if (threadIdx.x < 128) {
60 |         shmem[threadIdx.x] += shmem[threadIdx.x + 128];
61 |     }
62 |     __syncthreads();
63 |     if (threadIdx.x < 64) {
64 |         shmem[threadIdx.x] += shmem[threadIdx.x + 64];
65 |     }
66 |     __syncthreads();
67 |     if (threadIdx.x < 32) {
68 |         volatile float* mysh = &shmem[threadIdx.x];
69 |         *mysh += mysh[32];
70 |         *mysh += mysh[16];
71 |         *mysh += mysh[8];
72 |         *mysh += mysh[4];
73 |         *mysh += mysh[2];
74 |         *mysh += mysh[1];
75 |         if (threadIdx.x == 0) {
76 |             target[blockIdx.x] = *mysh;
77 |         }
78 |     }
79 | }
80 | 
81 | __global__ void kSetupCurand(curandState *state, unsigned long long seed) {
82 |     const uint tidx = NUM_RND_THREADS_PER_BLOCK * blockIdx.x + threadIdx.x;
83 |     /* Each thread gets same seed, a different sequence number,
84 |      no offset */
85 |     curand_init(seed, tidx, 0, &state[tidx]);
86 | }
87 | 
88 | 


--------------------------------------------------------------------------------
/src/pyconvnet.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  *
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <Python.h>
 28 | #include <arrayobject.h>
 29 | #include <assert.h>
 30 | #include <helper_cuda.h>
 31 | #include <cublas.h>
 32 | #include <time.h>
 33 | #include <vector>
 34 | 
 35 | #include <matrix.h>
 36 | #include <queue.h>
 37 | #include <worker.cuh>
 38 | #include <util.cuh>
 39 | #include <cost.cuh>
 40 | 
 41 | #include <pyconvnet.cuh>
 42 | #include <convnet.cuh>
 43 | 
 44 | using namespace std;
 45 | static ConvNet* model = NULL;
 46 | 
 47 | static PyMethodDef _ConvNetMethods[] = {  { "initModel",          initModel,          METH_VARARGS },
 48 |                                               { "startBatch",         startBatch,         METH_VARARGS },
 49 |                                               { "finishBatch",        finishBatch,        METH_VARARGS },
 50 |                                               { "checkGradients",     checkGradients,     METH_VARARGS },
 51 |                                               { "startMultiviewTest", startMultiviewTest, METH_VARARGS },
 52 |                                               { "startFeatureWriter",  startFeatureWriter,         METH_VARARGS },
 53 |                                               { "syncWithHost",       syncWithHost,       METH_VARARGS },
 54 |                                               { NULL, NULL }
 55 | };
 56 | 
 57 | PyMODINIT_FUNC
 58 | init_convnet() {
 59 |     (void) Py_InitModule("_convnet", _ConvNetMethods);
 60 |     import_array();
 61 | }
 62 | 
 63 | PyObject* initModel(PyObject *self, PyObject *args) {
 64 |     assert(model == NULL);
 65 | 
 66 |     PyListObject* pyLayerParams;
 67 |     int pyMinibatchSize;
 68 |     int pyDeviceID;
 69 | 
 70 |     if (!PyArg_ParseTuple(args, "O!ii",
 71 |                           &PyList_Type, &pyLayerParams,
 72 |                           &pyMinibatchSize,
 73 |                           &pyDeviceID)) {
 74 |         return NULL;
 75 |     }
 76 |     model = new ConvNet(pyLayerParams,
 77 |                         pyMinibatchSize,
 78 |                         pyDeviceID);
 79 | 
 80 |     model->start();
 81 |     return Py_BuildValue("i", 0);
 82 | }
 83 | 
 84 | /*
 85 |  * Starts training/testing on the given batch (asynchronous -- returns immediately).
 86 |  */
 87 | PyObject* startBatch(PyObject *self, PyObject *args) {
 88 |     assert(model != NULL);
 89 |     PyListObject* data;
 90 |     int test = 0;
 91 |     if (!PyArg_ParseTuple(args, "O!|i",
 92 |         &PyList_Type, &data,
 93 |         &test)) {
 94 |         return NULL;
 95 |     }
 96 |     MatrixV& mvec = *getMatrixV((PyObject*)data);
 97 | 
 98 |     TrainingWorker* wr = new TrainingWorker(*model, *new CPUData(mvec), test);
 99 |     model->getWorkerQueue().enqueue(wr);
100 |     return Py_BuildValue("i", 0);
101 | }
102 | 
103 | /*
104 |  * Starts testing on the given batch (asynchronous -- returns immediately).
105 |  */
106 | PyObject* startMultiviewTest(PyObject *self, PyObject *args) {
107 |     assert(model != NULL);
108 |     PyListObject* data;
109 |     int numViews, logregIdx;
110 |     if (!PyArg_ParseTuple(args, "O!ii",
111 |         &PyList_Type, &data,
112 |         &numViews,
113 |         &logregIdx)) {
114 |         return NULL;
115 |     }
116 |     MatrixV& mvec = *getMatrixV((PyObject*)data);
117 | 
118 |     MultiviewTestWorker* wr = new MultiviewTestWorker(*model, *new CPUData(mvec), numViews, logregIdx);
119 |     model->getWorkerQueue().enqueue(wr);
120 |     return Py_BuildValue("i", 0);
121 | }
122 | 
123 | PyObject* startFeatureWriter(PyObject *self, PyObject *args) {
124 |     assert(model != NULL);
125 |     PyListObject* data;
126 |     int layerIdx;
127 |     if (!PyArg_ParseTuple(args, "O!i",
128 |         &PyList_Type, &data,
129 |         &layerIdx)) {
130 |         return NULL;
131 |     }
132 |     MatrixV& mvec = *getMatrixV((PyObject*)data);
133 |     Matrix& ftrs = *mvec.back();
134 |     mvec.pop_back();
135 | 
136 |     FeatureWorker* wr = new FeatureWorker(*model, *new CPUData(mvec), ftrs, layerIdx);
137 |     model->getWorkerQueue().enqueue(wr);
138 |     return Py_BuildValue("i", 0);
139 | }
140 | 
141 | /*
142 |  * Waits for the trainer to finish training on the batch given to startBatch.
143 |  */
144 | PyObject* finishBatch(PyObject *self, PyObject *args) {
145 |     assert(model != NULL);
146 |     WorkResult* res = model->getResultQueue().dequeue();
147 |     assert(res != NULL);
148 |     assert(res->getResultType() == WorkResult::BATCH_DONE);
149 | 
150 |     Cost& cost = res->getResults();
151 |     PyObject* dict = PyDict_New();
152 |     CostMap& costMap = cost.getCostMap();
153 |     for (CostMap::const_iterator it = costMap.begin(); it != costMap.end(); ++it) {
154 |         PyObject* v = PyList_New(0);
155 |         for (vector<double>::const_iterator iv = it->second->begin(); iv != it->second->end(); ++iv) {
156 |             PyObject* f = PyFloat_FromDouble(*iv);
157 |             PyList_Append(v, f);
158 |         }
159 |         PyDict_SetItemString(dict, it->first.c_str(), v);
160 |     }
161 | 
162 |     PyObject* retVal = Py_BuildValue("Ni", dict, cost.getNumCases());
163 |     delete res; // Deletes cost too
164 |     return retVal;
165 | }
166 | 
167 | PyObject* checkGradients(PyObject *self, PyObject *args) {
168 |     assert(model != NULL);
169 |     PyListObject* data;
170 |     if (!PyArg_ParseTuple(args, "O!",
171 |         &PyList_Type, &data)) {
172 |         return NULL;
173 |     }
174 |     MatrixV& mvec = *getMatrixV((PyObject*)data);
175 | 
176 |     GradCheckWorker* wr = new GradCheckWorker(*model, *new CPUData(mvec));
177 |     model->getWorkerQueue().enqueue(wr);
178 |     WorkResult* res = model->getResultQueue().dequeue();
179 |     assert(res != NULL);
180 |     assert(res->getResultType() == WorkResult::BATCH_DONE);
181 |     delete res;
182 |     return Py_BuildValue("i", 0);
183 | }
184 | 
185 | /*
186 |  * Copies weight matrices from GPU to system memory.
187 |  */
188 | PyObject* syncWithHost(PyObject *self, PyObject *args) {
189 |     assert(model != NULL);
190 |     SyncWorker* wr = new SyncWorker(*model);
191 |     model->getWorkerQueue().enqueue(wr);
192 |     WorkResult* res = model->getResultQueue().dequeue();
193 |     assert(res != NULL);
194 |     assert(res->getResultType() == WorkResult::SYNC_DONE);
195 | 
196 |     delete res;
197 |     return Py_BuildValue("i", 0);
198 | }
199 | 


--------------------------------------------------------------------------------
/src/util.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  *
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <util.cuh>
 28 | 
 29 | using namespace std;
 30 | 
 31 | floatv* getFloatV(PyObject* pyList) {
 32 |     if (pyList == NULL) {
 33 |         return NULL;
 34 |     }
 35 |     floatv* vec = new floatv();
 36 |     for (int i = 0; i < PyList_GET_SIZE(pyList); i++) {
 37 |         vec->push_back(PyFloat_AS_DOUBLE(PyList_GET_ITEM(pyList, i)));
 38 |     }
 39 |     return vec;
 40 | }
 41 | 
 42 | intv* getIntV(PyObject* pyList) {
 43 |     if (pyList == NULL) {
 44 |         return NULL;
 45 |     }
 46 |     intv* vec = new intv();
 47 |     for (int i = 0; i < PyList_GET_SIZE(pyList); i++) {
 48 |         vec->push_back(PyInt_AS_LONG(PyList_GET_ITEM(pyList, i)));
 49 |     }
 50 |     return vec;
 51 | }
 52 | 
 53 | int* getIntA(PyObject* pyList) {
 54 |     if (pyList == NULL) {
 55 |         return NULL;
 56 |     }
 57 |     int* arr = new int[PyList_GET_SIZE(pyList)];
 58 |     for (int i = 0; i < PyList_GET_SIZE(pyList); i++) {
 59 |         arr[i] = PyInt_AS_LONG(PyList_GET_ITEM(pyList, i));
 60 |     }
 61 |     return arr;
 62 | }
 63 | MatrixV* getMatrixV(PyObject* pyList) {
 64 |     if (pyList == NULL) {
 65 |         return NULL;
 66 |     }
 67 |     MatrixV* vec = new MatrixV();
 68 |     for (int i = 0; i < PyList_GET_SIZE(pyList); i++) {
 69 |         vec->push_back(new Matrix((PyArrayObject*)PyList_GET_ITEM(pyList, i)));
 70 |     }
 71 |     return vec;
 72 | }
 73 | 
 74 | int pyDictGetInt(PyObject* dict, const char* key) {
 75 |     return PyInt_AS_LONG(PyDict_GetItemString(dict, key));
 76 | }
 77 | 
 78 | intv* pyDictGetIntV(PyObject* dict, const char* key) {
 79 |     return getIntV(PyDict_GetItemString(dict, key));
 80 | }
 81 | 
 82 | int* pyDictGetIntA(PyObject* dict, const char* key) {
 83 |     return getIntA(PyDict_GetItemString(dict, key));
 84 | }
 85 | 
 86 | string pyDictGetString(PyObject* dict, const char* key) {
 87 |     return string(PyString_AS_STRING(PyDict_GetItemString(dict, key)));
 88 | }
 89 | 
 90 | float pyDictGetFloat(PyObject* dict, const char* key) {
 91 |     return PyFloat_AS_DOUBLE(PyDict_GetItemString(dict, key));
 92 | }
 93 | 
 94 | floatv* pyDictGetFloatV(PyObject* dict, const char* key) {
 95 |     return getFloatV(PyDict_GetItemString(dict, key));
 96 | }
 97 | 
 98 | Matrix* pyDictGetMatrix(PyObject* dict, const char* key) {
 99 |     return new Matrix((PyArrayObject*)PyDict_GetItemString(dict, key));
100 | }
101 | 
102 | MatrixV* pyDictGetMatrixV(PyObject* dict, const char* key) {
103 |     return getMatrixV(PyDict_GetItemString(dict, key));
104 | }


--------------------------------------------------------------------------------
/src/weights.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  *
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #include <weights.cuh>
28 | 
29 | bool Weights::_autoCopyToGPU = false;


--------------------------------------------------------------------------------
/src/worker.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  *
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <algorithm>
 28 | #include <util.cuh>
 29 | #include <worker.cuh>
 30 | 
 31 | using namespace std;
 32 | 
 33 | /*
 34 |  * ====================
 35 |  * WorkResult
 36 |  * ====================
 37 |  */
 38 | WorkResult::WorkResult(WorkResult::RESULTS resultType, Cost& results) : _resultType(resultType), _results(&results) {
 39 | }
 40 | 
 41 | WorkResult::WorkResult(WorkResult::RESULTS resultType) : _resultType(resultType), _results(NULL) {
 42 | }
 43 | 
 44 | WorkResult::~WorkResult() {
 45 |     delete _results; // delete NULL is ok
 46 | }
 47 | 
 48 | Cost& WorkResult::getResults() const {
 49 |     return *_results;
 50 | }
 51 | 
 52 | WorkResult::RESULTS WorkResult::getResultType() const {
 53 |     return _resultType;
 54 | }
 55 | 
 56 | /*
 57 |  * ====================
 58 |  * Worker
 59 |  * ====================
 60 |  */
 61 | Worker::Worker(ConvNet& convNet) : _convNet(&convNet) {
 62 | }
 63 | 
 64 | /*
 65 |  * ====================
 66 |  * DataWorker
 67 |  * ====================
 68 |  */
 69 | DataWorker::DataWorker(ConvNet& convNet, CPUData& data) : Worker(convNet), _data(&data) {
 70 |     _dp = &convNet.getDataProvider();
 71 | }
 72 | 
 73 | DataWorker::~DataWorker() {
 74 |     _dp->clearData();
 75 | }
 76 | 
 77 | /*
 78 |  * ====================
 79 |  * TrainingWorker
 80 |  * ====================
 81 |  */
 82 | TrainingWorker::TrainingWorker(ConvNet& convNet, CPUData& data, bool test)
 83 |     : DataWorker(convNet, data), _test(test) {
 84 | }
 85 | 
 86 | // Need to setData here (as opposed to the constructor) because the constructor executes in
 87 | // the original CPU thread, which is not the one with GPU access.
 88 | void TrainingWorker::run() {
 89 |     _dp->setData(*_data);
 90 |     Cost& batchCost = *new Cost(0);
 91 |     for (int i = 0; i < _dp->getNumMinibatches(); i++) {
 92 |         _convNet->fprop(i, _test ? PASS_TEST : PASS_TRAIN);
 93 |         _convNet->getCost(batchCost);
 94 | 
 95 |         if (!_test) {
 96 |             _convNet->bprop(PASS_TRAIN);
 97 |             _convNet->updateWeights();
 98 |         }
 99 |     }
100 |     cudaThreadSynchronize();
101 |     _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::BATCH_DONE, batchCost));
102 | }
103 | 
104 | /*
105 |  * ====================
106 |  * SyncWorker
107 |  * ====================
108 |  */
109 | SyncWorker::SyncWorker(ConvNet& convNet) : Worker(convNet) {
110 | }
111 | 
112 | void SyncWorker::run() {
113 |     _convNet->copyToCPU();
114 |     _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::SYNC_DONE));
115 | }
116 | 
117 | /*
118 |  * ====================
119 |  * GradCheckWorker
120 |  * ====================
121 |  */
122 | GradCheckWorker::GradCheckWorker(ConvNet& convNet, CPUData& data)
123 |     : DataWorker(convNet, data) {
124 | }
125 | 
126 | void GradCheckWorker::run() {
127 |     _dp->setData(*_data);
128 |     _convNet->checkGradients();
129 |     exit(0);
130 | }
131 | 
132 | /*
133 |  * ====================
134 |  * MultiviewTestWorker
135 |  * ====================
136 |  */
137 | MultiviewTestWorker::MultiviewTestWorker(ConvNet& convNet, CPUData& data, int numViews, int logregIdx)
138 |     : DataWorker(convNet, data), _numViews(numViews), _logregIdx(logregIdx) {
139 |     assert(_data->getNumCases() % _numViews == 0);
140 | }
141 | 
142 | void MultiviewTestWorker::run() {
143 |     _dp->setData(*_data);
144 |     Layer& logregLayer = _convNet->getLayer(_logregIdx);
145 | 
146 |     int numCasesReal = _dp->getNumCases() / _numViews;
147 |     int numMiniReal = DIVUP(numCasesReal, _dp->getMinibatchSize());
148 | 
149 |     Cost& batchCost = *new Cost(0);
150 |     for (int i = 0; i < numMiniReal; i++) {
151 |         NVMatrix softmaxActs;
152 |         for (int v = 0; v < _numViews; v++) {
153 |             GPUData& mini = _dp->getDataSlice(v * numCasesReal + i * _dp->getMinibatchSize(),
154 |                                               min((v + 1) * numCasesReal, v * numCasesReal + (i + 1) * _dp->getMinibatchSize()));
155 |             _convNet->fprop(mini, PASS_TEST);
156 |             if (v == 0) {
157 |                 logregLayer.getPrev()[1]->getActs().copy(softmaxActs);
158 |             } else {
159 |                 softmaxActs.add(logregLayer.getPrev()[1]->getActs());
160 |             }
161 |         }
162 |         softmaxActs.scale(1.0 / _numViews);
163 |         NVMatrixV logregInput;
164 |         logregInput.push_back(&logregLayer.getPrev()[0]->getActs());
165 |         logregInput.push_back(&softmaxActs);
166 | 
167 |         logregLayer.fprop(logregInput, PASS_TEST);
168 | 
169 |         _convNet->getCost(batchCost);
170 |     }
171 |     cudaThreadSynchronize();
172 | 
173 |     _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::BATCH_DONE, batchCost));
174 | }
175 | 
176 | /*
177 |  * ====================
178 |  * FeatureWorker
179 |  * ====================
180 |  */
181 | FeatureWorker::FeatureWorker(ConvNet& convNet, CPUData& data, Matrix& ftrs, int layerIdx)
182 |     : DataWorker(convNet, data), _ftrs(&ftrs), _layerIdx(layerIdx) {
183 |     assert(ftrs.getNumRows() == data.getNumCases());
184 |     assert(!ftrs.isTrans());
185 | }
186 | 
187 | FeatureWorker::~FeatureWorker() {
188 |     delete _ftrs;
189 | }
190 | 
191 | void FeatureWorker::run() {
192 |     _dp->setData(*_data);
193 |     Layer& ftrLayer = _convNet->getLayer(_layerIdx);
194 |     Cost& batchCost = *new Cost(0);
195 |     for (int i = 0; i < _dp->getNumMinibatches(); i++) {
196 |         _convNet->fprop(i, PASS_TEST);
197 |         _convNet->getCost(batchCost);
198 |         Matrix& miniFtrs = _ftrs->sliceRows(i * _dp->getMinibatchSize(),
199 |                                             min(_dp->getNumCases(), (i + 1) * _dp->getMinibatchSize()));
200 |         NVMatrix& acts = ftrLayer.getActs();
201 |         NVMatrix acts_T;
202 |         if (acts.isTrans()) {
203 |             NVMatrix& soft_T = acts.getTranspose();
204 |             soft_T.transpose(acts_T);
205 |             delete &soft_T;
206 |         } else {
207 |             acts.transpose(acts_T);
208 |         }
209 |         acts_T.copyToHost(miniFtrs);
210 |         delete &miniFtrs;
211 |     }
212 |     cudaThreadSynchronize();
213 |     _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::BATCH_DONE, batchCost));
214 | }


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without modification,
  5 | # are permitted provided that the following conditions are met:
  6 | #
  7 | # - Redistributions of source code must retain the above copyright notice,
  8 | #   this list of conditions and the following disclaimer.
  9 | #
 10 | # - Redistributions in binary form must reproduce the above copyright notice,
 11 | #   this list of conditions and the following disclaimer in the documentation
 12 | #   and/or other materials provided with the distribution.
 13 | #
 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | 
 25 | import re
 26 | import cPickle
 27 | import os
 28 | import numpy as n
 29 | from math import sqrt
 30 | 
 31 | import gzip
 32 | import zipfile
 33 | 
 34 | class UnpickleError(Exception):
 35 |     pass
 36 | 
 37 | VENDOR_ID_REGEX = re.compile('^vendor_id\s+: (\S+)')
 38 | GPU_LOCK_NO_SCRIPT = -2
 39 | GPU_LOCK_NO_LOCK = -1
 40 | 
 41 | try:
 42 |     import magic
 43 |     ms = magic.open(magic.MAGIC_NONE)
 44 |     ms.load()
 45 | except ImportError: # no magic module
 46 |     ms = None
 47 | 
 48 | def get_gpu_lock(id=-1):
 49 |     import imp
 50 |     lock_script_path = '/u/tang/bin/gpu_lock2.py'
 51 |     if os.path.exists(lock_script_path):
 52 |         locker = imp.load_source("", lock_script_path)
 53 |         if id == -1:
 54 |             return locker.obtain_lock_id()
 55 |         print id
 56 |         got_id = locker._obtain_lock(id)
 57 |         return id if got_id else GPU_LOCK_NO_LOCK
 58 |     return GPU_LOCK_NO_SCRIPT if id < 0 else id
 59 | 
 60 | def pickle(filename, data, compress=False):
 61 |     if compress:
 62 |         fo = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED, allowZip64=True)
 63 |         fo.writestr('data', cPickle.dumps(data, -1))
 64 |     else:
 65 |         fo = open(filename, "wb")
 66 |         cPickle.dump(data, fo, protocol=cPickle.HIGHEST_PROTOCOL)
 67 |     fo.close()
 68 | 
 69 | def unpickle(filename):
 70 |     if not os.path.exists(filename):
 71 |         raise UnpickleError("Path '%s' does not exist." % filename)
 72 |     if ms is not None and ms.file(filename).startswith('gzip'):
 73 |         fo = gzip.open(filename, 'rb')
 74 |         dict = cPickle.load(fo)
 75 |     elif ms is not None and ms.file(filename).startswith('Zip'):
 76 |         fo = zipfile.ZipFile(filename, 'r', zipfile.ZIP_DEFLATED)
 77 |         dict = cPickle.loads(fo.read('data'))
 78 |     else:
 79 |         fo = open(filename, 'rb')
 80 |         dict = cPickle.load(fo)
 81 | 
 82 |     fo.close()
 83 |     return dict
 84 | 
 85 | def tryint(s):
 86 |     try:
 87 |         return int(s)
 88 |     except:
 89 |         return s
 90 | 
 91 | def alphanum_key(s):
 92 |     return [tryint(c) for c in re.split('([0-9]+)', s)]
 93 | 
 94 | def is_intel_machine():
 95 |     f = open('/proc/cpuinfo')
 96 |     for line in f:
 97 |         m = VENDOR_ID_REGEX.match(line)
 98 |         if m:
 99 |             f.close()
100 |             return m.group(1) == 'GenuineIntel'
101 |     f.close()
102 |     return False
103 | 
104 | def get_cpu():
105 |     if is_intel_machine():
106 |         return 'intel'
107 |     return 'amd'
108 | 
109 | def is_windows_machine():
110 |     return os.name == 'nt'
111 | 


--------------------------------------------------------------------------------