├── .gitignore ├── CMakeLists.txt ├── README.rst ├── cmake ├── findMKL.cmake └── findThreads.cmake ├── convdata.py ├── convnet.py ├── data.py ├── example-layers ├── layer-params-18pct.cfg ├── layer-params-19pct.cfg ├── layer-params-80sec.cfg ├── layer-params-conv-local-11pct.cfg ├── layer-params-conv-local-13pct.cfg ├── layer-params-example.cfg ├── layer-params.gc.cfg ├── layers-18pct.cfg ├── layers-19pct.cfg ├── layers-80sec.cfg ├── layers-conv-local-11pct.cfg ├── layers-conv-local-13pct.cfg ├── layers-example.cfg └── layers.gc.cfg ├── gpumodel.py ├── include ├── common │ ├── matrix.h │ ├── matrix_funcs.h │ ├── queue.h │ └── thread.h ├── convnet.cuh ├── cost.cuh ├── cudaconv2 │ ├── conv_util.cuh │ └── cudaconv2.cuh ├── data.cuh ├── layer.cuh ├── layer_kernels.cuh ├── neuron.cuh ├── nvmatrix │ ├── nvmatrix.cuh │ ├── nvmatrix_kernels.cuh │ └── nvmatrix_operators.cuh ├── pyconvnet.cuh ├── util.cuh ├── weights.cuh └── worker.cuh ├── layer.py ├── options.py ├── ordereddict.py ├── shownet.py ├── src ├── common │ ├── CMakeLists.txt │ └── matrix.cpp ├── convnet.cu ├── cost.cu ├── cudaconv2 │ ├── CMakeLists.txt │ ├── conv_util.cu │ ├── filter_acts.cu │ ├── img_acts.cu │ └── weight_acts.cu ├── data.cu ├── layer.cu ├── layer_kernels.cu ├── neuron.cu ├── nvmatrix │ ├── CMakeLists.txt │ ├── nvmatrix.cu │ └── nvmatrix_kernels.cu ├── pyconvnet.cu ├── util.cu ├── weights.cu └── worker.cu └── util.py /.gitignore: -------------------------------------------------------------------------------- 1 | # CMake 2 | CMakeCache.txt 3 | CMakeFiles 4 | Makefile 5 | cmake_install.cmake 6 | install_manifest.txt 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | 12 | # C extensions 13 | *.so 14 | *.a 15 | 16 | # Distribution / packaging 17 | .Python 18 | env/ 19 | bin/ 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Mr Developer 38 | .mr.developer.cfg 39 | .project 40 | .pydevproject 41 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 2.8) 2 | project (cuda-convnet ) 3 | 4 | if(MSVC) 5 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/") 6 | set(THREADS_USE_PTHREADS_WIN32 TRUE) 7 | 8 | # set the place of pthread.h 9 | set(THREADS_PTHREAD_WIN32_INC "C:/cuda-convnet/pthreads-w32-2-9-1-release/Pre-built.2/include") 10 | 11 | # set the place you install mkl (the folder containing "./mkl") 12 | set(MKL_ROOT "C:/Program Files (x86)/Intel/Composer XE 2013") 13 | 14 | string(REGEX REPLACE "/Z[iI7]" "" 15 | CMAKE_CXX_FLAGS_RELEASE 16 | "${CMAKE_CXX_FLAGS_RELEASE}") 17 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Z7") 18 | endif() 19 | 20 | find_package (CUDA REQUIRED) 21 | if(MSVC) 22 | find_package (MKL REQUIRED) 23 | find_package (Threads REQUIRED) 24 | else() 25 | find_package (BLAS REQUIRED) 26 | find_path(BLAS_INCLUDE_DIRS cblas.h 27 | HINTS 28 | "/System/Library/Frameworks/vecLib.framework/Versions/Current/Headers/" 29 | "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.9.sdk/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/" 30 | ) 31 | endif() 32 | find_package (PythonLibs 2.7 REQUIRED) 33 | find_package (PythonInterp 2.7 REQUIRED) 34 | 35 | find_path(CUDA_COMMON_INCLUDE_DIRS 36 | helper_cuda.h 37 | PATHS ${CUDA_SDK_SEARCH_PATH} 38 | "/usr/local/cuda" 39 | "/Developer/NVIDIA/CUDA-6.0" 40 | "C:/ProgramData/NVIDIA Corporation/CUDA Samples/v6.0/common/inc" 41 | PATH_SUFFIXES "samples/common/inc" 42 | DOC "Location of helper_cuda.h" 43 | NO_DEFAULT_PATH 44 | ) 45 | 46 | if(APPLE) 47 | LIST(APPEND CUDA_NVCC_FLAGS -ccbin /usr/bin/clang) 48 | SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libstdc++") 49 | SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libstdc++") 50 | endif(APPLE) 51 | 52 | #list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_20,code=sm_20) 53 | list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30) 54 | #list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_32,code=sm_32) 55 | #list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_35,code=sm_35) 56 | #list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_50,code=sm_50) 57 | 58 | IF(${CMAKE_SYSTEM_NAME} MATCHES "Linux") 59 | list(APPEND CMAKE_CXX_FLAGS -fPIC) 60 | ENDIF(${CMAKE_SYSTEM_NAME} MATCHES "Linux") 61 | 62 | execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" 63 | "import numpy as n; print(n.get_include());" 64 | RESULT_VARIABLE _NUMPY_SEARCH_SUCCESS 65 | OUTPUT_VARIABLE _NUMPY_VALUES 66 | ERROR_VARIABLE _NUMPY_ERROR_VALUE 67 | OUTPUT_STRIP_TRAILING_WHITESPACE 68 | ) 69 | if(_NUMPY_SEARCH_SUCCESS MATCHES 0) 70 | string(REGEX REPLACE ";" "\\\\;" _NUMPY_VALUES ${_NUMPY_VALUES}) 71 | string(REGEX REPLACE "\n" ";" _NUMPY_VALUES ${_NUMPY_VALUES}) 72 | list(GET _NUMPY_VALUES 0 NUMPY_INCLUDE_DIRS) 73 | string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIRS ${NUMPY_INCLUDE_DIRS}) 74 | else() 75 | message(FATAL_ERROR "NumPy import failure:\n${_NUMPY_ERROR_VALUE}") 76 | endif() 77 | 78 | if (MSVC) 79 | include_directories ( 80 | ${CUDA_INCLUDE_DIRS} 81 | ${CUDA_COMMON_INCLUDE_DIRS} 82 | ${PYTHON_INCLUDE_DIRS} 83 | ${NUMPY_INCLUDE_DIRS}/numpy 84 | ${MKL_INCLUDE_DIR} 85 | ${THREADS_PTHREADS_INCLUDE_DIR} 86 | include/common 87 | include/nvmatrix 88 | include/cudaconv2 89 | include/ 90 | ) 91 | else() 92 | include_directories ( 93 | ${BLAS_INCLUDE_DIRS} 94 | ${CUDA_INCLUDE_DIRS} 95 | ${CUDA_COMMON_INCLUDE_DIRS} 96 | ${PYTHON_INCLUDE_DIRS} 97 | ${NUMPY_INCLUDE_DIRS}/numpy 98 | include/common 99 | include/nvmatrix 100 | include/cudaconv2 101 | include/ 102 | ) 103 | endif() 104 | 105 | add_subdirectory (src/common) 106 | add_subdirectory (src/nvmatrix) 107 | add_subdirectory (src/cudaconv2) 108 | 109 | CUDA_ADD_LIBRARY (convnet SHARED 110 | src/convnet.cu 111 | src/cost.cu 112 | src/data.cu 113 | src/layer.cu 114 | src/layer_kernels.cu 115 | src/neuron.cu 116 | src/pyconvnet.cu 117 | src/util.cu 118 | src/weights.cu 119 | src/worker.cu 120 | ) 121 | 122 | if(MSVC) 123 | set_target_properties (convnet 124 | PROPERTIES 125 | PREFIX "_" 126 | SUFFIX ".pyd" 127 | ) 128 | else() 129 | set_target_properties (convnet 130 | PROPERTIES 131 | PREFIX "_" 132 | SUFFIX ".so" 133 | ) 134 | endif() 135 | 136 | if(MSVC) 137 | TARGET_LINK_LIBRARIES (convnet 138 | common 139 | nvmatrix 140 | cudaconv2 141 | ${PYTHON_LIBRARIES} 142 | ${CUDA_CUBLAS_LIBRARIES} 143 | ${MKL_LIBRARIES} 144 | ${CMAKE_THREAD_LIBS_INIT} 145 | ) 146 | else() 147 | TARGET_LINK_LIBRARIES (convnet 148 | common 149 | nvmatrix 150 | cudaconv2 151 | ${PYTHON_LIBRARIES} 152 | ${BLAS_LIBRARIES} 153 | ${CUDA_CUBLAS_LIBRARIES} 154 | ) 155 | endif() 156 | 157 | CUDA_BUILD_CLEAN_TARGET() 158 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | This is my fork of the ``cuda-convnet`` convolutional neural network 2 | implementation written by Alex Krizhevsky. 3 | 4 | ``cuda-convnet`` has quite extensive documentation itself. Find the 5 | `MAIN DOCUMENTATION HERE `_. 6 | 7 | **Update**: A newer version, `cuda-convnet 2 8 | `_, has been released by 9 | Alex. This fork is still based on the original cuda-convnet. 10 | 11 | =================== 12 | Additional features 13 | =================== 14 | 15 | This document will only describe the small differences between 16 | ``cuda-convnet`` as hosted on Google Code and this version. 17 | 18 | Dropout 19 | ======= 20 | 21 | Dropout is a relatively new regularization technique for neural 22 | networks. See the `Improving neural networks by preventing 23 | co-adaptation of feature detectors `_ 24 | and `Improving Neural Networks with Dropout 25 | `_ papers for 26 | details. 27 | 28 | To set a dropout rate for one of our layers, we use the ``dropout`` 29 | parameter in our model's ``layer-params`` configuration file. For 30 | example, we could use dropout for the last layer in the CIFAR example 31 | by modifying the section for the fc10 layer to look like so:: 32 | 33 | [fc10] 34 | epsW=0.001 35 | epsB=0.002 36 | # ... 37 | dropout=0.5 38 | 39 | In practice, you'll probably also want to double the number of 40 | ``outputs`` in that layer. 41 | 42 | 43 | CURAND random seeding 44 | ===================== 45 | 46 | An environment variable ``CONVNET_RANDOM_SEED``, if set, will be used 47 | to set the CURAND library's random seed. This is important in order 48 | to get reproducable results. 49 | 50 | 51 | Updated to work with CUDA via CMake 52 | =================================== 53 | 54 | The build configuration and code has been updated to work with CUDA 55 | via CMake. Run ``cmake .`` and then ``make``. If you have an alternative 56 | BLAS library just set it with for example ``cmake -DBLAS_LIBRARIES=/usr/lib/libcblas.so .``. 57 | -------------------------------------------------------------------------------- /cmake/findMKL.cmake: -------------------------------------------------------------------------------- 1 | # - Find the MKL libraries 2 | # Modified from Armadillo's ARMA_FindMKL.cmake 3 | # This module defines 4 | # MKL_INCLUDE_DIR, the directory for the MKL headers 5 | # MKL_LIB_DIR, the directory for the MKL library files 6 | # MKL_COMPILER_LIB_DIR, the directory for the MKL compiler library files 7 | # MKL_LIBRARIES, the libraries needed to use Intel's implementation of BLAS & LAPACK. 8 | # MKL_FOUND, If false, do not try to use MKL; if true, the macro definition USE_MKL is added. 9 | 10 | # Set the include path 11 | # TODO: what if MKL is not installed in /opt/intel/mkl? 12 | # try to find at /opt/intel/mkl 13 | # in windows, try to find MKL at C:/Program Files (x86)/Intel/Composer XE/mkl 14 | 15 | if ( WIN32 ) 16 | set(MKLROOT_PATH ${MKL_ROOT} CACHE PATH "Where the MKL are stored") 17 | else ( WIN32 ) 18 | set(MKLROOT_PATH "/opt/intel" CACHE PATH "Where the MKL are stored") 19 | endif ( WIN32 ) 20 | 21 | if (EXISTS ${MKLROOT_PATH}/mkl) 22 | SET(MKL_FOUND TRUE) 23 | message("MKL is found at ${MKLROOT_PATH}/mkl") 24 | IF(CMAKE_SIZEOF_VOID_P EQUAL 8) 25 | set( USE_MKL_64BIT On ) 26 | if ( ARMADILLO_FOUND ) 27 | if ( ARMADILLO_BLAS_LONG_LONG ) 28 | set( USE_MKL_64BIT_LIB On ) 29 | ADD_DEFINITIONS(-DMKL_ILP64) 30 | message("MKL is linked against ILP64 interface ... ") 31 | endif ( ARMADILLO_BLAS_LONG_LONG ) 32 | endif ( ARMADILLO_FOUND ) 33 | ELSE(CMAKE_SIZEOF_VOID_P EQUAL 8) 34 | set( USE_MKL_64BIT Off ) 35 | ENDIF(CMAKE_SIZEOF_VOID_P EQUAL 8) 36 | else (EXISTS ${MKLROOT_PATH}/mkl) 37 | SET(MKL_FOUND FALSE) 38 | message("MKL is NOT found ... ") 39 | endif (EXISTS ${MKLROOT_PATH}/mkl) 40 | 41 | if (MKL_FOUND) 42 | set(MKL_INCLUDE_DIR "${MKLROOT_PATH}/mkl/include") 43 | ADD_DEFINITIONS(-DUSE_MKL) 44 | if ( USE_MKL_64BIT ) 45 | set(MKL_LIB_DIR "${MKLROOT_PATH}/mkl/lib/intel64") 46 | set(MKL_COMPILER_LIB_DIR "${MKLROOT_PATH}/compiler/lib/intel64") 47 | set(MKL_COMPILER_LIB_DIR ${MKL_COMPILER_LIB_DIR} "${MKLROOT_PATH}/lib/intel64") 48 | if ( USE_MKL_64BIT_LIB ) 49 | if ( WIN32 ) 50 | set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_ilp64) 51 | else ( WIN32 ) 52 | set(MKL_LIBRARIES ${MKL_LIBRARIES} libmkl_intel_ilp64.a) 53 | endif ( WIN32 ) 54 | else ( USE_MKL_64BIT_LIB ) 55 | if ( WIN32 ) 56 | set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_lp64) 57 | else ( WIN32 ) 58 | set(MKL_LIBRARIES ${MKL_LIBRARIES} libmkl_intel_lp64.a) 59 | endif ( WIN32 ) 60 | endif ( USE_MKL_64BIT_LIB ) 61 | else ( USE_MKL_64BIT ) 62 | set(MKL_LIB_DIR "${MKLROOT_PATH}/mkl/lib/ia32") 63 | set(MKL_COMPILER_LIB_DIR "${MKLROOT_PATH}/compiler/lib/ia32") 64 | set(MKL_COMPILER_LIB_DIR ${MKL_COMPILER_LIB_DIR} "${MKLROOT_PATH}/lib/ia32") 65 | if ( WIN32 ) 66 | set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_c) 67 | else ( WIN32 ) 68 | set(MKL_LIBRARIES ${MKL_LIBRARIES} libmkl_intel.a) 69 | endif ( WIN32 ) 70 | endif ( USE_MKL_64BIT ) 71 | 72 | if ( WIN32 ) 73 | SET(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_thread) 74 | SET(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_core) 75 | SET(MKL_LIBRARIES ${MKL_LIBRARIES} libiomp5md) 76 | else ( WIN32 ) 77 | SET(MKL_LIBRARIES ${MKL_LIBRARIES} libmkl_intel_thread.a) 78 | SET(MKL_LIBRARIES ${MKL_LIBRARIES} libmkl_core.a) 79 | SET(MKL_LIBRARIES ${MKL_LIBRARIES} iomp5) 80 | endif ( WIN32 ) 81 | endif (MKL_FOUND) 82 | 83 | IF (MKL_FOUND) 84 | IF (NOT MKL_FIND_QUIETLY) 85 | MESSAGE(STATUS "Found MKL libraries: ${MKL_LIBRARIES}") 86 | MESSAGE(STATUS "MKL_INCLUDE_DIR: ${MKL_INCLUDE_DIR}") 87 | MESSAGE(STATUS "MKL_LIB_DIR: ${MKL_LIB_DIR}") 88 | MESSAGE(STATUS "MKL_COMPILER_LIB_DIR: ${MKL_COMPILER_LIB_DIR}") 89 | ENDIF (NOT MKL_FIND_QUIETLY) 90 | 91 | INCLUDE_DIRECTORIES( ${MKL_INCLUDE_DIR} ) 92 | LINK_DIRECTORIES( ${MKL_LIB_DIR} ${MKL_COMPILER_LIB_DIR} ) 93 | ELSE (MKL_FOUND) 94 | IF (MKL_FIND_REQUIRED) 95 | MESSAGE(FATAL_ERROR "Could not find MKL libraries") 96 | ENDIF (MKL_FIND_REQUIRED) 97 | ENDIF (MKL_FOUND) 98 | 99 | # MARK_AS_ADVANCED(MKL_LIBRARY) -------------------------------------------------------------------------------- /cmake/findThreads.cmake: -------------------------------------------------------------------------------- 1 | # Updated FindThreads.cmake that supports pthread-win32 2 | # Downloaded from http://www.vtk.org/Bug/bug_view_advanced_page.php?bug_id=6399 3 | 4 | # - This module determines the thread library of the system. 5 | # 6 | # The following variables are set 7 | # CMAKE_THREAD_LIBS_INIT - the thread library 8 | # CMAKE_USE_SPROC_INIT - are we using sproc? 9 | # CMAKE_USE_WIN32_THREADS_INIT - using WIN32 threads? 10 | # CMAKE_USE_PTHREADS_INIT - are we using pthreads 11 | # CMAKE_HP_PTHREADS_INIT - are we using hp pthreads 12 | # 13 | # If use of pthreads-win32 is desired, the following variables 14 | # can be set. 15 | # 16 | # THREADS_USE_PTHREADS_WIN32 - 17 | # Setting this to true searches for the pthreads-win32 18 | # port (since CMake 2.8.0) 19 | # 20 | # THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME 21 | # C = no exceptions (default) 22 | # (NOTE: This is the default scheme on most POSIX thread 23 | # implementations and what you should probably be using) 24 | # CE = C++ Exception Handling 25 | # SE = Structure Exception Handling (MSVC only) 26 | # (NOTE: Changing this option from the default may affect 27 | # the portability of your application. See pthreads-win32 28 | # documentation for more details.) 29 | # 30 | #====================================================== 31 | # Example usage where threading library 32 | # is provided by the system: 33 | # 34 | # find_package(Threads REQUIRED) 35 | # add_executable(foo foo.cc) 36 | # target_link_libraries(foo ${CMAKE_THREAD_LIBS_INIT}) 37 | # 38 | # Example usage if pthreads-win32 is desired on Windows 39 | # or a system provided thread library: 40 | # 41 | # set(THREADS_USE_PTHREADS_WIN32 true) 42 | # find_package(Threads REQUIRED) 43 | # include_directories(${THREADS_PTHREADS_INCLUDE_DIR}) 44 | # 45 | # add_executable(foo foo.cc) 46 | # target_link_libraries(foo ${CMAKE_THREAD_LIBS_INIT}) 47 | # 48 | 49 | INCLUDE (CheckIncludeFiles) 50 | INCLUDE (CheckLibraryExists) 51 | SET(Threads_FOUND FALSE) 52 | 53 | IF(WIN32 AND NOT CYGWIN AND THREADS_USE_PTHREADS_WIN32) 54 | SET(_Threads_ptwin32 true) 55 | ENDIF() 56 | 57 | # Do we have sproc? 58 | IF(CMAKE_SYSTEM MATCHES IRIX) 59 | CHECK_INCLUDE_FILES("sys/types.h;sys/prctl.h" CMAKE_HAVE_SPROC_H) 60 | ENDIF() 61 | 62 | IF(CMAKE_HAVE_SPROC_H) 63 | # We have sproc 64 | SET(CMAKE_USE_SPROC_INIT 1) 65 | 66 | ELSEIF(_Threads_ptwin32) 67 | 68 | IF(NOT DEFINED THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME) 69 | # Assign the default scheme 70 | SET(THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME "C") 71 | ELSE() 72 | # Validate the scheme specified by the user 73 | IF(NOT THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME STREQUAL "C" AND 74 | NOT THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME STREQUAL "CE" AND 75 | NOT THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME STREQUAL "SE") 76 | MESSAGE(FATAL_ERROR "See documentation for FindPthreads.cmake, only C, CE, and SE modes are allowed") 77 | ENDIF() 78 | IF(NOT MSVC AND THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME STREQUAL "SE") 79 | MESSAGE(FATAL_ERROR "Structured Exception Handling is only allowed for MSVC") 80 | ENDIF(NOT MSVC AND THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME STREQUAL "SE") 81 | ENDIF() 82 | 83 | FIND_PATH(THREADS_PTHREADS_INCLUDE_DIR 84 | pthread.h 85 | PATHS 86 | ${THREADS_PTHREAD_WIN32_INC} 87 | ) 88 | 89 | # Determine the library filename 90 | IF(MSVC) 91 | SET(_Threads_pthreads_libname 92 | pthreadV${THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME}2) 93 | ELSEIF(MINGW) 94 | SET(_Threads_pthreads_libname 95 | pthreadG${THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME}2) 96 | ELSE() 97 | SET(_Threads_pthreads_libname 98 | pthreadV${THREADS_PTHREADS_WIN32_EXCEPTION_SCHEME}2) 99 | ENDIF() 100 | 101 | # Use the include path to help find the library if possible 102 | SET(_Threads_lib_paths "") 103 | IF(THREADS_PTHREADS_INCLUDE_DIR) 104 | GET_FILENAME_COMPONENT(_Threads_root_dir 105 | ${THREADS_PTHREADS_INCLUDE_DIR} PATH) 106 | SET(_Threads_lib_paths ${_Threads_root_dir}/lib/x64) 107 | ENDIF() 108 | FIND_LIBRARY(THREADS_PTHREADS_WIN32_LIBRARY 109 | NAMES ${_Threads_pthreads_libname} 110 | PATHS ${_Threads_lib_paths} 111 | DOC "The Portable Threads Library for Win32" 112 | NO_SYSTEM_PATH 113 | ) 114 | 115 | IF(THREADS_PTHREADS_INCLUDE_DIR AND THREADS_PTHREADS_WIN32_LIBRARY) 116 | MARK_AS_ADVANCED(THREADS_PTHREADS_INCLUDE_DIR) 117 | SET(CMAKE_THREAD_LIBS_INIT ${THREADS_PTHREADS_WIN32_LIBRARY}) 118 | SET(CMAKE_HAVE_THREADS_LIBRARY 1) 119 | SET(Threads_FOUND TRUE) 120 | ENDIF() 121 | 122 | MARK_AS_ADVANCED(THREADS_PTHREADS_WIN32_LIBRARY) 123 | 124 | ELSE() 125 | # Do we have pthreads? 126 | CHECK_INCLUDE_FILES("pthread.h" CMAKE_HAVE_PTHREAD_H) 127 | IF(CMAKE_HAVE_PTHREAD_H) 128 | 129 | # 130 | # We have pthread.h 131 | # Let's check for the library now. 132 | # 133 | SET(CMAKE_HAVE_THREADS_LIBRARY) 134 | IF(NOT THREADS_HAVE_PTHREAD_ARG) 135 | 136 | # Do we have -lpthreads 137 | CHECK_LIBRARY_EXISTS(pthreads pthread_create "" CMAKE_HAVE_PTHREADS_CREATE) 138 | IF(CMAKE_HAVE_PTHREADS_CREATE) 139 | SET(CMAKE_THREAD_LIBS_INIT "-lpthreads") 140 | SET(CMAKE_HAVE_THREADS_LIBRARY 1) 141 | SET(Threads_FOUND TRUE) 142 | ENDIF() 143 | 144 | # Ok, how about -lpthread 145 | CHECK_LIBRARY_EXISTS(pthread pthread_create "" CMAKE_HAVE_PTHREAD_CREATE) 146 | IF(CMAKE_HAVE_PTHREAD_CREATE) 147 | SET(CMAKE_THREAD_LIBS_INIT "-lpthread") 148 | SET(Threads_FOUND TRUE) 149 | SET(CMAKE_HAVE_THREADS_LIBRARY 1) 150 | ENDIF() 151 | 152 | IF(CMAKE_SYSTEM MATCHES "SunOS.*") 153 | # On sun also check for -lthread 154 | CHECK_LIBRARY_EXISTS(thread thr_create "" CMAKE_HAVE_THR_CREATE) 155 | IF(CMAKE_HAVE_THR_CREATE) 156 | SET(CMAKE_THREAD_LIBS_INIT "-lthread") 157 | SET(CMAKE_HAVE_THREADS_LIBRARY 1) 158 | SET(Threads_FOUND TRUE) 159 | ENDIF() 160 | ENDIF(CMAKE_SYSTEM MATCHES "SunOS.*") 161 | 162 | ENDIF(NOT THREADS_HAVE_PTHREAD_ARG) 163 | 164 | IF(NOT CMAKE_HAVE_THREADS_LIBRARY) 165 | # If we did not found -lpthread, -lpthread, or -lthread, look for -pthread 166 | IF("THREADS_HAVE_PTHREAD_ARG" MATCHES "^THREADS_HAVE_PTHREAD_ARG") 167 | MESSAGE(STATUS "Check if compiler accepts -pthread") 168 | TRY_RUN(THREADS_PTHREAD_ARG THREADS_HAVE_PTHREAD_ARG 169 | ${CMAKE_BINARY_DIR} 170 | ${CMAKE_ROOT}/Modules/CheckForPthreads.c 171 | CMAKE_FLAGS -DLINK_LIBRARIES:STRING=-pthread 172 | COMPILE_OUTPUT_VARIABLE OUTPUT) 173 | 174 | IF(THREADS_HAVE_PTHREAD_ARG) 175 | IF(THREADS_PTHREAD_ARG MATCHES "^2$") 176 | SET(Threads_FOUND TRUE) 177 | MESSAGE(STATUS "Check if compiler accepts -pthread - yes") 178 | ELSE() 179 | MESSAGE(STATUS "Check if compiler accepts -pthread - no") 180 | FILE(APPEND 181 | ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log 182 | "Determining if compiler accepts -pthread returned ${THREADS_PTHREAD_ARG} instead of 2. The compiler had the following output:\n${OUTPUT}\n\n") 183 | ENDIF() 184 | ELSE() 185 | MESSAGE(STATUS "Check if compiler accepts -pthread - no") 186 | FILE(APPEND 187 | ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log 188 | "Determining if compiler accepts -pthread failed with the following output:\n${OUTPUT}\n\n") 189 | ENDIF() 190 | 191 | ENDIF("THREADS_HAVE_PTHREAD_ARG" MATCHES "^THREADS_HAVE_PTHREAD_ARG") 192 | 193 | IF(THREADS_HAVE_PTHREAD_ARG) 194 | SET(Threads_FOUND TRUE) 195 | SET(CMAKE_THREAD_LIBS_INIT "-pthread") 196 | ENDIF() 197 | 198 | ENDIF(NOT CMAKE_HAVE_THREADS_LIBRARY) 199 | ENDIF(CMAKE_HAVE_PTHREAD_H) 200 | ENDIF() 201 | 202 | IF(CMAKE_THREAD_LIBS_INIT) 203 | SET(CMAKE_USE_PTHREADS_INIT 1) 204 | SET(Threads_FOUND TRUE) 205 | ENDIF() 206 | 207 | IF(CMAKE_SYSTEM MATCHES "Windows" 208 | AND NOT THREADS_USE_PTHREADS_WIN32) 209 | SET(CMAKE_USE_WIN32_THREADS_INIT 1) 210 | SET(Threads_FOUND TRUE) 211 | ENDIF() 212 | 213 | IF(CMAKE_USE_PTHREADS_INIT) 214 | IF(CMAKE_SYSTEM MATCHES "HP-UX-*") 215 | # Use libcma if it exists and can be used. It provides more 216 | # symbols than the plain pthread library. CMA threads 217 | # have actually been deprecated: 218 | # http://docs.hp.com/en/B3920-90091/ch12s03.html#d0e11395 219 | # http://docs.hp.com/en/947/d8.html 220 | # but we need to maintain compatibility here. 221 | # The CMAKE_HP_PTHREADS setting actually indicates whether CMA threads 222 | # are available. 223 | CHECK_LIBRARY_EXISTS(cma pthread_attr_create "" CMAKE_HAVE_HP_CMA) 224 | IF(CMAKE_HAVE_HP_CMA) 225 | SET(CMAKE_THREAD_LIBS_INIT "-lcma") 226 | SET(CMAKE_HP_PTHREADS_INIT 1) 227 | SET(Threads_FOUND TRUE) 228 | ENDIF(CMAKE_HAVE_HP_CMA) 229 | SET(CMAKE_USE_PTHREADS_INIT 1) 230 | ENDIF() 231 | 232 | IF(CMAKE_SYSTEM MATCHES "OSF1-V*") 233 | SET(CMAKE_USE_PTHREADS_INIT 0) 234 | SET(CMAKE_THREAD_LIBS_INIT ) 235 | ENDIF() 236 | 237 | IF(CMAKE_SYSTEM MATCHES "CYGWIN_NT*") 238 | SET(CMAKE_USE_PTHREADS_INIT 1) 239 | SET(Threads_FOUND TRUE) 240 | SET(CMAKE_THREAD_LIBS_INIT ) 241 | SET(CMAKE_USE_WIN32_THREADS_INIT 0) 242 | ENDIF() 243 | ENDIF(CMAKE_USE_PTHREADS_INIT) 244 | 245 | INCLUDE(FindPackageHandleStandardArgs) 246 | IF(_Threads_ptwin32) 247 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(Threads DEFAULT_MSG 248 | THREADS_PTHREADS_WIN32_LIBRARY THREADS_PTHREADS_INCLUDE_DIR) 249 | ELSE() 250 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(Threads DEFAULT_MSG Threads_FOUND) 251 | ENDIF() -------------------------------------------------------------------------------- /convdata.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without modification, 5 | # are permitted provided that the following conditions are met: 6 | # 7 | # - Redistributions of source code must retain the above copyright notice, 8 | # this list of conditions and the following disclaimer. 9 | # 10 | # - Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | from data import * 26 | import numpy.random as nr 27 | import numpy as n 28 | import random as r 29 | 30 | class CIFARDataProvider(LabeledMemoryDataProvider): 31 | def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False): 32 | LabeledMemoryDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test) 33 | self.data_mean = self.batch_meta['data_mean'] 34 | self.num_colors = 3 35 | self.img_size = 32 36 | # Subtract the mean from the data and make sure that both data and 37 | # labels are in single-precision floating point. 38 | for d in self.data_dic: 39 | # This converts the data matrix to single precision and makes sure that it is C-ordered 40 | d['data'] = n.require((d['data'] - self.data_mean), dtype=n.single, requirements='C') 41 | d['labels'] = n.require(d['labels'].reshape((1, d['data'].shape[1])), dtype=n.single, requirements='C') 42 | 43 | def get_next_batch(self): 44 | epoch, batchnum, datadic = LabeledMemoryDataProvider.get_next_batch(self) 45 | return epoch, batchnum, [datadic['data'], datadic['labels']] 46 | 47 | # Returns the dimensionality of the two data matrices returned by get_next_batch 48 | # idx is the index of the matrix. 49 | def get_data_dims(self, idx=0): 50 | return self.img_size**2 * self.num_colors if idx == 0 else 1 51 | 52 | # Takes as input an array returned by get_next_batch 53 | # Returns a (numCases, imgSize, imgSize, 3) array which can be 54 | # fed to pylab for plotting. 55 | # This is used by shownet.py to plot test case predictions. 56 | def get_plottable_data(self, data): 57 | return n.require((data + self.data_mean).T.reshape(data.shape[1], 3, self.img_size, self.img_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single) 58 | 59 | class CroppedCIFARDataProvider(LabeledMemoryDataProvider): 60 | def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params=None, test=False): 61 | LabeledMemoryDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test) 62 | 63 | self.border_size = dp_params['crop_border'] 64 | self.inner_size = 32 - self.border_size*2 65 | self.multiview = dp_params['multiview_test'] and test 66 | self.num_views = 5*2 67 | self.data_mult = self.num_views if self.multiview else 1 68 | self.num_colors = 3 69 | 70 | for d in self.data_dic: 71 | d['data'] = n.require(d['data'], requirements='C') 72 | d['labels'] = n.require(n.tile(d['labels'].reshape((1, d['data'].shape[1])), (1, self.data_mult)), requirements='C') 73 | 74 | self.cropped_data = [n.zeros((self.get_data_dims(), self.data_dic[0]['data'].shape[1]*self.data_mult), dtype=n.single) for x in xrange(2)] 75 | 76 | self.batches_generated = 0 77 | self.data_mean = self.batch_meta['data_mean'].reshape((3,32,32))[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size].reshape((self.get_data_dims(), 1)) 78 | 79 | def get_next_batch(self): 80 | epoch, batchnum, datadic = LabeledMemoryDataProvider.get_next_batch(self) 81 | 82 | cropped = self.cropped_data[self.batches_generated % 2] 83 | 84 | self.__trim_borders(datadic['data'], cropped) 85 | cropped -= self.data_mean 86 | self.batches_generated += 1 87 | return epoch, batchnum, [cropped, datadic['labels']] 88 | 89 | def get_data_dims(self, idx=0): 90 | return self.inner_size**2 * 3 if idx == 0 else 1 91 | 92 | # Takes as input an array returned by get_next_batch 93 | # Returns a (numCases, imgSize, imgSize, 3) array which can be 94 | # fed to pylab for plotting. 95 | # This is used by shownet.py to plot test case predictions. 96 | def get_plottable_data(self, data): 97 | return n.require((data + self.data_mean).T.reshape(data.shape[1], 3, self.inner_size, self.inner_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single) 98 | 99 | def __trim_borders(self, x, target): 100 | y = x.reshape(3, 32, 32, x.shape[1]) 101 | 102 | if self.test: # don't need to loop over cases 103 | if self.multiview: 104 | start_positions = [(0,0), (0, self.border_size*2), 105 | (self.border_size, self.border_size), 106 | (self.border_size*2, 0), (self.border_size*2, self.border_size*2)] 107 | end_positions = [(sy+self.inner_size, sx+self.inner_size) for (sy,sx) in start_positions] 108 | for i in xrange(self.num_views/2): 109 | pic = y[:,start_positions[i][0]:end_positions[i][0],start_positions[i][1]:end_positions[i][1],:] 110 | target[:,i * x.shape[1]:(i+1)* x.shape[1]] = pic.reshape((self.get_data_dims(),x.shape[1])) 111 | target[:,(self.num_views/2 + i) * x.shape[1]:(self.num_views/2 +i+1)* x.shape[1]] = pic[:,:,::-1,:].reshape((self.get_data_dims(),x.shape[1])) 112 | else: 113 | pic = y[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size, :] # just take the center for now 114 | target[:,:] = pic.reshape((self.get_data_dims(), x.shape[1])) 115 | else: 116 | for c in xrange(x.shape[1]): # loop over cases 117 | startY, startX = nr.randint(0,self.border_size*2 + 1), nr.randint(0,self.border_size*2 + 1) 118 | endY, endX = startY + self.inner_size, startX + self.inner_size 119 | pic = y[:,startY:endY,startX:endX, c] 120 | if nr.randint(2) == 0: # also flip the image with 50% probability 121 | pic = pic[:,:,::-1] 122 | target[:,c] = pic.reshape((self.get_data_dims(),)) 123 | 124 | class DummyConvNetDataProvider(LabeledDummyDataProvider): 125 | def __init__(self, data_dim): 126 | LabeledDummyDataProvider.__init__(self, data_dim) 127 | 128 | def get_next_batch(self): 129 | epoch, batchnum, dic = LabeledDummyDataProvider.get_next_batch(self) 130 | 131 | dic['data'] = n.require(dic['data'].T, requirements='C') 132 | dic['labels'] = n.require(dic['labels'].T, requirements='C') 133 | 134 | return epoch, batchnum, [dic['data'], dic['labels']] 135 | 136 | # Returns the dimensionality of the two data matrices returned by get_next_batch 137 | def get_data_dims(self, idx=0): 138 | return self.batch_meta['num_vis'] if idx == 0 else 1 139 | -------------------------------------------------------------------------------- /convnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without modification, 5 | # are permitted provided that the following conditions are met: 6 | # 7 | # - Redistributions of source code must retain the above copyright notice, 8 | # this list of conditions and the following disclaimer. 9 | # 10 | # - Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | import numpy as n 26 | import numpy.random as nr 27 | from util import * 28 | from data import * 29 | from options import * 30 | from gpumodel import * 31 | import sys 32 | import math as m 33 | import layer as lay 34 | from convdata import * 35 | from os import linesep as NL 36 | #import pylab as pl 37 | 38 | class ConvNet(IGPUModel): 39 | def __init__(self, op, load_dic, dp_params={}): 40 | filename_options = [] 41 | dp_params['multiview_test'] = op.get_value('multiview_test') 42 | dp_params['crop_border'] = op.get_value('crop_border') 43 | IGPUModel.__init__(self, "ConvNet", op, load_dic, filename_options, dp_params=dp_params) 44 | 45 | def import_model(self): 46 | lib_name = "_convnet" 47 | print "=========================" 48 | print "Importing %s C++ module" % lib_name 49 | self.libmodel = __import__(lib_name) 50 | 51 | def init_model_lib(self): 52 | self.libmodel.initModel(self.layers, self.minibatch_size, self.device_ids[0]) 53 | 54 | def init_model_state(self): 55 | ms = self.model_state 56 | if self.load_file: 57 | ms['layers'] = lay.LayerParser.parse_layers(self.layer_def, self.layer_params, self, ms['layers']) 58 | else: 59 | ms['layers'] = lay.LayerParser.parse_layers(self.layer_def, self.layer_params, self) 60 | self.layers_dic = dict(zip([l['name'] for l in ms['layers']], ms['layers'])) 61 | 62 | logreg_name = self.op.get_value('logreg_name') 63 | if logreg_name: 64 | self.logreg_idx = self.get_layer_idx(logreg_name, check_type='cost.logreg') 65 | 66 | # Convert convolutional layers to local 67 | if len(self.op.get_value('conv_to_local')) > 0: 68 | for i, layer in enumerate(ms['layers']): 69 | if layer['type'] == 'conv' and layer['name'] in self.op.get_value('conv_to_local'): 70 | lay.LocalLayerParser.conv_to_local(ms['layers'], i) 71 | # Decouple weight matrices 72 | if len(self.op.get_value('unshare_weights')) > 0: 73 | for name_str in self.op.get_value('unshare_weights'): 74 | if name_str: 75 | name = lay.WeightLayerParser.get_layer_name(name_str) 76 | if name is not None: 77 | name, idx = name[0], name[1] 78 | if name not in self.layers_dic: 79 | raise ModelStateException("Layer '%s' does not exist; unable to unshare" % name) 80 | layer = self.layers_dic[name] 81 | lay.WeightLayerParser.unshare_weights(layer, ms['layers'], matrix_idx=idx) 82 | else: 83 | raise ModelStateException("Invalid layer name '%s'; unable to unshare." % name_str) 84 | self.op.set_value('conv_to_local', [], parse=False) 85 | self.op.set_value('unshare_weights', [], parse=False) 86 | 87 | def get_layer_idx(self, layer_name, check_type=None): 88 | try: 89 | layer_idx = [l['name'] for l in self.model_state['layers']].index(layer_name) 90 | if check_type: 91 | layer_type = self.model_state['layers'][layer_idx]['type'] 92 | if layer_type != check_type: 93 | raise ModelStateException("Layer with name '%s' has type '%s'; should be '%s'." % (layer_name, layer_type, check_type)) 94 | return layer_idx 95 | except ValueError: 96 | raise ModelStateException("Layer with name '%s' not defined." % layer_name) 97 | 98 | def fill_excused_options(self): 99 | if self.op.get_value('check_grads'): 100 | self.op.set_value('save_path', '') 101 | self.op.set_value('train_batch_range', '0') 102 | self.op.set_value('test_batch_range', '0') 103 | self.op.set_value('data_path', '') 104 | 105 | # Make sure the data provider returned data in proper format 106 | def parse_batch_data(self, batch_data, train=True): 107 | if max(d.dtype != n.single for d in batch_data[2]): 108 | raise DataProviderException("All matrices returned by data provider must consist of single-precision floats.") 109 | return batch_data 110 | 111 | def start_batch(self, batch_data, train=True): 112 | data = batch_data[2] 113 | if self.check_grads: 114 | self.libmodel.checkGradients(data) 115 | elif not train and self.multiview_test: 116 | self.libmodel.startMultiviewTest(data, self.train_data_provider.num_views, self.logreg_idx) 117 | else: 118 | self.libmodel.startBatch(data, not train) 119 | 120 | def print_iteration(self): 121 | print "%d.%d..." % (self.epoch, self.batchnum), 122 | 123 | def print_train_time(self, compute_time_py): 124 | print "(%.3f sec)" % (compute_time_py) 125 | 126 | def print_costs(self, cost_outputs): 127 | costs, num_cases = cost_outputs[0], cost_outputs[1] 128 | for errname in costs.keys(): 129 | costs[errname] = [(v/num_cases) for v in costs[errname]] 130 | print "%s: " % errname, 131 | print ", ".join("%6f" % v for v in costs[errname]), 132 | if sum(m.isnan(v) for v in costs[errname]) > 0 or sum(m.isinf(v) for v in costs[errname]): 133 | print "^ got nan or inf!" 134 | sys.exit(1) 135 | 136 | def print_train_results(self): 137 | self.print_costs(self.train_outputs[-1]) 138 | 139 | def print_test_status(self): 140 | pass 141 | 142 | def print_test_results(self): 143 | print "" 144 | print "======================Test output======================" 145 | self.print_costs(self.test_outputs[-1]) 146 | print "" 147 | print "-------------------------------------------------------", 148 | for i,l in enumerate(self.layers): # This is kind of hacky but will do for now. 149 | if 'weights' in l: 150 | if type(l['weights']) == n.ndarray: 151 | print "%sLayer '%s' weights: %e [%e]" % (NL, l['name'], n.mean(n.abs(l['weights'])), n.mean(n.abs(l['weightsInc']))), 152 | elif type(l['weights']) == list: 153 | print "" 154 | print NL.join("Layer '%s' weights[%d]: %e [%e]" % (l['name'], i, n.mean(n.abs(w)), n.mean(n.abs(wi))) for i,(w,wi) in enumerate(zip(l['weights'],l['weightsInc']))), 155 | print "%sLayer '%s' biases: %e [%e]" % (NL, l['name'], n.mean(n.abs(l['biases'])), n.mean(n.abs(l['biasesInc']))), 156 | print "" 157 | 158 | def conditional_save(self): 159 | self.save_state() 160 | print "-------------------------------------------------------" 161 | print "Saved checkpoint to %s" % os.path.join(self.save_path, self.save_file) 162 | print "=======================================================", 163 | 164 | def aggregate_test_outputs(self, test_outputs): 165 | num_cases = sum(t[1] for t in test_outputs) 166 | for i in xrange(1 ,len(test_outputs)): 167 | for k,v in test_outputs[i][0].items(): 168 | for j in xrange(len(v)): 169 | test_outputs[0][0][k][j] += test_outputs[i][0][k][j] 170 | return (test_outputs[0][0], num_cases) 171 | 172 | @classmethod 173 | def get_options_parser(cls): 174 | op = IGPUModel.get_options_parser() 175 | op.add_option("mini", "minibatch_size", IntegerOptionParser, "Minibatch size", default=128) 176 | op.add_option("layer-def", "layer_def", StringOptionParser, "Layer definition file", set_once=True) 177 | op.add_option("layer-params", "layer_params", StringOptionParser, "Layer parameter file") 178 | op.add_option("check-grads", "check_grads", BooleanOptionParser, "Check gradients and quit?", default=0, excuses=['data_path','save_path','train_batch_range','test_batch_range']) 179 | op.add_option("multiview-test", "multiview_test", BooleanOptionParser, "Cropped DP: test on multiple patches?", default=0, requires=['logreg_name']) 180 | op.add_option("crop-border", "crop_border", IntegerOptionParser, "Cropped DP: crop border size", default=4, set_once=True) 181 | op.add_option("logreg-name", "logreg_name", StringOptionParser, "Cropped DP: logreg layer name (for --multiview-test)", default="") 182 | op.add_option("conv-to-local", "conv_to_local", ListOptionParser(StringOptionParser), "Convert given conv layers to unshared local", default=[]) 183 | op.add_option("unshare-weights", "unshare_weights", ListOptionParser(StringOptionParser), "Unshare weight matrices in given layers", default=[]) 184 | op.add_option("conserve-mem", "conserve_mem", BooleanOptionParser, "Conserve GPU memory (slower)?", default=0) 185 | 186 | op.delete_option('max_test_err') 187 | op.options["max_filesize_mb"].default = 0 188 | op.options["testing_freq"].default = 50 189 | op.options["num_epochs"].default = 50000 190 | op.options['dp_type'].default = None 191 | 192 | DataProvider.register_data_provider('cifar', 'CIFAR', CIFARDataProvider) 193 | DataProvider.register_data_provider('dummy-cn-n', 'Dummy ConvNet', DummyConvNetDataProvider) 194 | DataProvider.register_data_provider('cifar-cropped', 'Cropped CIFAR', CroppedCIFARDataProvider) 195 | 196 | return op 197 | 198 | if __name__ == "__main__": 199 | #nr.seed(5) 200 | op = ConvNet.get_options_parser() 201 | 202 | op, load_dic = IGPUModel.parse_options(op) 203 | model = ConvNet(op, load_dic) 204 | model.start() 205 | -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without modification, 5 | # are permitted provided that the following conditions are met: 6 | # 7 | # - Redistributions of source code must retain the above copyright notice, 8 | # this list of conditions and the following disclaimer. 9 | # 10 | # - Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | import numpy as n 26 | from numpy.random import randn, rand, random_integers 27 | import os 28 | from util import * 29 | 30 | BATCH_META_FILE = "batches.meta" 31 | 32 | class DataProvider: 33 | BATCH_REGEX = re.compile('^data_batch_(\d+)(\.\d+)?$') 34 | def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params={}, test=False): 35 | if batch_range == None: 36 | batch_range = DataProvider.get_batch_nums(data_dir) 37 | if init_batchnum is None or init_batchnum not in batch_range: 38 | init_batchnum = batch_range[0] 39 | 40 | self.data_dir = data_dir 41 | self.batch_range = batch_range 42 | self.curr_epoch = init_epoch 43 | self.curr_batchnum = init_batchnum 44 | self.dp_params = dp_params 45 | self.batch_meta = self.get_batch_meta(data_dir) 46 | self.data_dic = None 47 | self.test = test 48 | self.batch_idx = batch_range.index(init_batchnum) 49 | 50 | def get_next_batch(self): 51 | if self.data_dic is None or len(self.batch_range) > 1: 52 | self.data_dic = self.get_batch(self.curr_batchnum) 53 | epoch, batchnum = self.curr_epoch, self.curr_batchnum 54 | self.advance_batch() 55 | 56 | return epoch, batchnum, self.data_dic 57 | 58 | def __add_subbatch(self, batch_num, sub_batchnum, batch_dic): 59 | subbatch_path = "%s.%d" % (os.path.join(self.data_dir, self.get_data_file_name(batch_num)), sub_batchnum) 60 | if os.path.exists(subbatch_path): 61 | sub_dic = unpickle(subbatch_path) 62 | self._join_batches(batch_dic, sub_dic) 63 | else: 64 | raise IndexError("Sub-batch %d.%d does not exist in %s" % (batch_num,sub_batchnum, self.data_dir)) 65 | 66 | def _join_batches(self, main_batch, sub_batch): 67 | main_batch['data'] = n.r_[main_batch['data'], sub_batch['data']] 68 | 69 | def get_batch(self, batch_num): 70 | if os.path.exists(self.get_data_file_name(batch_num) + '.1'): # batch in sub-batches 71 | dic = unpickle(self.get_data_file_name(batch_num) + '.1') 72 | sb_idx = 2 73 | while True: 74 | try: 75 | self.__add_subbatch(batch_num, sb_idx, dic) 76 | sb_idx += 1 77 | except IndexError: 78 | break 79 | else: 80 | dic = unpickle(self.get_data_file_name(batch_num)) 81 | return dic 82 | 83 | def get_data_dims(self): 84 | return self.batch_meta['num_vis'] 85 | 86 | def advance_batch(self): 87 | self.batch_idx = self.get_next_batch_idx() 88 | self.curr_batchnum = self.batch_range[self.batch_idx] 89 | if self.batch_idx == 0: # we wrapped 90 | self.curr_epoch += 1 91 | 92 | def get_next_batch_idx(self): 93 | return (self.batch_idx + 1) % len(self.batch_range) 94 | 95 | def get_next_batch_num(self): 96 | return self.batch_range[self.get_next_batch_idx()] 97 | 98 | # get filename of current batch 99 | def get_data_file_name(self, batchnum=None): 100 | if batchnum is None: 101 | batchnum = self.curr_batchnum 102 | return os.path.join(self.data_dir, 'data_batch_%d' % batchnum) 103 | 104 | @classmethod 105 | def get_instance(cls, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, type="default", dp_params={}, test=False): 106 | # why the fuck can't i reference DataProvider in the original definition? 107 | #cls.dp_classes['default'] = DataProvider 108 | type = type or DataProvider.get_batch_meta(data_dir)['dp_type'] # allow data to decide data provider 109 | if type.startswith("dummy-"): 110 | name = "-".join(type.split('-')[:-1]) + "-n" 111 | if name not in dp_types: 112 | raise DataProviderException("No such data provider: %s" % type) 113 | _class = dp_classes[name] 114 | dims = int(type.split('-')[-1]) 115 | return _class(dims) 116 | elif type in dp_types: 117 | _class = dp_classes[type] 118 | return _class(data_dir, batch_range, init_epoch, init_batchnum, dp_params, test) 119 | 120 | raise DataProviderException("No such data provider: %s" % type) 121 | 122 | @classmethod 123 | def register_data_provider(cls, name, desc, _class): 124 | if name in dp_types: 125 | raise DataProviderException("Data provider %s already registered" % name) 126 | dp_types[name] = desc 127 | dp_classes[name] = _class 128 | 129 | @staticmethod 130 | def get_batch_meta(data_dir): 131 | return unpickle(os.path.join(data_dir, BATCH_META_FILE)) 132 | 133 | @staticmethod 134 | def get_batch_filenames(srcdir): 135 | return sorted([f for f in os.listdir(srcdir) if DataProvider.BATCH_REGEX.match(f)], key=alphanum_key) 136 | 137 | @staticmethod 138 | def get_batch_nums(srcdir): 139 | names = DataProvider.get_batch_filenames(srcdir) 140 | return sorted(list(set(int(DataProvider.BATCH_REGEX.match(n).group(1)) for n in names))) 141 | 142 | @staticmethod 143 | def get_num_batches(srcdir): 144 | return len(DataProvider.get_batch_nums(srcdir)) 145 | 146 | class DummyDataProvider(DataProvider): 147 | def __init__(self, data_dim): 148 | #self.data_dim = data_dim 149 | self.batch_range = [1] 150 | self.batch_meta = {'num_vis': data_dim, 'data_in_rows':True} 151 | self.curr_epoch = 1 152 | self.curr_batchnum = 1 153 | self.batch_idx = 0 154 | 155 | def get_next_batch(self): 156 | epoch, batchnum = self.curr_epoch, self.curr_batchnum 157 | self.advance_batch() 158 | data = rand(512, self.get_data_dims()).astype(n.single) 159 | return self.curr_epoch, self.curr_batchnum, {'data':data} 160 | 161 | 162 | class LabeledDummyDataProvider(DummyDataProvider): 163 | def __init__(self, data_dim, num_classes=10, num_cases=512): 164 | #self.data_dim = data_dim 165 | self.batch_range = [1] 166 | self.batch_meta = {'num_vis': data_dim, 167 | 'label_names': [str(x) for x in range(num_classes)], 168 | 'data_in_rows':True} 169 | self.num_cases = num_cases 170 | self.num_classes = num_classes 171 | self.curr_epoch = 1 172 | self.curr_batchnum = 1 173 | self.batch_idx=0 174 | 175 | def get_num_classes(self): 176 | return self.num_classes 177 | 178 | def get_next_batch(self): 179 | epoch, batchnum = self.curr_epoch, self.curr_batchnum 180 | self.advance_batch() 181 | data = rand(self.num_cases, self.get_data_dims()).astype(n.single) # <--changed to rand 182 | labels = n.require(n.c_[random_integers(0,self.num_classes-1,self.num_cases)], requirements='C', dtype=n.single) 183 | 184 | return self.curr_epoch, self.curr_batchnum, {'data':data, 'labels':labels} 185 | 186 | class MemoryDataProvider(DataProvider): 187 | def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params=None, test=False): 188 | DataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test) 189 | self.data_dic = [] 190 | for i in self.batch_range: 191 | self.data_dic += [self.get_batch(i)] 192 | 193 | def get_next_batch(self): 194 | epoch, batchnum = self.curr_epoch, self.curr_batchnum 195 | self.advance_batch() 196 | 197 | return epoch, batchnum, self.data_dic[batchnum - self.batch_range[0]] 198 | 199 | class LabeledDataProvider(DataProvider): 200 | def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params={}, test=False): 201 | DataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test) 202 | 203 | def get_num_classes(self): 204 | return len(self.batch_meta['label_names']) 205 | 206 | class LabeledMemoryDataProvider(LabeledDataProvider): 207 | def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False): 208 | LabeledDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test) 209 | self.data_dic = [] 210 | for i in batch_range: 211 | self.data_dic += [unpickle(self.get_data_file_name(i))] 212 | self.data_dic[-1]["labels"] = n.c_[n.require(self.data_dic[-1]['labels'], dtype=n.single)] 213 | 214 | def get_next_batch(self): 215 | epoch, batchnum = self.curr_epoch, self.curr_batchnum 216 | self.advance_batch() 217 | bidx = batchnum - self.batch_range[0] 218 | return epoch, batchnum, self.data_dic[bidx] 219 | 220 | dp_types = {"default": "The default data provider; loads one batch into memory at a time", 221 | "memory": "Loads the entire dataset into memory", 222 | "labeled": "Returns data and labels (used by classifiers)", 223 | "labeled-memory": "Combination labeled + memory", 224 | "dummy-n": "Dummy data provider for n-dimensional data", 225 | "dummy-labeled-n": "Labeled dummy data provider for n-dimensional data"} 226 | dp_classes = {"default": DataProvider, 227 | "memory": MemoryDataProvider, 228 | "labeled": LabeledDataProvider, 229 | "labeled-memory": LabeledMemoryDataProvider, 230 | "dummy-n": DummyDataProvider, 231 | "dummy-labeled-n": LabeledDummyDataProvider} 232 | 233 | class DataProviderException(Exception): 234 | pass 235 | -------------------------------------------------------------------------------- /example-layers/layer-params-18pct.cfg: -------------------------------------------------------------------------------- 1 | # 18% error on CIFAR-10 in 20 minutes - layer definition file 2 | 3 | # Reduce all learning rates by factor of 10 after 120 epochs. 4 | # Then another factor of 10 after 10 more epochs. 5 | 6 | [conv1] 7 | epsW=0.001 8 | epsB=0.002 9 | momW=0.9 10 | momB=0.9 11 | wc=0.004 12 | 13 | [conv2] 14 | epsW=0.001 15 | epsB=0.002 16 | momW=0.9 17 | momB=0.9 18 | wc=0.004 19 | 20 | [conv3] 21 | epsW=0.001 22 | epsB=0.002 23 | momW=0.9 24 | momB=0.9 25 | wc=0.004 26 | 27 | [fc10] 28 | epsW=0.001 29 | epsB=0.002 30 | momW=0.9 31 | momB=0.9 32 | wc=1 33 | 34 | [logprob] 35 | coeff=1 36 | 37 | [rnorm1] 38 | scale=0.00005 39 | pow=.75 40 | 41 | [rnorm2] 42 | scale=0.00005 43 | pow=.75 44 | -------------------------------------------------------------------------------- /example-layers/layer-params-19pct.cfg: -------------------------------------------------------------------------------- 1 | # 19% error on CIFAR-10 in 20 minutes - layer parameter file 2 | # Set wc to 0 for translations -- 14.2% 3 | 4 | [conv1] 5 | epsW=0.001 6 | epsB=0.002 7 | momW=0.9 8 | momB=0.9 9 | wc=0.004 10 | 11 | [conv2] 12 | epsW=0.001 13 | epsB=0.002 14 | momW=0.9 15 | momB=0.9 16 | wc=0.004 17 | 18 | [conv3] 19 | epsW=0.001 20 | epsB=0.002 21 | momW=0.9 22 | momB=0.9 23 | wc=0.004 24 | 25 | [fc10] 26 | epsW=0.001 27 | epsB=0.002 28 | momW=0.9 29 | momB=0.9 30 | wc=3 31 | 32 | [logprob] 33 | coeff=1 34 | -------------------------------------------------------------------------------- /example-layers/layer-params-80sec.cfg: -------------------------------------------------------------------------------- 1 | # 26% error on CIFAR-10 in 80 seconds - layer parameter file 2 | # You should reduce the learning rate after 8 epochs by a factor of 10. 3 | 4 | [conv1] 5 | epsW=0.001 6 | epsB=0.002 7 | momW=0.9 8 | momB=0.9 9 | wc=0.004 10 | 11 | [conv2] 12 | epsW=0.001 13 | epsB=0.002 14 | momW=0.9 15 | momB=0.9 16 | wc=0.004 17 | 18 | [conv3] 19 | epsW=0.001 20 | epsB=0.002 21 | momW=0.9 22 | momB=0.9 23 | wc=0.004 24 | 25 | [fc64] 26 | epsW=0.001 27 | epsB=0.002 28 | momW=0.9 29 | momB=0.9 30 | wc=.03 31 | 32 | [fc10] 33 | epsW=0.001 34 | epsB=0.002 35 | momW=0.9 36 | momB=0.9 37 | wc=.03 38 | 39 | [logprob] 40 | coeff=1 41 | -------------------------------------------------------------------------------- /example-layers/layer-params-conv-local-11pct.cfg: -------------------------------------------------------------------------------- 1 | # 11% error on CIFAR-10 - layer parameter file 2 | # Methodology: 3 | # 1. Train on batches 1-4, use batch 5 for validation. 4 | # 2. After about 350 epochs, validation error no longer making improvements. 5 | # 3. Fold in batch 5. 6 | # 4. Train on batches 1-5 for about 150 more epochs, until the batch 5 error is near the errors for batches 1-4. It takes forever to actually get there but after 150 epochs it's close enough. 7 | # 5. Lower learning rates (epsW) by a factor of 10 to 0.0001, train for 10 more epochs. 8 | # 6. Lower learning rates (epsW) by another factor of 10 to 0.00001, train for 10 more epochs. 9 | # 7. Stop. Test on batch 6 with --test-range=6 --multiview-test=1 --logreg-name=logprob (read more about what this does here: http://code.google.com/p/cuda-convnet/wiki/TrainingNet#Training_on_image_translations ) 10 | 11 | # More details about methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology 12 | 13 | [conv1] 14 | epsW=0.001 15 | epsB=0.002 16 | momW=0.9 17 | momB=0.9 18 | wc=0.000 19 | 20 | [conv2] 21 | epsW=0.001 22 | epsB=0.002 23 | momW=0.9 24 | momB=0.9 25 | wc=0.000 26 | 27 | [local3] 28 | epsW=0.001 29 | epsB=0.002 30 | momW=0.9 31 | momB=0.9 32 | wc=0.004 33 | 34 | [local4] 35 | epsW=0.001 36 | epsB=0.002 37 | momW=0.9 38 | momB=0.9 39 | wc=0.004 40 | 41 | [fc10] 42 | epsW=0.001 43 | epsB=0.002 44 | momW=0.9 45 | momB=0.9 46 | wc=0.01 47 | 48 | [logprob] 49 | coeff=1 50 | 51 | [rnorm1] 52 | scale=0.001 53 | pow=0.75 54 | 55 | [rnorm2] 56 | scale=0.001 57 | pow=0.75 58 | -------------------------------------------------------------------------------- /example-layers/layer-params-conv-local-13pct.cfg: -------------------------------------------------------------------------------- 1 | # 13% error on CIFAR-10 - layer parameter file 2 | # See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology 3 | 4 | [conv1] 5 | epsW=0.001 6 | epsB=0.002 7 | momW=0.9 8 | momB=0.9 9 | wc=0.00 10 | 11 | [conv2] 12 | epsW=0.001 13 | epsB=0.002 14 | momW=0.9 15 | momB=0.9 16 | wc=0.00 17 | 18 | [local3] 19 | epsW=0.001 20 | epsB=0.002 21 | momW=0.9 22 | momB=0.9 23 | wc=0.004 24 | 25 | [local4] 26 | epsW=0.001 27 | epsB=0.002 28 | momW=0.9 29 | momB=0.9 30 | wc=0.004 31 | 32 | [fc10] 33 | epsW=0.001 34 | epsB=0.002 35 | momW=0.9 36 | momB=0.9 37 | wc=0.004 38 | 39 | [logprob] 40 | coeff=1 41 | -------------------------------------------------------------------------------- /example-layers/layer-params-example.cfg: -------------------------------------------------------------------------------- 1 | [conv32] 2 | epsW=0.001 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0 7 | 8 | [local32] 9 | epsW=0.001 10 | epsB=0.002 11 | momW=0.9 12 | momB=0.9 13 | wc=0 14 | 15 | [fc1024] 16 | momW=0.9 17 | momB=0.9 18 | epsW=0.00001 19 | epsB=0.00002 20 | wc=0 21 | 22 | [conv32-2] 23 | epsW=0.001 24 | epsB=0.002 25 | momW=0.9 26 | momB=0.9 27 | wc=0 28 | 29 | [conv32-3] 30 | epsW=0.001 31 | epsB=0.002 32 | momW=0.9 33 | momB=0.9 34 | wc=0 35 | 36 | [fc10] 37 | epsW=0.0001,0.001 38 | epsB=0.002 39 | momW=0.5,0.9 40 | momB=0.9 41 | wc=0,0 42 | 43 | [logprob] 44 | coeff=1 45 | -------------------------------------------------------------------------------- /example-layers/layer-params.gc.cfg: -------------------------------------------------------------------------------- 1 | [conv32] 2 | epsW=0.001 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0 7 | 8 | [local32] 9 | epsW=0.001 10 | epsB=0.002 11 | momW=0.9 12 | momB=0.9 13 | wc=0 14 | 15 | [fc10] 16 | wc=0,0 17 | momB=0 18 | momW=0,0 19 | epsW=0.00001,0.00001 20 | epsB=0.00002 21 | 22 | [logprob] 23 | coeff=1 24 | -------------------------------------------------------------------------------- /example-layers/layers-18pct.cfg: -------------------------------------------------------------------------------- 1 | # 18% error on CIFAR-10 in 20 minutes - layer definition file 2 | 3 | [data] 4 | type=data 5 | dataIdx=0 6 | 7 | [labels] 8 | type=data 9 | dataIdx=1 10 | 11 | [conv1] 12 | type=conv 13 | inputs=data 14 | channels=3 15 | filters=32 16 | padding=2 17 | stride=1 18 | filterSize=5 19 | initW=0.0001 20 | partialSum=4 21 | sharedBiases=1 22 | 23 | [pool1] 24 | type=pool 25 | pool=max 26 | inputs=conv1 27 | start=0 28 | sizeX=3 29 | stride=2 30 | outputsX=0 31 | channels=32 32 | neuron=relu 33 | 34 | [rnorm1] 35 | type=rnorm 36 | inputs=pool1 37 | channels=32 38 | size=3 39 | 40 | [conv2] 41 | type=conv 42 | inputs=rnorm1 43 | filters=32 44 | padding=2 45 | stride=1 46 | filterSize=5 47 | channels=32 48 | neuron=relu 49 | initW=0.01 50 | partialSum=4 51 | sharedBiases=1 52 | 53 | [pool2] 54 | type=pool 55 | pool=avg 56 | inputs=conv2 57 | start=0 58 | sizeX=3 59 | stride=2 60 | outputsX=0 61 | channels=32 62 | 63 | [rnorm2] 64 | type=rnorm 65 | inputs=pool2 66 | channels=32 67 | size=3 68 | 69 | [conv3] 70 | type=conv 71 | inputs=rnorm2 72 | filters=64 73 | padding=2 74 | stride=1 75 | filterSize=5 76 | channels=32 77 | neuron=relu 78 | initW=0.01 79 | partialSum=4 80 | sharedBiases=1 81 | 82 | [pool3] 83 | type=pool 84 | pool=avg 85 | inputs=conv3 86 | start=0 87 | sizeX=3 88 | stride=2 89 | outputsX=0 90 | channels=64 91 | 92 | [fc10] 93 | type=fc 94 | outputs=10 95 | inputs=pool3 96 | initW=0.01 97 | 98 | [probs] 99 | type=softmax 100 | inputs=fc10 101 | 102 | [logprob] 103 | type=cost.logreg 104 | inputs=labels,probs 105 | -------------------------------------------------------------------------------- /example-layers/layers-19pct.cfg: -------------------------------------------------------------------------------- 1 | # 19% error on CIFAR-10 in 20 minutes - layer definition file 2 | 3 | [data] 4 | type=data 5 | dataIdx=0 6 | 7 | [labels] 8 | type=data 9 | dataIdx=1 10 | 11 | [conv1] 12 | type=conv 13 | inputs=data 14 | channels=3 15 | filters=32 16 | padding=2 17 | stride=1 18 | filterSize=5 19 | initW=0.0001 20 | partialSum=1 21 | sharedBiases=1 22 | 23 | [pool1] 24 | type=pool 25 | pool=max 26 | inputs=conv1 27 | start=0 28 | sizeX=3 29 | stride=2 30 | outputsX=0 31 | channels=32 32 | neuron=relu 33 | 34 | [conv2] 35 | type=conv 36 | inputs=pool1 37 | filters=32 38 | padding=2 39 | stride=1 40 | filterSize=5 41 | channels=32 42 | neuron=relu 43 | initW=0.01 44 | partialSum=1 45 | sharedBiases=1 46 | 47 | [pool2] 48 | type=pool 49 | pool=avg 50 | inputs=conv2 51 | start=0 52 | sizeX=3 53 | stride=2 54 | outputsX=0 55 | channels=32 56 | 57 | [conv3] 58 | type=conv 59 | inputs=pool2 60 | filters=64 61 | padding=2 62 | stride=1 63 | filterSize=5 64 | channels=32 65 | neuron=relu 66 | initW=0.01 67 | partialSum=1 68 | sharedBiases=1 69 | 70 | [pool3] 71 | type=pool 72 | pool=avg 73 | inputs=conv3 74 | start=0 75 | sizeX=3 76 | stride=2 77 | outputsX=0 78 | channels=64 79 | 80 | [fc10] 81 | type=fc 82 | outputs=10 83 | inputs=pool3 84 | initW=0.01 85 | 86 | [probs] 87 | type=softmax 88 | inputs=fc10 89 | 90 | [logprob] 91 | type=cost.logreg 92 | inputs=labels,probs 93 | -------------------------------------------------------------------------------- /example-layers/layers-80sec.cfg: -------------------------------------------------------------------------------- 1 | # 26% error on CIFAR-10 in 80 seconds - layer definition file 2 | 3 | [data] 4 | type=data 5 | dataIdx=0 6 | 7 | [labels] 8 | type=data 9 | dataIdx=1 10 | 11 | [conv1] 12 | type=conv 13 | inputs=data 14 | channels=3 15 | filters=32 16 | padding=2 17 | stride=1 18 | filterSize=5 19 | initW=0.0001 20 | partialSum=4 21 | sharedBiases=1 22 | 23 | [pool1] 24 | type=pool 25 | pool=max 26 | inputs=conv1 27 | start=0 28 | sizeX=3 29 | stride=2 30 | outputsX=0 31 | channels=32 32 | neuron=relu 33 | 34 | [conv2] 35 | type=conv 36 | inputs=pool1 37 | filters=32 38 | padding=2 39 | stride=1 40 | filterSize=5 41 | channels=32 42 | neuron=relu 43 | initW=0.01 44 | partialSum=4 45 | sharedBiases=1 46 | 47 | [pool2] 48 | type=pool 49 | pool=avg 50 | inputs=conv2 51 | start=0 52 | sizeX=3 53 | stride=2 54 | outputsX=0 55 | channels=32 56 | 57 | [conv3] 58 | type=conv 59 | inputs=pool2 60 | filters=64 61 | padding=2 62 | stride=1 63 | filterSize=5 64 | channels=32 65 | neuron=relu 66 | initW=0.01 67 | partialSum=4 68 | sharedBiases=1 69 | 70 | [pool3] 71 | type=pool 72 | pool=avg 73 | inputs=conv3 74 | start=0 75 | sizeX=3 76 | stride=2 77 | outputsX=0 78 | channels=64 79 | 80 | [fc64] 81 | type=fc 82 | outputs=64 83 | inputs=pool3 84 | initW=0.1 85 | neuron=relu 86 | 87 | [fc10] 88 | type=fc 89 | outputs=10 90 | inputs=fc64 91 | initW=0.1 92 | 93 | [probs] 94 | type=softmax 95 | inputs=fc10 96 | 97 | [logprob] 98 | type=cost.logreg 99 | inputs=labels,probs 100 | -------------------------------------------------------------------------------- /example-layers/layers-conv-local-11pct.cfg: -------------------------------------------------------------------------------- 1 | [data] 2 | type=data 3 | dataIdx=0 4 | 5 | [labels] 6 | type=data 7 | dataIdx=1 8 | 9 | [conv1] 10 | type=conv 11 | inputs=data 12 | channels=3 13 | filters=64 14 | padding=2 15 | stride=1 16 | filterSize=5 17 | neuron=relu 18 | initW=0.0001 19 | partialSum=4 20 | sharedBiases=1 21 | 22 | [pool1] 23 | type=pool 24 | pool=max 25 | inputs=conv1 26 | start=0 27 | sizeX=3 28 | stride=2 29 | outputsX=0 30 | channels=64 31 | 32 | [rnorm1] 33 | type=cmrnorm 34 | inputs=pool1 35 | channels=64 36 | size=9 37 | 38 | [conv2] 39 | type=conv 40 | inputs=rnorm1 41 | filters=64 42 | padding=2 43 | stride=1 44 | filterSize=5 45 | channels=64 46 | neuron=relu 47 | initW=0.01 48 | partialSum=8 49 | sharedBiases=1 50 | 51 | [rnorm2] 52 | type=cmrnorm 53 | inputs=conv2 54 | channels=64 55 | size=9 56 | 57 | [pool2] 58 | type=pool 59 | pool=max 60 | inputs=rnorm2 61 | start=0 62 | sizeX=3 63 | stride=2 64 | outputsX=0 65 | channels=64 66 | 67 | [local3] 68 | type=local 69 | inputs=pool2 70 | filters=64 71 | padding=1 72 | stride=1 73 | filterSize=3 74 | channels=64 75 | neuron=relu 76 | initW=0.04 77 | 78 | [local4] 79 | type=local 80 | inputs=local3 81 | filters=32 82 | padding=1 83 | stride=1 84 | filterSize=3 85 | channels=64 86 | neuron=relu 87 | initW=0.04 88 | 89 | [fc10] 90 | type=fc 91 | outputs=10 92 | inputs=local4 93 | initW=0.01 94 | 95 | [probs] 96 | type=softmax 97 | inputs=fc10 98 | 99 | [logprob] 100 | type=cost.logreg 101 | inputs=labels,probs 102 | -------------------------------------------------------------------------------- /example-layers/layers-conv-local-13pct.cfg: -------------------------------------------------------------------------------- 1 | # 13% error on CIFAR-10 in 20 minutes - layer definition file 2 | # See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology 3 | 4 | [data] 5 | type=data 6 | dataIdx=0 7 | 8 | [labels] 9 | type=data 10 | dataIdx=1 11 | 12 | [conv1] 13 | type=conv 14 | inputs=data 15 | channels=3 16 | filters=64 17 | padding=2 18 | stride=1 19 | filterSize=5 20 | neuron=relu 21 | initW=0.0001 22 | partialSum=4 23 | sharedBiases=1 24 | 25 | [pool1] 26 | type=pool 27 | pool=max 28 | inputs=conv1 29 | start=0 30 | sizeX=3 31 | stride=2 32 | outputsX=0 33 | channels=64 34 | 35 | [conv2] 36 | type=conv 37 | inputs=pool1 38 | filters=64 39 | padding=2 40 | stride=1 41 | filterSize=5 42 | channels=64 43 | neuron=relu 44 | initW=0.01 45 | partialSum=8 46 | sharedBiases=1 47 | 48 | [pool2] 49 | type=pool 50 | pool=max 51 | inputs=conv2 52 | start=0 53 | sizeX=3 54 | stride=2 55 | outputsX=0 56 | channels=64 57 | 58 | [local3] 59 | type=local 60 | inputs=pool2 61 | filters=32 62 | padding=1 63 | stride=1 64 | filterSize=3 65 | channels=64 66 | neuron=relu 67 | initW=0.04 68 | 69 | [local4] 70 | type=local 71 | inputs=local3 72 | filters=32 73 | padding=1 74 | stride=1 75 | filterSize=3 76 | channels=32 77 | neuron=relu 78 | initW=0.04 79 | 80 | [fc10] 81 | type=fc 82 | outputs=10 83 | inputs=local4 84 | initW=0.01 85 | neuron=ident 86 | 87 | [probs] 88 | type=softmax 89 | inputs=fc10 90 | 91 | [logprob] 92 | type=cost.logreg 93 | inputs=labels,probs 94 | -------------------------------------------------------------------------------- /example-layers/layers-example.cfg: -------------------------------------------------------------------------------- 1 | # This is a layer configuration file that contains all the 2 | # layer types supported by this code. It's not actually good for anything 3 | # other than demonstrating how layers are specified and connected to one another. 4 | 5 | # Note: this file has gotten so big that the resultant net will not run on anything short of a 3GB GTX 580. 6 | # But there's no particular reason to run the net specified by this file. It's not actually good. 7 | 8 | [data] 9 | type=data 10 | dataIdx=0 11 | 12 | [labels] 13 | type=data 14 | dataIdx=1 15 | 16 | [conv32] 17 | type=conv 18 | inputs=data 19 | channels=3 20 | filters=32 21 | padding=4 22 | stride=1 23 | filterSize=9 24 | neuron=logistic 25 | initW=0.00001 26 | partialSum=1 27 | sharedBiases=true 28 | 29 | [local32] 30 | type=local 31 | inputs=conv32 32 | channels=32 33 | filters=32 34 | padding=4 35 | stride=1 36 | filterSize=9 37 | neuron=logistic 38 | initW=0.00001 39 | 40 | [fc1024] 41 | type=fc 42 | outputs=1024 43 | inputs=data 44 | initW=0.001 45 | neuron=relu 46 | 47 | [maxpool] 48 | type=pool 49 | pool=max 50 | inputs=local32 51 | start=0 52 | sizeX=4 53 | stride=2 54 | outputsX=0 55 | channels=32 56 | 57 | [rnorm1] 58 | type=rnorm 59 | inputs=maxpool 60 | channels=32 61 | sizeX=5 62 | scale=0.0000125 63 | pow=0.75 64 | 65 | [cnorm1] 66 | type=cnorm 67 | inputs=rnorm1 68 | channels=32 69 | sizeX=7 70 | scale=0.001 71 | pow=0.5 72 | 73 | [conv32-2] 74 | type=conv 75 | inputs=cnorm1 76 | groups=4 77 | channels=32 78 | filters=32 79 | padding=2 80 | stride=1 81 | filterSize=5 82 | neuron=relu 83 | initW=0.0001 84 | partialSum=1 85 | sharedBiases=false 86 | 87 | [conv32-3] 88 | type=conv 89 | inputs=conv32-2 90 | groups=4 91 | channels=128 92 | filters=32 93 | padding=2 94 | stride=2 95 | filterSize=5 96 | neuron=relu 97 | initW=0.0001 98 | partialSum=1 99 | randSparse=true 100 | filterChannels=64 101 | 102 | [fc10] 103 | type=fc 104 | outputs=10 105 | inputs=conv32-3,fc1024 106 | initW=0.0001,0.0001 107 | neuron=ident 108 | 109 | [probs] 110 | type=softmax 111 | inputs=fc10 112 | 113 | [logprob] 114 | type=cost.logreg 115 | inputs=labels,probs 116 | -------------------------------------------------------------------------------- /example-layers/layers.gc.cfg: -------------------------------------------------------------------------------- 1 | [data] 2 | type=data 3 | dataIdx=0 4 | 5 | [labels] 6 | type=data 7 | dataIdx=1 8 | 9 | [conv32] 10 | type=conv 11 | inputs=data 12 | filters=16 13 | padding=0 14 | stride=1 15 | filterSize=3 16 | channels=3 17 | neuron=linear[3,2.2] 18 | initW=0.8 19 | partialSum=1 20 | sharedBiases=true 21 | 22 | [avgpool] 23 | type=pool 24 | pool=avg 25 | inputs=conv32 26 | start=-2 27 | sizeX=4 28 | stride=4 29 | outputsX=0 30 | channels=16 31 | 32 | [local32] 33 | type=local 34 | inputs=avgpool 35 | filters=32 36 | padding=2 37 | stride=3 38 | filterSize=5 39 | channels=16 40 | neuron=tanh[1.79,-0.66] 41 | initW=0.4 42 | #partialSum=1 43 | #sharedBiases=true 44 | groups=2 45 | randSparse=true 46 | 47 | [fc10] 48 | type=fc 49 | outputs=10 50 | inputs=local32,conv32 51 | initW=0.8,0.008 52 | 53 | [probs] 54 | type=softmax 55 | inputs=fc10 56 | 57 | [logprob] 58 | type=cost.logreg 59 | inputs=labels,probs 60 | -------------------------------------------------------------------------------- /include/common/matrix.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef MATRIX_H_ 28 | #define MATRIX_H_ 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | #if defined(_WIN64) || defined(_WIN32) 40 | #include 41 | #define isnan(_X) (_isnan(_X)) 42 | #define isinf(_X) (!_finite(_X)) 43 | #define uint unsigned int 44 | double sqrt(int _X); 45 | double log(int _X); 46 | #endif 47 | 48 | #ifdef USE_MKL 49 | #include 50 | #include 51 | #include 52 | #include 53 | 54 | #define IS_MKL true 55 | 56 | #ifdef DOUBLE_PRECISION 57 | #define MKL_UNIFORM vdRngUniform 58 | #define MKL_NORMAL vdRngGaussian 59 | #define MKL_UNIFORM_RND_METHOD VSL_METHOD_DUNIFORM_STD_ACCURATE 60 | #define MKL_GAUSSIAN_RND_METHOD VSL_METHOD_DGAUSSIAN_BOXMULLER 61 | #define MKL_EXP vdExp 62 | #define MKL_RECIP vdInv 63 | #define MKL_SQUARE vdSqr 64 | #define MKL_TANH vdTanh 65 | #define MKL_LOG vdLn 66 | #define MKL_VECMUL vdMul 67 | #define MKL_VECDIV vdDiv 68 | #else 69 | #define MKL_UNIFORM vsRngUniform 70 | #define MKL_NORMAL vsRngGaussian 71 | #define MKL_UNIFORM_RND_METHOD VSL_METHOD_SUNIFORM_STD_ACCURATE 72 | #define MKL_GAUSSIAN_RND_METHOD VSL_METHOD_SGAUSSIAN_BOXMULLER 73 | #define MKL_EXP vsExp 74 | #define MKL_RECIP vsInv 75 | #define MKL_SQUARE vsSqr 76 | #define MKL_TANH vsTanh 77 | #define MKL_LOG vsLn 78 | #define MKL_VECMUL vsMul 79 | #define MKL_VECDIV vsDiv 80 | #endif /* DOUBLE_PRECISION */ 81 | 82 | #else 83 | extern "C" { 84 | #include 85 | } 86 | #define IS_MKL false 87 | #endif /* USE_MKL */ 88 | 89 | #ifdef DOUBLE_PRECISION 90 | #define CBLAS_GEMM cblas_dgemm 91 | #define CBLAS_SCAL cblas_dscal 92 | #define CBLAS_AXPY cblas_daxpy 93 | #else 94 | #define CBLAS_GEMM cblas_sgemm 95 | #define CBLAS_SCAL cblas_sscal 96 | #define CBLAS_AXPY cblas_saxpy 97 | #endif /* DOUBLE_PRECISION */ 98 | 99 | #define MTYPE_MAX numeric_limits::max() 100 | 101 | class Matrix { 102 | private: 103 | MTYPE* _data; 104 | bool _ownsData; 105 | long int _numRows, _numCols; 106 | long int _numElements; 107 | CBLAS_TRANSPOSE _trans; 108 | 109 | void _init(MTYPE* data, long int numRows, long int numCols, bool transpose, bool ownsData); 110 | void _tileTo2(Matrix& target) const; 111 | void _copyAllTo(Matrix& target) const; 112 | MTYPE _sum_column(long int col) const; 113 | MTYPE _sum_row(long int row) const; 114 | MTYPE _aggregate(MTYPE(*agg_func)(MTYPE, MTYPE), MTYPE initialValue) const; 115 | void _aggregate(long int axis, Matrix& target, MTYPE(*agg_func)(MTYPE, MTYPE), MTYPE initialValue) const; 116 | MTYPE _aggregateRow(long int row, MTYPE(*agg_func)(MTYPE, MTYPE), MTYPE initialValue) const; 117 | MTYPE _aggregateCol(long int row, MTYPE(*agg_func)(MTYPE, MTYPE), MTYPE initialValue) const; 118 | void _updateDims(long int numRows, long int numCols); 119 | void _applyLoop(MTYPE(*func)(MTYPE)); 120 | void _applyLoop(MTYPE (*func)(MTYPE), Matrix& target); 121 | void _applyLoop2(const Matrix& a, MTYPE(*func)(MTYPE, MTYPE), Matrix& target) const; 122 | void _applyLoop2(const Matrix& a, MTYPE (*func)(MTYPE,MTYPE, MTYPE), MTYPE scalar, Matrix& target) const; 123 | void _applyLoopScalar(const MTYPE scalar, MTYPE(*func)(MTYPE, MTYPE), Matrix& target) const; 124 | void _checkBounds(long int startRow, long int endRow, long int startCol, long int endCol) const; 125 | void _divideByVector(const Matrix& vec, Matrix& target); 126 | inline long int _getNumColsBackEnd() const { 127 | return _trans == CblasNoTrans ? _numCols : _numRows; 128 | } 129 | public: 130 | enum FUNCTION { 131 | TANH, RECIPROCAL, SQUARE, ABS, EXP, LOG, ZERO, ONE, LOGISTIC1, LOGISTIC2, SIGN 132 | }; 133 | Matrix(); 134 | Matrix(long int numRows, long int numCols); 135 | Matrix(const PyArrayObject *src); 136 | Matrix(const Matrix &like); 137 | Matrix(MTYPE* data, long int numRows, long int numCols); 138 | Matrix(MTYPE* data, long int numRows, long int numCols, bool transpose); 139 | ~Matrix(); 140 | 141 | inline MTYPE& getCell(long int i, long int j) const { 142 | assert(i >= 0 && i < _numRows); 143 | assert(j >= 0 && j < _numCols); 144 | if (_trans == CblasTrans) { 145 | return _data[j * _numRows + i]; 146 | } 147 | return _data[i * _numCols + j]; 148 | } 149 | 150 | MTYPE& operator()(long int i, long int j) const { 151 | return getCell(i, j); 152 | } 153 | 154 | inline MTYPE* getData() const { 155 | return _data; 156 | } 157 | 158 | inline bool isView() const { 159 | return !_ownsData; 160 | } 161 | 162 | inline long int getNumRows() const { 163 | return _numRows; 164 | } 165 | 166 | inline long int getNumCols() const { 167 | return _numCols; 168 | } 169 | 170 | inline long int getNumDataBytes() const { 171 | return _numElements * sizeof(MTYPE); 172 | } 173 | 174 | inline long int getNumElements() const { 175 | return _numElements; 176 | } 177 | 178 | inline long int getLeadingDim() const { 179 | return _trans == CblasTrans ? _numRows : _numCols; 180 | } 181 | 182 | inline long int getFollowingDim() const { 183 | return _trans == CblasTrans ? _numCols : _numRows; 184 | } 185 | 186 | inline CBLAS_TRANSPOSE getBLASTrans() const { 187 | return _trans; 188 | } 189 | 190 | inline bool isSameDims(const Matrix& a) const { 191 | return a.getNumRows() == getNumRows() && a.getNumCols() == getNumCols(); 192 | } 193 | 194 | inline bool isTrans() const { 195 | return _trans == CblasTrans; 196 | } 197 | 198 | /* 199 | * Only use if you know what you're doing! 200 | * Does not update any dimensions. Just flips the _trans flag. 201 | * 202 | * Use transpose() if you want to get the transpose of this matrix. 203 | */ 204 | inline void setTrans(bool trans) { 205 | assert(isTrans() == trans || !isView()); 206 | _trans = trans ? CblasTrans : CblasNoTrans; 207 | } 208 | 209 | void apply(FUNCTION f); 210 | void apply(Matrix::FUNCTION f, Matrix& target); 211 | void subtractFromScalar(MTYPE scalar); 212 | void subtractFromScalar(MTYPE scalar, Matrix &target) const; 213 | void biggerThanScalar(MTYPE scalar); 214 | void smallerThanScalar(MTYPE scalar); 215 | void equalsScalar(MTYPE scalar); 216 | void biggerThanScalar(MTYPE scalar, Matrix& target) const; 217 | void smallerThanScalar(MTYPE scalar, Matrix& target) const; 218 | void equalsScalar(MTYPE scalar, Matrix& target) const; 219 | void biggerThan(Matrix& a); 220 | void biggerThan(Matrix& a, Matrix& target) const; 221 | void smallerThan(Matrix& a); 222 | void smallerThan(Matrix& a, Matrix& target) const; 223 | void minWith(Matrix &a); 224 | void minWith(Matrix &a, Matrix &target) const; 225 | void maxWith(Matrix &a); 226 | void maxWith(Matrix &a, Matrix &target) const; 227 | void equals(Matrix& a); 228 | void equals(Matrix& a, Matrix& target) const; 229 | void notEquals(Matrix& a) ; 230 | void notEquals(Matrix& a, Matrix& target) const; 231 | void add(const Matrix &m); 232 | void add(const Matrix &m, MTYPE scale); 233 | void add(const Matrix &m, Matrix& target); 234 | void add(const Matrix &m, MTYPE scale, Matrix& target); 235 | void subtract(const Matrix &m); 236 | void subtract(const Matrix &m, Matrix& target); 237 | void subtract(const Matrix &m, MTYPE scale); 238 | void subtract(const Matrix &m, MTYPE scale, Matrix& target); 239 | void addVector(const Matrix& vec, MTYPE scale); 240 | void addVector(const Matrix& vec, MTYPE scale, Matrix& target); 241 | void addVector(const Matrix& vec); 242 | void addVector(const Matrix& vec, Matrix& target); 243 | void addScalar(MTYPE scalar); 244 | void addScalar(MTYPE scalar, Matrix& target) const; 245 | void maxWithScalar(MTYPE scalar); 246 | void maxWithScalar(MTYPE scalar, Matrix &target) const; 247 | void minWithScalar(MTYPE scalar); 248 | void minWithScalar(MTYPE scalar, Matrix &target) const; 249 | void eltWiseMultByVector(const Matrix& vec); 250 | void eltWiseMultByVector(const Matrix& vec, Matrix& target); 251 | void eltWiseDivideByVector(const Matrix& vec); 252 | void eltWiseDivideByVector(const Matrix& vec, Matrix& target); 253 | void resize(long int newNumRows, long int newNumCols); 254 | void resize(const Matrix& like); 255 | Matrix& slice(long int startRow, long int endRow, long int startCol, long int endCol) const; 256 | void slice(long int startRow, long int endRow, long int startCol, long int endCol, Matrix &target) const; 257 | Matrix& sliceRows(long int startRow, long int endRow) const; 258 | void sliceRows(long int startRow, long int endRow, Matrix& target) const; 259 | Matrix& sliceCols(long int startCol, long int endCol) const; 260 | void sliceCols(long int startCol, long int endCol, Matrix& target) const; 261 | void rightMult(const Matrix &b, MTYPE scale); 262 | void rightMult(const Matrix &b, Matrix &target) const; 263 | void rightMult(const Matrix &b); 264 | void rightMult(const Matrix &b, MTYPE scaleAB, Matrix &target) const; 265 | void addProduct(const Matrix &a, const Matrix &b, MTYPE scaleAB, MTYPE scaleThis); 266 | void addProduct(const Matrix& a, const Matrix& b); 267 | void eltWiseMult(const Matrix& a); 268 | void eltWiseMult(const Matrix& a, Matrix& target) const; 269 | void eltWiseDivide(const Matrix& a); 270 | void eltWiseDivide(const Matrix& a, Matrix &target) const; 271 | Matrix& transpose() const; 272 | Matrix& transpose(bool hard) const; 273 | Matrix& tile(long int timesY, long int timesX) const; 274 | void tile(long int timesY, long int timesX, Matrix& target) const; 275 | void copy(Matrix &dest, long int srcStartRow, long int srcEndRow, long int srcStartCol, long int srcEndCol, long int destStartRow, long int destStartCol) const; 276 | Matrix& copy() const; 277 | void copy(Matrix& target) const; 278 | Matrix& sum(long int axis) const; 279 | void sum(long int axis, Matrix &target) const; 280 | MTYPE sum() const; 281 | MTYPE max() const; 282 | Matrix& max(long int axis) const; 283 | void max(long int axis, Matrix& target) const; 284 | MTYPE min() const; 285 | Matrix& min(long int axis) const; 286 | void min(long int axis, Matrix& target) const; 287 | MTYPE norm() const; 288 | MTYPE norm2() const; 289 | void scale(MTYPE scale); 290 | void scale(MTYPE alpha, Matrix& target); 291 | void reshape(long int numRows, long int numCols); 292 | Matrix& reshaped(long int numRows, long int numCols); 293 | void printShape(const char* name) const; 294 | bool hasNan() const; 295 | bool hasInf() const; 296 | #ifdef USE_MKL 297 | void randomizeNormal(VSLStreamStatePtr stream, MTYPE mean, MTYPE stdev); 298 | void randomizeUniform(VSLStreamStatePtr stream); 299 | void randomizeNormal(VSLStreamStatePtr stream); 300 | #else 301 | void randomizeNormal(MTYPE mean, MTYPE stdev); 302 | void randomizeUniform(); 303 | void randomizeNormal(); 304 | #endif 305 | void print() const; 306 | void print(long int startRow,long int rows, long int startCol,long int cols) const; 307 | void print(long int rows, long int cols) const; 308 | }; 309 | 310 | #endif /* MATRIX_H_ */ 311 | -------------------------------------------------------------------------------- /include/common/matrix_funcs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef MATRIX_FUNCS_H_ 28 | #define MATRIX_FUNCS_H_ 29 | 30 | #include 31 | #include 32 | #include 33 | 34 | #ifdef DOUBLE_PRECISION 35 | #define MTYPE double 36 | #else 37 | #define MTYPE float 38 | #endif 39 | 40 | #define MYRAND ((double)rand() / ((double)RAND_MAX + 1)) 41 | 42 | inline MTYPE _zero(MTYPE x) { 43 | return 0; 44 | } 45 | 46 | inline MTYPE _one(MTYPE x) { 47 | return 1; 48 | } 49 | 50 | inline MTYPE _abs(MTYPE x) { 51 | return x > 0 ? x : -x; 52 | } 53 | 54 | inline MTYPE _square(MTYPE x) { 55 | return x * x; 56 | } 57 | 58 | inline MTYPE _sigma1(MTYPE x) { 59 | return (tanh(x / 2) + 1) / 2; 60 | } 61 | 62 | inline MTYPE _sigma2(MTYPE x) { 63 | return 1 / (1 + exp(-x)); 64 | } 65 | 66 | inline MTYPE _recip(MTYPE x) { 67 | return 1 / x; 68 | } 69 | 70 | inline MTYPE _exp(MTYPE x) { 71 | return exp(x); 72 | } 73 | 74 | inline MTYPE _log(MTYPE x) { 75 | return log(x); 76 | } 77 | 78 | inline MTYPE _tanh(MTYPE x) { 79 | return tanh(x); 80 | } 81 | 82 | inline MTYPE _sign(MTYPE x) { 83 | return x > 0 ? 1 : -1; 84 | } 85 | 86 | inline MTYPE _rand(MTYPE x) { 87 | return MYRAND; 88 | } 89 | 90 | inline MTYPE _divide(MTYPE x, MTYPE y) { 91 | return x / y; 92 | } 93 | 94 | inline MTYPE _mult(MTYPE x, MTYPE y) { 95 | return x * y; 96 | } 97 | 98 | inline MTYPE _add(MTYPE x, MTYPE y) { 99 | return x + y; 100 | } 101 | 102 | inline MTYPE _addSquare(MTYPE x, MTYPE y) { 103 | return x*x + y; 104 | } 105 | 106 | inline MTYPE _addWithScale(MTYPE x, MTYPE y, MTYPE scale) { 107 | return x + scale*y; 108 | } 109 | 110 | inline MTYPE _max(MTYPE x, MTYPE y) { 111 | return std::max(x, y); 112 | } 113 | 114 | inline MTYPE _min(MTYPE x, MTYPE y) { 115 | return std::min(x, y); 116 | } 117 | 118 | inline MTYPE _bigger(MTYPE x, MTYPE y) { 119 | return x > y; 120 | } 121 | 122 | inline MTYPE _smaller(MTYPE x, MTYPE y) { 123 | return x < y; 124 | } 125 | 126 | inline MTYPE _equal(MTYPE x, MTYPE y) { 127 | return x == y; 128 | } 129 | 130 | inline MTYPE _notEqual(MTYPE x, MTYPE y) { 131 | return x != y; 132 | } 133 | 134 | #endif /* MATRIX_FUNCS_H_ */ 135 | -------------------------------------------------------------------------------- /include/common/queue.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef QUEUE_H_ 28 | #define QUEUE_H_ 29 | #include 30 | #include 31 | 32 | /* 33 | * A thread-safe circular queue that automatically grows but never shrinks. 34 | */ 35 | template 36 | class Queue { 37 | private: 38 | T *_elements; 39 | int _numElements; 40 | int _head, _tail; 41 | int _maxSize; 42 | pthread_mutex_t *_queueMutex; 43 | pthread_cond_t *_queueCV; 44 | 45 | void _init(int initialSize) { 46 | _numElements = 0; 47 | _head = 0; 48 | _tail = 0; 49 | _maxSize = initialSize; 50 | _elements = new T[initialSize]; 51 | _queueCV = (pthread_cond_t*)(malloc(sizeof (pthread_cond_t))); 52 | _queueMutex = (pthread_mutex_t*)(malloc(sizeof (pthread_mutex_t))); 53 | pthread_mutex_init(_queueMutex, NULL); 54 | pthread_cond_init(_queueCV, NULL); 55 | } 56 | 57 | void expand() { 58 | T *newStorage = new T[_maxSize * 2]; 59 | memcpy(newStorage, _elements + _head, (_maxSize - _head) * sizeof(T)); 60 | memcpy(newStorage + _maxSize - _head, _elements, _tail * sizeof(T)); 61 | delete[] _elements; 62 | _elements = newStorage; 63 | _head = 0; 64 | _tail = _numElements; 65 | _maxSize *= 2; 66 | } 67 | public: 68 | Queue(int initialSize) { 69 | _init(initialSize); 70 | } 71 | 72 | Queue() { 73 | _init(1); 74 | } 75 | 76 | ~Queue() { 77 | pthread_mutex_destroy(_queueMutex); 78 | pthread_cond_destroy(_queueCV); 79 | delete[] _elements; 80 | free(_queueMutex); 81 | free(_queueCV); 82 | } 83 | 84 | void enqueue(T el) { 85 | pthread_mutex_lock(_queueMutex); 86 | if(_numElements == _maxSize) { 87 | expand(); 88 | } 89 | _elements[_tail] = el; 90 | _tail = (_tail + 1) % _maxSize; 91 | _numElements++; 92 | 93 | pthread_cond_signal(_queueCV); 94 | pthread_mutex_unlock(_queueMutex); 95 | } 96 | 97 | /* 98 | * Blocks until not empty. 99 | */ 100 | T dequeue() { 101 | pthread_mutex_lock(_queueMutex); 102 | if(_numElements == 0) { 103 | pthread_cond_wait(_queueCV, _queueMutex); 104 | } 105 | T el = _elements[_head]; 106 | _head = (_head + 1) % _maxSize; 107 | _numElements--; 108 | pthread_mutex_unlock(_queueMutex); 109 | return el; 110 | } 111 | 112 | /* 113 | * Obviously this number can change by the time you actually look at it. 114 | */ 115 | inline int getNumElements() const { 116 | return _numElements; 117 | } 118 | }; 119 | 120 | #endif /* QUEUE_H_ */ 121 | -------------------------------------------------------------------------------- /include/common/thread.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef THREAD_H_ 28 | #define THREAD_H_ 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | /* 35 | * Abstract joinable thread class. 36 | * The only thing the implementer has to fill in is the run method. 37 | */ 38 | class Thread { 39 | private: 40 | pthread_attr_t _pthread_attr; 41 | pthread_t _threadID; 42 | bool _joinable, _startable; 43 | 44 | static void* start_pthread_func(void *obj) { 45 | void* retval = reinterpret_cast(obj)->run(); 46 | pthread_exit(retval); 47 | return retval; 48 | } 49 | protected: 50 | virtual void* run() = 0; 51 | public: 52 | Thread(bool joinable) : _joinable(joinable), _startable(true) { 53 | pthread_attr_init(&_pthread_attr); 54 | pthread_attr_setdetachstate(&_pthread_attr, joinable ? PTHREAD_CREATE_JOINABLE : PTHREAD_CREATE_DETACHED); 55 | } 56 | 57 | virtual ~Thread() { 58 | } 59 | 60 | pthread_t start() { 61 | assert(_startable); 62 | _startable = false; 63 | int n; 64 | if ((n = pthread_create(&_threadID, &_pthread_attr, &Thread::start_pthread_func, (void*)this))) { 65 | errno = n; 66 | perror("pthread_create error"); 67 | } 68 | return _threadID; 69 | } 70 | 71 | void join(void **status) { 72 | assert(_joinable); 73 | int n; 74 | if((n = pthread_join(_threadID, status))) { 75 | errno = n; 76 | perror("pthread_join error"); 77 | } 78 | } 79 | 80 | void join() { 81 | join(NULL); 82 | } 83 | 84 | pthread_t getThreadID() const { 85 | return _threadID; 86 | } 87 | }; 88 | 89 | #endif /* THREAD_H_ */ 90 | -------------------------------------------------------------------------------- /include/convnet.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef CONVNET3 28 | #define CONVNET3 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | #include "layer.cuh" 39 | #include "data.cuh" 40 | #include "worker.cuh" 41 | #include "weights.cuh" 42 | 43 | class Worker; 44 | class WorkResult; 45 | class Layer; 46 | class DataLayer; 47 | class CostLayer; 48 | 49 | class ConvNet : public Thread { 50 | protected: 51 | std::vector _layers; 52 | std::vector _dataLayers; 53 | std::vector _costs; 54 | GPUData* _data; 55 | 56 | DataProvider* _dp; 57 | int _deviceID; 58 | 59 | Queue _workerQueue; 60 | Queue _resultQueue; 61 | 62 | // For gradient checking 63 | int _numFailures; 64 | int _numTests; 65 | double _baseErr; 66 | 67 | virtual Layer* initLayer(string& layerType, PyObject* paramsDict); 68 | void initCuda(); 69 | void* run(); 70 | public: 71 | ConvNet(PyListObject* layerParams, int minibatchSize, int deviceID); 72 | 73 | Queue& getWorkerQueue(); 74 | Queue& getResultQueue(); 75 | DataProvider& getDataProvider(); 76 | 77 | Layer& operator[](int idx); 78 | Layer& getLayer(int idx); 79 | void copyToCPU(); 80 | void copyToGPU(); 81 | void updateWeights(); 82 | void reset(); 83 | int getNumLayers(); 84 | 85 | void bprop(PASS_TYPE passType); 86 | void fprop(PASS_TYPE passType); 87 | void fprop(int miniIdx, PASS_TYPE passType); 88 | void fprop(GPUData& data, PASS_TYPE passType); 89 | 90 | bool checkGradient(const std::string& name, float eps, Weights& weights); 91 | void checkGradients(); 92 | Cost& getCost(); 93 | Cost& getCost(Cost& cost); 94 | double getCostValue(); 95 | }; 96 | 97 | #endif /* CONVNET3 */ 98 | 99 | -------------------------------------------------------------------------------- /include/cost.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef COST_CUH 28 | #define COST_CUH 29 | 30 | #include 31 | #include 32 | #include 33 | 34 | #include "layer.cuh" 35 | #include "util.cuh" 36 | 37 | class CostLayer; 38 | 39 | /* 40 | * Wrapper for dictionary mapping cost name to vector of returned values. 41 | */ 42 | class Cost { 43 | private: 44 | int _numCases; 45 | CostMap _costMap; 46 | CostCoeffMap _costCoeffMap; 47 | public: 48 | Cost(int numCases); 49 | Cost(int numCases, std::vector& costs); 50 | doublev& operator [](const std::string s); 51 | CostMap& getCostMap(); 52 | CostCoeffMap& getCostCoeffMap(); 53 | int getNumCases(); 54 | /* 55 | * Returns sum of first values returned by all the costs, weighted by the cost coefficients. 56 | */ 57 | double getValue(); 58 | Cost& operator += (Cost& er); 59 | Cost& operator /= (const double v); 60 | virtual ~Cost(); 61 | }; 62 | 63 | 64 | #endif /* COST_CUH */ 65 | 66 | -------------------------------------------------------------------------------- /include/cudaconv2/cudaconv2.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef COMMON_CUH 28 | #define COMMON_CUH 29 | 30 | #include 31 | #include 32 | #include "conv_util.cuh" 33 | 34 | enum FILTER_OUTPUT_ORDER {MODULE_FILTER_IMAGE, FILTER_MODULE_IMAGE}; 35 | 36 | void convFilterActs(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, 37 | int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride, 38 | int numImgColors, int numGroups); 39 | void convFilterActs(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, 40 | int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride, 41 | int numImgColors, int numGroups, 42 | float scaleTargets, float scaleOutput); 43 | 44 | void localFilterActs(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, 45 | int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride, 46 | int numImgColors, int numGroups); 47 | void localFilterActs(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, 48 | int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride, 49 | int numImgColors, int numGroups, 50 | float scaleTargets, float scaleOutput); 51 | 52 | void convImgActs(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, 53 | int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numGroups); 54 | void convImgActs(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, 55 | int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numGroups, 56 | float scaleTargets, float scaleOutput); 57 | 58 | void localImgActs(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, 59 | int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numGroups); 60 | void localImgActs(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, 61 | int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numGroups, 62 | float scaleTargets, float scaleOutput); 63 | 64 | void convWeightActs(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, 65 | int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, 66 | int moduleStride, int numImgColors, int numGroups, int partialSum); 67 | void convWeightActs(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, 68 | int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride, 69 | int numImgColors, int numGroups, int partialSum, 70 | float scaleTargets, float scaleOutput); 71 | 72 | void localWeightActs(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, 73 | int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, 74 | int moduleStride, int numImgColors, int numGroups); 75 | 76 | void localWeightActs(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, 77 | int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride, 78 | int numImgColors, int numGroups, float scaleTargets, float scaleOutput); 79 | 80 | void convFilterActsSparse(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, int* dColorIndices, 81 | int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride, 82 | int numImgColors, int numFilterColors, int numGroups); 83 | void convFilterActsSparse(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, int* dColorIndices, 84 | int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride, 85 | int numImgColors, int numFilterColors, int numGroups, 86 | float scaleTargets, float scaleOutput); 87 | 88 | void localFilterActsSparse(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, int* dColorIndices, 89 | int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride, 90 | int numImgColors, int numFilterColors, int numGroups, 91 | float scaleTargets, float scaleOutput); 92 | void localFilterActsSparse(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, int* dColorIndices, 93 | int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride, 94 | int numImgColors, int numFilterColors, int numGroups); 95 | 96 | void convWeightActsSparse(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, int* dColorIndices, 97 | int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride, 98 | int numImgColors, int numFilterColors, int numGroups); 99 | void convWeightActsSparse(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, int* dColorIndices, 100 | int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, 101 | int numGroups, int partialSum, float scaleTargets, float scaleOutput); 102 | 103 | void localWeightActsSparse(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, int* dColorIndices, 104 | int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride, 105 | int numImgColors, int numFilterColors, int numGroups); 106 | void localWeightActsSparse(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, int* dColorIndices, 107 | int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, 108 | int numGroups, float scaleTargets, float scaleOutput); 109 | 110 | void convImgActsSparse(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, int* dColorIndices, 111 | int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, int numGroups); 112 | void convImgActsSparse(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, int* dColorIndices, 113 | int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, int numGroups, 114 | float scaleTargets, float scaleOutput); 115 | 116 | void localImgActsSparse(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, int* dColorIndices, 117 | int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, int numGroups); 118 | void localImgActsSparse(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, int* dColorIndices, 119 | int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, int numGroups, 120 | float scaleTargets, float scaleOutput); 121 | 122 | 123 | #endif /* COMMON_CUH */ 124 | 125 | -------------------------------------------------------------------------------- /include/data.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef DATA_CUH 28 | #define DATA_CUH 29 | 30 | #include 31 | #include 32 | #include "util.cuh" 33 | 34 | template 35 | class Data { 36 | protected: 37 | std::vector* _data; 38 | public: 39 | typedef typename std::vector::iterator T_iter; 40 | 41 | Data(std::vector& data) : _data(&data) { 42 | assert(_data->size() > 0); 43 | for (int i = 1; i < data.size(); i++) { 44 | assert(data[i-1]->getLeadingDim() == data[i]->getLeadingDim()); 45 | } 46 | assert(data[0]->getLeadingDim() > 0); 47 | } 48 | 49 | ~Data() { 50 | for (T_iter it = _data->begin(); it != _data->end(); ++it) { 51 | delete *it; 52 | } 53 | delete _data; 54 | } 55 | 56 | T& operator [](int idx) { 57 | return *_data->at(idx); 58 | } 59 | 60 | int getSize() { 61 | return _data->size(); 62 | } 63 | 64 | std::vector& getData() { 65 | return *_data; 66 | } 67 | 68 | int getNumCases() { 69 | return _data->at(0)->getLeadingDim(); 70 | } 71 | }; 72 | 73 | typedef Data GPUData; 74 | typedef Data CPUData; 75 | 76 | class DataProvider { 77 | protected: 78 | CPUData* _hData; 79 | NVMatrixV _data; 80 | int _minibatchSize; 81 | long int _dataSize; 82 | public: 83 | DataProvider(int minibatchSize); 84 | GPUData& operator[](int idx); 85 | void setData(CPUData&); 86 | void clearData(); 87 | GPUData& getMinibatch(int idx); 88 | GPUData& getDataSlice(int startCase, int endCase); 89 | int getNumMinibatches(); 90 | int getMinibatchSize(); 91 | int getNumCases(); 92 | int getNumCasesInMinibatch(int idx); 93 | }; 94 | 95 | #endif /* DATA_CUH */ 96 | 97 | -------------------------------------------------------------------------------- /include/layer.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef LAYER_CUH 28 | #define LAYER_CUH 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "convnet.cuh" 37 | #include "cost.cuh" 38 | #include "weights.cuh" 39 | #include "neuron.cuh" 40 | 41 | class Cost; 42 | class ConvNet; 43 | class CostLayer; 44 | class DataLayer; 45 | 46 | /* 47 | * Abstract layer. 48 | */ 49 | class Layer { 50 | protected: 51 | ConvNet* _convNet; 52 | std::vector _prev, _next; 53 | int _rcvdFInputs, _rcvdBInputs; 54 | 55 | NVMatrixV _inputs; 56 | NVMatrix *_outputs; // TODO: make this a pointer so you can reuse previous layers' matrices 57 | NVMatrix *_actsGrad; // Layer activity gradients 58 | bool _gradConsumer, _foundGradConsumers, _trans; 59 | bool _conserveMem; 60 | int _numGradProducersNext; 61 | int _actsTarget, _actsGradTarget; 62 | std::string _name, _type; 63 | 64 | NVMatrix _dropout_mask; 65 | float _dropout; 66 | 67 | void fpropNext(PASS_TYPE passType); 68 | virtual void truncBwdActs(); 69 | virtual void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) = 0; 70 | 71 | virtual void bpropCommon(NVMatrix& v, PASS_TYPE passType) { 72 | // Do nothing by default 73 | } 74 | virtual void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) { 75 | assert(!isGradProducer()); // Only do nothing if not grad producer 76 | } 77 | public: 78 | static bool _saveActsGrad, _saveActs; 79 | 80 | Layer(ConvNet* convNet, PyObject* paramsDict, bool trans); 81 | 82 | virtual void fprop(PASS_TYPE passType); 83 | void fprop(NVMatrix& v, PASS_TYPE passType); 84 | virtual void fprop(NVMatrixV& v, PASS_TYPE passType); 85 | virtual void bprop(PASS_TYPE passType); 86 | void bprop(NVMatrix& v, PASS_TYPE passType); 87 | virtual void reset(); 88 | int incRcvdBInputs(); 89 | int getRcvdFInputs(); 90 | int getRcvdBInputs(); 91 | bool isGradConsumer(); 92 | virtual bool isGradProducer(); 93 | std::string& getName(); 94 | std::string& getType(); 95 | void addNext(Layer* l); 96 | void addPrev(Layer* l); 97 | std::vector& getPrev(); 98 | std::vector& getNext(); 99 | virtual NVMatrix& getActs(); 100 | virtual NVMatrix& getActsGrad(); 101 | virtual void postInit(); 102 | 103 | // Do nothing if this layer has no weights 104 | virtual void updateWeights() { 105 | } 106 | virtual void checkGradients() { 107 | } 108 | virtual void copyToCPU() { 109 | } 110 | virtual void copyToGPU() { 111 | } 112 | }; 113 | 114 | class NeuronLayer : public Layer { 115 | protected: 116 | Neuron* _neuron; 117 | 118 | virtual void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 119 | virtual void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 120 | public: 121 | NeuronLayer(ConvNet* convNet, PyObject* paramsDict); 122 | }; 123 | 124 | class WeightLayer : public Layer { 125 | protected: 126 | WeightList _weights; 127 | Weights *_biases; 128 | float _wStep, _bStep; 129 | 130 | void bpropCommon(NVMatrix& v, PASS_TYPE passType); 131 | virtual void bpropBiases(NVMatrix& v, PASS_TYPE passType) = 0; 132 | virtual void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType) = 0; 133 | public: 134 | WeightLayer(ConvNet* convNet, PyObject* paramsDict, bool trans, bool useGrad); 135 | virtual void updateWeights(); 136 | virtual void copyToCPU(); 137 | virtual void copyToGPU(); 138 | void checkGradients(); 139 | Weights& getWeights(int idx); 140 | }; 141 | 142 | class FCLayer : public WeightLayer { 143 | protected: 144 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 145 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 146 | void bpropBiases(NVMatrix& v, PASS_TYPE passType); 147 | void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType); 148 | public: 149 | FCLayer(ConvNet* convNet, PyObject* paramsDict); 150 | }; 151 | 152 | class SoftmaxLayer : public Layer { 153 | protected: 154 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 155 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 156 | public: 157 | SoftmaxLayer(ConvNet* convNet, PyObject* paramsDict); 158 | }; 159 | 160 | class EltwiseSumLayer : public Layer { 161 | protected: 162 | vector* _coeffs; 163 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 164 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 165 | public: 166 | EltwiseSumLayer(ConvNet* convNet, PyObject* paramsDict); 167 | }; 168 | 169 | class EltwiseMaxLayer : public Layer { 170 | protected: 171 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 172 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 173 | public: 174 | EltwiseMaxLayer(ConvNet* convNet, PyObject* paramsDict); 175 | }; 176 | 177 | class DataLayer : public Layer { 178 | private: 179 | int _dataIdx; 180 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 181 | public: 182 | DataLayer(ConvNet* convNet, PyObject* paramsDict); 183 | 184 | bool isGradProducer(); 185 | void fprop(PASS_TYPE passType); 186 | void fprop(NVMatrixV& data, PASS_TYPE passType); 187 | }; 188 | 189 | class LocalLayer : public WeightLayer { 190 | protected: 191 | struct FilterConns { 192 | int* hFilterConns; 193 | int* dFilterConns; 194 | }; 195 | vector* _filterConns; 196 | 197 | intv* _padding, *_stride, *_filterSize, *_channels, *_imgSize, *_groups; 198 | intv* _imgPixels, *_filterPixels, *_filterChannels, *_overSample, *_randSparse; 199 | int _modulesX, _modules, _numFilters; 200 | 201 | void copyToGPU(); 202 | 203 | public: 204 | LocalLayer(ConvNet* convNet, PyObject* paramsDict, bool useGrad); 205 | }; 206 | 207 | class ConvLayer : public LocalLayer { 208 | protected: 209 | int _partialSum; 210 | bool _sharedBiases; 211 | 212 | NVMatrix _weightGradTmp, _actGradTmp; 213 | 214 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 215 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 216 | void bpropBiases(NVMatrix& v, PASS_TYPE passType); 217 | void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType); 218 | void truncBwdActs(); 219 | 220 | public: 221 | ConvLayer(ConvNet* convNet, PyObject* paramsDict); 222 | }; 223 | 224 | class LocalUnsharedLayer : public LocalLayer { 225 | protected: 226 | NVMatrix _sexMask; 227 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 228 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 229 | void bpropBiases(NVMatrix& v, PASS_TYPE passType); 230 | void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType); 231 | public: 232 | LocalUnsharedLayer(ConvNet* convNet, PyObject* paramsDict); 233 | }; 234 | 235 | class PoolLayer : public Layer { 236 | protected: 237 | int _channels, _sizeX, _start, _stride, _outputsX; 238 | int _imgSize; 239 | string _pool; 240 | public: 241 | PoolLayer(ConvNet* convNet, PyObject* paramsDict, bool trans); 242 | 243 | static PoolLayer& makePoolLayer(ConvNet* convNet, PyObject* paramsDict); 244 | }; 245 | 246 | class AvgPoolLayer : public PoolLayer { 247 | protected: 248 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 249 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 250 | public: 251 | AvgPoolLayer(ConvNet* convNet, PyObject* paramsDict); 252 | }; 253 | 254 | class MaxPoolLayer : public PoolLayer { 255 | protected: 256 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 257 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 258 | public: 259 | MaxPoolLayer(ConvNet* convNet, PyObject* paramsDict); 260 | }; 261 | 262 | class NailbedLayer : public Layer { 263 | protected: 264 | int _channels, _start, _stride, _outputsX; 265 | int _imgSize; 266 | public: 267 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 268 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 269 | 270 | NailbedLayer(ConvNet* convNet, PyObject* paramsDict); 271 | }; 272 | 273 | class GaussianBlurLayer : public Layer { 274 | protected: 275 | int _channels; 276 | Matrix* _hFilter; 277 | NVMatrix _filter; 278 | NVMatrix _actGradsTmp; 279 | public: 280 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 281 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 282 | void copyToGPU(); 283 | 284 | GaussianBlurLayer(ConvNet* convNet, PyObject* paramsDict); 285 | }; 286 | 287 | class ResizeLayer : public Layer { 288 | protected: 289 | int _channels; 290 | float _scale; 291 | int _imgSize, _tgtSize; 292 | public: 293 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 294 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 295 | 296 | ResizeLayer(ConvNet* convNet, PyObject* paramsDict); 297 | }; 298 | 299 | class RGBToYUVLayer : public Layer { 300 | public: 301 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 302 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 303 | 304 | RGBToYUVLayer(ConvNet* convNet, PyObject* paramsDict); 305 | }; 306 | 307 | class RGBToLABLayer : public Layer { 308 | protected: 309 | bool _center; 310 | public: 311 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 312 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 313 | 314 | RGBToLABLayer(ConvNet* convNet, PyObject* paramsDict); 315 | }; 316 | 317 | class ResponseNormLayer : public Layer { 318 | protected: 319 | int _channels, _size; 320 | float _scale, _pow; 321 | NVMatrix _denoms; 322 | 323 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 324 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 325 | void truncBwdActs(); 326 | public: 327 | ResponseNormLayer(ConvNet* convNet, PyObject* paramsDict); 328 | }; 329 | 330 | class CrossMapResponseNormLayer : public ResponseNormLayer { 331 | protected: 332 | bool _blocked; 333 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 334 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 335 | public: 336 | CrossMapResponseNormLayer(ConvNet* convNet, PyObject* paramsDict); 337 | }; 338 | 339 | class ContrastNormLayer : public ResponseNormLayer { 340 | protected: 341 | int _imgSize; 342 | NVMatrix _meanDiffs; 343 | 344 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 345 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 346 | void truncBwdActs(); 347 | public: 348 | ContrastNormLayer(ConvNet* convNet, PyObject* paramsDict); 349 | }; 350 | 351 | class CostLayer : public Layer { 352 | protected: 353 | float _coeff; 354 | doublev _costv; 355 | public: 356 | CostLayer(ConvNet* convNet, PyObject* paramsDict, bool trans); 357 | void bprop(PASS_TYPE passType); 358 | virtual doublev& getCost(); 359 | float getCoeff(); 360 | bool isGradProducer(); 361 | 362 | static CostLayer& makeCostLayer(ConvNet* convNet, string& type, PyObject* paramsDict); 363 | }; 364 | 365 | /* 366 | * Input 0: labels 367 | * Input 1: softmax outputs 368 | */ 369 | class LogregCostLayer : public CostLayer { 370 | protected: 371 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 372 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 373 | public: 374 | LogregCostLayer(ConvNet* convNet, PyObject* paramsDict); 375 | }; 376 | 377 | class SumOfSquaresCostLayer : public CostLayer { 378 | protected: 379 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 380 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 381 | public: 382 | SumOfSquaresCostLayer(ConvNet* convNet, PyObject* paramsDict); 383 | }; 384 | 385 | #endif /* LAYER_CUH */ 386 | 387 | -------------------------------------------------------------------------------- /include/layer_kernels.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef LAYER_KERNELS_CUH 28 | #define LAYER_KERNELS_CUH 29 | 30 | #include 31 | #include 32 | 33 | #define LOGREG_GRAD_THREADS_X 32 34 | #define LOGREG_GRAD_THREADS_Y 4 35 | 36 | #define LOGREG_ERR_THREADS_X 128 37 | #define LOGREG_ERR_THREADS_Y 1 38 | 39 | void computeLogregCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& labelLogProbs_out, NVMatrix& correctProbs_out); 40 | void computeLogregGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff); 41 | void computeSoftmaxGrad(NVMatrix& acts, NVMatrix& actsGrad, NVMatrix& target, bool add); 42 | 43 | // Numerical stability optimization: this routine combines computeLogregGrad with computeSoftmaxGrad 44 | // to avoi dividing and then multiplying by quantities that may be near zero. 45 | void computeLogregSoftmaxGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff); 46 | void computeEltwiseMaxGrad(NVMatrix& actGrad, NVMatrix& input, NVMatrix& output, NVMatrix& target, bool add); 47 | 48 | #endif /* LAYER_KERNELS_CUH */ 49 | 50 | -------------------------------------------------------------------------------- /include/nvmatrix/nvmatrix_operators.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef NVMATRIX_OPERATORS_CUH 28 | #define NVMATRIX_OPERATORS_CUH 29 | 30 | #include 31 | 32 | class NVMatrixOps { 33 | public: 34 | class Exp { 35 | public: 36 | __device__ inline float operator()(const float a) const { 37 | return __expf(a); 38 | } 39 | }; 40 | 41 | class Logistic { 42 | public: 43 | __device__ inline float operator()(const float a) const { 44 | return __fdividef(1.0f, 1.0f + __expf(-a)); 45 | } 46 | }; 47 | 48 | class Log { 49 | public: 50 | __device__ inline float operator()(const float a) const { 51 | return __logf(a); 52 | } 53 | }; 54 | 55 | class Square { 56 | public: 57 | __device__ inline float operator()(const float a) const { 58 | return a * a; 59 | } 60 | }; 61 | 62 | class Sqrt { 63 | public: 64 | __device__ inline float operator()(const float a) const { 65 | return sqrtf(a); 66 | } 67 | }; 68 | 69 | class Reciprocal { 70 | public: 71 | __device__ inline float operator()(const float a) const { 72 | return 1.0f / a; 73 | } 74 | }; 75 | 76 | class Abs { 77 | public: 78 | __device__ inline float operator()(const float a) const { 79 | return a > 0 ? a : -a; 80 | } 81 | }; 82 | 83 | class Sign { 84 | public: 85 | __device__ inline float operator()(const float a) const { 86 | return (a > 0) - (a < 0); 87 | } 88 | }; 89 | 90 | class Identity { 91 | public: 92 | __device__ inline float operator()(const float a) const { 93 | return a; 94 | } 95 | }; 96 | 97 | class Zero { 98 | public: 99 | __device__ inline float operator()(const float a) const { 100 | return 0; 101 | } 102 | }; 103 | 104 | class One { 105 | public: 106 | __device__ inline float operator()(const float a) const { 107 | return 1; 108 | } 109 | }; 110 | 111 | class SmallerThanScalar { 112 | private: 113 | const float scalar; 114 | public: 115 | SmallerThanScalar(const float _scalar) : scalar(_scalar) { 116 | } 117 | __device__ inline float operator()(const float a) const { 118 | return a < scalar; 119 | } 120 | }; 121 | 122 | class BiggerThanScalar { 123 | private: 124 | const float scalar; 125 | public: 126 | BiggerThanScalar(const float _scalar) : scalar(_scalar) { 127 | } 128 | __device__ inline float operator()(const float a) const { 129 | return a > scalar; 130 | } 131 | }; 132 | 133 | class AddScalar { 134 | private: 135 | const float scalar; 136 | public: 137 | AddScalar(const float _scalar) : scalar(_scalar) { 138 | } 139 | __device__ inline float operator()(const float a) const { 140 | return a + scalar; 141 | } 142 | }; 143 | 144 | class WeightedAddScalar { 145 | private: 146 | const float weight, scalar; 147 | public: 148 | WeightedAddScalar(const float _weight, const float _scalar) : weight(_weight), scalar(_scalar) { 149 | } 150 | __device__ inline float operator()(const float a) const { 151 | return weight * a + scalar; 152 | } 153 | }; 154 | 155 | class MultByScalar { 156 | private: 157 | const float scalar; 158 | public: 159 | MultByScalar(const float _scalar) : scalar(_scalar) { 160 | } 161 | __device__ inline float operator()(const float a) const { 162 | return a * scalar; 163 | } 164 | }; 165 | 166 | class Pow { 167 | private: 168 | const float p; 169 | public: 170 | Pow(const float _p) : p(_p) { 171 | } 172 | __device__ inline float operator()(const float a) const { 173 | return __powf(a, p); 174 | } 175 | }; 176 | 177 | template 178 | class InRange { 179 | private: 180 | const float lower, upper; 181 | public: 182 | InRange(const float _lower, const float _upper) : lower(_lower), upper(_upper) { 183 | } 184 | __device__ inline float operator()(const float a) const { 185 | return exclusive ? a > lower && a < upper : a >= lower && a <= upper; 186 | } 187 | }; 188 | 189 | class MinWithScalar { 190 | private: 191 | const float scalar; 192 | public: 193 | MinWithScalar(const float _scalar) : scalar(_scalar) { 194 | } 195 | __device__ inline float operator()(const float a) const { 196 | return a > scalar ? scalar : a; 197 | } 198 | }; 199 | 200 | class MaxWithScalar { 201 | private: 202 | const float scalar; 203 | public: 204 | MaxWithScalar(const float _scalar) : scalar(_scalar) { 205 | } 206 | __device__ inline float operator()(const float a) const { 207 | return a > scalar ? a : scalar; 208 | } 209 | }; 210 | }; 211 | 212 | class NVMatrixBinaryOps { 213 | public: 214 | class Equals { 215 | public: 216 | __device__ inline float operator()(const float a, const float b) const { 217 | return a == b; 218 | } 219 | }; 220 | 221 | class BiggerThan { 222 | public: 223 | __device__ inline float operator()(const float a, const float b) const { 224 | return a > b; 225 | } 226 | }; 227 | 228 | class Divide { 229 | public: 230 | __device__ inline float operator()(const float a, const float b) const { 231 | return __fdividef(a, b); 232 | } 233 | }; 234 | 235 | class Multiply { 236 | public: 237 | __device__ inline float operator()(const float a, const float b) const { 238 | return a * b; 239 | } 240 | }; 241 | 242 | class SquaredDiff { 243 | public: 244 | __device__ inline float operator()(const float a, const float b) const { 245 | return (a - b) * (a - b); 246 | } 247 | }; 248 | 249 | class WeightedAdd { 250 | private: 251 | const float scaleA, scaleB; 252 | public: 253 | WeightedAdd(const float _scaleA, const float _scaleB) : scaleA(_scaleA), scaleB(_scaleB) { 254 | } 255 | __device__ inline float operator()(const float a, const float b) const { 256 | return a * scaleA + b * scaleB; 257 | } 258 | }; 259 | 260 | class Add { 261 | public: 262 | __device__ inline float operator()(const float a, const float b) const { 263 | return a + b; 264 | } 265 | }; 266 | 267 | class First { 268 | public: 269 | __device__ inline float operator()(const float a, const float b) const { 270 | return a; 271 | } 272 | }; 273 | 274 | class Second { 275 | public: 276 | __device__ inline float operator()(const float a, const float b) const { 277 | return b; 278 | } 279 | }; 280 | 281 | class SecondScaled { 282 | private: 283 | const float scale; 284 | public: 285 | SecondScaled(const float _scale) : scale(_scale) { 286 | } 287 | __device__ inline float operator()(const float a, const float b) const { 288 | return scale * b; 289 | } 290 | }; 291 | }; 292 | 293 | class NVMatrixAggs { 294 | public: 295 | class Sum { 296 | public: 297 | __device__ inline float operator()(const float a, const float b) const { 298 | return a + b; 299 | } 300 | __device__ inline float getBaseValue() { 301 | return 0; 302 | } 303 | }; 304 | 305 | class Max { 306 | public: 307 | __device__ inline float operator()(const float a, const float b) const { 308 | return a > b ? a : b; 309 | } 310 | __device__ inline float getBaseValue() { 311 | return -2e38; 312 | } 313 | }; 314 | 315 | class Min { 316 | public: 317 | __device__ inline float operator()(const float a, const float b) const { 318 | return a > b ? b : a; 319 | } 320 | __device__ inline float getBaseValue() { 321 | return 2e38; 322 | } 323 | }; 324 | 325 | template 326 | class ArgMax { 327 | private: 328 | UnaryOperator u; 329 | public: 330 | ArgMax(UnaryOperator _u) : u(_u) { 331 | } 332 | __device__ inline float operator()(const float a, const float b) const { 333 | return u(a) > u(b) ? a : b; 334 | } 335 | __device__ inline float getBaseValue() { 336 | return u.getArgMin(); 337 | } 338 | }; 339 | }; 340 | 341 | class NVMatrixTernaryOps { 342 | public: 343 | class Add { 344 | public: 345 | __device__ inline float operator()(const float a, const float b, const float c) const { 346 | return a + b + c; 347 | } 348 | }; 349 | }; 350 | 351 | #endif /* NVMATRIX_OPERATORS_CUH */ 352 | 353 | -------------------------------------------------------------------------------- /include/pyconvnet.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef PYCONVNET3_CUH 28 | #define PYCONVNET3_CUH 29 | 30 | #define _QUOTEME(x) #x 31 | #define QUOTEME(x) _QUOTEME(x) 32 | 33 | extern "C" PyMODINIT_FUNC init_convnet(); 34 | 35 | PyObject* initModel(PyObject *self, PyObject *args); 36 | PyObject* startBatch(PyObject *self, PyObject *args); 37 | PyObject* finishBatch(PyObject *self, PyObject *args); 38 | PyObject* checkGradients(PyObject *self, PyObject *args); 39 | PyObject* syncWithHost(PyObject *self, PyObject *args); 40 | PyObject* startMultiviewTest(PyObject *self, PyObject *args); 41 | PyObject* startFeatureWriter(PyObject *self, PyObject *args); 42 | 43 | #endif /* PYCONVNET3_CUH */ 44 | -------------------------------------------------------------------------------- /include/util.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef UTIL_H 28 | #define UTIL_H 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include 36 | #include 37 | 38 | #include 39 | 40 | /* 41 | * The types of passes that the convnet supports. Used in the fprop and bprop functions in 42 | * ConvNet class. Most of the layers ignore the pass type, but some make use of it. 43 | */ 44 | enum PASS_TYPE {PASS_TRAIN, PASS_TEST, PASS_GC}; 45 | 46 | // For gradient checking 47 | #define GC_SUPPRESS_PASSES true 48 | #define GC_REL_ERR_THRESH 0.02 49 | 50 | /* 51 | * Store entire data matrix on GPU if its size does not exceed this many MB. 52 | * Otherwise store only one minibatch at a time. 53 | */ 54 | #define MAX_DATA_ON_GPU 200 55 | 56 | typedef std::vector MatrixV; 57 | typedef std::vector NVMatrixV; 58 | typedef std::map*> CostMap; 59 | typedef std::map CostCoeffMap; 60 | typedef std::vector doublev; 61 | typedef std::vector floatv; 62 | typedef std::vector intv; 63 | 64 | floatv* getFloatV(PyObject* pyList); 65 | intv* getIntV(PyObject* pyList); 66 | MatrixV* getMatrixV(PyObject* pyList); 67 | int* getIntA(PyObject* pyList); 68 | 69 | int pyDictGetInt(PyObject* dict, const char* key); 70 | intv* pyDictGetIntV(PyObject* dict, const char* key); 71 | std::string pyDictGetString(PyObject* dict, const char* key); 72 | float pyDictGetFloat(PyObject* dict, const char* key); 73 | floatv* pyDictGetFloatV(PyObject* dict, const char* key); 74 | Matrix* pyDictGetMatrix(PyObject* dict, const char* key); 75 | MatrixV* pyDictGetMatrixV(PyObject* dict, const char* key); 76 | int* pyDictGetIntA(PyObject* dict, const char* key); 77 | 78 | template 79 | std::string tostr(T n) { 80 | std::ostringstream result; 81 | result << n; 82 | return result.str(); 83 | } 84 | 85 | #endif /* UTIL_H */ 86 | 87 | -------------------------------------------------------------------------------- /include/weights.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef WEIGHTS_CUH 28 | #define WEIGHTS_CUH 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include "util.cuh" 38 | 39 | using namespace std; 40 | 41 | class Weights { 42 | private: 43 | Matrix* _hWeights, *_hWeightsInc; 44 | NVMatrix* _weights, *_weightsInc, *_weightsGrad; 45 | 46 | float _epsW, _wc, _mom; 47 | bool _onGPU, _useGrad; 48 | int _numUpdates; 49 | static bool _autoCopyToGPU; 50 | 51 | // Non-NULL if these weights are really shared from some other layer 52 | Weights* _srcWeights; 53 | 54 | public: 55 | NVMatrix& operator*() { 56 | return getW(); 57 | } 58 | 59 | Weights(Weights& srcWeights, float epsW) : _srcWeights(&srcWeights), _epsW(epsW), _wc(0), _onGPU(false), _numUpdates(0), 60 | _weights(NULL), _weightsInc(NULL), _weightsGrad(NULL){ 61 | _hWeights = &srcWeights.getCPUW(); 62 | _hWeightsInc = &srcWeights.getCPUWInc(); 63 | _mom = srcWeights.getMom(); 64 | _useGrad = srcWeights.isUseGrad(); 65 | if (_autoCopyToGPU) { 66 | copyToGPU(); 67 | } 68 | } 69 | 70 | Weights(Matrix& hWeights, Matrix& hWeightsInc, float epsW, float wc, float mom, bool useGrad) 71 | : _srcWeights(NULL), _hWeights(&hWeights), _hWeightsInc(&hWeightsInc), _numUpdates(0), 72 | _epsW(epsW), _wc(wc), _mom(mom), _useGrad(useGrad), _onGPU(false), _weights(NULL), 73 | _weightsInc(NULL), _weightsGrad(NULL) { 74 | if (_autoCopyToGPU) { 75 | copyToGPU(); 76 | } 77 | } 78 | 79 | ~Weights() { 80 | delete _hWeights; 81 | delete _hWeightsInc; 82 | if (_srcWeights == NULL) { 83 | delete _weights; 84 | delete _weightsInc; 85 | delete _weightsGrad; 86 | } 87 | } 88 | 89 | static void setAutoCopyToGPU(bool autoCopyToGPU) { 90 | _autoCopyToGPU = autoCopyToGPU; 91 | } 92 | 93 | NVMatrix& getW() { 94 | assert(_onGPU); 95 | return *_weights; 96 | } 97 | 98 | NVMatrix& getInc() { 99 | assert(_onGPU); 100 | return *_weightsInc; 101 | } 102 | 103 | NVMatrix& getGrad() { 104 | assert(_onGPU); 105 | return _useGrad ? *_weightsGrad : *_weightsInc; 106 | } 107 | 108 | Matrix& getCPUW() { 109 | return *_hWeights; 110 | } 111 | 112 | Matrix& getCPUWInc() { 113 | return *_hWeightsInc; 114 | } 115 | 116 | int getNumRows() const { 117 | return _hWeights->getNumRows(); 118 | } 119 | 120 | int getNumCols() const { 121 | return _hWeights->getNumCols(); 122 | } 123 | 124 | void copyToCPU() { 125 | if (_srcWeights == NULL) { 126 | assert(_onGPU); 127 | _weights->copyToHost(*_hWeights); 128 | _weightsInc->copyToHost(*_hWeightsInc); 129 | } 130 | } 131 | 132 | // This function is assumed to be called in the order in which the layers 133 | // were defined 134 | void copyToGPU() { 135 | if (_srcWeights == NULL) { 136 | _weights = new NVMatrix(); 137 | _weightsInc = new NVMatrix(); 138 | _weights->copyFromHost(*_hWeights, true); 139 | _weightsInc->copyFromHost(*_hWeightsInc, true); 140 | _weightsGrad = _useGrad ? new NVMatrix() : NULL; 141 | } else { 142 | _weights = _srcWeights->_weights; 143 | _weightsInc = _srcWeights->_weightsInc; 144 | _weightsGrad = _srcWeights->_weightsGrad; 145 | } 146 | _onGPU = true; 147 | } 148 | 149 | // Scale your gradient by epsW / numCases! 150 | void update() { 151 | // Only true owner of weights updates 152 | if (_srcWeights == NULL && _epsW > 0) { 153 | assert(_onGPU); 154 | if (_useGrad) { 155 | _weightsInc->add(*_weightsGrad, _mom, 1); 156 | } 157 | if (_wc > 0) { 158 | _weightsInc->add(*_weights, -_wc * _epsW); 159 | } 160 | _weights->add(*_weightsInc); 161 | _numUpdates = 0; 162 | } 163 | } 164 | 165 | int incNumUpdates() { 166 | if (_srcWeights != NULL) { 167 | return _srcWeights->incNumUpdates(); 168 | } 169 | return _numUpdates++; 170 | } 171 | 172 | // Returns the number of times a gradient has been computed for this 173 | // weight matrix during the current pass (interval between two calls of update()) 174 | // through the net. This number will only be greater than 1 if this weight matrix 175 | // is *shared* by multiple layers in the net. 176 | int getNumUpdates() const { 177 | if (_srcWeights != NULL) { 178 | return _srcWeights->getNumUpdates(); 179 | } 180 | return _numUpdates; 181 | } 182 | 183 | float getEps() const { 184 | return _epsW; 185 | } 186 | 187 | float getMom() const { 188 | return _mom; 189 | } 190 | 191 | float getWC() const { 192 | return _wc; 193 | } 194 | 195 | bool isUseGrad() const { // is good grammar 196 | return _useGrad; 197 | } 198 | }; 199 | 200 | class WeightList { 201 | private: 202 | std::vector _weightList; 203 | 204 | public: 205 | Weights& operator[](const int idx) const { 206 | return *_weightList[idx]; 207 | } 208 | 209 | ~WeightList() { 210 | for (int i = 0; i < _weightList.size(); i++) { 211 | delete _weightList[i]; 212 | } 213 | } 214 | 215 | // WeightList(MatrixV& hWeights, MatrixV& hWeightsInc, floatv& epsW, floatv& wc, floatv& mom, bool useGrads) : _initialized(false) { 216 | // initialize(hWeights, hWeightsInc, epsW, wc, mom, useGrads); 217 | // } 218 | 219 | WeightList() { 220 | } 221 | 222 | // void initialize(MatrixV& hWeights, MatrixV& hWeightsInc, floatv& epsW, floatv& wc, floatv& mom, bool useGrads) { 223 | // for (int i = 0; i < hWeights.size(); i++) { 224 | // _weightList.push_back(new Weights(*hWeights[i], *hWeightsInc[i], epsW[i], wc[i], mom[i], useGrads)); 225 | // } 226 | // _initialized = true; 227 | // delete &hWeights; 228 | // delete &hWeightsInc; 229 | // delete &epsW; 230 | // delete &wc; 231 | // delete &mom; 232 | // } 233 | 234 | void addWeights(Weights& w) { 235 | _weightList.push_back(&w); 236 | } 237 | 238 | // void addWeights(WeightList& wl) { 239 | // for (int i = 0; i < wl.getSize(); i++) { 240 | // addWeights(wl[i]); 241 | // } 242 | // } 243 | 244 | void update() { 245 | for (int i = 0; i < getSize(); i++) { 246 | _weightList[i]->update(); 247 | } 248 | } 249 | 250 | void copyToCPU() { 251 | for (int i = 0; i < getSize(); i++) { 252 | _weightList[i]->copyToCPU(); 253 | } 254 | } 255 | 256 | void copyToGPU() { 257 | for (int i = 0; i < getSize(); i++) { 258 | _weightList[i]->copyToGPU(); 259 | } 260 | } 261 | 262 | int getSize() { 263 | return _weightList.size(); 264 | } 265 | }; 266 | 267 | #endif /* WEIGHTS_CUH */ -------------------------------------------------------------------------------- /include/worker.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef WORKER_CUH 28 | #define WORKER_CUH 29 | 30 | #include "convnet.cuh" 31 | #include "cost.cuh" 32 | #include "data.cuh" 33 | 34 | class ConvNet; 35 | class Cost; 36 | 37 | class WorkResult { 38 | public: 39 | enum RESULTS {BATCH_DONE, SYNC_DONE}; 40 | protected: 41 | WorkResult::RESULTS _resultType; 42 | Cost* _results; 43 | public: 44 | WorkResult(WorkResult::RESULTS resultType, Cost& results); 45 | WorkResult(WorkResult::RESULTS resultType); 46 | virtual ~WorkResult(); 47 | Cost& getResults() const; 48 | WorkResult::RESULTS getResultType() const; 49 | }; 50 | 51 | class Worker { 52 | protected: 53 | ConvNet* _convNet; 54 | public: 55 | Worker(ConvNet& convNet); 56 | virtual void run() = 0; 57 | }; 58 | 59 | class DataWorker : public Worker { 60 | protected: 61 | CPUData* _data; 62 | DataProvider* _dp; 63 | public: 64 | DataWorker(ConvNet& convNet, CPUData& data); 65 | virtual ~DataWorker(); 66 | }; 67 | 68 | class TrainingWorker : public DataWorker { 69 | protected: 70 | bool _test; 71 | public: 72 | TrainingWorker(ConvNet& convNet, CPUData& data, bool test); 73 | void run(); 74 | }; 75 | 76 | class SyncWorker : public Worker { 77 | public: 78 | SyncWorker(ConvNet& convNet); 79 | void run(); 80 | }; 81 | 82 | class GradCheckWorker : public DataWorker { 83 | public: 84 | GradCheckWorker(ConvNet& convNet, CPUData& data); 85 | void run(); 86 | }; 87 | 88 | class MultiviewTestWorker : public DataWorker { 89 | protected: 90 | int _numViews, _logregIdx; 91 | public: 92 | MultiviewTestWorker(ConvNet& convNet, CPUData& data, int numViews, int logregIdx); 93 | void run(); 94 | }; 95 | 96 | class FeatureWorker : public DataWorker { 97 | protected: 98 | Matrix* _ftrs; 99 | int _layerIdx; 100 | public: 101 | FeatureWorker(ConvNet& convNet, CPUData& data, Matrix& ftrs, int layerIdx); 102 | ~FeatureWorker(); 103 | void run(); 104 | }; 105 | 106 | #endif /* WORKER_CUH */ 107 | 108 | -------------------------------------------------------------------------------- /ordereddict.py: -------------------------------------------------------------------------------- 1 | # Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. 2 | # Passes Python2.7's test suite and incorporates all the latest updates. 3 | 4 | try: 5 | from thread import get_ident as _get_ident 6 | except ImportError: 7 | from dummy_thread import get_ident as _get_ident 8 | 9 | try: 10 | from _abcoll import KeysView, ValuesView, ItemsView 11 | except ImportError: 12 | pass 13 | 14 | 15 | class OrderedDict(dict): 16 | 'Dictionary that remembers insertion order' 17 | # An inherited dict maps keys to values. 18 | # The inherited dict provides __getitem__, __len__, __contains__, and get. 19 | # The remaining methods are order-aware. 20 | # Big-O running times for all methods are the same as for regular dictionaries. 21 | 22 | # The internal self.__map dictionary maps keys to links in a doubly linked list. 23 | # The circular doubly linked list starts and ends with a sentinel element. 24 | # The sentinel element never gets deleted (this simplifies the algorithm). 25 | # Each link is stored as a list of length three: [PREV, NEXT, KEY]. 26 | 27 | def __init__(self, *args, **kwds): 28 | '''Initialize an ordered dictionary. Signature is the same as for 29 | regular dictionaries, but keyword arguments are not recommended 30 | because their insertion order is arbitrary. 31 | 32 | ''' 33 | if len(args) > 1: 34 | raise TypeError('expected at most 1 arguments, got %d' % len(args)) 35 | try: 36 | self.__root 37 | except AttributeError: 38 | self.__root = root = [] # sentinel node 39 | root[:] = [root, root, None] 40 | self.__map = {} 41 | self.__update(*args, **kwds) 42 | 43 | def __setitem__(self, key, value, dict_setitem=dict.__setitem__): 44 | 'od.__setitem__(i, y) <==> od[i]=y' 45 | # Setting a new item creates a new link which goes at the end of the linked 46 | # list, and the inherited dictionary is updated with the new key/value pair. 47 | if key not in self: 48 | root = self.__root 49 | last = root[0] 50 | last[1] = root[0] = self.__map[key] = [last, root, key] 51 | dict_setitem(self, key, value) 52 | 53 | def __delitem__(self, key, dict_delitem=dict.__delitem__): 54 | 'od.__delitem__(y) <==> del od[y]' 55 | # Deleting an existing item uses self.__map to find the link which is 56 | # then removed by updating the links in the predecessor and successor nodes. 57 | dict_delitem(self, key) 58 | link_prev, link_next, key = self.__map.pop(key) 59 | link_prev[1] = link_next 60 | link_next[0] = link_prev 61 | 62 | def __iter__(self): 63 | 'od.__iter__() <==> iter(od)' 64 | root = self.__root 65 | curr = root[1] 66 | while curr is not root: 67 | yield curr[2] 68 | curr = curr[1] 69 | 70 | def __reversed__(self): 71 | 'od.__reversed__() <==> reversed(od)' 72 | root = self.__root 73 | curr = root[0] 74 | while curr is not root: 75 | yield curr[2] 76 | curr = curr[0] 77 | 78 | def clear(self): 79 | 'od.clear() -> None. Remove all items from od.' 80 | try: 81 | for node in self.__map.itervalues(): 82 | del node[:] 83 | root = self.__root 84 | root[:] = [root, root, None] 85 | self.__map.clear() 86 | except AttributeError: 87 | pass 88 | dict.clear(self) 89 | 90 | def popitem(self, last=True): 91 | '''od.popitem() -> (k, v), return and remove a (key, value) pair. 92 | Pairs are returned in LIFO order if last is true or FIFO order if false. 93 | 94 | ''' 95 | if not self: 96 | raise KeyError('dictionary is empty') 97 | root = self.__root 98 | if last: 99 | link = root[0] 100 | link_prev = link[0] 101 | link_prev[1] = root 102 | root[0] = link_prev 103 | else: 104 | link = root[1] 105 | link_next = link[1] 106 | root[1] = link_next 107 | link_next[0] = root 108 | key = link[2] 109 | del self.__map[key] 110 | value = dict.pop(self, key) 111 | return key, value 112 | 113 | # -- the following methods do not depend on the internal structure -- 114 | 115 | def keys(self): 116 | 'od.keys() -> list of keys in od' 117 | return list(self) 118 | 119 | def values(self): 120 | 'od.values() -> list of values in od' 121 | return [self[key] for key in self] 122 | 123 | def items(self): 124 | 'od.items() -> list of (key, value) pairs in od' 125 | return [(key, self[key]) for key in self] 126 | 127 | def iterkeys(self): 128 | 'od.iterkeys() -> an iterator over the keys in od' 129 | return iter(self) 130 | 131 | def itervalues(self): 132 | 'od.itervalues -> an iterator over the values in od' 133 | for k in self: 134 | yield self[k] 135 | 136 | def iteritems(self): 137 | 'od.iteritems -> an iterator over the (key, value) items in od' 138 | for k in self: 139 | yield (k, self[k]) 140 | 141 | def update(*args, **kwds): 142 | '''od.update(E, **F) -> None. Update od from dict/iterable E and F. 143 | 144 | If E is a dict instance, does: for k in E: od[k] = E[k] 145 | If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] 146 | Or if E is an iterable of items, does: for k, v in E: od[k] = v 147 | In either case, this is followed by: for k, v in F.items(): od[k] = v 148 | 149 | ''' 150 | if len(args) > 2: 151 | raise TypeError('update() takes at most 2 positional ' 152 | 'arguments (%d given)' % (len(args),)) 153 | elif not args: 154 | raise TypeError('update() takes at least 1 argument (0 given)') 155 | self = args[0] 156 | # Make progressively weaker assumptions about "other" 157 | other = () 158 | if len(args) == 2: 159 | other = args[1] 160 | if isinstance(other, dict): 161 | for key in other: 162 | self[key] = other[key] 163 | elif hasattr(other, 'keys'): 164 | for key in other.keys(): 165 | self[key] = other[key] 166 | else: 167 | for key, value in other: 168 | self[key] = value 169 | for key, value in kwds.items(): 170 | self[key] = value 171 | 172 | __update = update # let subclasses override update without breaking __init__ 173 | 174 | __marker = object() 175 | 176 | def pop(self, key, default=__marker): 177 | '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. 178 | If key is not found, d is returned if given, otherwise KeyError is raised. 179 | 180 | ''' 181 | if key in self: 182 | result = self[key] 183 | del self[key] 184 | return result 185 | if default is self.__marker: 186 | raise KeyError(key) 187 | return default 188 | 189 | def setdefault(self, key, default=None): 190 | 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' 191 | if key in self: 192 | return self[key] 193 | self[key] = default 194 | return default 195 | 196 | def __repr__(self, _repr_running={}): 197 | 'od.__repr__() <==> repr(od)' 198 | call_key = id(self), _get_ident() 199 | if call_key in _repr_running: 200 | return '...' 201 | _repr_running[call_key] = 1 202 | try: 203 | if not self: 204 | return '%s()' % (self.__class__.__name__,) 205 | return '%s(%r)' % (self.__class__.__name__, self.items()) 206 | finally: 207 | del _repr_running[call_key] 208 | 209 | def __reduce__(self): 210 | 'Return state information for pickling' 211 | items = [[k, self[k]] for k in self] 212 | inst_dict = vars(self).copy() 213 | for k in vars(OrderedDict()): 214 | inst_dict.pop(k, None) 215 | if inst_dict: 216 | return (self.__class__, (items,), inst_dict) 217 | return self.__class__, (items,) 218 | 219 | def copy(self): 220 | 'od.copy() -> a shallow copy of od' 221 | return self.__class__(self) 222 | 223 | @classmethod 224 | def fromkeys(cls, iterable, value=None): 225 | '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S 226 | and values equal to v (which defaults to None). 227 | 228 | ''' 229 | d = cls() 230 | for key in iterable: 231 | d[key] = value 232 | return d 233 | 234 | def __eq__(self, other): 235 | '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive 236 | while comparison to a regular mapping is order-insensitive. 237 | 238 | ''' 239 | if isinstance(other, OrderedDict): 240 | return len(self)==len(other) and self.items() == other.items() 241 | return dict.__eq__(self, other) 242 | 243 | def __ne__(self, other): 244 | return not self == other 245 | 246 | # -- the following methods are only used in Python 2.7 -- 247 | 248 | def viewkeys(self): 249 | "od.viewkeys() -> a set-like object providing a view on od's keys" 250 | return KeysView(self) 251 | 252 | def viewvalues(self): 253 | "od.viewvalues() -> an object providing a view on od's values" 254 | return ValuesView(self) 255 | 256 | def viewitems(self): 257 | "od.viewitems() -> a set-like object providing a view on od's items" 258 | return ItemsView(self) 259 | -------------------------------------------------------------------------------- /src/common/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ADD_LIBRARY (common 2 | matrix.cpp 3 | ) 4 | 5 | TARGET_LINK_LIBRARIES (common 6 | ${BLAS_LIBRARIES} 7 | ) 8 | -------------------------------------------------------------------------------- /src/convnet.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | using namespace std; 38 | 39 | /* 40 | * ======================= 41 | * ConvNet 42 | * ======================= 43 | */ 44 | ConvNet::ConvNet(PyListObject* layerParams, int minibatchSize, int deviceID) : Thread(false), _deviceID(deviceID), _data(NULL) { 45 | try { 46 | int numLayers = PyList_GET_SIZE(layerParams); 47 | 48 | for (int i = 0; i < numLayers; i++) { 49 | PyObject* paramsDict = PyList_GET_ITEM(layerParams, i); 50 | string layerType = pyDictGetString(paramsDict, "type"); 51 | 52 | Layer* l = initLayer(layerType, paramsDict); 53 | // Connect backward links in graph for this layer 54 | intv* inputLayers = pyDictGetIntV(paramsDict, "inputs"); 55 | if (inputLayers != NULL) { 56 | for (int i = 0; i < inputLayers->size(); i++) { 57 | l->addPrev(&getLayer(inputLayers->at(i))); 58 | } 59 | } 60 | delete inputLayers; 61 | } 62 | 63 | // Connect the forward links in the graph 64 | for (int i = 0; i < _layers.size(); i++) { 65 | vector& prev = _layers[i]->getPrev(); 66 | for (int j = 0; j < prev.size(); j++) { 67 | prev[j]->addNext(_layers[i]); 68 | } 69 | } 70 | 71 | // Execute post-initialization stuff 72 | for (int i = 0; i < _layers.size(); i++) { 73 | _layers[i]->postInit(); 74 | } 75 | 76 | _dp = new DataProvider(minibatchSize); 77 | } catch (string& s) { 78 | cout << "Error creating ConvNet: " << s << endl; 79 | exit(1); 80 | } 81 | } 82 | 83 | /* 84 | * Override this in derived classes 85 | */ 86 | Layer* ConvNet::initLayer(string& layerType, PyObject* paramsDict) { 87 | if (layerType == "fc") { 88 | _layers.push_back(new FCLayer(this, paramsDict)); 89 | } else if (layerType == "conv") { 90 | _layers.push_back(new ConvLayer(this, paramsDict)); 91 | } else if (layerType == "local") { 92 | _layers.push_back(new LocalUnsharedLayer(this, paramsDict)); 93 | } else if (layerType == "pool") { 94 | _layers.push_back(&PoolLayer::makePoolLayer(this, paramsDict)); 95 | } else if (layerType == "rnorm") { 96 | _layers.push_back(new ResponseNormLayer(this, paramsDict)); 97 | } else if (layerType == "cmrnorm") { 98 | _layers.push_back(new CrossMapResponseNormLayer(this, paramsDict)); 99 | } else if (layerType == "cnorm") { 100 | _layers.push_back(new ContrastNormLayer(this, paramsDict)); 101 | } else if (layerType == "softmax") { 102 | _layers.push_back(new SoftmaxLayer(this, paramsDict)); 103 | } else if (layerType == "eltsum") { 104 | _layers.push_back(new EltwiseSumLayer(this, paramsDict)); 105 | } else if (layerType == "eltmax") { 106 | _layers.push_back(new EltwiseMaxLayer(this, paramsDict)); 107 | } else if (layerType == "neuron") { 108 | _layers.push_back(new NeuronLayer(this, paramsDict)); 109 | } else if (layerType == "nailbed") { 110 | _layers.push_back(new NailbedLayer(this, paramsDict)); 111 | } else if (layerType == "blur") { 112 | _layers.push_back(new GaussianBlurLayer(this, paramsDict)); 113 | } else if (layerType == "resize") { 114 | _layers.push_back(new ResizeLayer(this, paramsDict)); 115 | } else if (layerType == "rgb2yuv") { 116 | _layers.push_back(new RGBToYUVLayer(this, paramsDict)); 117 | } else if (layerType == "rgb2lab") { 118 | _layers.push_back(new RGBToLABLayer(this, paramsDict)); 119 | } else if (layerType == "data") { 120 | DataLayer *d = new DataLayer(this, paramsDict); 121 | _layers.push_back(d); 122 | _dataLayers.push_back(d); 123 | } else if (strncmp(layerType.c_str(), "cost.", 5) == 0) { 124 | CostLayer *c = &CostLayer::makeCostLayer(this, layerType, paramsDict); 125 | _layers.push_back(c); 126 | _costs.push_back(c); 127 | } else { 128 | throw string("Unknown layer type ") + layerType; 129 | } 130 | 131 | return _layers.back(); 132 | } 133 | 134 | /* 135 | * This executes in a new CPU thread so it's OK to initialize CUDA stuff here. 136 | */ 137 | void ConvNet::initCuda() { 138 | int randomSeed = time(0); 139 | char* randomSeedEnv; 140 | 141 | cudaSetDevice(_deviceID < 0 ? gpuGetMaxGflopsDeviceId() : _deviceID); 142 | cudaDeviceSetCacheConfig(cudaFuncCachePreferShared); 143 | cublasInit(); 144 | 145 | randomSeedEnv = getenv("CONVNET_RANDOM_SEED"); 146 | if (randomSeedEnv != NULL) { 147 | randomSeed = atoi(randomSeedEnv); 148 | } 149 | 150 | NVMatrix::initRandom(randomSeed); 151 | 152 | copyToGPU(); 153 | } 154 | 155 | void* ConvNet::run() { 156 | initCuda(); 157 | 158 | while (true) { 159 | Worker* worker = _workerQueue.dequeue(); 160 | worker->run(); 161 | delete worker; 162 | } 163 | return NULL; 164 | } 165 | 166 | Queue& ConvNet::getWorkerQueue() { 167 | return _workerQueue; 168 | } 169 | 170 | Queue& ConvNet::getResultQueue() { 171 | return _resultQueue; 172 | } 173 | 174 | DataProvider& ConvNet::getDataProvider() { 175 | return *_dp; 176 | } 177 | 178 | Layer& ConvNet::operator[](int idx) { 179 | return *_layers[idx]; 180 | } 181 | 182 | Layer& ConvNet::getLayer(int idx) { 183 | return *_layers[idx]; 184 | } 185 | 186 | void ConvNet::copyToCPU() { 187 | for (int i = 0; i < _layers.size(); i++) { 188 | _layers[i]->copyToCPU(); 189 | } 190 | } 191 | 192 | void ConvNet::copyToGPU() { 193 | for (int i = 0; i < _layers.size(); i++) { 194 | _layers[i]->copyToGPU(); 195 | } 196 | } 197 | 198 | void ConvNet::updateWeights() { 199 | for (int i = 0; i < _layers.size(); i++) { 200 | _layers[i]->updateWeights(); 201 | } 202 | } 203 | 204 | void ConvNet::reset() { 205 | for (int i = 0; i < _layers.size(); i++) { 206 | _layers[i]->reset(); 207 | } 208 | } 209 | 210 | int ConvNet::getNumLayers() { 211 | return _layers.size(); 212 | } 213 | 214 | void ConvNet::bprop(PASS_TYPE passType) { 215 | for (int i = 0; i < _costs.size(); i++) { 216 | _costs[i]->bprop(passType); 217 | } 218 | reset(); 219 | } 220 | 221 | void ConvNet::fprop(PASS_TYPE passType) { 222 | assert(_data != NULL); 223 | reset(); 224 | for (int i = 0; i < _dataLayers.size(); i++) { 225 | _dataLayers[i]->fprop(_data->getData(), passType); 226 | } 227 | } 228 | 229 | void ConvNet::fprop(GPUData& data, PASS_TYPE passType) { 230 | if (&data != _data) { 231 | delete _data; 232 | } 233 | _data = &data; 234 | fprop(passType); 235 | } 236 | 237 | void ConvNet::fprop(int miniIdx, PASS_TYPE passType) { 238 | delete _data; 239 | _data = &_dp->getMinibatch(miniIdx); 240 | fprop(passType); 241 | } 242 | 243 | Cost& ConvNet::getCost() { 244 | return *new Cost(_data->getNumCases(), _costs); 245 | } 246 | 247 | // Same as getCost() but adds results to given cost and returns it 248 | Cost& ConvNet::getCost(Cost& cost) { 249 | Cost& newCost = getCost(); 250 | cost += newCost; 251 | delete &newCost; 252 | return cost; 253 | } 254 | 255 | double ConvNet::getCostValue() { 256 | Cost& cost = getCost(); 257 | double val = cost.getValue(); 258 | delete &cost; 259 | return val; 260 | } 261 | 262 | /* 263 | * Gradient checking stuff 264 | */ 265 | void ConvNet::checkGradients() { 266 | _numFailures = 0; 267 | _numTests = 0; 268 | fprop(0, PASS_GC); 269 | _baseErr = getCostValue(); 270 | bprop(PASS_GC); 271 | 272 | for (vector::iterator it = _layers.begin(); it != _layers.end(); ++it) { 273 | (*it)->checkGradients(); 274 | } 275 | 276 | cout << "------------------------" << endl; 277 | if (_numFailures > 0) { 278 | cout << _numFailures << "/" << _numTests << " TESTS FAILED" << endl; 279 | } else { 280 | cout << "ALL " << _numTests << " TESTS PASSED" << endl; 281 | } 282 | } 283 | 284 | /* 285 | * name: weight matrix name 286 | * eps: finite difference step 287 | */ 288 | bool ConvNet::checkGradient(const string& name, float eps, Weights& weights) { 289 | Matrix numGrad(weights.getNumRows(), weights.getNumCols()); 290 | Matrix diff(numGrad); 291 | numGrad.apply(Matrix::ZERO); 292 | Matrix weightsCPU; 293 | 294 | weights.getW().copyToHost(weightsCPU, true); 295 | 296 | for(int i = 0; i < weights.getNumRows(); i++) { 297 | for (int j = 0; j < weights.getNumCols(); j++) { 298 | float v = weightsCPU(i,j); 299 | weightsCPU(i,j) += eps; 300 | weights.getW().copyFromHost(weightsCPU); 301 | weightsCPU(i,j) = v; 302 | fprop(PASS_GC); 303 | double err = getCostValue(); 304 | numGrad(i,j) = (err - _baseErr) / (_data->getNumCases() * eps); 305 | if (isnan(numGrad(i,j)) || isinf(numGrad(i,j))) { 306 | cout << "Numerical computation produced nan or inf when checking '" << name << "': " << numGrad(i,j) << endl; 307 | cout << "Consider reducing the sizes of the weights or finite difference steps." << endl; 308 | cout << "Exiting." << endl; 309 | exit(1); 310 | } 311 | weights.getW().copyFromHost(weightsCPU); 312 | } 313 | } 314 | 315 | Matrix gradCPU; 316 | weights.getGrad().copyToHost(gradCPU, true); 317 | gradCPU.scale(-1.0 / _data->getNumCases()); 318 | float analNorm = gradCPU.norm(); 319 | float numNorm = numGrad.norm(); 320 | numGrad.subtract(gradCPU, diff); 321 | float relErr = diff.norm() / analNorm; 322 | bool fail = relErr >= GC_REL_ERR_THRESH; 323 | if (fail || !GC_SUPPRESS_PASSES) { 324 | cout << "========================" << endl; 325 | printf("(%s) %s GRADIENT CHECK\n", fail ? "****FAIL****" : "PASS", name.c_str()); 326 | cout << "========================" << endl; 327 | cout << "Analytic:" << endl; 328 | gradCPU.print(6,4); 329 | cout << "Numeric:" << endl; 330 | numGrad.print(6,4); 331 | printf("Analytic norm: %e\n", analNorm); 332 | printf("Numeric norm: %e\n", numNorm); 333 | printf("Relative error: %e\n", relErr); 334 | } 335 | _numTests++; 336 | _numFailures += fail; 337 | return fail; 338 | } 339 | -------------------------------------------------------------------------------- /src/cost.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | 30 | using namespace std; 31 | 32 | /* 33 | * ===================== 34 | * Cost 35 | * ===================== 36 | */ 37 | 38 | Cost::Cost(int numCases) : _numCases(numCases) { 39 | } 40 | 41 | Cost::Cost(int numCases, vector& costs) : _numCases(numCases) { 42 | for (vector::iterator it = costs.begin(); it != costs.end(); ++it) { 43 | _costMap[(*it)->getName()] = &(*it)->getCost(); 44 | _costCoeffMap[(*it)->getName()] = (*it)->getCoeff(); 45 | } 46 | } 47 | 48 | int Cost::getNumCases() { 49 | return _numCases; 50 | } 51 | 52 | doublev& Cost::operator [](const string s) { 53 | return *_costMap[s]; 54 | } 55 | 56 | CostMap& Cost::getCostMap() { 57 | return _costMap; 58 | } 59 | 60 | CostCoeffMap& Cost::getCostCoeffMap() { 61 | return _costCoeffMap; 62 | } 63 | 64 | double Cost::getValue() { 65 | double val = 0; 66 | for (CostMap::iterator it = _costMap.begin(); it != _costMap.end(); ++it) { 67 | val += _costCoeffMap[it->first] * it->second->at(0); 68 | } 69 | return val; 70 | } 71 | 72 | Cost& Cost::operator += (Cost& er) { 73 | CostMap& otherMap = er.getCostMap(); 74 | CostCoeffMap& otherCoeffMap = er.getCostCoeffMap(); 75 | for (CostMap::const_iterator it = otherMap.begin(); it != otherMap.end(); ++it) { 76 | if (_costMap.count(it->first) == 0) { 77 | _costMap[it->first] = new doublev(); 78 | _costCoeffMap[it->first] = otherCoeffMap[it->first]; 79 | } 80 | 81 | vector& myVec = *_costMap[it->first]; 82 | vector& otherVec = *otherMap[it->first]; 83 | for (int i = 0; i < otherVec.size(); i++) { 84 | if (myVec.size() <= i) { 85 | myVec.push_back(0); 86 | } 87 | myVec[i] += otherVec[i]; 88 | } 89 | } 90 | _numCases += er.getNumCases(); 91 | return *this; 92 | } 93 | 94 | Cost& Cost::operator /= (const double v) { 95 | for (CostMap::const_iterator it = _costMap.begin(); it != _costMap.end(); ++it) { 96 | for (doublev::iterator it2 = it->second->begin(); it2 != it->second->end(); ++it2) { 97 | *it2 /= v; 98 | } 99 | } 100 | return *this; 101 | } 102 | 103 | Cost::~Cost() { 104 | for (CostMap::const_iterator it = _costMap.begin(); it != _costMap.end(); ++it) { 105 | delete it->second; 106 | } 107 | } -------------------------------------------------------------------------------- /src/cudaconv2/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CUDA_ADD_LIBRARY (cudaconv2 2 | conv_util.cu 3 | filter_acts.cu 4 | img_acts.cu 5 | weight_acts.cu 6 | ) 7 | 8 | TARGET_LINK_LIBRARIES (cudaconv2 9 | nvmatrix 10 | ${BLAS_LIBRARIES} 11 | ${CUDA_CUBLAS_LIBRARIES} 12 | ) 13 | -------------------------------------------------------------------------------- /src/data.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | 30 | using namespace std; 31 | 32 | DataProvider::DataProvider(int minibatchSize) : 33 | _minibatchSize(minibatchSize), _hData(NULL) { 34 | 35 | } 36 | 37 | GPUData& DataProvider::operator[](int idx) { 38 | return getMinibatch(idx); 39 | } 40 | 41 | void DataProvider::clearData() { 42 | delete _hData; 43 | _hData = NULL; 44 | _dataSize = 0; 45 | } 46 | 47 | void DataProvider::setData(CPUData& hData) { 48 | // This is now deleted by the DataWorker's destructor 49 | // delete _hData; // Delete old CPU matrices 50 | 51 | _hData = &hData; 52 | _dataSize = 0; 53 | for (int i = 0; i < hData.getSize(); i++) { 54 | _dataSize += hData[i].getNumDataBytes(); 55 | } 56 | _dataSize /= 1024 * 1024; 57 | if (_dataSize < MAX_DATA_ON_GPU) { 58 | for (int i = 0; i < hData.getSize(); i++) { 59 | if (i >= _data.size()) { 60 | _data.push_back(new NVMatrix()); 61 | } 62 | _data[i]->copyFromHost(hData[i], true); 63 | } 64 | } 65 | } 66 | 67 | GPUData& DataProvider::getMinibatch(int idx) { 68 | assert(idx >= 0 && idx < getNumMinibatches()); 69 | return getDataSlice(idx * _minibatchSize, (idx + 1) * _minibatchSize); 70 | } 71 | 72 | GPUData& DataProvider::getDataSlice(int startCase, int endCase) { 73 | assert(_hData != NULL); 74 | assert(_hData->getNumCases() > 0); 75 | 76 | NVMatrixV& miniData = *new NVMatrixV(); 77 | 78 | for (int i = 0; i < _hData->getData().size(); i++) { 79 | miniData.push_back(new NVMatrix()); 80 | if (_dataSize < MAX_DATA_ON_GPU) { 81 | if (_data[i]->isTrans()) { 82 | _data[i]->sliceRows(startCase, min(_hData->getNumCases(), endCase), *miniData[i]); 83 | } else { 84 | _data[i]->sliceCols(startCase, min(_hData->getNumCases(), endCase), *miniData[i]); 85 | } 86 | } else { 87 | Matrix tmp; 88 | if ((*_hData)[i].isTrans()) { 89 | (*_hData)[i].sliceRows(startCase, min(_hData->getNumCases(), endCase), tmp); 90 | } else { 91 | (*_hData)[i].sliceCols(startCase, min(_hData->getNumCases(), endCase), tmp); 92 | } 93 | miniData.back()->copyFromHost(tmp, true); 94 | } 95 | } 96 | 97 | return *new GPUData(miniData); 98 | } 99 | 100 | int DataProvider::getNumMinibatches() { 101 | assert(_hData != NULL); 102 | assert(_hData->getNumCases() > 0); 103 | return DIVUP(_hData->getNumCases(), _minibatchSize); 104 | } 105 | 106 | int DataProvider::getMinibatchSize() { 107 | return _minibatchSize; 108 | } 109 | 110 | int DataProvider::getNumCases() { 111 | assert(_hData != NULL); 112 | assert(_hData->getNumCases() > 0); 113 | return _hData->getNumCases(); 114 | } 115 | 116 | int DataProvider::getNumCasesInMinibatch(int idx) { 117 | assert(_hData != NULL); 118 | assert(_hData->getNumCases() > 0); 119 | assert(idx >= 0 && idx < getNumMinibatches()); 120 | return min(_minibatchSize, max(0, _hData->getNumCases() - idx * _minibatchSize)); 121 | } -------------------------------------------------------------------------------- /src/layer_kernels.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | 29 | #include 30 | 31 | /* 32 | * E = -log(y_t) 33 | * probs: (numOut, numCases) 34 | * labels: (1, numCases) 35 | * maxProbs: (1, numCases) 36 | * labelLogProbs: (1, numCases) (*out) 37 | * correctProbs: (1, numCases) (*out) 38 | * 39 | * target: (1, numCases) 40 | */ 41 | __global__ void kLogregCost(float* probs, float* labels, float* maxProbs, float* labelLogProbs, float* correctProbs, 42 | const int numCases, const int numOut) { 43 | const int tx = blockIdx.x * LOGREG_ERR_THREADS_X + threadIdx.x; 44 | 45 | if (tx < numCases) { 46 | const int label = int(labels[tx]); 47 | const float maxp = maxProbs[tx]; 48 | const float labelp = probs[label * numCases + tx]; 49 | 50 | labelLogProbs[tx] = __logf(labelp); 51 | 52 | /* 53 | * Compute the probability of guessing the correct case if you take the most-probable label. 54 | * 55 | * This is done like this: 56 | * 57 | * - If the most probable label is not equal to the true label, then the probability is zero. 58 | * - Otherwise, the probability is 1 / (number of labels whose probability is equal to the maximum). 59 | * 60 | * This is certainly overkill -- in practice, it's just about impossible for two labels to get assigned 61 | * maximum probability. But it's a safety measure to prevent over-estimating your accuracy. 62 | * Though it could never happen in reality. Well it could. But it wouldn't. Cool? 63 | */ 64 | if (labelp != maxp) { 65 | correctProbs[tx] = 0; 66 | } else { 67 | int numMax = 0; 68 | for (int i = 0; i < numOut; i++) { 69 | numMax += probs[i * numCases + tx] == maxp; 70 | } 71 | correctProbs[tx] = 1.0f / float(numMax); 72 | } 73 | } 74 | } 75 | 76 | /* 77 | * E = -log(y_t) 78 | * y_l: (numOut, numCases) 79 | * labels: (1, numCases) 80 | * 81 | * dE_dy_l: (numOut, numCases) 82 | */ 83 | template 84 | __global__ void kLogregCostGrad(float* y_l, float* labels, float* dE_dy_l, const int numCases, 85 | const int numOut, const float gradCoeff) { 86 | const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x; 87 | const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y; 88 | const int tidx = ty * numCases + tx; 89 | 90 | if (ty < numOut && tx < numCases) { 91 | const int label = int(labels[tx]); 92 | float v = gradCoeff * (label == ty); 93 | v = __fdividef(v, y_l[tidx]); 94 | if (add) { 95 | dE_dy_l[tidx] += v; 96 | } else { 97 | dE_dy_l[tidx] = v; 98 | } 99 | } 100 | } 101 | 102 | /* 103 | * dE_dy_l: (numOut, numCases) 104 | * y_l: (numOut, numCases) 105 | * 106 | * dE_dx_l: (numOut, numCases) 107 | */ 108 | template 109 | __global__ void kSoftmaxGrad(float* dE_dy_l, float* y_l, float* dE_dx_l, const int numCases, const int numOut) { 110 | const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x; 111 | const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y; 112 | const int tidx = ty * numCases + tx; 113 | 114 | if (ty < numOut && tx < numCases) { 115 | float v = 0; 116 | for (int j = 0; j < numOut; j++) { 117 | v += dE_dy_l[j * numCases + tx] * ((j == ty) - y_l[j * numCases + tx]); 118 | } 119 | v *= y_l[tidx]; 120 | 121 | if (add) { 122 | dE_dx_l[tidx] += v; 123 | } else { 124 | dE_dx_l[tidx] = v; 125 | } 126 | } 127 | } 128 | 129 | /* 130 | * E = -log(y_t) 131 | * y_l: (numOut, numCases) 132 | * labels: (1, numCases) 133 | * 134 | * dE_dx_l: (numOut, numCases) 135 | */ 136 | template 137 | __global__ void kLogregSoftmaxGrad(float* y_l, float* labels, float* dE_dx_l, const int numCases, 138 | const int numOut, const float gradCoeff) { 139 | const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x; 140 | const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y; 141 | const int tidx = ty * numCases + tx; 142 | 143 | if (ty < numOut && tx < numCases) { 144 | const int label = int(labels[tx]); 145 | float v = gradCoeff * ((label == ty) - y_l[tidx]); 146 | if (add) { 147 | dE_dx_l[tidx] += v; 148 | } else { 149 | dE_dx_l[tidx] = v; 150 | } 151 | } 152 | } 153 | 154 | template 155 | __global__ void kEltwiseMaxGrad(float* actGrad, float* input, float* output, float* target, 156 | const int numElements) { 157 | for (int i = B_X * blockIdx.x + threadIdx.x; i < numElements; i += B_X * gridDim.x) { 158 | if (add) { 159 | target[i] += actGrad[i] * (output[i] == input[i]); 160 | } else { 161 | target[i] = actGrad[i] * (output[i] == input[i]); 162 | } 163 | } 164 | } 165 | 166 | void computeEltwiseMaxGrad(NVMatrix& actGrad, NVMatrix& input, NVMatrix& output, NVMatrix& target, bool add) { 167 | assert(actGrad.isContiguous()); 168 | assert(output.isContiguous()); 169 | assert(input.isContiguous()); 170 | assert(actGrad.isSameDims(input)); 171 | assert(actGrad.isSameDims(output)); 172 | 173 | dim3 blocks(DIVUP(actGrad.getNumElements(), 128)); 174 | dim3 threads(128); 175 | if (add) { 176 | assert(actGrad.isSameDims(target)); 177 | cudaFuncSetCacheConfig(kEltwiseMaxGrad<128, true>, cudaFuncCachePreferL1); 178 | kEltwiseMaxGrad<128, true><<>>(actGrad.getDevData(), input.getDevData(), output.getDevData(), target.getDevData(), actGrad.getNumElements()); 179 | } else { 180 | target.resize(actGrad); 181 | cudaFuncSetCacheConfig(kEltwiseMaxGrad<128, false>, cudaFuncCachePreferL1); 182 | kEltwiseMaxGrad<128, false><<>>(actGrad.getDevData(), input.getDevData(), output.getDevData(), target.getDevData(), actGrad.getNumElements()); 183 | } 184 | 185 | getLastCudaError("computeEltwiseMaxGrad: Kernel execution failed"); 186 | } 187 | 188 | /* 189 | * E = -log(y_t) 190 | * probs: (numOut, numCases) 191 | * labels: (1, numCases) 192 | * maxProbs: (1, numCases) 193 | * labelLogProbs: (1, numCases) (*out) 194 | * correctProbs: (1, numCases) (*out) 195 | * 196 | * target: (1, numCases) 197 | */ 198 | void computeLogregCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& labelLogProbs_out, NVMatrix& correctProbs_out) { 199 | int numCases = probs.getNumCols(); 200 | int numOut = probs.getNumRows(); 201 | 202 | assert(labels.getNumElements() == numCases); 203 | assert(!labels.isTrans()); 204 | assert(!probs.isTrans()); 205 | assert(labels.isContiguous()); 206 | assert(probs.isContiguous()); 207 | 208 | NVMatrix& maxProbs = probs.max(0); 209 | 210 | labelLogProbs_out.resize(1, numCases); 211 | correctProbs_out.resize(1, numCases); 212 | dim3 threads(LOGREG_ERR_THREADS_X, 1); 213 | dim3 blocks(DIVUP(numCases, LOGREG_ERR_THREADS_X), 1); 214 | cudaFuncSetCacheConfig(kLogregCost, cudaFuncCachePreferL1); 215 | kLogregCost<<>>(probs.getDevData(), labels.getDevData(), maxProbs.getDevData(), 216 | labelLogProbs_out.getDevData(), correctProbs_out.getDevData(), 217 | numCases, numOut); 218 | getLastCudaError("computeLogregCost: Kernel execution failed"); 219 | // cudaThreadSynchronize(); 220 | delete &maxProbs; 221 | } 222 | 223 | void computeLogregGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff) { 224 | int numCases = probs.getLeadingDim(); 225 | int numOut = probs.getFollowingDim(); 226 | assert(labels.getNumElements() == numCases); 227 | assert(probs.isContiguous()); 228 | assert(target.isContiguous()); 229 | assert(labels.isContiguous()); 230 | assert(!labels.isTrans()); 231 | assert(!probs.isTrans()); 232 | 233 | dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y); 234 | dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y)); 235 | if (!add) { 236 | target.resize(probs); 237 | kLogregCostGrad<<>>(probs.getDevData(), labels.getDevData(), target.getDevData(), 238 | numCases, numOut, coeff); 239 | } else { 240 | kLogregCostGrad<<>>(probs.getDevData(), labels.getDevData(), target.getDevData(), 241 | numCases, numOut, coeff); 242 | } 243 | 244 | getLastCudaError("computeLogregGrad: Kernel execution failed"); 245 | } 246 | 247 | void computeSoftmaxGrad(NVMatrix& acts, NVMatrix& actsGrad, NVMatrix& target, bool add) { 248 | int numCases = acts.getLeadingDim(); 249 | int numOut = acts.getFollowingDim(); 250 | 251 | assert(acts.isSameDims(actsGrad)); 252 | assert(acts.isContiguous()); 253 | assert(actsGrad.isContiguous()); 254 | assert(target.isContiguous()); 255 | assert(acts.isTrans()); 256 | assert(actsGrad.isTrans()); 257 | 258 | dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y); 259 | dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y)); 260 | if (!add) { 261 | target.resize(acts); 262 | kSoftmaxGrad<<>>(actsGrad.getDevData(), acts.getDevData(), target.getDevData(), numCases, numOut); 263 | } else { 264 | kSoftmaxGrad<<>>(actsGrad.getDevData(), acts.getDevData(), target.getDevData(), numCases, numOut); 265 | } 266 | getLastCudaError("computeSoftmaxGrad: Kernel execution failed"); 267 | } 268 | 269 | void computeLogregSoftmaxGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff) { 270 | int numCases = probs.getLeadingDim(); 271 | int numOut = probs.getFollowingDim(); 272 | assert(labels.getNumElements() == numCases); 273 | assert(probs.isContiguous()); 274 | assert(target.isContiguous()); 275 | assert(labels.isContiguous()); 276 | assert(probs.isTrans()); 277 | 278 | dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y); 279 | dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y)); 280 | if (!add) { 281 | target.resize(probs); 282 | kLogregSoftmaxGrad<<>>(probs.getDevData(), labels.getDevData(), target.getDevData(), 283 | numCases, numOut, coeff); 284 | } else { 285 | kLogregSoftmaxGrad<<>>(probs.getDevData(), labels.getDevData(), target.getDevData(), 286 | numCases, numOut, coeff); 287 | } 288 | 289 | getLastCudaError("computeLogregSoftmaxGrad: Kernel execution failed"); 290 | } 291 | -------------------------------------------------------------------------------- /src/neuron.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | 30 | using namespace std; 31 | 32 | Neuron& Neuron::makeNeuron(PyObject* neuronDict) { 33 | string type = pyDictGetString(neuronDict, "type"); 34 | PyObject* neuronParamsDict = PyDict_GetItemString(neuronDict, "params"); 35 | 36 | if (type == "relu") { 37 | return *new ReluNeuron(); 38 | } 39 | 40 | if (type == "softrelu") { 41 | return *new SoftReluNeuron(); 42 | } 43 | 44 | if (type == "brelu") { 45 | float a = pyDictGetFloat(neuronParamsDict, "a"); 46 | return *new BoundedReluNeuron(a); 47 | } 48 | 49 | if (type == "abs") { 50 | return *new AbsNeuron(); 51 | } 52 | 53 | if (type == "logistic") { 54 | return *new LogisticNeuron(); 55 | } 56 | 57 | if (type == "tanh") { 58 | float a = pyDictGetFloat(neuronParamsDict, "a"); 59 | float b = pyDictGetFloat(neuronParamsDict, "b"); 60 | 61 | return *new TanhNeuron(a, b); 62 | } 63 | 64 | if (type == "square") { 65 | return *new SquareNeuron(); 66 | } 67 | 68 | if (type == "sqrt") { 69 | return *new SqrtNeuron(); 70 | } 71 | 72 | if (type == "linear") { 73 | float a = pyDictGetFloat(neuronParamsDict, "a"); 74 | float b = pyDictGetFloat(neuronParamsDict, "b"); 75 | return *new LinearNeuron(a, b); 76 | } 77 | 78 | if (type == "ident") { 79 | return *new Neuron(); 80 | } 81 | 82 | throw string("Unknown neuron type: ") + type; 83 | } 84 | -------------------------------------------------------------------------------- /src/nvmatrix/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CUDA_ADD_LIBRARY (nvmatrix 2 | nvmatrix.cu 3 | nvmatrix_kernels.cu 4 | ) 5 | 6 | TARGET_LINK_LIBRARIES (nvmatrix 7 | common 8 | ${BLAS_LIBRARIES} 9 | ${CUDA_CUBLAS_LIBRARIES} 10 | ) -------------------------------------------------------------------------------- /src/nvmatrix/nvmatrix_kernels.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | __global__ void kTile(const float* src, float* tgt, const uint srcWidth, const uint srcHeight, const uint tgtWidth, const uint tgtHeight) { 32 | const int idx = blockIdx.x * blockDim.x + threadIdx.x; 33 | const int numThreads = blockDim.x * gridDim.x; 34 | // const unsigned int numEls = tgtWidth * tgtHeight; 35 | for (uint i = idx; i < tgtWidth * tgtHeight; i += numThreads) { 36 | const uint y = i / tgtWidth; 37 | const uint x = i % tgtWidth; 38 | const uint srcY = y % srcHeight; 39 | const uint srcX = x % srcWidth; 40 | tgt[i] = src[srcY * srcWidth + srcX]; 41 | } 42 | } 43 | 44 | __global__ void kDotProduct_r(float* a, float* b, float* target, const uint numCols, const uint numElements) { 45 | __shared__ float shmem[DP_BLOCKSIZE]; 46 | 47 | uint eidx = DP_BLOCKSIZE * blockIdx.x + threadIdx.x; 48 | shmem[threadIdx.x] = 0; 49 | if (eidx < numCols) { 50 | for (; eidx < numElements; eidx += numCols) { 51 | shmem[threadIdx.x] += a[eidx] * b[eidx]; 52 | } 53 | } 54 | __syncthreads(); 55 | if (threadIdx.x < 256) { 56 | shmem[threadIdx.x] += shmem[threadIdx.x + 256]; 57 | } 58 | __syncthreads(); 59 | if (threadIdx.x < 128) { 60 | shmem[threadIdx.x] += shmem[threadIdx.x + 128]; 61 | } 62 | __syncthreads(); 63 | if (threadIdx.x < 64) { 64 | shmem[threadIdx.x] += shmem[threadIdx.x + 64]; 65 | } 66 | __syncthreads(); 67 | if (threadIdx.x < 32) { 68 | volatile float* mysh = &shmem[threadIdx.x]; 69 | *mysh += mysh[32]; 70 | *mysh += mysh[16]; 71 | *mysh += mysh[8]; 72 | *mysh += mysh[4]; 73 | *mysh += mysh[2]; 74 | *mysh += mysh[1]; 75 | if (threadIdx.x == 0) { 76 | target[blockIdx.x] = *mysh; 77 | } 78 | } 79 | } 80 | 81 | __global__ void kSetupCurand(curandState *state, unsigned long long seed) { 82 | const uint tidx = NUM_RND_THREADS_PER_BLOCK * blockIdx.x + threadIdx.x; 83 | /* Each thread gets same seed, a different sequence number, 84 | no offset */ 85 | curand_init(seed, tidx, 0, &state[tidx]); 86 | } 87 | 88 | -------------------------------------------------------------------------------- /src/pyconvnet.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | #include 42 | #include 43 | 44 | using namespace std; 45 | static ConvNet* model = NULL; 46 | 47 | static PyMethodDef _ConvNetMethods[] = { { "initModel", initModel, METH_VARARGS }, 48 | { "startBatch", startBatch, METH_VARARGS }, 49 | { "finishBatch", finishBatch, METH_VARARGS }, 50 | { "checkGradients", checkGradients, METH_VARARGS }, 51 | { "startMultiviewTest", startMultiviewTest, METH_VARARGS }, 52 | { "startFeatureWriter", startFeatureWriter, METH_VARARGS }, 53 | { "syncWithHost", syncWithHost, METH_VARARGS }, 54 | { NULL, NULL } 55 | }; 56 | 57 | PyMODINIT_FUNC 58 | init_convnet() { 59 | (void) Py_InitModule("_convnet", _ConvNetMethods); 60 | import_array(); 61 | } 62 | 63 | PyObject* initModel(PyObject *self, PyObject *args) { 64 | assert(model == NULL); 65 | 66 | PyListObject* pyLayerParams; 67 | int pyMinibatchSize; 68 | int pyDeviceID; 69 | 70 | if (!PyArg_ParseTuple(args, "O!ii", 71 | &PyList_Type, &pyLayerParams, 72 | &pyMinibatchSize, 73 | &pyDeviceID)) { 74 | return NULL; 75 | } 76 | model = new ConvNet(pyLayerParams, 77 | pyMinibatchSize, 78 | pyDeviceID); 79 | 80 | model->start(); 81 | return Py_BuildValue("i", 0); 82 | } 83 | 84 | /* 85 | * Starts training/testing on the given batch (asynchronous -- returns immediately). 86 | */ 87 | PyObject* startBatch(PyObject *self, PyObject *args) { 88 | assert(model != NULL); 89 | PyListObject* data; 90 | int test = 0; 91 | if (!PyArg_ParseTuple(args, "O!|i", 92 | &PyList_Type, &data, 93 | &test)) { 94 | return NULL; 95 | } 96 | MatrixV& mvec = *getMatrixV((PyObject*)data); 97 | 98 | TrainingWorker* wr = new TrainingWorker(*model, *new CPUData(mvec), test); 99 | model->getWorkerQueue().enqueue(wr); 100 | return Py_BuildValue("i", 0); 101 | } 102 | 103 | /* 104 | * Starts testing on the given batch (asynchronous -- returns immediately). 105 | */ 106 | PyObject* startMultiviewTest(PyObject *self, PyObject *args) { 107 | assert(model != NULL); 108 | PyListObject* data; 109 | int numViews, logregIdx; 110 | if (!PyArg_ParseTuple(args, "O!ii", 111 | &PyList_Type, &data, 112 | &numViews, 113 | &logregIdx)) { 114 | return NULL; 115 | } 116 | MatrixV& mvec = *getMatrixV((PyObject*)data); 117 | 118 | MultiviewTestWorker* wr = new MultiviewTestWorker(*model, *new CPUData(mvec), numViews, logregIdx); 119 | model->getWorkerQueue().enqueue(wr); 120 | return Py_BuildValue("i", 0); 121 | } 122 | 123 | PyObject* startFeatureWriter(PyObject *self, PyObject *args) { 124 | assert(model != NULL); 125 | PyListObject* data; 126 | int layerIdx; 127 | if (!PyArg_ParseTuple(args, "O!i", 128 | &PyList_Type, &data, 129 | &layerIdx)) { 130 | return NULL; 131 | } 132 | MatrixV& mvec = *getMatrixV((PyObject*)data); 133 | Matrix& ftrs = *mvec.back(); 134 | mvec.pop_back(); 135 | 136 | FeatureWorker* wr = new FeatureWorker(*model, *new CPUData(mvec), ftrs, layerIdx); 137 | model->getWorkerQueue().enqueue(wr); 138 | return Py_BuildValue("i", 0); 139 | } 140 | 141 | /* 142 | * Waits for the trainer to finish training on the batch given to startBatch. 143 | */ 144 | PyObject* finishBatch(PyObject *self, PyObject *args) { 145 | assert(model != NULL); 146 | WorkResult* res = model->getResultQueue().dequeue(); 147 | assert(res != NULL); 148 | assert(res->getResultType() == WorkResult::BATCH_DONE); 149 | 150 | Cost& cost = res->getResults(); 151 | PyObject* dict = PyDict_New(); 152 | CostMap& costMap = cost.getCostMap(); 153 | for (CostMap::const_iterator it = costMap.begin(); it != costMap.end(); ++it) { 154 | PyObject* v = PyList_New(0); 155 | for (vector::const_iterator iv = it->second->begin(); iv != it->second->end(); ++iv) { 156 | PyObject* f = PyFloat_FromDouble(*iv); 157 | PyList_Append(v, f); 158 | } 159 | PyDict_SetItemString(dict, it->first.c_str(), v); 160 | } 161 | 162 | PyObject* retVal = Py_BuildValue("Ni", dict, cost.getNumCases()); 163 | delete res; // Deletes cost too 164 | return retVal; 165 | } 166 | 167 | PyObject* checkGradients(PyObject *self, PyObject *args) { 168 | assert(model != NULL); 169 | PyListObject* data; 170 | if (!PyArg_ParseTuple(args, "O!", 171 | &PyList_Type, &data)) { 172 | return NULL; 173 | } 174 | MatrixV& mvec = *getMatrixV((PyObject*)data); 175 | 176 | GradCheckWorker* wr = new GradCheckWorker(*model, *new CPUData(mvec)); 177 | model->getWorkerQueue().enqueue(wr); 178 | WorkResult* res = model->getResultQueue().dequeue(); 179 | assert(res != NULL); 180 | assert(res->getResultType() == WorkResult::BATCH_DONE); 181 | delete res; 182 | return Py_BuildValue("i", 0); 183 | } 184 | 185 | /* 186 | * Copies weight matrices from GPU to system memory. 187 | */ 188 | PyObject* syncWithHost(PyObject *self, PyObject *args) { 189 | assert(model != NULL); 190 | SyncWorker* wr = new SyncWorker(*model); 191 | model->getWorkerQueue().enqueue(wr); 192 | WorkResult* res = model->getResultQueue().dequeue(); 193 | assert(res != NULL); 194 | assert(res->getResultType() == WorkResult::SYNC_DONE); 195 | 196 | delete res; 197 | return Py_BuildValue("i", 0); 198 | } 199 | -------------------------------------------------------------------------------- /src/util.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | 29 | using namespace std; 30 | 31 | floatv* getFloatV(PyObject* pyList) { 32 | if (pyList == NULL) { 33 | return NULL; 34 | } 35 | floatv* vec = new floatv(); 36 | for (int i = 0; i < PyList_GET_SIZE(pyList); i++) { 37 | vec->push_back(PyFloat_AS_DOUBLE(PyList_GET_ITEM(pyList, i))); 38 | } 39 | return vec; 40 | } 41 | 42 | intv* getIntV(PyObject* pyList) { 43 | if (pyList == NULL) { 44 | return NULL; 45 | } 46 | intv* vec = new intv(); 47 | for (int i = 0; i < PyList_GET_SIZE(pyList); i++) { 48 | vec->push_back(PyInt_AS_LONG(PyList_GET_ITEM(pyList, i))); 49 | } 50 | return vec; 51 | } 52 | 53 | int* getIntA(PyObject* pyList) { 54 | if (pyList == NULL) { 55 | return NULL; 56 | } 57 | int* arr = new int[PyList_GET_SIZE(pyList)]; 58 | for (int i = 0; i < PyList_GET_SIZE(pyList); i++) { 59 | arr[i] = PyInt_AS_LONG(PyList_GET_ITEM(pyList, i)); 60 | } 61 | return arr; 62 | } 63 | MatrixV* getMatrixV(PyObject* pyList) { 64 | if (pyList == NULL) { 65 | return NULL; 66 | } 67 | MatrixV* vec = new MatrixV(); 68 | for (int i = 0; i < PyList_GET_SIZE(pyList); i++) { 69 | vec->push_back(new Matrix((PyArrayObject*)PyList_GET_ITEM(pyList, i))); 70 | } 71 | return vec; 72 | } 73 | 74 | int pyDictGetInt(PyObject* dict, const char* key) { 75 | return PyInt_AS_LONG(PyDict_GetItemString(dict, key)); 76 | } 77 | 78 | intv* pyDictGetIntV(PyObject* dict, const char* key) { 79 | return getIntV(PyDict_GetItemString(dict, key)); 80 | } 81 | 82 | int* pyDictGetIntA(PyObject* dict, const char* key) { 83 | return getIntA(PyDict_GetItemString(dict, key)); 84 | } 85 | 86 | string pyDictGetString(PyObject* dict, const char* key) { 87 | return string(PyString_AS_STRING(PyDict_GetItemString(dict, key))); 88 | } 89 | 90 | float pyDictGetFloat(PyObject* dict, const char* key) { 91 | return PyFloat_AS_DOUBLE(PyDict_GetItemString(dict, key)); 92 | } 93 | 94 | floatv* pyDictGetFloatV(PyObject* dict, const char* key) { 95 | return getFloatV(PyDict_GetItemString(dict, key)); 96 | } 97 | 98 | Matrix* pyDictGetMatrix(PyObject* dict, const char* key) { 99 | return new Matrix((PyArrayObject*)PyDict_GetItemString(dict, key)); 100 | } 101 | 102 | MatrixV* pyDictGetMatrixV(PyObject* dict, const char* key) { 103 | return getMatrixV(PyDict_GetItemString(dict, key)); 104 | } -------------------------------------------------------------------------------- /src/weights.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | 29 | bool Weights::_autoCopyToGPU = false; -------------------------------------------------------------------------------- /src/worker.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | using namespace std; 32 | 33 | /* 34 | * ==================== 35 | * WorkResult 36 | * ==================== 37 | */ 38 | WorkResult::WorkResult(WorkResult::RESULTS resultType, Cost& results) : _resultType(resultType), _results(&results) { 39 | } 40 | 41 | WorkResult::WorkResult(WorkResult::RESULTS resultType) : _resultType(resultType), _results(NULL) { 42 | } 43 | 44 | WorkResult::~WorkResult() { 45 | delete _results; // delete NULL is ok 46 | } 47 | 48 | Cost& WorkResult::getResults() const { 49 | return *_results; 50 | } 51 | 52 | WorkResult::RESULTS WorkResult::getResultType() const { 53 | return _resultType; 54 | } 55 | 56 | /* 57 | * ==================== 58 | * Worker 59 | * ==================== 60 | */ 61 | Worker::Worker(ConvNet& convNet) : _convNet(&convNet) { 62 | } 63 | 64 | /* 65 | * ==================== 66 | * DataWorker 67 | * ==================== 68 | */ 69 | DataWorker::DataWorker(ConvNet& convNet, CPUData& data) : Worker(convNet), _data(&data) { 70 | _dp = &convNet.getDataProvider(); 71 | } 72 | 73 | DataWorker::~DataWorker() { 74 | _dp->clearData(); 75 | } 76 | 77 | /* 78 | * ==================== 79 | * TrainingWorker 80 | * ==================== 81 | */ 82 | TrainingWorker::TrainingWorker(ConvNet& convNet, CPUData& data, bool test) 83 | : DataWorker(convNet, data), _test(test) { 84 | } 85 | 86 | // Need to setData here (as opposed to the constructor) because the constructor executes in 87 | // the original CPU thread, which is not the one with GPU access. 88 | void TrainingWorker::run() { 89 | _dp->setData(*_data); 90 | Cost& batchCost = *new Cost(0); 91 | for (int i = 0; i < _dp->getNumMinibatches(); i++) { 92 | _convNet->fprop(i, _test ? PASS_TEST : PASS_TRAIN); 93 | _convNet->getCost(batchCost); 94 | 95 | if (!_test) { 96 | _convNet->bprop(PASS_TRAIN); 97 | _convNet->updateWeights(); 98 | } 99 | } 100 | cudaThreadSynchronize(); 101 | _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::BATCH_DONE, batchCost)); 102 | } 103 | 104 | /* 105 | * ==================== 106 | * SyncWorker 107 | * ==================== 108 | */ 109 | SyncWorker::SyncWorker(ConvNet& convNet) : Worker(convNet) { 110 | } 111 | 112 | void SyncWorker::run() { 113 | _convNet->copyToCPU(); 114 | _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::SYNC_DONE)); 115 | } 116 | 117 | /* 118 | * ==================== 119 | * GradCheckWorker 120 | * ==================== 121 | */ 122 | GradCheckWorker::GradCheckWorker(ConvNet& convNet, CPUData& data) 123 | : DataWorker(convNet, data) { 124 | } 125 | 126 | void GradCheckWorker::run() { 127 | _dp->setData(*_data); 128 | _convNet->checkGradients(); 129 | exit(0); 130 | } 131 | 132 | /* 133 | * ==================== 134 | * MultiviewTestWorker 135 | * ==================== 136 | */ 137 | MultiviewTestWorker::MultiviewTestWorker(ConvNet& convNet, CPUData& data, int numViews, int logregIdx) 138 | : DataWorker(convNet, data), _numViews(numViews), _logregIdx(logregIdx) { 139 | assert(_data->getNumCases() % _numViews == 0); 140 | } 141 | 142 | void MultiviewTestWorker::run() { 143 | _dp->setData(*_data); 144 | Layer& logregLayer = _convNet->getLayer(_logregIdx); 145 | 146 | int numCasesReal = _dp->getNumCases() / _numViews; 147 | int numMiniReal = DIVUP(numCasesReal, _dp->getMinibatchSize()); 148 | 149 | Cost& batchCost = *new Cost(0); 150 | for (int i = 0; i < numMiniReal; i++) { 151 | NVMatrix softmaxActs; 152 | for (int v = 0; v < _numViews; v++) { 153 | GPUData& mini = _dp->getDataSlice(v * numCasesReal + i * _dp->getMinibatchSize(), 154 | min((v + 1) * numCasesReal, v * numCasesReal + (i + 1) * _dp->getMinibatchSize())); 155 | _convNet->fprop(mini, PASS_TEST); 156 | if (v == 0) { 157 | logregLayer.getPrev()[1]->getActs().copy(softmaxActs); 158 | } else { 159 | softmaxActs.add(logregLayer.getPrev()[1]->getActs()); 160 | } 161 | } 162 | softmaxActs.scale(1.0 / _numViews); 163 | NVMatrixV logregInput; 164 | logregInput.push_back(&logregLayer.getPrev()[0]->getActs()); 165 | logregInput.push_back(&softmaxActs); 166 | 167 | logregLayer.fprop(logregInput, PASS_TEST); 168 | 169 | _convNet->getCost(batchCost); 170 | } 171 | cudaThreadSynchronize(); 172 | 173 | _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::BATCH_DONE, batchCost)); 174 | } 175 | 176 | /* 177 | * ==================== 178 | * FeatureWorker 179 | * ==================== 180 | */ 181 | FeatureWorker::FeatureWorker(ConvNet& convNet, CPUData& data, Matrix& ftrs, int layerIdx) 182 | : DataWorker(convNet, data), _ftrs(&ftrs), _layerIdx(layerIdx) { 183 | assert(ftrs.getNumRows() == data.getNumCases()); 184 | assert(!ftrs.isTrans()); 185 | } 186 | 187 | FeatureWorker::~FeatureWorker() { 188 | delete _ftrs; 189 | } 190 | 191 | void FeatureWorker::run() { 192 | _dp->setData(*_data); 193 | Layer& ftrLayer = _convNet->getLayer(_layerIdx); 194 | Cost& batchCost = *new Cost(0); 195 | for (int i = 0; i < _dp->getNumMinibatches(); i++) { 196 | _convNet->fprop(i, PASS_TEST); 197 | _convNet->getCost(batchCost); 198 | Matrix& miniFtrs = _ftrs->sliceRows(i * _dp->getMinibatchSize(), 199 | min(_dp->getNumCases(), (i + 1) * _dp->getMinibatchSize())); 200 | NVMatrix& acts = ftrLayer.getActs(); 201 | NVMatrix acts_T; 202 | if (acts.isTrans()) { 203 | NVMatrix& soft_T = acts.getTranspose(); 204 | soft_T.transpose(acts_T); 205 | delete &soft_T; 206 | } else { 207 | acts.transpose(acts_T); 208 | } 209 | acts_T.copyToHost(miniFtrs); 210 | delete &miniFtrs; 211 | } 212 | cudaThreadSynchronize(); 213 | _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::BATCH_DONE, batchCost)); 214 | } -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without modification, 5 | # are permitted provided that the following conditions are met: 6 | # 7 | # - Redistributions of source code must retain the above copyright notice, 8 | # this list of conditions and the following disclaimer. 9 | # 10 | # - Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | import re 26 | import cPickle 27 | import os 28 | import numpy as n 29 | from math import sqrt 30 | 31 | import gzip 32 | import zipfile 33 | 34 | class UnpickleError(Exception): 35 | pass 36 | 37 | VENDOR_ID_REGEX = re.compile('^vendor_id\s+: (\S+)') 38 | GPU_LOCK_NO_SCRIPT = -2 39 | GPU_LOCK_NO_LOCK = -1 40 | 41 | try: 42 | import magic 43 | ms = magic.open(magic.MAGIC_NONE) 44 | ms.load() 45 | except ImportError: # no magic module 46 | ms = None 47 | 48 | def get_gpu_lock(id=-1): 49 | import imp 50 | lock_script_path = '/u/tang/bin/gpu_lock2.py' 51 | if os.path.exists(lock_script_path): 52 | locker = imp.load_source("", lock_script_path) 53 | if id == -1: 54 | return locker.obtain_lock_id() 55 | print id 56 | got_id = locker._obtain_lock(id) 57 | return id if got_id else GPU_LOCK_NO_LOCK 58 | return GPU_LOCK_NO_SCRIPT if id < 0 else id 59 | 60 | def pickle(filename, data, compress=False): 61 | if compress: 62 | fo = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED, allowZip64=True) 63 | fo.writestr('data', cPickle.dumps(data, -1)) 64 | else: 65 | fo = open(filename, "wb") 66 | cPickle.dump(data, fo, protocol=cPickle.HIGHEST_PROTOCOL) 67 | fo.close() 68 | 69 | def unpickle(filename): 70 | if not os.path.exists(filename): 71 | raise UnpickleError("Path '%s' does not exist." % filename) 72 | if ms is not None and ms.file(filename).startswith('gzip'): 73 | fo = gzip.open(filename, 'rb') 74 | dict = cPickle.load(fo) 75 | elif ms is not None and ms.file(filename).startswith('Zip'): 76 | fo = zipfile.ZipFile(filename, 'r', zipfile.ZIP_DEFLATED) 77 | dict = cPickle.loads(fo.read('data')) 78 | else: 79 | fo = open(filename, 'rb') 80 | dict = cPickle.load(fo) 81 | 82 | fo.close() 83 | return dict 84 | 85 | def tryint(s): 86 | try: 87 | return int(s) 88 | except: 89 | return s 90 | 91 | def alphanum_key(s): 92 | return [tryint(c) for c in re.split('([0-9]+)', s)] 93 | 94 | def is_intel_machine(): 95 | f = open('/proc/cpuinfo') 96 | for line in f: 97 | m = VENDOR_ID_REGEX.match(line) 98 | if m: 99 | f.close() 100 | return m.group(1) == 'GenuineIntel' 101 | f.close() 102 | return False 103 | 104 | def get_cpu(): 105 | if is_intel_machine(): 106 | return 'intel' 107 | return 'amd' 108 | 109 | def is_windows_machine(): 110 | return os.name == 'nt' 111 | --------------------------------------------------------------------------------