├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── build.bat ├── cmake ├── FindBoostNumpy.cmake ├── FindCUDADriver.cmake ├── FindGLFW.cmake ├── FindOpenCL.cmake ├── FindOpenEXR.cmake └── cmake_uninstall.cmake.in ├── cpack └── CMakeCPackOptions.cmake.in ├── doc ├── CMakeLists.txt ├── Doxyfile.in ├── mainpage.h └── update-web-api.sh ├── examples ├── boxblur_cuda.ip ├── boxblur_glsl.ip ├── boxblur_opencl.ip ├── example.cpp ├── gaussblur_cuda.ip ├── gaussblur_glsl.ip ├── gaussblur_opencl.ip ├── gaussblur_separable_cuda.ip ├── gaussblur_separable_glsl.ip ├── gaussblur_separable_opencl.ip ├── images │ └── README ├── kernels │ ├── box_blur.cl │ ├── box_blur.cu │ ├── box_blur.glsl │ ├── gaussian_blur.cl │ ├── gaussian_blur.cu │ ├── gaussian_blur.glsl │ ├── gaussian_blur_hor.cl │ ├── gaussian_blur_hor.cu │ ├── gaussian_blur_hor.glsl │ ├── gaussian_blur_vert.cl │ ├── gaussian_blur_vert.cu │ ├── gaussian_blur_vert.glsl │ ├── lerp.cl │ ├── lerp.cu │ ├── lerp.glsl │ ├── lerp_float.cl │ ├── lerp_float.cu │ ├── lerp_ubyte.cl │ ├── lerp_ubyte.cu │ ├── sobel.cl │ ├── sobel.cu │ └── sobel.glsl ├── lerp_cuda.ip ├── lerp_cuda_float.ip ├── lerp_cuda_ubyte.ip ├── lerp_glsl.ip ├── lerp_glsl_float.ip ├── lerp_glsl_ubyte.ip ├── lerp_opencl.ip ├── lerp_opencl_float.ip ├── lerp_opencl_ubyte.ip ├── sobel_cuda.ip ├── sobel_glsl.ip └── sobel_opencl.ip ├── install.bat ├── python ├── CMakeLists.txt ├── bufferswidget.py ├── displaywidget.py ├── gpuip.py ├── icons │ ├── boilerplate.png │ ├── build.png │ ├── export.png │ ├── generateIcons.py │ ├── import.png │ ├── init.png │ ├── new.png │ ├── newExisting.png │ ├── open.png │ ├── process.png │ ├── pug.png │ ├── refresh.png │ ├── run.png │ └── save.png ├── kernelwidget.py ├── mainwindow.py ├── newdialog.py ├── settings.py ├── stylesheet.py └── utils.py ├── src ├── CMakeLists.txt ├── cuda.cpp ├── cuda.h ├── cuda_error.h ├── glcontext.h ├── glcontext.m ├── glsl.cpp ├── glsl.h ├── glsl_error.h ├── gpuip.cpp ├── gpuip.h ├── helper_math.cuh ├── io_wrapper.cpp ├── io_wrapper.h ├── opencl.cpp ├── opencl.h ├── opencl_error.h └── python.cpp └── test ├── CMakeLists.txt ├── performance.cpp ├── test.cpp └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | doc/html 3 | doc/man 4 | doc/Doxyfile 5 | thirdparty 6 | *.exr 7 | *.png 8 | *.pyc 9 | python/icons.py 10 | python/gpuip -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Per Karlsson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | gpuip 2 | ===== 3 | 4 | Gpuip is a C++ cross-platform framework for Image Processing on the GPU architechure. It tries to simplify the image processing pipeline on the GPU and make it more generic across the thre most common environments: OpenCL, CUDA and OpenGL GLSL. It provides a simple interface to copy data from and to the GPU and makes it easy to compile and run GPU kernel code. 5 | 6 | ### API 7 | The online API documentation [can be found here.] (http://karlssonper.github.io/gpuip/api/) 8 | 9 | ### pygpuip 10 | The gpuip library comes with optional python bindings to the C++ code. The python bindings have I/O operations included with .exr and .png support (and .jpeg, .tiff and .tga if dev libraries are found at build time). Numpy arrays are used to tranfser data to/from the GPU. 11 | 12 | ### bin/gpuip 13 | If python bindings are available, gpuip comes with an executable program that has both a GUI version for debugging and development of GPU kernels and a command line version to plug into existing pipelines. The progam uses the gpuip specific XML-based file format *.ip to store settings. 14 | ``` 15 | usage: Framework for Image Processing on the GPU [-h] [-f FILE] 16 | [-p kernel param value] 17 | [-i buffer path] 18 | [-o buffer path] [-v] 19 | [--timestamp] [--nogui] 20 | 21 | optional arguments: 22 | -h, --help show this help message and exit 23 | -f FILE, --file FILE Image Processing file *.ip 24 | -p kernel param value, --param kernel param value 25 | Change value of a parameter. 26 | -i buffer path, --inbuffer buffer path 27 | Set input image to a buffer 28 | -o buffer path, --outbuffer buffer path 29 | Set output image to a buffer 30 | -v, --verbose Outputs information 31 | --timestamp Add timestamp in log output 32 | --nogui Command line version 33 | 34 | ``` 35 | 36 | ### Dependencies 37 | * gpuip: 38 | * [`OpenCL`](https://www.khronos.org/opencl/) *optional* 39 | * [`CUDA`](https://developer.nvidia.com/cuda-zone) *optional* 40 | * [`OpenGL`](http://www.opengl.org/) *optional* 41 | * [`GLFW`] (http://www.glfw.org/) *OpenGL context creation* 42 | * [`GLEW`](http://glew.sourceforge.net/) *OpenGL extensions* 43 | 44 | * pygpuip: 45 | * [`Python`](http://www.python.org/) *version 2.6 or newer* 46 | * [`Boost Python`](http://www.boost.org/) *python C++ bindings* 47 | * [`Boost NumPy`] (https://github.com/ndarray/Boost.NumPy) *numpy C++ bindings* 48 | * [`OpenEXR`] (http://www.openexr.com/) *exr i/o* 49 | * [`CImg`] (http://cimg.sourceforge.net/) *png, jpeg,t iff, tga i/o* 50 | * [`libpng`] (http://www.libpng.org/pub/png/libpng.html) *png format* 51 | * [`zlib`] (http://www.zlib.net) *compression used by OpenEXR and libpng* 52 | 53 | * bin/gpuip 54 | * [`numpy`](http://www.numpy.org/) *python array object* 55 | * [`Qt`] (http://qt-project.org/) *GUI (optional)* 56 | * [`PIL`] (https://pypi.python.org/pypi/Pillow) *GUI icons (optional, needed at build)* 57 | * [`PySide`] (http://qt-project.org/wiki/PySide) *Qt python bindings* 58 | 59 | 60 | ### Build/Install ### 61 | 62 | #### Linux/OSX 63 | 64 | There are two bash scripts provided, `build.sh` and `install.sh`. If you want to generate your own Makefiles, use CMake: 65 | ``` 66 | mkdir build 67 | cd build 68 | cmake .. 69 | make 70 | sudo make install 71 | ``` 72 | 73 | #### Windows 74 | There are two batch scripts provided, `build.bat` and `install.bat`. If you want to generate your own Visual Studio Solution, use CMake: 75 | ``` 76 | mkdir build 77 | cd build 78 | cmake .. 79 | cmake --build . --config Release 80 | ``` 81 | 82 | If you have admin rights, you can add `--target INSTALL` to the last command to install files 83 | 84 | 85 | ### CMake options 86 | 87 | ``` 88 | BUILD_THIRD_PARTY_LIBS // Build and link missing third party libraries 89 | BUILD_SHARED_LIB // Make gpuip a shared library 90 | BUILD_WITH_OPENCL // Support OpenCL (if found) 91 | BUILD_WITH_CUDA // Support CUDA (if found) 92 | BUILD_WITH_GLSL // Support GLSL (if found) 93 | BUILD_PYTHON_BINDINGS // Build Python bindings 94 | BUILD_TESTS // Build unit tests 95 | BUILD_DOCS // Generate Doxygen documenation 96 | DOWNLOAD_EXAMPLES_IMAGES // Download examples input images 97 | ``` 98 | 99 | ### Third party libraries 100 | If the CMake option `BUILD_THIRD_PARTY_LIBS` is set to ON, the build will download the source code from the missing libraries and compile. This does not apply for the core libs OpenCL, CUDA and OpenGL since they are not open source. Although supported, it is not recommended to download and build boost from the git repo as it takes a long time to clone the submodules. 101 | 102 | Following CMake variables can be set to help CMake find the third party libraries: 103 | 104 | ``` 105 | -DBOOST_ROOT=... 106 | -DZLIB_ROOT=... 107 | -DGLFW_ROOT=... 108 | ``` 109 | 110 | ### Examples ### 111 | The following examples are included the `examples` directory (can be run with `bin/gpuip`): 112 | 113 | ``` 114 | - linear interpolation 115 | - box blur 116 | - gaussian blur 117 | - separable gaussian blur 118 | ``` 119 | 120 | ### Tests ### 121 | 122 | To run all tests, run `ctest` from the `build` directory. 123 | 124 | ``` 125 | test_cpp // Test C++ api 126 | test_py // Test Python bindings 127 | test_performance // Tests the performance, compare against CPU 128 | ``` 129 | -------------------------------------------------------------------------------- /build.bat: -------------------------------------------------------------------------------- 1 | pushd %~dp0 2 | if not exist build\ ( 3 | mkdir build 4 | ) 5 | cd build 6 | cmake .. 7 | cmake --build . --config Release 8 | cd .. 9 | pause 10 | popd -------------------------------------------------------------------------------- /cmake/FindBoostNumpy.cmake: -------------------------------------------------------------------------------- 1 | FIND_PACKAGE(PackageHandleStandardArgs) 2 | 3 | FIND_LIBRARY( 4 | Boost_NUMPY_LIBRARY 5 | NAMES boost_numpy 6 | PATHS 7 | /usr/local/lib/ 8 | /usr/lib64/ 9 | ${Boost_LIBRARY_DIRS}) 10 | 11 | FIND_PATH(Boost_NUMPY_INCLUDE_DIRS 12 | NAMES 13 | boost/numpy.hpp 14 | PATHS 15 | /usr/local/include 16 | ${Boost_INCLUDE_DIRS}) 17 | 18 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(BoostNumpy DEFAULT_MSG Boost_NUMPY_LIBRARY Boost_NUMPY_INCLUDE_DIRS) 19 | 20 | SET(Boost_NUMPY_FOUND 0) 21 | IF(Boost_NUMPY_LIBRARY AND Boost_NUMPY_INCLUDE_DIRS) 22 | SET(Boost_NUMPY_FOUND 1) 23 | message(STATUS "Boost numpy found!") 24 | ENDIF(Boost_NUMPY_LIBRARY AND Boost_NUMPY_INCLUDE_DIRS) -------------------------------------------------------------------------------- /cmake/FindCUDADriver.cmake: -------------------------------------------------------------------------------- 1 | FIND_PACKAGE(PackageHandleStandardArgs) 2 | 3 | FIND_LIBRARY( 4 | CUDA_DRIVER_LIBRARY 5 | NAMES cuda 6 | PATHS 7 | /usr/local/lib/ 8 | /usr/lib64/ 9 | $ENV{CUDA_PATH}/lib/Win32) 10 | #$ENV{CUDA_LIB_PATH}) 11 | #${CUDA_TOOLKIT_ROOT_DIR}/lib64) 12 | 13 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(CUDADriver DEFAULT_MSG CUDA_DRIVER_LIBRARY) -------------------------------------------------------------------------------- /cmake/FindGLFW.cmake: -------------------------------------------------------------------------------- 1 | # Locate the glfw library 2 | # This module defines the following variables: 3 | # GLFW_LIBRARY, the name of the library; 4 | # GLFW_INCLUDE_DIR, where to find glfw include files. 5 | # GLFW_FOUND, true if both the GLFW_LIBRARY and GLFW_INCLUDE_DIR have been found. 6 | # 7 | # To help locate the library and include file, you could define an environment variable called 8 | # GLFW_ROOT which points to the root of the glfw library installation. This is pretty useful 9 | # on a Windows platform. 10 | # 11 | # 12 | # Usage example to compile an "executable" target to the glfw library: 13 | # 14 | # FIND_PACKAGE (glfw REQUIRED) 15 | # INCLUDE_DIRECTORIES (${GLFW_INCLUDE_DIR}) 16 | # ADD_EXECUTABLE (executable ${EXECUTABLE_SRCS}) 17 | # TARGET_LINK_LIBRARIES (executable ${GLFW_LIBRARY}) 18 | # 19 | # TODO: 20 | # Allow the user to select to link to a shared library or to a static library. 21 | 22 | #Search for the include file... 23 | FIND_PATH(GLFW_INCLUDE_DIRS GLFW/glfw3.h DOC "Path to GLFW include directory." 24 | HINTS 25 | $ENV{GLFW_ROOT} 26 | PATH_SUFFIX include #For finding the include file under the root of the glfw expanded archive, typically on Windows. 27 | PATHS 28 | /usr/include/ 29 | /usr/local/include/ 30 | # By default headers are under GL subfolder 31 | /usr/include/GL 32 | /usr/local/include/GL 33 | "C:/Program Files (x86)/GLFW/include" 34 | ${GLFW_ROOT_DIR}/include/ # added by ptr 35 | 36 | ) 37 | 38 | FIND_LIBRARY(GLFW_LIBRARIES DOC "Absolute path to GLFW library." 39 | NAMES glfw glfw3 40 | HINTS 41 | $ENV{GLFW_ROOT} 42 | PATH_SUFFIXES lib/win32 #For finding the library file under the root of the glfw expanded archive, typically on Windows. 43 | PATHS 44 | "C:/Program Files (x86)/GLFW/lib" 45 | /usr/local/lib 46 | /usr/lib 47 | ${GLFW_ROOT_DIR}/lib-msvc100/release # added by ptr 48 | ) 49 | 50 | SET(GLFW_FOUND 0) 51 | IF(GLFW_LIBRARY AND GLFW_INCLUDE_DIR) 52 | SET(GLFW_FOUND 1) 53 | message(STATUS "GLFW found!") 54 | ENDIF(GLFW_LIBRARY AND GLFW_INCLUDE_DIR) 55 | -------------------------------------------------------------------------------- /cmake/FindOpenCL.cmake: -------------------------------------------------------------------------------- 1 | #.rst: 2 | # FindOpenCL 3 | # ---------- 4 | # 5 | # Try to find OpenCL 6 | # 7 | # Once done this will define:: 8 | # 9 | # OpenCL_FOUND - True if OpenCL was found 10 | # OpenCL_INCLUDE_DIRS - include directories for OpenCL 11 | # OpenCL_LIBRARIES - link against this library to use OpenCL 12 | # OpenCL_VERSION_STRING - Highest supported OpenCL version (eg. 1.2) 13 | # OpenCL_VERSION_MAJOR - The major version of the OpenCL implementation 14 | # OpenCL_VERSION_MINOR - The minor version of the OpenCL implementation 15 | # 16 | # The module will also define two cache variables:: 17 | # 18 | # OpenCL_INCLUDE_DIR - the OpenCL include directory 19 | # OpenCL_LIBRARY - the path to the OpenCL library 20 | # 21 | 22 | #============================================================================= 23 | # Copyright 2014 Matthaeus G. Chajdas 24 | # 25 | # Distributed under the OSI-approved BSD License (the "License"); 26 | # see accompanying file Copyright.txt for details. 27 | # 28 | # This software is distributed WITHOUT ANY WARRANTY; without even the 29 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 30 | # See the License for more information. 31 | #============================================================================= 32 | # (To distribute this file outside of CMake, substitute the full 33 | # License text for the above reference.) 34 | 35 | function(_FIND_OPENCL_VERSION) 36 | include(CheckSymbolExists) 37 | include(CMakePushCheckState) 38 | set(CMAKE_REQUIRED_QUIET ${OpenCL_FIND_QUIETLY}) 39 | 40 | CMAKE_PUSH_CHECK_STATE() 41 | foreach(VERSION "2_0" "1_2" "1_1" "1_0") 42 | set(CMAKE_REQUIRED_INCLUDES "${OpenCL_INCLUDE_DIR}") 43 | 44 | if(APPLE) 45 | CHECK_SYMBOL_EXISTS( 46 | CL_VERSION_${VERSION} 47 | "${OpenCL_INCLUDE_DIR}/OpenCL/cl.h" 48 | OPENCL_VERSION_${VERSION}) 49 | else() 50 | CHECK_SYMBOL_EXISTS( 51 | CL_VERSION_${VERSION} 52 | "${OpenCL_INCLUDE_DIR}/CL/cl.h" 53 | OPENCL_VERSION_${VERSION}) 54 | endif() 55 | 56 | if(OPENCL_VERSION_${VERSION}) 57 | string(REPLACE "_" "." VERSION "${VERSION}") 58 | set(OpenCL_VERSION_STRING ${VERSION} PARENT_SCOPE) 59 | string(REGEX MATCHALL "[0-9]+" version_components "${VERSION}") 60 | list(GET version_components 0 major_version) 61 | list(GET version_components 1 minor_version) 62 | set(OpenCL_VERSION_MAJOR ${major_version} PARENT_SCOPE) 63 | set(OpenCL_VERSION_MINOR ${minor_version} PARENT_SCOPE) 64 | break() 65 | endif() 66 | endforeach() 67 | CMAKE_POP_CHECK_STATE() 68 | endfunction() 69 | 70 | find_path(OpenCL_INCLUDE_DIR 71 | NAMES 72 | CL/cl.h OpenCL/cl.h 73 | PATHS 74 | ENV "PROGRAMFILES(X86)" 75 | ENV AMDAPPSDKROOT 76 | ENV INTELOCLSDKROOT 77 | ENV NVSDKCOMPUTE_ROOT 78 | ENV CUDA_PATH 79 | ENV ATISTREAMSDKROOT 80 | PATH_SUFFIXES 81 | include 82 | OpenCL/common/inc 83 | "AMD APP/include") 84 | 85 | _FIND_OPENCL_VERSION() 86 | 87 | if(WIN32) 88 | if(CMAKE_SIZEOF_VOID_P EQUAL 4) 89 | find_library(OpenCL_LIBRARY 90 | NAMES OpenCL 91 | PATHS 92 | ENV "PROGRAMFILES(X86)" 93 | ENV AMDAPPSDKROOT 94 | ENV INTELOCLSDKROOT 95 | ENV CUDA_PATH 96 | ENV NVSDKCOMPUTE_ROOT 97 | ENV ATISTREAMSDKROOT 98 | PATH_SUFFIXES 99 | "AMD APP/lib/x86" 100 | lib/x86 101 | lib/Win32 102 | OpenCL/common/lib/Win32) 103 | elseif(CMAKE_SIZEOF_VOID_P EQUAL 8) 104 | find_library(OpenCL_LIBRARY 105 | NAMES OpenCL 106 | PATHS 107 | ENV "PROGRAMFILES(X86)" 108 | ENV AMDAPPSDKROOT 109 | ENV INTELOCLSDKROOT 110 | ENV CUDA_PATH 111 | ENV NVSDKCOMPUTE_ROOT 112 | ENV ATISTREAMSDKROOT 113 | PATH_SUFFIXES 114 | "AMD APP/lib/x86_64" 115 | lib/x86_64 116 | lib/x64 117 | OpenCL/common/lib/x64) 118 | endif() 119 | else() 120 | find_library(OpenCL_LIBRARY 121 | NAMES OpenCL) 122 | endif() 123 | 124 | set(OpenCL_LIBRARIES ${OpenCL_LIBRARY}) 125 | set(OpenCL_INCLUDE_DIRS ${OpenCL_INCLUDE_DIR}) 126 | FIND_PACKAGE(PackageHandleStandardArgs) 127 | find_package_handle_standard_args( 128 | OpenCL 129 | FOUND_VAR OpenCL_FOUND 130 | REQUIRED_VARS OpenCL_LIBRARY OpenCL_INCLUDE_DIR 131 | VERSION_VAR OpenCL_VERSION_STRING) 132 | 133 | mark_as_advanced( 134 | OpenCL_INCLUDE_DIR 135 | OpenCL_LIBRARY) 136 | -------------------------------------------------------------------------------- /cmake/FindOpenEXR.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Try to find OpenEXR's libraries, and include path. 3 | # Once done this will define: 4 | # 5 | # OPENEXR_FOUND = OpenEXR found. 6 | # OPENEXR_INCLUDE_PATHS = OpenEXR include directories. 7 | # OPENEXR_LIBRARIES = libraries that are needed to use OpenEXR. 8 | # 9 | 10 | INCLUDE(FindZLIB) 11 | 12 | 13 | IF(ZLIB_FOUND) 14 | 15 | SET(LIBRARY_PATHS 16 | /usr/lib 17 | /usr/local/lib 18 | /sw/lib 19 | /opt/local/lib 20 | $ENV{PROGRAM_FILES}/OpenEXR/lib/static) 21 | 22 | FIND_PATH(OPENEXR_INCLUDE_PATH OpenEXR/ImfRgbaFile.h 23 | /usr/include 24 | /usr/local/include 25 | /sw/include 26 | /opt/local/include) 27 | 28 | FIND_LIBRARY(OPENEXR_HALF_LIBRARY 29 | NAMES Half 30 | PATHS ${LIBRARY_PATHS}) 31 | 32 | FIND_LIBRARY(OPENEXR_IEX_LIBRARY 33 | NAMES Iex 34 | PATHS ${LIBRARY_PATHS}) 35 | 36 | FIND_LIBRARY(OPENEXR_IMATH_LIBRARY 37 | NAMES Imath 38 | PATHS ${LIBRARY_PATHS}) 39 | 40 | FIND_LIBRARY(OPENEXR_ILMIMF_LIBRARY 41 | NAMES IlmImf 42 | PATHS ${LIBRARY_PATHS}) 43 | 44 | FIND_LIBRARY(OPENEXR_ILMTHREAD_LIBRARY 45 | NAMES IlmThread 46 | PATHS ${LIBRARY_PATHS}) 47 | 48 | ENDIF(ZLIB_FOUND) 49 | 50 | #MESSAGE(STATUS ${OPENEXR_IMATH_LIBRARY} ${OPENEXR_ILMIMF_LIBRARY} ${OPENEXR_IEX_LIBRARY} ${OPENEXR_HALF_LIBRARY} ${OPENEXR_ILMTHREAD_LIBRARY} ${ZLIB_LIBRARY}) 51 | 52 | IF (OPENEXR_INCLUDE_PATH AND OPENEXR_IMATH_LIBRARY AND OPENEXR_ILMIMF_LIBRARY AND OPENEXR_IEX_LIBRARY AND OPENEXR_HALF_LIBRARY) 53 | SET(OPENEXR_FOUND TRUE) 54 | SET(OPENEXR_INCLUDE_PATHS ${OPENEXR_INCLUDE_PATH} CACHE STRING "The include paths needed to use OpenEXR") 55 | SET(OPENEXR_LIBRARIES ${OPENEXR_IMATH_LIBRARY} ${OPENEXR_ILMIMF_LIBRARY} ${OPENEXR_IEX_LIBRARY} ${OPENEXR_HALF_LIBRARY} ${OPENEXR_ILMTHREAD_LIBRARY} ${ZLIB_LIBRARY} CACHE STRING "The libraries needed to use OpenEXR") 56 | ENDIF (OPENEXR_INCLUDE_PATH AND OPENEXR_IMATH_LIBRARY AND OPENEXR_ILMIMF_LIBRARY AND OPENEXR_IEX_LIBRARY AND OPENEXR_HALF_LIBRARY) 57 | 58 | IF(OPENEXR_FOUND) 59 | IF(NOT OPENEXR_FIND_QUIETLY) 60 | MESSAGE(STATUS "Found OpenEXR: ${OPENEXR_ILMIMF_LIBRARY}") 61 | ENDIF(NOT OPENEXR_FIND_QUIETLY) 62 | ELSE(OPENEXR_FOUND) 63 | IF(OPENEXR_FIND_REQUIRED) 64 | MESSAGE(FATAL_ERROR "Could not find OpenEXR library") 65 | ENDIF(OPENEXR_FIND_REQUIRED) 66 | ENDIF(OPENEXR_FOUND) 67 | 68 | MARK_AS_ADVANCED( 69 | OPENEXR_INCLUDE_PATHS 70 | OPENEXR_LIBRARIES 71 | OPENEXR_ILMIMF_LIBRARY 72 | OPENEXR_IMATH_LIBRARY 73 | OPENEXR_IEX_LIBRARY 74 | OPENEXR_HALF_LIBRARY) 75 | -------------------------------------------------------------------------------- /cmake/cmake_uninstall.cmake.in: -------------------------------------------------------------------------------- 1 | if(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 2 | message(FATAL_ERROR "Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 3 | endif(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 4 | 5 | file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files) 6 | string(REGEX REPLACE "\n" ";" files "${files}") 7 | foreach(file ${files}) 8 | message(STATUS "Uninstalling $ENV{DESTDIR}${file}") 9 | if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 10 | exec_program( 11 | "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" 12 | OUTPUT_VARIABLE rm_out 13 | RETURN_VALUE rm_retval 14 | ) 15 | if(NOT "${rm_retval}" STREQUAL 0) 16 | message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}") 17 | endif(NOT "${rm_retval}" STREQUAL 0) 18 | else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 19 | message(STATUS "File $ENV{DESTDIR}${file} does not exist.") 20 | endif(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 21 | endforeach(file) -------------------------------------------------------------------------------- /cpack/CMakeCPackOptions.cmake.in: -------------------------------------------------------------------------------- 1 | #CPack 2 | set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Framework for Image Processing on the GPU") 3 | set(CPACK_PACKAGE_VERSION ${VERSION}) 4 | set(CPACK_COMPONENTS_ALL devel python bin) 5 | set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${GPUIP_VERSION}) 6 | set(CPACK_COMPONENTS_GROUPING ALL_COMPONENTS_IN_ONE) 7 | 8 | 9 | if(${CPACK_GENERATOR} MATCHES "RPM") 10 | set(CPACK_RPM_COMPONENT_INSTALL ON) 11 | set(CPACK_RPM_PACKAGE_GROUP "Applications/Engineering") 12 | endif() -------------------------------------------------------------------------------- /doc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(Doxygen REQUIRED) 2 | configure_file( 3 | ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in 4 | ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile) 5 | add_custom_target(doc ALL ${DOXYGEN_EXECUTABLE} 6 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 7 | COMMENT "Generating documentation" VERBATIM) -------------------------------------------------------------------------------- /doc/mainpage.h: -------------------------------------------------------------------------------- 1 | /*! 2 | \example box_blur.cl 3 | \example lerp_ubyte.cu 4 | \example gaussian_blur_hor.cl 5 | \example box_blur.glsl 6 | \example sobel.glsl 7 | \example lerp_float.cu 8 | \example lerp.cu 9 | \example gaussian_blur_vert.cu 10 | \example box_blur.cl 11 | \example gaussian_blur_vert.glsl 12 | \example box_blur.cu 13 | \example lerp.glsl 14 | \example gaussian_blur.cu 15 | \example sobel.cu 16 | \example gaussian_blur_vert.cl 17 | \example lerp_float.cl 18 | \example lerp.cl 19 | \example gaussian_blur_hor.cu 20 | \example gaussian_blur_hor.glsl 21 | \example gaussian_blur.cl 22 | \example sobel.cl 23 | \example lerp_ubyte.cl 24 | \example gaussian_blur.glsl 25 | */ 26 | 27 | /*! 28 | \mainpage 29 | gpuip is a C++ cross-platform framework for Image Processing on the GPU architechure. It tries to simplify the image processing pipeline on the GPU and make it more generic across the thre most common environments: OpenCL, CUDA and OpenGL GLSL. It provides a simple interface to copy data from and to the GPU and makes it easy to compile and run GPU kernel code. 30 | 31 | Example: 32 | \include ../examples/example.cpp 33 | */ 34 | //----------------------------------------------------------------------------// 35 | -------------------------------------------------------------------------------- /doc/update-web-api.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p ~/tmp/ 3 | git clone git@github.com:karlssonper/gpuip -b gh-pages ~/tmp/gpuip-web 4 | cur=$(pwd) 5 | echo $cur 6 | cd ~/tmp/gpuip-web 7 | git pull origin 8 | git rm -r api/* 9 | mkdir api 10 | cp -r $cur/html/* api/ 11 | git add api/* 12 | git commit -m "api update" 13 | git push 14 | cd $cur 15 | rm -rf ~/tmp/gpuip-web -------------------------------------------------------------------------------- /examples/boxblur_cuda.ip: -------------------------------------------------------------------------------- 1 | 2 | CUDA 3 | 4 | buffer1 5 | half 6 | 4 7 | images/bridge.exr 8 | 9 | 10 | buffer2 11 | half 12 | 4 13 | output_images/boxblur_cuda.exr 14 | 15 | 16 | box_blur 17 | kernels/box_blur.cu 18 | 19 | in 20 | buffer1 21 | 22 | 23 | out 24 | buffer2 25 | 26 | 27 | n 28 | int 29 | 1 30 | 1 31 | 1 32 | 15 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /examples/boxblur_glsl.ip: -------------------------------------------------------------------------------- 1 | 2 | GLSL 3 | 4 | buffer1 5 | half 6 | 4 7 | images/bridge.exr 8 | 9 | 10 | buffer2 11 | half 12 | 4 13 | output_images/boxblur_glsl.exr 14 | 15 | 16 | box_blur 17 | kernels/box_blur.glsl 18 | 19 | input 20 | buffer1 21 | 22 | 23 | output 24 | buffer2 25 | 26 | 27 | n 28 | int 29 | 9 30 | 1 31 | 1 32 | 15 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /examples/boxblur_opencl.ip: -------------------------------------------------------------------------------- 1 | 2 | OpenCL 3 | 4 | buffer1 5 | half 6 | 4 7 | images/bridge.exr 8 | 9 | 10 | buffer2 11 | half 12 | 4 13 | output_images/boxblur_opencl.exr 14 | 15 | 16 | box_blur 17 | kernels/box_blur.cl 18 | 19 | in 20 | buffer1 21 | 22 | 23 | out 24 | buffer2 25 | 26 | 27 | n 28 | int 29 | 1 30 | 1 31 | 1 32 | 15 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /examples/example.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void print_timings(const char * func_name, double ms, std::string * err) 4 | { 5 | if (ms != GPUIP_ERROR) { 6 | printf("%s took %.2lf ms.\n", func_name, ms); 7 | } else { 8 | printf("Error in %s: %s\n", func_name, err->c_str()); 9 | } 10 | } 11 | 12 | void use_gpuip() 13 | { 14 | std::string err; 15 | float * data; 16 | unsigned int width, height; 17 | ReadImage(&data, &width, &height); // definied somewhere else 18 | 19 | if (!gpuip::ImageProcessor::CanCreateGpuEnvironment(gpuip::GLSL)) { 20 | // ... deal with error - throw exception, return function etc 21 | } 22 | gpuip::ImageProcessor::Ptr ip = gpuip::ImageProcessor::Create(gpuip::GLSL); 23 | ip->SetDimensions(width, height); 24 | gpuip::Buffer::Ptr b0 = ip->CreateBuffer("b0", gpuip::FLOAT, 4); 25 | gpuip::Buffer::Ptr b1 = ip->CreateBuffer("b1", gpuip::FLOAT, 4); 26 | gpuip::Kernel::Ptr kernel = gpuipip->CreateKernel("modify_red"); 27 | kernel->code = GetKernelCode(); // definied somewhere else 28 | kernel->inBuffers.push_back(gpuip::Kernel::BufferLink(b0, "img")); 29 | kernel->outBuffers.push_back(gpuip::Kernel::BufferLink(b1, "out_img")); 30 | kernel->paramsFloat.push_back(gpuip::Parameter("alpha", 0.4)); 31 | print_timings("Build", ip->Build(&err), &err); 32 | print_timings("Allocate", ip->Allocate(&err), &err); 33 | print_timings("Copy", ip->Copy(b0, gpuip::Buffer::COPY_TO_GPU, data, &err), &err); 34 | print_timings("Run", ip->Run(&err), &err); 35 | print_timings("Copy", ip->Copy(b1, gpuip::Buffer::COPY_FROM_GPU, data, &err), &err); 36 | } 37 | -------------------------------------------------------------------------------- /examples/gaussblur_cuda.ip: -------------------------------------------------------------------------------- 1 | 2 | CUDA 3 | 4 | buffer1 5 | half 6 | 4 7 | images/bridge.exr 8 | 9 | 10 | buffer2 11 | half 12 | 4 13 | output_images/gaussblur_cuda.exr 14 | 15 | 16 | gaussian_blur 17 | kernels/gaussian_blur.cu 18 | 19 | in 20 | buffer1 21 | 22 | 23 | out 24 | buffer2 25 | 26 | 27 | n 28 | int 29 | 1 30 | 1 31 | 1 32 | 16 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /examples/gaussblur_glsl.ip: -------------------------------------------------------------------------------- 1 | 2 | GLSL 3 | 4 | buffer1 5 | half 6 | 4 7 | images/bridge.exr 8 | 9 | 10 | buffer2 11 | half 12 | 4 13 | output_images/gaussblur_glsl.exr 14 | 15 | 16 | gaussian_blur 17 | kernels/gaussian_blur.glsl 18 | 19 | input 20 | buffer1 21 | 22 | 23 | output 24 | buffer2 25 | 26 | 27 | n 28 | int 29 | 1 30 | 1 31 | 1 32 | 16 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /examples/gaussblur_opencl.ip: -------------------------------------------------------------------------------- 1 | 2 | OpenCL 3 | 4 | buffer1 5 | half 6 | 4 7 | images/bridge.exr 8 | 9 | 10 | buffer2 11 | half 12 | 4 13 | output_images/gaussblur_opencl.exr 14 | 15 | 16 | gaussian_blur 17 | kernels/gaussian_blur.cl 18 | 19 | in 20 | buffer1 21 | 22 | 23 | out 24 | buffer2 25 | 26 | 27 | n 28 | int 29 | 1 30 | 1 31 | 1 32 | 16 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /examples/gaussblur_separable_cuda.ip: -------------------------------------------------------------------------------- 1 | 2 | CUDA 3 | 4 | buffer1 5 | half 6 | 4 7 | images/bridge.exr 8 | 9 | 10 | buffer2 11 | half 12 | 4 13 | 14 | 15 | buffer3 16 | half 17 | 4 18 | output_images/gaussblur_separable_cuda.exr 19 | 20 | 21 | gaussian_blur_hor 22 | kernels/gaussian_blur_hor.cu 23 | 24 | in 25 | buffer1 26 | 27 | 28 | out 29 | buffer2 30 | 31 | 32 | n 33 | int 34 | 4 35 | 1 36 | 1 37 | 16 38 | 39 | 40 | 41 | gaussian_blur_vert 42 | kernels/gaussian_blur_vert.cu 43 | 44 | in 45 | buffer2 46 | 47 | 48 | out 49 | buffer3 50 | 51 | 52 | n 53 | int 54 | 4 55 | 1 56 | 1 57 | 16 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /examples/gaussblur_separable_glsl.ip: -------------------------------------------------------------------------------- 1 | 2 | GLSL 3 | 4 | buffer1 5 | half 6 | 4 7 | images/bridge.exr 8 | 9 | 10 | buffer2 11 | half 12 | 4 13 | 14 | 15 | buffer3 16 | half 17 | 4 18 | output_images/gaussblur_separable_glsl.exr 19 | 20 | 21 | gaussian_blur_hor 22 | kernels/gaussian_blur_hor.glsl 23 | 24 | input 25 | buffer1 26 | 27 | 28 | out 29 | buffer2 30 | 31 | 32 | n 33 | int 34 | 4 35 | 1 36 | 1 37 | 16 38 | 39 | 40 | 41 | gaussian_blur_vert 42 | kernels/gaussian_blur_vert.glsl 43 | 44 | input 45 | buffer2 46 | 47 | 48 | out 49 | buffer3 50 | 51 | 52 | n 53 | int 54 | 4 55 | 1 56 | 1 57 | 16 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /examples/gaussblur_separable_opencl.ip: -------------------------------------------------------------------------------- 1 | 2 | OpenCL 3 | 4 | buffer1 5 | half 6 | 4 7 | images/bridge.exr 8 | 9 | 10 | buffer2 11 | half 12 | 4 13 | 14 | 15 | buffer3 16 | half 17 | 4 18 | output_images/blur.exr 19 | 20 | 21 | gaussian_blur_hor 22 | kernels/gaussian_blur_hor.cl 23 | 24 | in 25 | buffer1 26 | 27 | 28 | out 29 | buffer2 30 | 31 | 32 | n 33 | int 34 | 4 35 | 1 36 | 1 37 | 16 38 | 39 | 40 | 41 | gaussian_blur_vert 42 | kernels/gaussian_blur_vert.cl 43 | 44 | in 45 | buffer2 46 | 47 | 48 | out 49 | buffer3 50 | 51 | 52 | n 53 | int 54 | 4 55 | 1 56 | 1 57 | 16 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /examples/images/README: -------------------------------------------------------------------------------- 1 | This directory contains the input images used in the examples. 2 | 3 | If this directory is empty, make sure the CMake option DOWNLOAD_EXAMPLES_IMAGEs 4 | is set to ON. 5 | 6 | You can also download the images yourself at: 7 | https://github.com/karlssonper/gpuip-examples-images -------------------------------------------------------------------------------- /examples/kernels/box_blur.cl: -------------------------------------------------------------------------------- 1 | float4 read(__global const half * in_half, int x, int y, int w) 2 | { 3 | const int idx = x + w * y; 4 | return (float4)(vload_half(4 * idx + 0, in_half), 5 | vload_half(4 * idx + 1, in_half), 6 | vload_half(4 * idx + 2, in_half), 7 | vload_half(4 * idx + 3, in_half)); 8 | } 9 | 10 | __kernel void 11 | box_blur(__global const half * in_half, 12 | __global half * out_half, 13 | const int n, 14 | const int width, 15 | const int height) 16 | { 17 | const int x = get_global_id(0); 18 | const int y = get_global_id(1); 19 | 20 | // array index 21 | const int idx = x + width * y; 22 | 23 | // inside image bounds check 24 | if (x >= width || y >= height) { 25 | return; 26 | } 27 | 28 | // kernel code 29 | float4 out = (float4)(0, 0, 0, 0); 30 | int count = 0; 31 | for(int j = y - n; j <= y + n; ++j) { 32 | for(int i = x - n; i <= x+n; ++i) { 33 | if (i>=0 && j>= 0 && i < width && j < height) { 34 | out += read(in_half, i, j, width); 35 | count += 1; 36 | } 37 | } 38 | } 39 | out /= count; 40 | 41 | // float to half conversion 42 | vstore_half(out.x, 4 * idx + 0, out_half); 43 | vstore_half(out.y, 4 * idx + 1, out_half); 44 | vstore_half(out.z, 4 * idx + 2, out_half); 45 | vstore_half(out.w, 4 * idx + 3, out_half); 46 | } -------------------------------------------------------------------------------- /examples/kernels/box_blur.cu: -------------------------------------------------------------------------------- 1 | __device__ float4 read(const unsigned short * in_half, int x, int y, int w) 2 | { 3 | return make_float4(__half2float(in_half[4 * (x + y * w) + 0]), 4 | __half2float(in_half[4 * (x + y * w) + 1]), 5 | __half2float(in_half[4 * (x + y * w) + 2]), 6 | __half2float(in_half[4 * (x + y * w) + 3])); 7 | } 8 | 9 | __global__ void 10 | box_blur(const unsigned short * in_half, 11 | unsigned short * out_half, 12 | const int n, 13 | const int width, 14 | const int height) 15 | { 16 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 17 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 18 | 19 | // array index 20 | const int idx = x + width * y; 21 | 22 | // inside image bounds check 23 | if (x >= width || y >= height) { 24 | return; 25 | } 26 | 27 | // kernel code 28 | float4 out = make_float4(0, 0, 0, 0); 29 | int count = 0; 30 | for(int j = y - n; j <= y + n; ++j) { 31 | for(int i = x - n; i <= x + n; ++i) { 32 | if (i>=0 && j>= 0 && i < width && j < height) { 33 | out += read(in_half, i, j, width); 34 | count += 1; 35 | } 36 | } 37 | } 38 | out /= count; 39 | 40 | // float to half conversion 41 | out_half[4 * idx + 0] = __float2half_rn(out.x); 42 | out_half[4 * idx + 1] = __float2half_rn(out.y); 43 | out_half[4 * idx + 2] = __float2half_rn(out.z); 44 | out_half[4 * idx + 3] = __float2half_rn(out.w); 45 | } 46 | -------------------------------------------------------------------------------- /examples/kernels/box_blur.glsl: -------------------------------------------------------------------------------- 1 | #version 120 2 | uniform sampler2D input; 3 | uniform int n; 4 | varying vec2 x; // texture coordinates 5 | uniform float dx; // delta 6 | 7 | void main() 8 | { 9 | vec3 v = vec3(0,0,0); 10 | float count; 11 | for(int j = -n; j<=n; ++j) { 12 | for(int i = -n; i<=n; ++i) { 13 | vec2 tx = x + vec2(i*dx,j*dx); 14 | v+= texture2D(input, tx).xyz; 15 | count += 1; 16 | } 17 | } 18 | 19 | // gl_FragData[0] is buffer output 20 | gl_FragData[0] = vec4(v/count,1); 21 | } 22 | -------------------------------------------------------------------------------- /examples/kernels/gaussian_blur.cl: -------------------------------------------------------------------------------- 1 | float4 read(__global const half * in_half, int x, int y, int w) 2 | { 3 | const int idx = x + w * y; 4 | return (float4)(vload_half(4 * idx + 0, in_half), 5 | vload_half(4 * idx + 1, in_half), 6 | vload_half(4 * idx + 2, in_half), 7 | vload_half(4 * idx + 3, in_half)); 8 | } 9 | 10 | float weight(int i, int j, int x, int y, float invdx2) 11 | { 12 | return exp(-invdx2*((i-x)*(i-x) + (j-y)*(j-y))); 13 | } 14 | 15 | __kernel void 16 | gaussian_blur(__global const half * in_half, 17 | __global half * out_half, 18 | const int n, 19 | const int width, 20 | const int height) 21 | { 22 | const int x = get_global_id(0); 23 | const int y = get_global_id(1); 24 | 25 | // array index 26 | const int idx = x + width * y; 27 | 28 | // inside image bounds check 29 | if (x >= width || y >= height) { 30 | return; 31 | } 32 | 33 | // kernel code 34 | float4 out = (float4)(0, 0, 0, 0); 35 | const float invdx2 = 1.0/(width*width); 36 | float totWeight = 0; 37 | float w; 38 | for(int j = y - n; j <= y + n; ++j) { 39 | for(int i = x - n; i <= x+n; ++i) { 40 | if (i>=0 && j>= 0 && i < width && j < height) { 41 | w = weight(i, j, x, y, invdx2); 42 | out += w * read(in_half, i, j, width); 43 | totWeight += w; 44 | } 45 | } 46 | } 47 | out /= totWeight; 48 | 49 | // float to half conversion 50 | vstore_half(out.x, 4 * idx + 0, out_half); 51 | vstore_half(out.y, 4 * idx + 1, out_half); 52 | vstore_half(out.z, 4 * idx + 2, out_half); 53 | vstore_half(out.w, 4 * idx + 3, out_half); 54 | } 55 | -------------------------------------------------------------------------------- /examples/kernels/gaussian_blur.cu: -------------------------------------------------------------------------------- 1 | __device__ float4 read(const unsigned short * in_half, int x, int y, int w) 2 | { 3 | return make_float4(__half2float(in_half[4 * (x + y * w) + 0]), 4 | __half2float(in_half[4 * (x + y * w) + 1]), 5 | __half2float(in_half[4 * (x + y * w) + 2]), 6 | __half2float(in_half[4 * (x + y * w) + 3])); 7 | } 8 | 9 | __device__ float weight(int i, int j, int x, int y, float invdx2) 10 | { 11 | return exp(-invdx2*((i-x)*(i-x) + (j-y)*(j-y))); 12 | } 13 | 14 | __global__ void 15 | gaussian_blur(const unsigned short * in_half, 16 | unsigned short * out_half, 17 | const int n, 18 | const int width, 19 | const int height) 20 | { 21 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 22 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 23 | 24 | // array index 25 | const int idx = x + width * y; 26 | 27 | // inside image bounds check 28 | if (x >= width || y >= height) { 29 | return; 30 | } 31 | 32 | // kernel code 33 | float4 out = make_float4(0, 0, 0, 0); 34 | const float invdx2 = 1.0/(width*width); 35 | float totWeight = 0; 36 | float w; 37 | for(int j = y - n; j <= y + n; ++j) { 38 | for(int i = x - n; i <= x + n; ++i) { 39 | if (i>=0 && j>= 0 && i < width && j < height) { 40 | w = weight(i, j, x, y, invdx2); 41 | out += w * read(in_half, i, j, width); 42 | totWeight += w; 43 | } 44 | } 45 | } 46 | out /= totWeight; 47 | 48 | // float to half conversion 49 | out_half[4 * idx + 0] = __float2half_rn(out.x); 50 | out_half[4 * idx + 1] = __float2half_rn(out.y); 51 | out_half[4 * idx + 2] = __float2half_rn(out.z); 52 | out_half[4 * idx + 3] = __float2half_rn(out.w); 53 | } 54 | -------------------------------------------------------------------------------- /examples/kernels/gaussian_blur.glsl: -------------------------------------------------------------------------------- 1 | #version 120 2 | uniform sampler2D input; 3 | uniform int n; 4 | varying vec2 x; // texture coordinates 5 | uniform float dx; // delta 6 | 7 | void main() 8 | { 9 | vec3 v = vec3(0,0,0); 10 | float totWeight = 0; 11 | for(int j = -n; j<=n; ++j) { 12 | for(int i = -n; i<=n; ++i) { 13 | vec2 tx = x + vec2(i*dx,j*dx); 14 | float weight = exp(-((tx.x-x.x)*(tx.x-x.x)+(tx.y-x.y)*(tx.y-x.y))); 15 | v+= weight * texture2D(input, tx).xyz; 16 | totWeight += weight; 17 | } 18 | } 19 | 20 | // gl_FragData[0] is buffer output 21 | gl_FragData[0] = vec4(v/totWeight,1); 22 | } 23 | -------------------------------------------------------------------------------- /examples/kernels/gaussian_blur_hor.cl: -------------------------------------------------------------------------------- 1 | float4 read(__global const half * in_half, int x, int y, int w) 2 | { 3 | const int idx = x + w * y; 4 | return (float4)(vload_half(4 * idx + 0, in_half), 5 | vload_half(4 * idx + 1, in_half), 6 | vload_half(4 * idx + 2, in_half), 7 | vload_half(4 * idx + 3, in_half)); 8 | } 9 | 10 | float weight(int i, int x,float invdx2) 11 | { 12 | return exp(-invdx2*((i-x)*(i-x))); 13 | } 14 | 15 | __kernel void 16 | gaussian_blur_hor(__global const half * in_half, 17 | __global half * out_half, 18 | const int n, 19 | const int width, 20 | const int height) 21 | { 22 | const int x = get_global_id(0); 23 | const int y = get_global_id(1); 24 | 25 | // array index 26 | const int idx = x + width * y; 27 | 28 | // inside image bounds check 29 | if (x >= width || y >= height) { 30 | return; 31 | } 32 | 33 | // kernel code 34 | float4 out = (float4)(0, 0, 0, 0); 35 | const float invdx2 = 1.0/(width*width); 36 | float totWeight = 0; 37 | float w; 38 | for(int i = x - n; i <= x+n; ++i) { 39 | if (i>=0 && i < width) { 40 | w = weight(i, x, invdx2); 41 | out += w * read(in_half, i, y, width); 42 | totWeight += w; 43 | } 44 | } 45 | out /= totWeight; 46 | 47 | // float to half conversion 48 | vstore_half(out.x, 4 * idx + 0, out_half); 49 | vstore_half(out.y, 4 * idx + 1, out_half); 50 | vstore_half(out.z, 4 * idx + 2, out_half); 51 | vstore_half(out.w, 4 * idx + 3, out_half); 52 | } 53 | -------------------------------------------------------------------------------- /examples/kernels/gaussian_blur_hor.cu: -------------------------------------------------------------------------------- 1 | __device__ float4 readx(const unsigned short * in_half, int x, int y, int w) 2 | { 3 | return make_float4(__half2float(in_half[4 * (x + y * w) + 0]), 4 | __half2float(in_half[4 * (x + y * w) + 1]), 5 | __half2float(in_half[4 * (x + y * w) + 2]), 6 | __half2float(in_half[4 * (x + y * w) + 3])); 7 | } 8 | 9 | __device__ float weightx(int i, int x, float invdx2) 10 | { 11 | return exp(-invdx2*((i-x)*(i-x))); 12 | } 13 | 14 | __global__ void 15 | gaussian_blur_hor(const unsigned short * in_half, 16 | unsigned short * out_half, 17 | const int n, 18 | const int width, 19 | const int height) 20 | { 21 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 22 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 23 | 24 | // array index 25 | const int idx = x + width * y; 26 | 27 | // inside image bounds check 28 | if (x >= width || y >= height) { 29 | return; 30 | } 31 | 32 | // kernel code 33 | float4 out = make_float4(0, 0, 0, 0); 34 | const float invdx2 = 1.0/(width*width); 35 | float totWeight = 0; 36 | float w; 37 | for(int i = x - n; i <= x + n; ++i) { 38 | if (x>= 0 && x < width) { 39 | w = weightx(i, x, invdx2); 40 | out += w * readx(in_half, i, y, width); 41 | totWeight += w; 42 | } 43 | } 44 | out /= totWeight; 45 | 46 | // float to half conversion 47 | out_half[4 * idx + 0] = __float2half_rn(out.x); 48 | out_half[4 * idx + 1] = __float2half_rn(out.y); 49 | out_half[4 * idx + 2] = __float2half_rn(out.z); 50 | out_half[4 * idx + 3] = __float2half_rn(out.w); 51 | } 52 | -------------------------------------------------------------------------------- /examples/kernels/gaussian_blur_hor.glsl: -------------------------------------------------------------------------------- 1 | #version 120 2 | uniform sampler2D input; 3 | uniform int n; 4 | varying vec2 x; // texture coordinates 5 | uniform float dx; // delta 6 | 7 | void main() 8 | { 9 | vec3 v = vec3(0,0,0); 10 | float totWeight = 0; 11 | for(int i = -n; i<=n; ++i) { 12 | vec2 tx = x + vec2(i*dx,0); 13 | float weight = exp(-((tx.x-x.x)*(tx.x-x.x))); 14 | v+= weight * texture2D(input, tx).xyz; 15 | totWeight += weight; 16 | } 17 | 18 | // gl_FragData[0] is buffer output 19 | gl_FragData[0] = vec4(v/totWeight,1); 20 | } 21 | -------------------------------------------------------------------------------- /examples/kernels/gaussian_blur_vert.cl: -------------------------------------------------------------------------------- 1 | float4 read(__global const half * in_half, int x, int y, int w) 2 | { 3 | const int idx = x + w * y; 4 | return (float4)(vload_half(4 * idx + 0, in_half), 5 | vload_half(4 * idx + 1, in_half), 6 | vload_half(4 * idx + 2, in_half), 7 | vload_half(4 * idx + 3, in_half)); 8 | } 9 | 10 | float weight(int j, int y,float invdx2) 11 | { 12 | return exp(-invdx2*((j-y)*(j-y))); 13 | } 14 | 15 | __kernel void 16 | gaussian_blur_vert(__global const half * in_half, 17 | __global half * out_half, 18 | const int n, 19 | const int width, 20 | const int height) 21 | { 22 | const int x = get_global_id(0); 23 | const int y = get_global_id(1); 24 | 25 | // array index 26 | const int idx = x + width * y; 27 | 28 | // inside image bounds check 29 | if (x >= width || y >= height) { 30 | return; 31 | } 32 | 33 | // kernel code 34 | float4 out = (float4)(0, 0, 0, 0); 35 | const float invdx2 = 1.0/(width*width); 36 | float totWeight = 0; 37 | float w; 38 | for(int j = y - n; j <= y+n; ++j) { 39 | if (j>=0 && j < height) { 40 | w = weight(j, y, invdx2); 41 | out += w * read(in_half, x, j, width); 42 | totWeight += w; 43 | } 44 | } 45 | out /= totWeight; 46 | 47 | // float to half conversion 48 | vstore_half(out.x, 4 * idx + 0, out_half); 49 | vstore_half(out.y, 4 * idx + 1, out_half); 50 | vstore_half(out.z, 4 * idx + 2, out_half); 51 | vstore_half(out.w, 4 * idx + 3, out_half); 52 | } 53 | -------------------------------------------------------------------------------- /examples/kernels/gaussian_blur_vert.cu: -------------------------------------------------------------------------------- 1 | __device__ float4 ready(const unsigned short * in_half, int x, int y, int w) 2 | { 3 | return make_float4(__half2float(in_half[4 * (x + y * w) + 0]), 4 | __half2float(in_half[4 * (x + y * w) + 1]), 5 | __half2float(in_half[4 * (x + y * w) + 2]), 6 | __half2float(in_half[4 * (x + y * w) + 3])); 7 | } 8 | 9 | __device__ float weighty(int j, int y, float invdx2) 10 | { 11 | return exp(-invdx2*((j-y)*(j-y))); 12 | } 13 | 14 | __global__ void 15 | gaussian_blur_vert(const unsigned short * in_half, 16 | unsigned short * out_half, 17 | const int n, 18 | const int width, 19 | const int height) 20 | { 21 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 22 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 23 | 24 | // array index 25 | const int idx = x + width * y; 26 | 27 | // inside image bounds check 28 | if (x >= width || y >= height) { 29 | return; 30 | } 31 | 32 | // kernel code 33 | float4 out = make_float4(0, 0, 0, 0); 34 | const float invdx2 = 1.0/(width*width); 35 | float totWeight = 0; 36 | float w; 37 | for(int j = y - n; j <= y + n; ++j) { 38 | if (j>= 0 && j < height) { 39 | w = weighty(j, y, invdx2); 40 | out += w * ready(in_half, x, j, width); 41 | totWeight += w; 42 | } 43 | } 44 | out /= totWeight; 45 | 46 | // float to half conversion 47 | out_half[4 * idx + 0] = __float2half_rn(out.x); 48 | out_half[4 * idx + 1] = __float2half_rn(out.y); 49 | out_half[4 * idx + 2] = __float2half_rn(out.z); 50 | out_half[4 * idx + 3] = __float2half_rn(out.w); 51 | } 52 | -------------------------------------------------------------------------------- /examples/kernels/gaussian_blur_vert.glsl: -------------------------------------------------------------------------------- 1 | #version 120 2 | uniform sampler2D input; 3 | uniform int n; 4 | varying vec2 x; // texture coordinates 5 | uniform float dx; // delta 6 | 7 | void main() 8 | { 9 | vec3 v = vec3(0,0,0); 10 | float totWeight = 0; 11 | for(int j = -n; j<=n; ++j) { 12 | vec2 tx = x + vec2(0,j*dx); 13 | float weight = exp(-((tx.y-x.y)*(tx.y-x.y))); 14 | v+= weight * texture2D(input, tx).xyz; 15 | totWeight += weight; 16 | } 17 | 18 | // gl_FragData[0] is buffer output 19 | gl_FragData[0] = vec4(v/totWeight,1); 20 | } 21 | -------------------------------------------------------------------------------- /examples/kernels/lerp.cl: -------------------------------------------------------------------------------- 1 | __kernel void 2 | lerp(__global const half * a_half, 3 | __global const half * b_half, 4 | __global half * out_half, 5 | const float alpha, 6 | const int width, 7 | const int height) 8 | { 9 | const int x = get_global_id(0); 10 | const int y = get_global_id(1); 11 | 12 | // array index 13 | const int idx = x + width * y; 14 | 15 | // inside image bounds check 16 | if (x >= width || y >= height) { 17 | return; 18 | } 19 | 20 | // half to float conversion 21 | const float4 a = (float4)(vload_half(4 * idx + 0, a_half), 22 | vload_half(4 * idx + 1, a_half), 23 | vload_half(4 * idx + 2, a_half), 24 | vload_half(4 * idx + 3, a_half)); 25 | const float4 b = (float4)(vload_half(4 * idx + 0, b_half), 26 | vload_half(4 * idx + 1, b_half), 27 | vload_half(4 * idx + 2, b_half), 28 | vload_half(4 * idx + 3, b_half)); 29 | 30 | // kernel code 31 | float4 out = (1-alpha) * a + alpha * b; 32 | 33 | // float to half conversion 34 | vstore_half(out.x, 4 * idx + 0, out_half); 35 | vstore_half(out.y, 4 * idx + 1, out_half); 36 | vstore_half(out.z, 4 * idx + 2, out_half); 37 | vstore_half(out.w, 4 * idx + 3, out_half); 38 | } -------------------------------------------------------------------------------- /examples/kernels/lerp.cu: -------------------------------------------------------------------------------- 1 | __global__ void 2 | lerp(const unsigned short * a_half, 3 | const unsigned short * b_half, 4 | unsigned short * out_half, 5 | const float alpha, 6 | const int width, 7 | const int height) 8 | { 9 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 10 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 11 | 12 | // array index 13 | const int idx = x + width * y; 14 | 15 | // inside image bounds check 16 | if (x >= width || y >= height) { 17 | return; 18 | } 19 | 20 | // half to float conversion 21 | const float4 a = make_float4(__half2float(a_half[4 * idx + 0]), 22 | __half2float(a_half[4 * idx + 1]), 23 | __half2float(a_half[4 * idx + 2]), 24 | __half2float(a_half[4 * idx + 3])); 25 | const float4 b = make_float4(__half2float(b_half[4 * idx + 0]), 26 | __half2float(b_half[4 * idx + 1]), 27 | __half2float(b_half[4 * idx + 2]), 28 | __half2float(b_half[4 * idx + 3])); 29 | 30 | // kernel code 31 | float4 out = (1-alpha) * a + alpha * b; 32 | 33 | // float to half conversion 34 | out_half[4 * idx + 0] = __float2half_rn(out.x); 35 | out_half[4 * idx + 1] = __float2half_rn(out.y); 36 | out_half[4 * idx + 2] = __float2half_rn(out.z); 37 | out_half[4 * idx + 3] = __float2half_rn(out.w); 38 | } -------------------------------------------------------------------------------- /examples/kernels/lerp.glsl: -------------------------------------------------------------------------------- 1 | #version 120 2 | uniform sampler2D a; 3 | uniform sampler2D b; 4 | uniform float alpha; 5 | varying vec2 x; // texture coordinates 6 | uniform float dx; // delta 7 | 8 | void main() 9 | { 10 | // gl_FragData[0] is buffer out 11 | vec4 c = (1-alpha)*texture2D(a,x) + alpha*texture2D(b,x); 12 | c.w = 1; 13 | gl_FragData[0] = c; 14 | } -------------------------------------------------------------------------------- /examples/kernels/lerp_float.cl: -------------------------------------------------------------------------------- 1 | __kernel void 2 | lerp(__global const float4 * a, 3 | __global const float4 * b, 4 | __global float4 * out, 5 | const float alpha, 6 | const int width, 7 | const int height) 8 | { 9 | const int x = get_global_id(0); 10 | const int y = get_global_id(1); 11 | 12 | // array index 13 | const int idx = x + width * y; 14 | 15 | // inside image bounds check 16 | if (x >= width || y >= height) { 17 | return; 18 | } 19 | 20 | // kernel code 21 | out[idx] = (1-alpha) * a[idx] + alpha * b[idx]; 22 | } -------------------------------------------------------------------------------- /examples/kernels/lerp_float.cu: -------------------------------------------------------------------------------- 1 | __kernel void 2 | lerp(__global const float4 * a, 3 | __global const float4 * b, 4 | __global float4 * out, 5 | const float alpha, 6 | const int width, 7 | const int height) 8 | { 9 | const int x = get_global_id(0); 10 | const int y = get_global_id(1); 11 | 12 | // array index 13 | const int idx = x + width * y; 14 | 15 | // inside image bounds check 16 | if (x >= width || y >= height) { 17 | return; 18 | } 19 | 20 | // kernel code 21 | out[idx] = (1-alpha) * a[idx] + alpha * b[idx]; 22 | } -------------------------------------------------------------------------------- /examples/kernels/lerp_ubyte.cl: -------------------------------------------------------------------------------- 1 | __kernel void 2 | lerp(__global const uchar4 * a, 3 | __global const uchar4 * b, 4 | __global uchar4 * out, 5 | const float alpha, 6 | const int width, 7 | const int height) 8 | { 9 | const int x = get_global_id(0); 10 | const int y = get_global_id(1); 11 | 12 | // array index 13 | const int idx = x + width * y; 14 | 15 | // inside image bounds check 16 | if (x >= width || y >= height) { 17 | return; 18 | } 19 | 20 | // kernel code 21 | const float4 af = convert_float4(a[idx]); 22 | const float4 bf = convert_float4(b[idx]); 23 | uchar4 tmp = convert_uchar4((1-alpha)*af+alpha*bf); 24 | 25 | // set alpha 26 | tmp.w = 255; 27 | 28 | out[idx] = tmp; 29 | } -------------------------------------------------------------------------------- /examples/kernels/lerp_ubyte.cu: -------------------------------------------------------------------------------- 1 | __kernel void 2 | lerp(__global const uchar4 * a, 3 | __global const uchar4 * b, 4 | __global uchar4 * out, 5 | const float alpha, 6 | const int width, 7 | const int height) 8 | { 9 | const int x = get_global_id(0); 10 | const int y = get_global_id(1); 11 | 12 | // array index 13 | const int idx = x + width * y; 14 | 15 | // inside image bounds check 16 | if (x >= width || y >= height) { 17 | return; 18 | } 19 | 20 | 21 | // kernel code 22 | const uchar4 aa = a[idx]; 23 | const uchar4 bb = b[idx]; 24 | 25 | out[idx] = (uchar4)((1-alpha)*aa.x + alpha*bb.x, 26 | (1-alpha)*aa.y + alpha*bb.y, 27 | (1-alpha)*aa.z + alpha*bb.z, 28 | 255); 29 | } -------------------------------------------------------------------------------- /examples/kernels/sobel.cl: -------------------------------------------------------------------------------- 1 | float4 read(__global const uchar4 * in, int x, int y, int width) 2 | { 3 | return convert_float4(in[x + width * y]); 4 | } 5 | 6 | float4 compute_sobelx(__global const uchar4 * in, int x, int y, int w) 7 | { 8 | return -2*read(in,x-1,y-1,w) - read(in,x-1,y,w) - read(in,x-1,y+1,w) 9 | +2*read(in,x+1,y-1,w) + read(in,x+1,y,w) + read(in,x+1,y+1,w); 10 | } 11 | 12 | float4 compute_sobely(__global const uchar4 * in, int x, int y, int w) 13 | { 14 | return -2*read(in,x-1,y-1,w) - read(in,x,y-1,w) - read(in,x+1,y-1,w) 15 | +2*read(in,x-1,y+1,w) + read(in,x,y+1,w) + read(in,x+1,y+1,w); 16 | } 17 | 18 | __kernel void 19 | sobel(__global const uchar4 * in, 20 | __global uchar4 * sobelx, 21 | __global uchar4 * sobely, 22 | __global uchar4 * gradient, 23 | __global uchar4 * edges, 24 | float primary_treshold, 25 | float secondary_treshold, 26 | const int width, 27 | const int height) 28 | { 29 | const int x = get_global_id(0); 30 | const int y = get_global_id(1); 31 | 32 | // array index 33 | const int idx = x + width * y; 34 | 35 | // inside image bounds check 36 | if (x >= width || y >= height) { 37 | return; 38 | } 39 | 40 | // kernel code 41 | if (x == 0 || y == 0 || x == width - 1 || y == height - 1) { 42 | sobelx[idx] = (uchar4)(0, 0, 0, 255); 43 | sobely[idx] = (uchar4)(0, 0, 0, 255); 44 | gradient[idx] = (uchar4)(0, 0, 0, 255); 45 | edges[idx] = (uchar4)(0, 0, 0, 255); 46 | return; 47 | } 48 | 49 | const float4 sx = compute_sobelx(in, x, y, width); 50 | const float4 sy = compute_sobely(in, x, y, width); 51 | const float grad = (fabs(sx.x)+fabs(sx.y)+fabs(sx.z)+ 52 | fabs(sy.x)+fabs(sy.y)+fabs(sy.z))/3.0; 53 | 54 | sobelx[idx] = convert_uchar4(sx); 55 | sobely[idx] = convert_uchar4(sy); 56 | 57 | const unsigned char gradu = convert_uchar(grad); 58 | gradient[idx] = (uchar4)(gradu, gradu, gradu, 255); 59 | 60 | const bool prim_edge = grad > primary_treshold; 61 | const bool sec_edge = grad > secondary_treshold; 62 | const uchar edge = 255 * prim_edge | 125 * sec_edge; 63 | 64 | edges[idx] = (uchar4)(edge, edge, edge, 255); 65 | } 66 | -------------------------------------------------------------------------------- /examples/kernels/sobel.cu: -------------------------------------------------------------------------------- 1 | __device__ float4 read(const uchar4 * in, int x, int y, int width) 2 | { 3 | const uchar4 v = in[x + y * width]; 4 | return make_float4(v.x, v.y, v.z, v.w); 5 | } 6 | 7 | __device__ float4 compute_sobelx(const uchar4 * in, int x, int y, int w) 8 | { 9 | return -2*read(in,x-1,y-1,w) - read(in,x-1,y,w) - read(in,x-1,y+1,w) 10 | +2*read(in,x+1,y-1,w) + read(in,x+1,y,w) + read(in,x+1,y+1,w); 11 | } 12 | 13 | __device__ float4 compute_sobely(const uchar4 * in, int x, int y, int w) 14 | { 15 | return -2*read(in,x-1,y-1,w) - read(in,x,y-1,w) - read(in,x+1,y-1,w) 16 | +2*read(in,x-1,y+1,w) + read(in,x,y+1,w) + read(in,x+1,y+1,w); 17 | } 18 | 19 | __global__ void 20 | sobel(const uchar4 * in, 21 | uchar4 * sobelx, 22 | uchar4 * sobely, 23 | uchar4 * gradient, 24 | uchar4 * edges, 25 | const float primary_treshold, 26 | const float secondary_treshold, 27 | const int width, 28 | const int height) 29 | { 30 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 31 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 32 | 33 | // array index 34 | const int idx = x + width * y; 35 | 36 | // inside image bounds check 37 | if (x >= width || y >= height) { 38 | return; 39 | } 40 | 41 | // kernel code 42 | if (x == 0 || y == 0 || x == width - 1 || y == height - 1) { 43 | sobelx[idx] = make_uchar4(0,0,0,255); 44 | sobely[idx] = make_uchar4(0,0,0,255); 45 | gradient[idx] = make_uchar4(0,0,0,255); 46 | edges[idx] = make_uchar4(0,0,0,255); 47 | return; 48 | } 49 | 50 | float4 sx = compute_sobelx(in,x,y,width); 51 | float4 sy = compute_sobely(in,x,y,width); 52 | 53 | sobelx[idx] = make_uchar4(abs(sx.x), abs(sx.y), abs(sx.z), 255); 54 | sobely[idx] = make_uchar4(abs(sy.x), abs(sy.y), abs(sy.z), 255); 55 | 56 | float grad = (abs(sx.x)+abs(sx.y)+abs(sx.z)+ 57 | abs(sy.x)+abs(sy.y)+abs(sy.z))/3.0; 58 | gradient[idx] = make_uchar4(grad, grad, grad, 255); 59 | 60 | const bool prim_edge = grad > primary_treshold; 61 | const bool sec_edge = grad > secondary_treshold; 62 | const unsigned char edge = prim_edge * 255 | sec_edge * 125; 63 | edges[idx] = make_uchar4(edge,edge,edge,255); 64 | } 65 | -------------------------------------------------------------------------------- /examples/kernels/sobel.glsl: -------------------------------------------------------------------------------- 1 | #version 120 2 | uniform sampler2D input; 3 | uniform float primary_treshold; 4 | uniform float secondary_treshold; 5 | varying vec2 x; // texture coordinates 6 | uniform float dx; // delta 7 | 8 | vec4 compute_sobelx() 9 | { 10 | return -2*texture2D(input, x + vec2(-dx,0)) 11 | - texture2D(input, x + vec2(-dx,dx)) 12 | - texture2D(input, x + vec2(-dx,-dx)) 13 | + 2*texture2D(input, x + vec2(dx,0)) 14 | + texture2D(input, x + vec2(dx,dx)) 15 | + texture2D(input, x + vec2(dx,-dx)); 16 | } 17 | 18 | vec4 compute_sobely() 19 | { 20 | return -2*texture2D(input, x + vec2(-dx,-dx)) 21 | - texture2D(input, x + vec2(0,-dx)) 22 | - texture2D(input, x + vec2(dx,-dx)) 23 | + 2*texture2D(input, x + vec2(-dx,dx)) 24 | + texture2D(input, x + vec2(0,dx)) 25 | + texture2D(input, x + vec2(dx,dx)); 26 | } 27 | 28 | float edge(float grad) 29 | { 30 | if (grad > primary_treshold) { 31 | return 1.0; 32 | } else if (grad > secondary_treshold){ 33 | return 0.5; 34 | } else { 35 | return 0.0; 36 | } 37 | } 38 | 39 | void main() 40 | { 41 | vec4 sx = compute_sobelx(); 42 | vec4 sy = compute_sobely(); 43 | float grad = (abs(sx.x)+abs(sx.y)+abs(sx.z)+ 44 | abs(sx.x)+abs(sy.y)+abs(sy.z))/3.0; 45 | 46 | // gl_FragData[0] is buffer sobelx 47 | gl_FragData[0] = vec4(sx.xyz,1); 48 | 49 | // gl_FragData[1] is buffer sobely 50 | gl_FragData[1] = vec4(sy.xyz,1); 51 | 52 | // gl_FragData[2] is buffer gradient 53 | gl_FragData[2] = vec4(vec3(grad),1); 54 | 55 | // gl_FragData[3] is buffer edges 56 | gl_FragData[3] = vec4(vec3(edge(grad)),1); 57 | } 58 | -------------------------------------------------------------------------------- /examples/lerp_cuda.ip: -------------------------------------------------------------------------------- 1 | 2 | CUDA 3 | 4 | buffer1 5 | half 6 | 4 7 | images/bridge.exr 8 | 9 | 10 | buffer2 11 | half 12 | 4 13 | images/river.exr 14 | 15 | 16 | buffer3 17 | half 18 | 4 19 | output_images/lerp_cuda.exr 20 | 21 | 22 | lerp 23 | kernels/lerp.cu 24 | 25 | a 26 | buffer1 27 | 28 | 29 | b 30 | buffer2 31 | 32 | 33 | out 34 | buffer3 35 | 36 | 37 | alpha 38 | float 39 | 0.0 40 | 0.0 41 | 0.0 42 | 1.0 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /examples/lerp_cuda_float.ip: -------------------------------------------------------------------------------- 1 | 2 | OpenCL 3 | 4 | buffer1 5 | float 6 | 4 7 | images/bridge.exr 8 | 9 | 10 | buffer2 11 | float 12 | 4 13 | images/river.exr 14 | 15 | 16 | buffer3 17 | float 18 | 4 19 | output_images/lerp_float_cuda.exr 20 | 21 | 22 | lerp 23 | kernels/lerp_float.cu 24 | 25 | a 26 | buffer1 27 | 28 | 29 | b 30 | buffer2 31 | 32 | 33 | out 34 | buffer3 35 | 36 | 37 | alpha 38 | float 39 | 0.0 40 | 0.0 41 | 0.0 42 | 1.0 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /examples/lerp_cuda_ubyte.ip: -------------------------------------------------------------------------------- 1 | 2 | OpenCL 3 | 4 | buffer1 5 | ubyte 6 | 4 7 | images/lena.png 8 | 9 | 10 | buffer2 11 | ubyte 12 | 4 13 | images/baboon.png 14 | 15 | 16 | buffer3 17 | ubyte 18 | 4 19 | output_images/lerp_ubyte_cuda.png 20 | 21 | 22 | lerp 23 | kernels/lerp_ubyte.cu 24 | 25 | a 26 | buffer1 27 | 28 | 29 | b 30 | buffer2 31 | 32 | 33 | out 34 | buffer3 35 | 36 | 37 | alpha 38 | float 39 | 0.0 40 | 0.0 41 | 0.0 42 | 1.0 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /examples/lerp_glsl.ip: -------------------------------------------------------------------------------- 1 | 2 | GLSL 3 | 4 | buffer1 5 | half 6 | 4 7 | images/bridge.exr 8 | 9 | 10 | buffer2 11 | half 12 | 4 13 | images/river.exr 14 | 15 | 16 | buffer3 17 | half 18 | 4 19 | output_images/lerp_glsl.exr 20 | 21 | 22 | lerp 23 | kernels/lerp.glsl 24 | 25 | a 26 | buffer1 27 | 28 | 29 | b 30 | buffer2 31 | 32 | 33 | out 34 | buffer3 35 | 36 | 37 | alpha 38 | float 39 | 0.39 40 | 0.0 41 | 0.0 42 | 1.0 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /examples/lerp_glsl_float.ip: -------------------------------------------------------------------------------- 1 | 2 | GLSL 3 | 4 | buffer1 5 | float 6 | 4 7 | images/bridge.exr 8 | 9 | 10 | buffer2 11 | float 12 | 4 13 | images/river.exr 14 | 15 | 16 | buffer3 17 | float 18 | 4 19 | output_images/lerp_glsl_float.exr 20 | 21 | 22 | lerp 23 | kernels/lerp.glsl 24 | 25 | a 26 | buffer1 27 | 28 | 29 | b 30 | buffer2 31 | 32 | 33 | out 34 | buffer3 35 | 36 | 37 | alpha 38 | float 39 | 0 40 | 0.0 41 | 0.0 42 | 1.0 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /examples/lerp_glsl_ubyte.ip: -------------------------------------------------------------------------------- 1 | 2 | GLSL 3 | 4 | buffer1 5 | ubyte 6 | 4 7 | images/lena.png 8 | 9 | 10 | buffer2 11 | ubyte 12 | 4 13 | images/baboon.png 14 | 15 | 16 | buffer3 17 | ubyte 18 | 4 19 | output_images/lerp_glsl.png 20 | 21 | 22 | lerp 23 | kernels/lerp.glsl 24 | 25 | a 26 | buffer1 27 | 28 | 29 | b 30 | buffer2 31 | 32 | 33 | out 34 | buffer3 35 | 36 | 37 | alpha 38 | float 39 | 0.35 40 | 0.0 41 | 0.0 42 | 1.0 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /examples/lerp_opencl.ip: -------------------------------------------------------------------------------- 1 | 2 | OpenCL 3 | 4 | buffer1 5 | half 6 | 4 7 | images/bridge.exr 8 | 9 | 10 | buffer2 11 | half 12 | 4 13 | images/river.exr 14 | 15 | 16 | buffer3 17 | half 18 | 4 19 | output_images/lerp_opencl.exr 20 | 21 | 22 | lerp 23 | kernels/lerp.cl 24 | 25 | a 26 | buffer1 27 | 28 | 29 | b 30 | buffer2 31 | 32 | 33 | out 34 | buffer3 35 | 36 | 37 | alpha 38 | float 39 | 0.74 40 | 0.0 41 | 0.0 42 | 1.0 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /examples/lerp_opencl_float.ip: -------------------------------------------------------------------------------- 1 | 2 | OpenCL 3 | 4 | buffer1 5 | float 6 | 4 7 | images/bridge.exr 8 | 9 | 10 | buffer2 11 | float 12 | 4 13 | images/river.exr 14 | 15 | 16 | buffer3 17 | float 18 | 4 19 | output_images/lerp_float_opencl.exr 20 | 21 | 22 | lerp 23 | kernels/lerp_float.cl 24 | 25 | a 26 | buffer1 27 | 28 | 29 | b 30 | buffer2 31 | 32 | 33 | out 34 | buffer3 35 | 36 | 37 | alpha 38 | float 39 | 0.84 40 | 0.0 41 | 0.0 42 | 1.0 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /examples/lerp_opencl_ubyte.ip: -------------------------------------------------------------------------------- 1 | 2 | OpenCL 3 | 4 | buffer1 5 | ubyte 6 | 4 7 | images/lena.png 8 | 9 | 10 | buffer2 11 | ubyte 12 | 4 13 | images/baboon.png 14 | 15 | 16 | buffer3 17 | ubyte 18 | 4 19 | output_images/lerp_ubyte_opencl.png 20 | 21 | 22 | lerp 23 | kernels/lerp_ubyte.cl 24 | 25 | a 26 | buffer1 27 | 28 | 29 | b 30 | buffer2 31 | 32 | 33 | out 34 | buffer3 35 | 36 | 37 | alpha 38 | float 39 | 0.46 40 | 0.0 41 | 0.0 42 | 1.0 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /examples/sobel_cuda.ip: -------------------------------------------------------------------------------- 1 | 2 | CUDA 3 | 4 | buffer1 5 | ubyte 6 | 4 7 | images/lena.png 8 | 9 | 10 | buffer2 11 | ubyte 12 | 4 13 | 14 | 15 | buffer3 16 | ubyte 17 | 4 18 | 19 | 20 | buffer4 21 | ubyte 22 | 4 23 | 24 | 25 | buffer5 26 | ubyte 27 | 4 28 | 29 | 30 | sobel 31 | kernels/sobel.cu 32 | 33 | in 34 | buffer1 35 | 36 | 37 | sobelx 38 | buffer2 39 | 40 | 41 | sobely 42 | buffer3 43 | 44 | 45 | gradient 46 | buffer4 47 | 48 | 49 | edges 50 | buffer5 51 | 52 | 53 | primary_treshold 54 | float 55 | 250 56 | 250.0 57 | 0.0 58 | 500.0 59 | 60 | 61 | secondary_treshold 62 | float 63 | 160 64 | 160.0 65 | 0.0 66 | 500.0 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /examples/sobel_glsl.ip: -------------------------------------------------------------------------------- 1 | 2 | GLSL 3 | 4 | buffer1 5 | ubyte 6 | 4 7 | images/lena.png 8 | 9 | 10 | buffer2 11 | ubyte 12 | 4 13 | 14 | 15 | buffer3 16 | ubyte 17 | 4 18 | 19 | 20 | buffer4 21 | ubyte 22 | 4 23 | 24 | 25 | buffer5 26 | ubyte 27 | 4 28 | 29 | 30 | sobel 31 | kernels/sobel.glsl 32 | 33 | input 34 | buffer1 35 | 36 | 37 | sobelx 38 | buffer2 39 | 40 | 41 | sobely 42 | buffer3 43 | 44 | 45 | gradient 46 | buffer4 47 | 48 | 49 | edges 50 | buffer5 51 | 52 | 53 | primary_treshold 54 | float 55 | 250 56 | 250.0 57 | 0.0 58 | 500.0 59 | 60 | 61 | secondary_treshold 62 | float 63 | 150 64 | 150.0 65 | 0.0 66 | 500.0 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /examples/sobel_opencl.ip: -------------------------------------------------------------------------------- 1 | 2 | OpenCL 3 | 4 | buffer1 5 | ubyte 6 | 4 7 | images/lena.png 8 | 9 | 10 | buffer2 11 | ubyte 12 | 4 13 | output_images/sobelx_opencl.png 14 | 15 | 16 | buffer3 17 | ubyte 18 | 4 19 | output_images/sobely_opencl.png 20 | 21 | 22 | buffer4 23 | ubyte 24 | 4 25 | output_images/sobelgrad_opencl.png 26 | 27 | 28 | buffer5 29 | ubyte 30 | 4 31 | output_images/sobeledges_opencl.png 32 | 33 | 34 | sobel 35 | kernels/sobel.cl 36 | 37 | in 38 | buffer1 39 | 40 | 41 | sobelx 42 | buffer2 43 | 44 | 45 | sobely 46 | buffer3 47 | 48 | 49 | gradient 50 | buffer4 51 | 52 | 53 | edges 54 | buffer5 55 | 56 | 57 | primary_treshold 58 | float 59 | 250.0 60 | 250.0 61 | 0.0 62 | 500.0 63 | 64 | 65 | secondary_treshold 66 | float 67 | 160.0 68 | 160.0 69 | 0.0 70 | 500.0 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /install.bat: -------------------------------------------------------------------------------- 1 | pushd %~dp0 2 | if not exist build\ ( 3 | echo "Run 'build.bat' first to build gpuip" 4 | 5 | ) else ( 6 | cd build 7 | cmake --build . --config Release --target INSTALL 8 | ) 9 | pause 10 | popd -------------------------------------------------------------------------------- /python/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | # 3 | # Copyright (c) 2014 Per Karlsson 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | project(gpuip_python) 24 | 25 | set(PYTHON_ICONS_FILE ${GPUIP_ROOT_DIR}/python/icons.py) 26 | add_custom_target(generateIcons ALL 27 | ${PYTHON_EXECUTABLE} generateIcons.py ${PYTHON_ICONS_FILE} 28 | WORKING_DIRECTORY ${GPUIP_ROOT_DIR}/python/icons) 29 | 30 | file(GLOB PYTHON_FILES "*.py") 31 | install(FILES ${PYTHON_FILES} ${PYTHON_ICONS_FILE} DESTINATION gpuip 32 | COMPONENT bin) 33 | install(FILES gpuip.py DESTINATION gpuip 34 | PERMISSIONS OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE 35 | OWNER_READ GROUP_READ WORLD_READ COMPONENT bin) 36 | 37 | if(NOT WIN32) 38 | # Create symlink in CMAKE_INSTALL_PREFIX/bin 39 | set(GPUIP_PY ${CMAKE_INSTALL_PREFIX}/gpuip/gpuip.py) 40 | add_custom_target(symlink ALL 41 | ln -sf ${GPUIP_PY} gpuip 42 | WORKING_DIRECTORY ${GPUIP_ROOT_DIR}/python) 43 | install(FILES gpuip DESTINATION bin COMPONENT bin) 44 | endif() 45 | 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /python/bufferswidget.py: -------------------------------------------------------------------------------- 1 | from PySide import QtGui, QtCore 2 | 3 | class BuffersWidget(QtGui.QWidget): 4 | class Buffer(object): 5 | def __init__(self, name, format, channels, input, output, parent): 6 | self.layout = QtGui.QVBoxLayout() 7 | 8 | self.inputLineEdit = QtGui.QLineEdit(input, parent) 9 | self.outputLineEdit = QtGui.QLineEdit(output, parent) 10 | self.inputLineEdit.setMinimumWidth(100) 11 | self.outputLineEdit.setMinimumWidth(99) 12 | inputButton = QtGui.QPushButton("...", parent) 13 | inputButton.clicked.connect(self.selectInput) 14 | outputButton = QtGui.QPushButton("...", parent) 15 | outputButton.clicked.connect(self.selectOutput) 16 | 17 | labelNames = ["Name", "Format", "Channels", "Input", "Output" ] 18 | rhsWidgets = [[QtGui.QLabel(name,parent)] , 19 | [QtGui.QLabel(format,parent)], 20 | [QtGui.QLabel(str(channels),parent)], 21 | [self.inputLineEdit, inputButton], 22 | [self.outputLineEdit, outputButton]] 23 | 24 | for name, widgets in zip(labelNames, rhsWidgets): 25 | lhs = QtGui.QLabel(""+name+": ", parent) 26 | layout = QtGui.QHBoxLayout() 27 | layout.addWidget(lhs) 28 | for widget in widgets: 29 | layout.addWidget(widget) 30 | if len(widgets) == 1: 31 | layout.addStretch() 32 | self.layout.addLayout(layout) 33 | 34 | def selectInput(self): 35 | inputImageFile = QtGui.QFileDialog.getOpenFileName( 36 | None, "Select input image", 37 | QtCore.QDir.currentPath(), "Exr (*exr);;Png (*png)") 38 | if inputImageFile[0]: 39 | self.inputLineEdit.setText(inputImageFile[0]) 40 | 41 | def selectOutput(self): 42 | outputImageFile = QtGui.QFileDialog.getSaveFileName( 43 | None, "Choose output image", 44 | QtCore.QDir.currentPath(), "Exr(*exr);;Png (*png)") 45 | if outputImageFile[0]: 46 | self.outputLineEdit.setText(outputImageFile[0]) 47 | 48 | def __init__(self, parent = None): 49 | super(BuffersWidget, self).__init__( parent) 50 | 51 | self.layout = QtGui.QVBoxLayout() 52 | self.setLayout(self.layout) 53 | self.buffers = {} 54 | 55 | def addBuffer(self, name, format, channels, input, output): 56 | self.buffers[name] = BuffersWidget.Buffer(name, format, channels, 57 | input, output, self) 58 | self.layout.addLayout(self.buffers[name].layout) 59 | 60 | # Add separating line after each buffer 61 | separator = QtGui.QFrame(self) 62 | separator.setFrameShape(QtGui.QFrame.HLine) 63 | separator.setFrameShadow(QtGui.QFrame.Sunken) 64 | self.layout.addWidget(separator) 65 | 66 | def getBufferInput(self, name): 67 | return str(self.buffers[name].inputLineEdit.text()) 68 | 69 | def getBufferOutput(self, name): 70 | return str(self.buffers[name].outputLineEdit.text()) 71 | 72 | -------------------------------------------------------------------------------- /python/gpuip.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import utils 3 | import sys 4 | import signal 5 | import os 6 | try: 7 | import argparse 8 | parsermodule = argparse.ArgumentParser 9 | except: 10 | import optparse 11 | parsermodule = optparse.OptionParser 12 | parsermodule.add_argument = parsermodule.add_option 13 | 14 | def getCommandLineArguments(): 15 | # Command line arguments 16 | desc = "Framework for Image Processing on the GPU" 17 | parser = parsermodule("gpuip", description=desc) 18 | parser.add_argument("-f", "--file", 19 | help="Image Processing file *.ip") 20 | parser.add_argument("-p", "--param", 21 | action="append", 22 | nargs = 3, 23 | metavar = ("kernel", "param", "value"), 24 | help="Change value of a parameter.") 25 | parser.add_argument("-i", "--inbuffer", 26 | action="append", 27 | nargs = 2, 28 | metavar = ("buffer", "path"), 29 | help = "Set input image to a buffer") 30 | parser.add_argument("-o", "--outbuffer", 31 | action="append", 32 | nargs = 2, 33 | metavar = ("buffer", "path"), 34 | help = "Set output image to a buffer") 35 | parser.add_argument("-v","--verbose", 36 | action="store_true", 37 | help="Outputs information") 38 | parser.add_argument("--timestamp", 39 | action="store_true", 40 | help="Add timestamp in log output") 41 | parser.add_argument("--nogui", 42 | action="store_true", 43 | help="Command line version") 44 | 45 | if parsermodule.__name__ == "ArgumentParser": 46 | return parser.parse_args() 47 | else: 48 | return parser.parse_args()[0] 49 | 50 | def terminate(msg): 51 | print msg 52 | sys.exit(1) 53 | 54 | def getSettings(args): 55 | import settings 56 | 57 | if not args.file or not os.path.isfile(args.file): 58 | return None 59 | 60 | ipsettings = settings.Settings() 61 | ipsettings.read(args.file) 62 | 63 | # Change parameter values 64 | if args.param: 65 | for p in args.param: 66 | kernelName, paramName, value = p 67 | kernel = ipsettings.getKernel(kernelName) 68 | if not kernel: 69 | terminate("gpuip error: No kernel %s found." % kernelName) 70 | param = kernel.getParam(paramName) 71 | if param: 72 | param.setValue(utils.safeEval(value)) 73 | else: 74 | terminate("gpuip error: No param %s found in kernel %s." \ 75 | % (paramName, kernelName)) 76 | 77 | # Change input buffers 78 | if args.inbuffer: 79 | for inb in args.inbuffer: 80 | bufferName, path = inb[0], inb[1] 81 | buffer = ipsettings.getBuffer(bufferName) 82 | if buffer: 83 | buffer.input = path 84 | if not os.path.isfile(buffer.input): 85 | raise IOError("No such file: '%s'" % buffer.input) 86 | else: 87 | terminate("gpuip error: No buffer %s found." % buffer) 88 | 89 | # Change output buffers 90 | if args.outbuffer: 91 | for outb in args.outbuffer: 92 | bufferName, path = outb[0], outb[1] 93 | buffer = ipsettings.getBuffer(bufferName) 94 | if buffer: 95 | buffer.output = path 96 | os.makedirs(os.path.dirname(os.path.realpath(path))) 97 | else: 98 | terminate("gpuip error: No buffer %s found." % bufferName) 99 | 100 | return ipsettings 101 | 102 | def runGUI(ippath, ipsettings): 103 | # Run GUI version 104 | from PySide import QtGui 105 | import mainwindow 106 | 107 | # Makes it possible to close program with ctrl+c in a terminal 108 | signal.signal(signal.SIGINT, signal.SIG_DFL) 109 | app = QtGui.QApplication(sys.argv) 110 | app.setStyle("plastique") 111 | mainwindow = mainwindow.MainWindow(path = ippath, settings = ipsettings) 112 | mainwindow.show() 113 | sys.exit(app.exec_()) 114 | 115 | def runCommandLine(ipsettings, verbose): 116 | # Can't run non-gui version if there's no *.ip file 117 | if not ipsettings: 118 | err = "Must specify an existing *.ip file in the command-line version\n" 119 | err += "example: \n" 120 | err += " gpuip --nogui smooth.ip""" 121 | terminate(err) 122 | 123 | def check_error(err): 124 | if err: 125 | terminate(err) 126 | 127 | def log(text, stopwatch = None, time = True): 128 | time = time and args.timestamp 129 | if verbose: 130 | stopwatchStr = str(stopwatch) if stopwatch else "" 131 | timeStr = utils.getTimeStr() if time else "" 132 | print timeStr + text + " " + stopwatchStr 133 | 134 | overall_clock = utils.StopWatch() 135 | 136 | ### 0. Create gpuip items from settings 137 | ip, buffers, kernels = ipsettings.create() 138 | log("Created elements from settings.", overall_clock) 139 | 140 | ### 1. Build 141 | c = utils.StopWatch() 142 | check_error(ip.Build()) 143 | log("Building kernels [%s]." % [k.name for k in kernels], c) 144 | 145 | ### 2. Import data from images 146 | c = utils.StopWatch() 147 | for b in ipsettings.buffers: 148 | if b.input: 149 | log("Importing data from %s to %s" %(b.input, b.name)) 150 | check_error(buffers[b.name].Read(b.input, utils.getNumCores())) 151 | log("Importing data done.", c) 152 | 153 | ### 3. Allocate and transfer data to GPU 154 | c = utils.StopWatch() 155 | width, height = utils.allocateBufferData(buffers) 156 | ip.SetDimensions(width, height) 157 | check_error(ip.Allocate()) 158 | log("Allocating done.", c) 159 | c = utils.StopWatch() 160 | for b in ipsettings.buffers: 161 | if b.input: 162 | check_error(ip.WriteBufferToGPU(buffers[b.name])) 163 | log("Transfering data to GPU done.", c) 164 | 165 | ### 4. Process 166 | c = utils.StopWatch() 167 | check_error(ip.Run()) 168 | log("Processing done.", c) 169 | 170 | ### 5. Export buffers to images 171 | c = utils.StopWatch() 172 | for b in ipsettings.buffers: 173 | if b.output: 174 | log("Exporting data from %s to %s" %(b.name, b.output)) 175 | check_error(ip.ReadBufferFromGPU(buffers[b.name])) 176 | check_error(buffers[b.name].Write(b.output,utils.getNumCores())) 177 | log("Exporting data done.", c) 178 | 179 | log("\nAll steps done. Total runtime:", overall_clock, time = False) 180 | 181 | if __name__ == "__main__": 182 | args = getCommandLineArguments() 183 | ipsettings = getSettings(args) 184 | if args.nogui: 185 | runCommandLine(ipsettings, args.verbose) 186 | else: 187 | runGUI(args.file if ipsettings else None, ipsettings) 188 | 189 | -------------------------------------------------------------------------------- /python/icons/boilerplate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlssonper/gpuip/ce9a62ea9ced3f167a2477bf58944281dac6d45b/python/icons/boilerplate.png -------------------------------------------------------------------------------- /python/icons/build.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlssonper/gpuip/ce9a62ea9ced3f167a2477bf58944281dac6d45b/python/icons/build.png -------------------------------------------------------------------------------- /python/icons/export.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlssonper/gpuip/ce9a62ea9ced3f167a2477bf58944281dac6d45b/python/icons/export.png -------------------------------------------------------------------------------- /python/icons/generateIcons.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import sys 4 | import numpy 5 | # no need to generate again 6 | if os.path.exists(sys.argv[1]): 7 | sys.exit(0) 8 | try: 9 | from PIL import Image 10 | except: 11 | txt = ("raise Exception('gpuip: Icons were not generated. " 12 | "Install the Python module PIL and rebuild.')") 13 | open(sys.argv[1], "w").write(txt) 14 | sys.exit(0) 15 | 16 | out = """from PySide import QtGui 17 | data, width, height = {}, {}, {} 18 | def get(name): 19 | image = QtGui.QImage(data[name], width[name], height[name], 20 | QtGui.QImage.Format_ARGB32) 21 | return QtGui.QIcon(QtGui.QPixmap.fromImage(image)) 22 | """ 23 | 24 | for i in glob.glob("*.png"): 25 | name = i[:i.find(".")] 26 | data = numpy.asarray(Image.open(i).convert("RGBA")) 27 | data_flip = numpy.array(data, copy=True) 28 | data_flip[:,:,0] = data[:,:,2] 29 | data_flip[:,:,2] = data[:,:,0] 30 | out += "data['" + name + "'] = " + repr(data_flip.tostring()) + " \n" 31 | out += "width['" + name + "'] = " + str(data_flip.shape[0]) + "\n" 32 | out += "height['" + name + "'] = " + str(data_flip.shape[1]) + "\n" 33 | 34 | open(sys.argv[1], "w").write(out) 35 | -------------------------------------------------------------------------------- /python/icons/import.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlssonper/gpuip/ce9a62ea9ced3f167a2477bf58944281dac6d45b/python/icons/import.png -------------------------------------------------------------------------------- /python/icons/init.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlssonper/gpuip/ce9a62ea9ced3f167a2477bf58944281dac6d45b/python/icons/init.png -------------------------------------------------------------------------------- /python/icons/new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlssonper/gpuip/ce9a62ea9ced3f167a2477bf58944281dac6d45b/python/icons/new.png -------------------------------------------------------------------------------- /python/icons/newExisting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlssonper/gpuip/ce9a62ea9ced3f167a2477bf58944281dac6d45b/python/icons/newExisting.png -------------------------------------------------------------------------------- /python/icons/open.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlssonper/gpuip/ce9a62ea9ced3f167a2477bf58944281dac6d45b/python/icons/open.png -------------------------------------------------------------------------------- /python/icons/process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlssonper/gpuip/ce9a62ea9ced3f167a2477bf58944281dac6d45b/python/icons/process.png -------------------------------------------------------------------------------- /python/icons/pug.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlssonper/gpuip/ce9a62ea9ced3f167a2477bf58944281dac6d45b/python/icons/pug.png -------------------------------------------------------------------------------- /python/icons/refresh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlssonper/gpuip/ce9a62ea9ced3f167a2477bf58944281dac6d45b/python/icons/refresh.png -------------------------------------------------------------------------------- /python/icons/run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlssonper/gpuip/ce9a62ea9ced3f167a2477bf58944281dac6d45b/python/icons/run.png -------------------------------------------------------------------------------- /python/icons/save.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlssonper/gpuip/ce9a62ea9ced3f167a2477bf58944281dac6d45b/python/icons/save.png -------------------------------------------------------------------------------- /python/kernelwidget.py: -------------------------------------------------------------------------------- 1 | from PySide import QtGui, QtCore 2 | import sys 3 | import utils 4 | 5 | class KernelWidget(QtGui.QSplitter): 6 | def __init__(self, parent = None, callbackFunc = None): 7 | super(KernelWidget, self).__init__(QtCore.Qt.Horizontal,parent) 8 | 9 | #Optional callback function. Called everytime a paramter is changed 10 | self.callbackFunc = callbackFunc 11 | 12 | # Left side is a Text Editor where the kernel code is displayed 13 | self.codeEditor = CodeEditor(self) 14 | 15 | # Right side is going to vertically placed group boxes 16 | # rightWidget is a work around since you can't add layouts to QSplitters 17 | rightWidget = QtGui.QWidget(self) 18 | rightLayout = QtGui.QVBoxLayout() 19 | rightWidget.setLayout(rightLayout) 20 | rightWidget.setSizePolicy(QtGui.QSizePolicy.Minimum, 21 | QtGui.QSizePolicy.Minimum) 22 | 23 | # Three group boxes for in buffers, out buffers and parameters 24 | groupBoxesNames = ["Input Buffers", "Output Buffers", "Parameters"] 25 | groupBoxes = [QtGui.QGroupBox(s,self) for s in groupBoxesNames] 26 | self.gridLayouts= {} 27 | for i,groupBox in enumerate(groupBoxes): 28 | self.gridLayouts[groupBoxesNames[i]] = QtGui.QGridLayout() 29 | groupBox.setLayout(self.gridLayouts[groupBoxesNames[i]]) 30 | groupBox.setSizePolicy(QtGui.QSizePolicy.Minimum, 31 | QtGui.QSizePolicy.Minimum) 32 | rightLayout.addWidget(groupBox) 33 | rightLayout.addStretch() 34 | 35 | # Add widgets to splitter 36 | self.addWidget(self.codeEditor) 37 | self.addWidget(rightWidget) 38 | 39 | self.inBuffers = {} 40 | self.outBuffers = {} 41 | self.params = {} 42 | 43 | def addInBuffer(self, name, buffer, bufferNames): 44 | self.inBuffers[name] = Buffer(self, name, buffer, bufferNames, 45 | self.gridLayouts["Input Buffers"], 46 | len(self.inBuffers)) 47 | 48 | def addOutBuffer(self, name, buffer, bufferNames): 49 | self.outBuffers[name] = Buffer(self, name, buffer, bufferNames, 50 | self.gridLayouts["Output Buffers"], 51 | len(self.outBuffers)) 52 | 53 | def addParameter(self, name, val, defVal, minVal, maxVal, typename): 54 | self.params[name] = Parameter(self.gridLayouts["Parameters"], 55 | len(self.params), name, val, defVal, 56 | minVal, maxVal, typename, 57 | self.callbackFunc) 58 | class Buffer(object): 59 | def __init__(self, parent, name, buffer, bufferNames, grid, row): 60 | self.name = name 61 | label = QtGui.QLabel(name+": ", parent) 62 | label.setSizePolicy(QtGui.QSizePolicy.Maximum, 63 | QtGui.QSizePolicy.Maximum) 64 | self.cbox = QtGui.QComboBox(parent) 65 | self.cbox.addItems(bufferNames) 66 | if buffer != "": 67 | idx = bufferNames.index(buffer) 68 | if idx != -1: 69 | self.cbox.setCurrentIndex(idx) 70 | 71 | grid.addWidget(label, row, 0) 72 | grid.addWidget(self.cbox, row, 1) 73 | 74 | class Parameter(object): 75 | def __init__(self, gridLayout, row, name, val, defVal, minVal, maxVal, 76 | typename, callbackFunc = None): 77 | self.name = name 78 | self.defaultVal = defVal 79 | self.minVal = minVal 80 | self.maxVal = maxVal 81 | self.typename = typename 82 | self.callbackFunc = callbackFunc 83 | 84 | # Each parameters has a label with the name, a lineedit with text value 85 | # and a slider with the value (relative to min max) 86 | self.label = QtGui.QLabel(name) 87 | self.lineEdit = QtGui.QLineEdit() 88 | self.lineEdit.setFixedWidth(60) 89 | 90 | # Custom slider with a minimum width 91 | class _Slider(QtGui.QSlider): 92 | def __init__(self): 93 | super(_Slider, self).__init__(QtCore.Qt.Horizontal) 94 | self.setSizePolicy(QtGui.QSizePolicy.MinimumExpanding, 95 | QtGui.QSizePolicy.Maximum) 96 | def sizeHint(self): 97 | return QtCore.QSize(80,20) 98 | self.slider = _Slider() 99 | self.slider.setRange(0,100) 100 | 101 | gridLayout.addWidget(self.label, row, 0) 102 | gridLayout.addWidget(self.lineEdit, row, 1) 103 | gridLayout.addWidget(self.slider, row, 2) 104 | 105 | # When a slider is changed it should update the line edit and vice verse 106 | self.lineEdit.textChanged.connect(self.onLineEditChange) 107 | self.slider.valueChanged.connect(self.onSliderChange) 108 | 109 | # Helper variables to know when to trigger updates 110 | self.updateSlider = True 111 | self.updateLineEdit = True 112 | 113 | txt = str(int(val)) if typename == "int" else str(val) 114 | self.lineEdit.setText(txt) 115 | 116 | def onLineEditChange(self): 117 | # Changing the line edit triggers slider update that triggers 118 | # line edit update again. This is to prevent the second update 119 | if not self.updateLineEdit: 120 | return 121 | 122 | # Evaluate the line edit text to get the number 123 | val = utils.safeEval(self.lineEdit.text(),self.defaultVal,self.typename) 124 | if self.typename == "int": 125 | # If the parameter is of int, format line edit to be int too. 126 | self.lineEdit.setText(str(val)) 127 | 128 | # Don't run the onSliderChange function 129 | self.updateSlider = False 130 | 131 | # Update the slider position 132 | if val < self.minVal: 133 | self.slider.setSliderPosition(0) 134 | elif val > self.maxVal: 135 | self.slider.setSliderPosition(100) 136 | else: 137 | t = (val - self.minVal) / float(self.maxVal - self.minVal) 138 | self.slider.setSliderPosition(100 * t) 139 | 140 | # Slider has been updated, safe to set this variable to true again 141 | self.updateSlider = True 142 | 143 | # If a callback function was added, call it 144 | if self.callbackFunc: 145 | self.callbackFunc() 146 | 147 | def onSliderChange(self): 148 | # Changing the slider triggers line edit update that triggers 149 | # slider update again. This is to prevent the second update 150 | if not self.updateSlider: 151 | return 152 | 153 | # Evaluate val based on slider position 154 | val = 0.01*self.slider.value() * (self.maxVal-self.minVal) + self.minVal 155 | 156 | # Don't run the onLineEditChange function 157 | self.updateLineEdit = False 158 | 159 | # Update LineEdit text 160 | txt = str(int(val))if self.typename == "int" else str(val) 161 | self.lineEdit.setText(txt) 162 | 163 | # LineEdit has been updated, safe to set this variable to true again 164 | self.updateLineEdit = True 165 | 166 | # If a callback function was added, call it 167 | if self.callbackFunc: 168 | self.callbackFunc() 169 | 170 | class CodeEditor(QtGui.QTextEdit): 171 | def __init__(self, parent): 172 | super(CodeEditor, self).__init__(parent) 173 | font = QtGui.QFont() 174 | font.setFamily("Monospace") 175 | font.setFixedPitch(True); 176 | metrics = QtGui.QFontMetrics(font) 177 | self.setTabStopWidth(4 * metrics.width(' ')) 178 | self.w = 55 * metrics.width(' ') 179 | self.setFont(font) 180 | color = QtGui.QColor(0,0,0) 181 | color.setNamedColor("#F8F8F2") 182 | self.setTextColor(color) 183 | self.setSizePolicy(QtGui.QSizePolicy.Minimum, 184 | QtGui.QSizePolicy.Minimum) 185 | self.highlighter = Highlighter(self.document()) 186 | 187 | def sizeHint(self): 188 | return QtCore.QSize(self.w,200) 189 | 190 | class Highlighter(QtGui.QSyntaxHighlighter): 191 | def __init__(self, parent=None): 192 | super(Highlighter, self).__init__(parent) 193 | 194 | keywordFormat = QtGui.QTextCharFormat() 195 | color = QtGui.QColor(0,0,0) 196 | color.setNamedColor("#66D9EF") 197 | keywordFormat.setForeground(color) 198 | 199 | keywords = ["char", "double", "sampler2D", 200 | "vec2", "vec3", "vec4", 201 | "half", "bool", 202 | "float", "float2", "float3", "float4", 203 | "uchar", "uchar2", "uchar3", "uchar4", 204 | "int", "int2", "int3", "int4", "long", 205 | "short", "signed", "unsigned", "union", "void"] 206 | keywordPatterns = ["\\b" + kw + "\\b" for kw in keywords] 207 | self.highlightingRules = [(QtCore.QRegExp(pattern), keywordFormat) 208 | for pattern in keywordPatterns] 209 | 210 | keywordFormat = QtGui.QTextCharFormat() 211 | color = QtGui.QColor(0,0,0) 212 | color.setNamedColor("#4e9a06") 213 | keywordFormat.setForeground(color) 214 | keywords = ["__kernel", "__device__", "__global", "__global__", 215 | "uniform", "varying", "#version", "texture2D"] 216 | keywordPatterns = ["\\b" + kw + "\\b" for kw in keywords] 217 | self.highlightingRules += [(QtCore.QRegExp(pattern), keywordFormat) 218 | for pattern in keywordPatterns] 219 | 220 | keywordFormat = QtGui.QTextCharFormat() 221 | color = QtGui.QColor(0,0,0) 222 | color.setNamedColor("#F92672") 223 | keywordFormat.setForeground(color) 224 | keywords = ["const", "inline", "template", "typedef", "typename", 225 | "if", "for", "while", "switch", "case", 226 | "return", "break", "else"] 227 | keywordPatterns = ["\\b" + kw + "\\b" for kw in keywords] 228 | self.highlightingRules += [(QtCore.QRegExp(pattern), keywordFormat) 229 | for pattern in keywordPatterns] 230 | 231 | color = QtGui.QColor(0,0,0) 232 | color.setNamedColor("#75715E") 233 | singleLineCommentFormat = QtGui.QTextCharFormat() 234 | singleLineCommentFormat.setForeground(color) 235 | self.highlightingRules.append((QtCore.QRegExp("//[^\n]*"), 236 | singleLineCommentFormat)) 237 | 238 | self.multiLineCommentFormat = QtGui.QTextCharFormat() 239 | self.multiLineCommentFormat.setForeground(color)#QtCore.Qt.red) 240 | 241 | quotationFormat = QtGui.QTextCharFormat() 242 | quotationFormat.setForeground(QtCore.Qt.darkGreen) 243 | self.highlightingRules.append((QtCore.QRegExp("\".*\""), 244 | quotationFormat)) 245 | 246 | self.commentStartExpression = QtCore.QRegExp("/\\*") 247 | self.commentEndExpression = QtCore.QRegExp("\\*/") 248 | 249 | def highlightBlock(self, text): 250 | for pattern, format in self.highlightingRules: 251 | expression = QtCore.QRegExp(pattern) 252 | index = expression.indexIn(text) 253 | while index >= 0: 254 | length = expression.matchedLength() 255 | self.setFormat(index, length, format) 256 | index = expression.indexIn(text, index + length) 257 | 258 | self.setCurrentBlockState(0) 259 | 260 | startIndex = 0 261 | if self.previousBlockState() != 1: 262 | startIndex = self.commentStartExpression.indexIn(text) 263 | 264 | while startIndex >= 0: 265 | endIndex = self.commentEndExpression.indexIn(text, startIndex) 266 | 267 | if endIndex == -1: 268 | self.setCurrentBlockState(1) 269 | commentLength = len(text) - startIndex 270 | else: 271 | commentLength = endIndex - startIndex + \ 272 | self.commentEndExpression.matchedLength() 273 | 274 | self.setFormat(startIndex, commentLength, 275 | self.multiLineCommentFormat) 276 | startIndex = self.commentStartExpression.indexIn(text, 277 | startIndex + commentLength); 278 | 279 | -------------------------------------------------------------------------------- /python/settings.py: -------------------------------------------------------------------------------- 1 | from xml.dom import minidom 2 | import pygpuip 3 | import numpy 4 | import utils 5 | import os 6 | 7 | class Settings(object): 8 | class Buffer(object): 9 | def __init__(self, name, type, channels): 10 | self.name = name 11 | self.type = type 12 | self.channels = channels 13 | self.input = "" 14 | self.output = "" 15 | 16 | class Param(object): 17 | def __init__(self, name, type, default, min, max): 18 | self.name = name 19 | self.type = type 20 | self.default = default if type == "float" else int(default) 21 | self.min = min if type == "float" else int(min) 22 | self.max = max if type == "float" else int(max) 23 | self.setValue(self.default) 24 | 25 | def setValue(self, value): 26 | self.value = value if type == "float" else int(value) 27 | 28 | class Kernel(object): 29 | class KernelBuffer(object): 30 | def __init__(self, name, buffer): 31 | self.name = name 32 | self.buffer = buffer 33 | def __init__(self, name, code_file): 34 | self.name = name 35 | self.code = "" 36 | self.code_file = code_file 37 | self.params = [] 38 | self.inBuffers = [] 39 | self.outBuffers = [] 40 | 41 | def getParam(self, name): 42 | for p in self.params: 43 | if p.name == name: 44 | return p 45 | return None 46 | 47 | def __init__(self): 48 | self.buffers = [] 49 | self.kernels = [] 50 | self.environment = "" 51 | 52 | def getKernel(self, name): 53 | for k in self.kernels: 54 | if k.name == name: 55 | return k 56 | return None 57 | 58 | def getBuffer(self, name): 59 | for b in self.buffers: 60 | if b.name == name: 61 | return b 62 | return None 63 | 64 | def updateCode(self): 65 | for k in self.kernels: 66 | if os.path.isfile(k.code_file): 67 | k.code = open(k.code_file).read() 68 | 69 | def read(self, xml_file): 70 | xmldom = minidom.parse(xml_file) 71 | path = os.path.realpath(os.path.dirname(xml_file)) 72 | 73 | # Environment 74 | self.environment = str(self.data( 75 | xmldom.getElementsByTagName("gpuip")[0], 76 | "environment")) 77 | 78 | # Buffers 79 | for b in xmldom.getElementsByTagName("buffer"): 80 | buffer = Settings.Buffer(self.data(b, "name"), 81 | self.data(b, "type"), 82 | utils.safeEval(self.data(b, "channels"))) 83 | if b.getElementsByTagName("input"): 84 | buffer.input = os.path.join(path,self.data(b, "input")) 85 | if b.getElementsByTagName("output"): 86 | buffer.output = os.path.join(path,self.data(b, "output")) 87 | if not os.path.exists(os.path.dirname(buffer.output)): 88 | os.makedirs(os.path.dirname(buffer.output)) 89 | self.buffers.append(buffer) 90 | 91 | # Kernels 92 | for k in xmldom.getElementsByTagName("kernel"): 93 | kernel = Settings.Kernel( 94 | self.data(k, "name"), 95 | os.path.join(path, self.data(k, "code_file"))) 96 | 97 | # In Buffers 98 | for inb in k.getElementsByTagName("inbuffer"): 99 | name = self.data(inb, "name") 100 | hasBuffer = inb.getElementsByTagName("targetbuffer") 101 | buf = self.data(inb, "targetbuffer") if hasBuffer else "" 102 | kernel.inBuffers.append(Settings.Kernel.KernelBuffer(name,buf)) 103 | 104 | # Out Buffers 105 | for outb in k.getElementsByTagName("outbuffer"): 106 | name = self.data(outb, "name") 107 | hasBuffer = outb.getElementsByTagName("targetbuffer") 108 | buf = self.data(outb, "targetbuffer") if hasBuffer else "" 109 | kernel.outBuffers.append(Settings.Kernel.KernelBuffer(name,buf)) 110 | 111 | # Params 112 | for p in k.getElementsByTagName("param"): 113 | type = self.data(p, "type") 114 | param = Settings.Param( 115 | self.data(p, "name"), type, 116 | utils.safeEval(self.data(p, "default"),type), 117 | utils.safeEval(self.data(p, "min"),type), 118 | utils.safeEval(self.data(p, "max"),type)) 119 | param.value = utils.safeEval(self.data(p, "value"),type) 120 | kernel.params.append(param) 121 | self.kernels.append(kernel) 122 | 123 | self.updateCode() 124 | 125 | def write(self, xml_file): 126 | path = os.path.realpath(os.path.dirname(xml_file)) 127 | doc = minidom.Document() 128 | root = doc.createElement("gpuip") 129 | 130 | # Environment 131 | node = doc.createElement("environment") 132 | root.appendChild(node) 133 | node.appendChild(doc.createTextNode(self.environment)) 134 | 135 | # Buffers 136 | bufferAttrs = ["name", "type", "channels", "input", "output"] 137 | for b in self.buffers: 138 | bufferNode = doc.createElement("buffer") 139 | root.appendChild(bufferNode) 140 | 141 | for attr in bufferAttrs: 142 | value = str(getattr(b, attr)) 143 | if value != "": 144 | node = doc.createElement(attr) 145 | bufferNode.appendChild(node) 146 | node.appendChild(doc.createTextNode(value)) 147 | 148 | # Kernels 149 | paramAttrs = ["name", "type", "value", "default", "min", "max"] 150 | for k in self.kernels: 151 | # Write kernel code to file 152 | code_file_path = os.path.join(path,k.code_file) 153 | open(code_file_path, "w").write(k.code.replace("\t"," ")) 154 | 155 | kernelNode = doc.createElement("kernel") 156 | root.appendChild(kernelNode) 157 | 158 | node = doc.createElement("name") 159 | kernelNode.appendChild(node) 160 | node.appendChild(doc.createTextNode(k.name)) 161 | 162 | node = doc.createElement("code_file") 163 | kernelNode.appendChild(node) 164 | node.appendChild(doc.createTextNode(k.code_file)) 165 | 166 | # In Buffers 167 | for inb in k.inBuffers: 168 | inbufferNode = doc.createElement("inbuffer") 169 | kernelNode.appendChild(inbufferNode) 170 | node = doc.createElement("name") 171 | inbufferNode.appendChild(node) 172 | node.appendChild(doc.createTextNode(inb.name)) 173 | if inb.buffer != "": 174 | node = doc.createElement("targetbuffer") 175 | inbufferNode.appendChild(node) 176 | node.appendChild(doc.createTextNode(inb.buffer)) 177 | 178 | # In Buffers 179 | for outb in k.outBuffers: 180 | outbufferNode = doc.createElement("outbuffer") 181 | kernelNode.appendChild(outbufferNode) 182 | node = doc.createElement("name") 183 | outbufferNode.appendChild(node) 184 | node.appendChild(doc.createTextNode(outb.name)) 185 | if outb.buffer != "": 186 | node = doc.createElement("targetbuffer") 187 | outbufferNode.appendChild(node) 188 | node.appendChild(doc.createTextNode(outb.buffer)) 189 | 190 | # Params 191 | for p in k.params: 192 | paramNode = doc.createElement("param") 193 | kernelNode.appendChild(paramNode) 194 | for attr in paramAttrs: 195 | node = doc.createElement(attr) 196 | paramNode.appendChild(node) 197 | node.appendChild(doc.createTextNode(str(getattr(p, attr)))) 198 | 199 | # Ugly result :( 200 | #root.writexml(open(xml_file,'w'), addindent=" ", newl='\n') 201 | 202 | # Work-around to get one line text nodes, taken from 203 | #http://stackoverflow.com/questions/749796/pretty-printing-xml-in-python 204 | import re 205 | xml = root.toprettyxml(indent=" ") 206 | text_re = re.compile('>\n\s+([^<>\s].*?)\n\s+\g<1> error 256 | if not len(buffers) and (len(k.inBuffers) or len(k.outBuffers)): 257 | raise Exception("no buffers found") 258 | 259 | # Backup buffer if no buffer is set in kernels 260 | firstBuf = buffers.values()[0] if len(buffers) else None 261 | 262 | # Input buffers 263 | for inb in k.inBuffers: 264 | buf = buffers[inb.buffer] if inb.buffer != "" else firstBuf 265 | kernel.SetInBuffer(inb.name, buf) 266 | 267 | # Output buffers 268 | for outb in k.outBuffers: 269 | buf = buffers[outb.buffer] if outb.buffer != "" else firstBuf 270 | kernel.SetOutBuffer(outb.name, buf) 271 | 272 | # Params 273 | for p in k.params: 274 | c= pygpuip.ParamFloat if p.type == "float" else pygpuip.ParamInt 275 | kernel.SetParam(c(p.name,p.value)) 276 | 277 | # Code 278 | kernel.code = k.code 279 | 280 | -------------------------------------------------------------------------------- /python/stylesheet.py: -------------------------------------------------------------------------------- 1 | data = """ 2 | QToolTip 3 | { 4 | border: 1px solid black; 5 | background-color: #ffa02f; 6 | padding: 1px; 7 | border-radius: 3px; 8 | opacity: 100; 9 | } 10 | 11 | QWidget 12 | { 13 | color: #b1b1b1; 14 | background-color: #323232; 15 | } 16 | 17 | QWidget:item:hover 18 | { 19 | background-color: QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #ffa02f, stop: 1 #ca0619); 20 | color: #000000; 21 | } 22 | 23 | QWidget:item:selected 24 | { 25 | background-color: QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #ffa02f, stop: 1 #d7801a); 26 | } 27 | 28 | QMenuBar::item 29 | { 30 | background: transparent; 31 | } 32 | 33 | QMenuBar::item:selected 34 | { 35 | background: transparent; 36 | border: 1px solid #ffaa00; 37 | } 38 | 39 | QMenuBar::item:pressed 40 | { 41 | background: #444; 42 | border: 1px solid #000; 43 | background-color: QLinearGradient( 44 | x1:0, y1:0, 45 | x2:0, y2:1, 46 | stop:1 #212121, 47 | stop:0.4 #343434/*, 48 | stop:0.2 #343434, 49 | stop:0.1 #ffaa00*/ 50 | ); 51 | margin-bottom:-1px; 52 | padding-bottom:1px; 53 | } 54 | 55 | QMenu 56 | { 57 | border: 1px solid #000; 58 | } 59 | 60 | QMenu::item 61 | { 62 | padding: 2px 20px 2px 20px; 63 | } 64 | 65 | QMenu::item:selected 66 | { 67 | color: #000000; 68 | } 69 | 70 | QWidget:disabled 71 | { 72 | color: #404040; 73 | background-color: #323232; 74 | } 75 | 76 | QAbstractItemView 77 | { 78 | background-color: QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #4d4d4d, stop: 0.1 #646464, stop: 1 #5d5d5d); 79 | } 80 | 81 | QWidget:focus 82 | { 83 | /*border: 2px solid QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #ffa02f, stop: 1 #d7801a);*/ 84 | } 85 | 86 | QLineEdit 87 | { 88 | background-color: QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #4d4d4d, stop: 0 #646464, stop: 1 #5d5d5d); 89 | padding: 1px; 90 | border-style: solid; 91 | border: 1px solid #1e1e1e; 92 | border-radius: 5; 93 | } 94 | 95 | QPushButton 96 | { 97 | color: #b1b1b1; 98 | background-color: QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #565656, stop: 0.1 #525252, stop: 0.5 #4e4e4e, stop: 0.9 #4a4a4a, stop: 1 #464646); 99 | border-width: 1px; 100 | border-color: #1e1e1e; 101 | border-style: solid; 102 | border-radius: 6; 103 | padding: 3px; 104 | font-size: 12px; 105 | padding-left: 5px; 106 | padding-right: 5px; 107 | } 108 | 109 | QPushButton:pressed 110 | { 111 | background-color: QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #2d2d2d, stop: 0.1 #2b2b2b, stop: 0.5 #292929, stop: 0.9 #282828, stop: 1 #252525); 112 | } 113 | 114 | QComboBox 115 | { 116 | selection-background-color: #ffaa00; 117 | background-color: QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #565656, stop: 0.1 #525252, stop: 0.5 #4e4e4e, stop: 0.9 #4a4a4a, stop: 1 #464646); 118 | border-style: solid; 119 | border: 1px solid #1e1e1e; 120 | border-radius: 5; 121 | } 122 | 123 | QComboBox:hover,QPushButton:hover 124 | { 125 | border: 2px solid QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #ffa02f, stop: 1 #d7801a); 126 | } 127 | 128 | 129 | QComboBox:on 130 | { 131 | padding-top: 3px; 132 | padding-left: 4px; 133 | background-color: QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #2d2d2d, stop: 0.1 #2b2b2b, stop: 0.5 #292929, stop: 0.9 #282828, stop: 1 #252525); 134 | selection-background-color: #ffaa00; 135 | } 136 | 137 | QComboBox QAbstractItemView 138 | { 139 | border: 2px solid darkgray; 140 | selection-background-color: QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #ffa02f, stop: 1 #d7801a); 141 | } 142 | 143 | QComboBox::drop-down 144 | { 145 | subcontrol-origin: padding; 146 | subcontrol-position: top right; 147 | width: 15px; 148 | 149 | border-left-width: 0px; 150 | border-left-color: darkgray; 151 | border-left-style: solid; /* just a single line */ 152 | border-top-right-radius: 3px; /* same radius as the QComboBox */ 153 | border-bottom-right-radius: 3px; 154 | } 155 | 156 | QGroupBox:focus 157 | { 158 | border: 2px solid QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #ffa02f, stop: 1 #d7801a); 159 | } 160 | 161 | QTextEdit:focus 162 | { 163 | border: 2px solid QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #ffa02f, stop: 1 #d7801a); 164 | } 165 | 166 | QScrollBar:horizontal { 167 | border: 1px solid #222222; 168 | background: QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0.0 #121212, stop: 0.2 #282828, stop: 1 #484848); 169 | height: 7px; 170 | margin: 0px 16px 0 16px; 171 | } 172 | 173 | QScrollBar::handle:horizontal 174 | { 175 | background: QLinearGradient( x1: 0, y1: 0, x2: 1, y2: 0, stop: 0 #ffa02f, stop: 0.5 #d7801a, stop: 1 #ffa02f); 176 | min-height: 20px; 177 | border-radius: 2px; 178 | } 179 | 180 | QScrollBar::add-line:horizontal { 181 | border: 1px solid #1b1b19; 182 | border-radius: 2px; 183 | background: QLinearGradient( x1: 0, y1: 0, x2: 1, y2: 0, stop: 0 #ffa02f, stop: 1 #d7801a); 184 | width: 14px; 185 | subcontrol-position: right; 186 | subcontrol-origin: margin; 187 | } 188 | 189 | QScrollBar::sub-line:horizontal { 190 | border: 1px solid #1b1b19; 191 | border-radius: 2px; 192 | background: QLinearGradient( x1: 0, y1: 0, x2: 1, y2: 0, stop: 0 #ffa02f, stop: 1 #d7801a); 193 | width: 14px; 194 | subcontrol-position: left; 195 | subcontrol-origin: margin; 196 | } 197 | 198 | QScrollBar::right-arrow:horizontal, QScrollBar::left-arrow:horizontal 199 | { 200 | border: 1px solid black; 201 | width: 1px; 202 | height: 1px; 203 | background: white; 204 | } 205 | 206 | QScrollBar::add-page:horizontal, QScrollBar::sub-page:horizontal 207 | { 208 | background: none; 209 | } 210 | 211 | QScrollBar:vertical 212 | { 213 | background: QLinearGradient( x1: 0, y1: 0, x2: 1, y2: 0, stop: 0.0 #121212, stop: 0.2 #282828, stop: 1 #484848); 214 | width: 7px; 215 | margin: 16px 0 16px 0; 216 | border: 1px solid #222222; 217 | } 218 | 219 | QScrollBar::handle:vertical 220 | { 221 | background: QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #ffa02f, stop: 0.5 #d7801a, stop: 1 #ffa02f); 222 | min-height: 20px; 223 | border-radius: 2px; 224 | } 225 | 226 | QScrollBar::add-line:vertical 227 | { 228 | border: 1px solid #1b1b19; 229 | border-radius: 2px; 230 | background: QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #ffa02f, stop: 1 #d7801a); 231 | height: 14px; 232 | subcontrol-position: bottom; 233 | subcontrol-origin: margin; 234 | } 235 | 236 | QScrollBar::sub-line:vertical 237 | { 238 | border: 1px solid #1b1b19; 239 | border-radius: 2px; 240 | background: QLinearGradient( x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #d7801a, stop: 1 #ffa02f); 241 | height: 14px; 242 | subcontrol-position: top; 243 | subcontrol-origin: margin; 244 | } 245 | 246 | QScrollBar::up-arrow:vertical, QScrollBar::down-arrow:vertical 247 | { 248 | border: 1px solid black; 249 | width: 1px; 250 | height: 1px; 251 | background: white; 252 | } 253 | 254 | 255 | QScrollBar::add-page:vertical, QScrollBar::sub-page:vertical 256 | { 257 | background: none; 258 | } 259 | 260 | QTextEdit 261 | { 262 | background-color: #242424; 263 | } 264 | 265 | QPlainTextEdit 266 | { 267 | background-color: #242424; 268 | } 269 | 270 | QHeaderView::section 271 | { 272 | background-color: QLinearGradient(x1:0, y1:0, x2:0, y2:1, stop:0 #616161, stop: 0.5 #505050, stop: 0.6 #434343, stop:1 #656565); 273 | color: white; 274 | padding-left: 4px; 275 | border: 1px solid #6c6c6c; 276 | } 277 | 278 | 279 | QDockWidget::title 280 | { 281 | text-align: center; 282 | spacing: 3px; /* spacing between items in the tool bar */ 283 | background-color: QLinearGradient(x1:0, y1:0, x2:0, y2:1, stop:0 #323232, stop: 0.5 #242424, stop:1 #323232); 284 | } 285 | 286 | QDockWidget::close-button, QDockWidget::float-button 287 | { 288 | text-align: center; 289 | spacing: 1px; /* spacing between items in the tool bar */ 290 | background-color: QLinearGradient(x1:0, y1:0, x2:0, y2:1, stop:0 #323232, stop: 0.5 #242424, stop:1 #323232); 291 | } 292 | 293 | QDockWidget::close-button:hover, QDockWidget::float-button:hover 294 | { 295 | background: #242424; 296 | } 297 | 298 | QDockWidget::close-button:pressed, QDockWidget::float-button:pressed 299 | { 300 | padding: 1px -1px -1px 1px; 301 | } 302 | 303 | QMainWindow::separator 304 | { 305 | background-color: QLinearGradient(x1:0, y1:0, x2:0, y2:1, stop:0 #161616, stop: 0.5 #151515, stop: 0.6 #212121, stop:1 #343434); 306 | color: white; 307 | padding-left: 4px; 308 | border: 1px solid #4c4c4c; 309 | spacing: 3px; /* spacing between items in the tool bar */ 310 | } 311 | 312 | QMainWindow::separator:hover 313 | { 314 | 315 | background-color: QLinearGradient(x1:0, y1:0, x2:0, y2:1, stop:0 #d7801a, stop:0.5 #b56c17 stop:1 #ffa02f); 316 | color: white; 317 | padding-left: 4px; 318 | border: 1px solid #6c6c6c; 319 | spacing: 3px; /* spacing between items in the tool bar */ 320 | } 321 | 322 | QMenu::separator 323 | { 324 | height: 2px; 325 | background-color: QLinearGradient(x1:0, y1:0, x2:0, y2:1, stop:0 #161616, stop: 0.5 #151515, stop: 0.6 #212121, stop:1 #343434); 326 | color: white; 327 | padding-left: 4px; 328 | margin-left: 10px; 329 | margin-right: 5px; 330 | } 331 | 332 | QProgressBar 333 | { 334 | border: 2px solid grey; 335 | border-radius: 5px; 336 | text-align: center; 337 | } 338 | 339 | QProgressBar::chunk 340 | { 341 | background-color: #d7801a; 342 | width: 2.15px; 343 | margin: 0.5px; 344 | } 345 | 346 | QTabBar::tab { 347 | color: #b1b1b1; 348 | border: 1px solid #444; 349 | border-bottom-style: none; 350 | background-color: #323232; 351 | padding-left: 10px; 352 | padding-right: 10px; 353 | padding-top: 3px; 354 | padding-bottom: 2px; 355 | margin-right: -1px; 356 | } 357 | 358 | QTabWidget::pane { 359 | border: 1px solid #444; 360 | top: 1px; 361 | } 362 | 363 | QTabBar::tab:last 364 | { 365 | margin-right: 0; /* the last selected tab has nothing to overlap with on the right */ 366 | border-top-right-radius: 3px; 367 | } 368 | 369 | QTabBar::tab:first:!selected 370 | { 371 | margin-left: 0px; /* the last selected tab has nothing to overlap with on the right */ 372 | 373 | 374 | border-top-left-radius: 3px; 375 | } 376 | 377 | QTabBar::tab:!selected 378 | { 379 | color: #b1b1b1; 380 | border-bottom-style: solid; 381 | margin-top: 3px; 382 | background-color: QLinearGradient(x1:0, y1:0, x2:0, y2:1, stop:1 #212121, stop:.4 #343434); 383 | } 384 | 385 | QTabBar::tab:selected 386 | { 387 | border-top-left-radius: 3px; 388 | border-top-right-radius: 3px; 389 | margin-bottom: 0px; 390 | } 391 | 392 | QTabBar::tab:!selected:hover 393 | { 394 | /*border-top: 2px solid #ffaa00; 395 | padding-bottom: 3px;*/ 396 | border-top-left-radius: 3px; 397 | border-top-right-radius: 3px; 398 | background-color: QLinearGradient(x1:0, y1:0, x2:0, y2:1, stop:1 #212121, stop:0.4 #343434, stop:0.2 #343434, stop:0.1 #ffaa00); 399 | } 400 | 401 | QRadioButton::indicator:checked, QRadioButton::indicator:unchecked{ 402 | color: #b1b1b1; 403 | background-color: #323232; 404 | border: 1px solid #b1b1b1; 405 | border-radius: 6px; 406 | } 407 | 408 | QRadioButton::indicator:checked 409 | { 410 | background-color: qradialgradient( 411 | cx: 0.5, cy: 0.5, 412 | fx: 0.5, fy: 0.5, 413 | radius: 1.0, 414 | stop: 0.25 #ffaa00, 415 | stop: 0.3 #323232 416 | ); 417 | } 418 | 419 | 420 | 421 | QRadioButton::indicator 422 | { 423 | border-radius: 6px; 424 | } 425 | """ 426 | -------------------------------------------------------------------------------- /python/utils.py: -------------------------------------------------------------------------------- 1 | import pygpuip 2 | import numpy 3 | from time import gmtime, strftime, time 4 | 5 | class StopWatch(object): 6 | def __init__(self): 7 | self.t = time() 8 | 9 | def __str__(self): 10 | return "%.2f" %((time() - self.t) * 1000.0) + " ms" 11 | 12 | def allocateBufferData(buffers): 13 | maxw, maxh = 0,0 14 | for bname in buffers: 15 | buf = buffers[bname] 16 | maxw = max(buf.data.shape[0], maxw) 17 | maxh = max(buf.data.shape[1], maxh) 18 | 19 | for bname in buffers: 20 | buf = buffers[bname] 21 | if buf.data.shape[0] != maxw or buf.data.shape[1] != maxh: 22 | if buf.type == pygpuip.BufferType.UNSIGNED_BYTE: 23 | ndtype = numpy.ubyte 24 | elif buf.type == pygpuip.BufferType.HALF: 25 | ndtype = numpy.float16 26 | elif buf.type == pygpuip.BufferType.FLOAT: 27 | ndtype = numpy.float32 28 | elif buf.type == pygpuip.BufferType.DOUBLE: 29 | ndtype = numpy.float64 30 | buf.data = numpy.zeros((maxw, maxh, buf.channels), dtype = ndtype) 31 | 32 | return maxw, maxh 33 | 34 | def getNumCores(): 35 | import multiprocessing 36 | return multiprocessing.cpu_count() 37 | 38 | def getTimeStr(): 39 | return str(strftime("[%Y-%m-%d %H:%M:%S] ", gmtime())) 40 | 41 | def safeEval(text, fallback = 0.0, type = "float"): 42 | try: 43 | val = eval(text) 44 | return int(val) if type == "int" else val 45 | except SyntaxError: 46 | return int(fallback) if type == "int" else fallback 47 | 48 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | # 3 | # Copyright (c) 2014 Per Karlsson 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | project(gpuip_src) 24 | 25 | if(NOT WIN32) 26 | add_definitions(-Wall) 27 | add_definitions(-fPIC) 28 | add_definitions(-O3) 29 | endif() 30 | 31 | # Common variables for compling the library 32 | set(SOURCE gpuip) 33 | 34 | # Build with OpenCL 35 | if(OpenCL_FOUND AND BUILD_WITH_OPENCL) 36 | add_definitions(-D_GPUIP_OPENCL) 37 | set(SOURCE ${SOURCE} opencl) 38 | endif() 39 | 40 | # Build with CUDA 41 | if(CUDA_FOUND AND BUILD_WITH_CUDA) 42 | add_definitions(-D_GPUIP_CUDA) 43 | set(SOURCE ${SOURCE} cuda) 44 | endif() 45 | 46 | # Build with GLSL (needs OpenGL) 47 | if(OPENGL_FOUND AND BUILD_WITH_GLSL) 48 | add_definitions(-D_GPUIP_GLSL) 49 | set(SOURCE ${SOURCE} glsl) 50 | if(APPLE) 51 | set(SOURCE ${SOURCE} glcontext.m) 52 | endif() 53 | endif() 54 | 55 | # Build the gpuip library 56 | if(BUILD_SHARED_LIB) 57 | set(LIBRARY_TYPE SHARED) 58 | else() 59 | set(LIBRARY_TYPE STATIC) 60 | endif() 61 | add_definitions(-DGPUIP_VERSION=${GPUIP_VERSION}) 62 | add_library(gpuip ${LIBRARY_TYPE} ${SOURCE}) 63 | target_link_libraries(gpuip ${GPUIP_LIBRARIES}) 64 | install(TARGETS gpuip DESTINATION lib COMPONENT devel) 65 | install(FILES gpuip.h DESTINATION include COMPONENT devel) 66 | if (THIRD_PARTY_TARGETS) 67 | add_dependencies(gpuip ${THIRD_PARTY_TARGETS}) 68 | endif() 69 | 70 | # Build python bindings (using boost python) 71 | if(BUILD_PYTHON_BINDINGS) 72 | add_library(pygpuip SHARED python.cpp io_wrapper.cpp) 73 | target_link_libraries(pygpuip gpuip ${GPUIP_PYTHON_LIBRARIES}) 74 | 75 | # Rename python shared lib from libpyGpuip.{so,lib} to pyGpuip.{so,pyd} 76 | set_target_properties(pygpuip PROPERTIES PREFIX "") 77 | if(WIN32) 78 | set_target_properties(pygpuip PROPERTIES SUFFIX ".pyd") 79 | else() 80 | set_target_properties(pygpuip PROPERTIES SUFFIX ".so") 81 | endif() 82 | 83 | # Install python bindings in the python site-packages 84 | execute_process(COMMAND 85 | ${PYTHON_EXECUTABLE} -c 86 | "from distutils.sysconfig import get_python_lib; print get_python_lib()" 87 | OUTPUT_VARIABLE PYTHON_SITE_PACKAGES OUTPUT_STRIP_TRAILING_WHITESPACE) 88 | install(TARGETS pygpuip DESTINATION ${PYTHON_SITE_PACKAGES} COMPONENT python) 89 | endif() 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /src/cuda.h: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2014 Per Karlsson 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | #ifndef GPUIP_CUDA_H_ 26 | #define GPUIP_CUDA_H_ 27 | //----------------------------------------------------------------------------// 28 | #include "gpuip.h" 29 | #include 30 | #include 31 | //----------------------------------------------------------------------------// 32 | namespace gpuip { 33 | //----------------------------------------------------------------------------// 34 | class CUDAImpl : public ImageProcessor 35 | { 36 | public: 37 | CUDAImpl(); 38 | 39 | virtual ~CUDAImpl(); 40 | 41 | virtual double Allocate(std::string * err); 42 | 43 | virtual double Build(std::string * err); 44 | 45 | virtual double Run(std::string * err); 46 | 47 | virtual double Copy(Buffer::Ptr buffer, 48 | Buffer::CopyOperation op, 49 | void * data, 50 | std::string * err); 51 | 52 | virtual std::string BoilerplateCode(Kernel::Ptr kernel) const; 53 | 54 | protected: 55 | std::vector _cudaKernels; 56 | bool _cudaBuild; 57 | CUmodule _cudaModule; 58 | cudaEvent_t _start,_stop; 59 | std::map _cudaBuffers; 60 | 61 | bool _LaunchKernel(Kernel & kernel, 62 | const CUfunction & cudaKernel, 63 | std::string * err); 64 | 65 | void _StartTimer(); 66 | 67 | double _StopTimer(); 68 | 69 | bool _FreeBuffers(std::string * err); 70 | 71 | bool _UnloadModule(std::string * err); 72 | }; 73 | //----------------------------------------------------------------------------// 74 | } // end namespace gpuip 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /src/cuda_error.h: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2014 Per Karlsson 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | #ifndef GPUIP_CUDA_ERROR_H_ 26 | #define GPUIP_CUDA_ERROR_H_ 27 | //----------------------------------------------------------------------------// 28 | #include 29 | //----------------------------------------------------------------------------// 30 | namespace gpuip { 31 | //----------------------------------------------------------------------------// 32 | inline bool _cudaErrorGetFunction(CUresult c_err, std::string * err, 33 | const std::string & kernel_name) 34 | { 35 | if (c_err != CUDA_SUCCESS) { 36 | (*err) += "Cuda: Error, could not find kernel named "; 37 | (*err) += kernel_name; 38 | (*err) += "\n"; 39 | switch(c_err) { 40 | //TODO: add cases here 41 | default: 42 | break; 43 | } 44 | return true; 45 | } 46 | return false; 47 | } 48 | //----------------------------------------------------------------------------// 49 | inline bool _cudaErrorMalloc(cudaError_t c_err, std::string * err) 50 | { 51 | if (c_err != cudaSuccess) { 52 | (*err) += "Cuda: error when allocating buffers\n"; 53 | switch(c_err) { 54 | //TODO: add cases here 55 | default: 56 | break; 57 | } 58 | return true; 59 | } 60 | return false; 61 | } 62 | //----------------------------------------------------------------------------// 63 | inline bool _cudaErrorFree(cudaError_t c_err, std::string * err) 64 | { 65 | if (c_err != cudaSuccess) { 66 | (*err) += "Cuda: error when releasing buffers\n"; 67 | switch(c_err) { 68 | //TODO: add cases here 69 | default: 70 | break; 71 | } 72 | return true; 73 | } 74 | return false; 75 | } 76 | //----------------------------------------------------------------------------// 77 | inline bool _cudaErrorCopy(cudaError_t c_err, std::string * err, 78 | const std::string & buffer, Buffer::CopyOperation op) 79 | { 80 | if (c_err != cudaSuccess) { 81 | (*err) += "CUDA: error when copying data "; 82 | (*err) += op == Buffer::COPY_FROM_GPU ? "FROM" : "TO"; 83 | (*err) += " buffer "; 84 | (*err) += buffer; 85 | switch(c_err) { 86 | case cudaErrorInvalidValue: 87 | (*err) += ". Invalid value.\n"; 88 | break; 89 | case cudaErrorInvalidDevicePointer: 90 | (*err) += ". Invalid device pointer.\n"; 91 | break; 92 | case cudaErrorInvalidMemcpyDirection: 93 | (*err) += ". Invalid Memcpy direction.\n"; 94 | break; 95 | case cudaErrorIllegalAddress: 96 | (*err) += ". Illegal address.\n"; 97 | break; 98 | //TODO: add cases here 99 | default: { 100 | (*err) += ". Unknown error enum: "; 101 | std::stringstream ss; 102 | ss << c_err << std::endl; 103 | (*err) += ss.str(); 104 | } 105 | } 106 | return true; 107 | } 108 | return false; 109 | } 110 | //----------------------------------------------------------------------------// 111 | inline bool _cudaErrorUnloadModule(CUresult c_err, std::string * err) 112 | { 113 | if (c_err != CUDA_SUCCESS) { 114 | (*err) += "Cuda: Error, could not unload module\n"; 115 | switch(c_err) { 116 | //TODO: add cases here 117 | default: 118 | break; 119 | } 120 | return true; 121 | } 122 | return false; 123 | } 124 | //----------------------------------------------------------------------------// 125 | inline bool _cudaErrorLoadModule(CUresult c_err, std::string * err) 126 | { 127 | if (c_err != CUDA_SUCCESS) { 128 | (*err) += "Cuda: Error, could not load module\n"; 129 | switch(c_err) { 130 | //TODO: add cases here 131 | default: 132 | break; 133 | } 134 | return true; 135 | } 136 | return false; 137 | } 138 | //----------------------------------------------------------------------------// 139 | inline bool _cudaErrorCheckParamSet(CUresult c_err, std::string * err, 140 | const std::string & kernel_name) 141 | { 142 | if (c_err != CUDA_SUCCESS) { 143 | (*err) += "Cuda: Error, could not set arguments for kernel named "; 144 | (*err) += kernel_name; 145 | (*err) += "\n"; 146 | switch(c_err) { 147 | //TODO: add cases here 148 | default: 149 | break; 150 | } 151 | return true; 152 | } 153 | return false; 154 | } 155 | //----------------------------------------------------------------------------// 156 | inline bool _cudaErrorParamSetSize(CUresult c_err, std::string * err, 157 | const std::string & kernel_name) 158 | { 159 | if (c_err != CUDA_SUCCESS) { 160 | (*err) += "Cuda: Error, could not set arguments size for kernel named "; 161 | (*err) += kernel_name; 162 | (*err) += "\n"; 163 | switch(c_err) { 164 | //TODO: add cases here 165 | default: 166 | break; 167 | } 168 | return true; 169 | } 170 | return false; 171 | } 172 | //----------------------------------------------------------------------------// 173 | inline bool _cudaErrorLaunchKernel(CUresult c_err, std::string * err, 174 | const std::string & kernel_name) 175 | { 176 | if (c_err != CUDA_SUCCESS) { 177 | (*err) += "Cuda: Error, could not launch kernel "; 178 | (*err) += kernel_name; 179 | (*err) += "\n"; 180 | switch(c_err) { 181 | //TODO: add cases here 182 | default: 183 | break; 184 | } 185 | return true; 186 | } 187 | return false; 188 | } 189 | //----------------------------------------------------------------------------// 190 | } // end namespace gpuip 191 | //----------------------------------------------------------------------------// 192 | #endif 193 | -------------------------------------------------------------------------------- /src/glcontext.h: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2014 Per Karlsson 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | #ifndef GPUIP_GL_CONTEXT_H_ 26 | #define GPUIP_GL_CONTEXT_H_ 27 | 28 | #ifdef __APPLE__ 29 | extern "C" { 30 | bool _HasNSGLContext(); 31 | } 32 | #else 33 | # ifdef _WIN32 34 | # include 35 | # include 36 | # else 37 | # include 38 | # endif 39 | #endif 40 | #include 41 | #include 42 | 43 | //----------------------------------------------------------------------------// 44 | namespace gpuip { 45 | //----------------------------------------------------------------------------// 46 | class GLContext 47 | { 48 | public: 49 | static bool Exists() 50 | { 51 | #ifdef __APPLE__ 52 | if (_HasNSGLContext()) { 53 | return true; 54 | } 55 | #else 56 | # ifdef _WIN32 57 | if (wglGetCurrentContext()) { 58 | return true; 59 | } 60 | # else 61 | if (glXGetCurrentContext()) { 62 | return true; 63 | } 64 | # endif 65 | #endif 66 | return false; 67 | } 68 | 69 | static bool Create(std::string * err) 70 | { 71 | if (!glfwInit()) { 72 | (*err) += "gpuip could not initiate GLFW"; 73 | return false; 74 | } 75 | GLFWwindow * window = glfwCreateWindow(1, 1, "", NULL, NULL); 76 | if (!window) { 77 | (*err) += "gpuip could not create window with glfw"; 78 | return false; 79 | } 80 | glfwMakeContextCurrent(window); 81 | return true; 82 | } 83 | 84 | static void Delete() 85 | { 86 | if(glfwGetCurrentContext()) { 87 | glfwTerminate(); 88 | } 89 | } 90 | }; 91 | //----------------------------------------------------------------------------// 92 | }// end namespace gpuip 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /src/glcontext.m: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2014 Per Karlsson 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | #if defined(__OBJC__) 26 | #import 27 | #else 28 | #error "No objective C found" 29 | #endif 30 | //----------------------------------------------------------------------------// 31 | bool _HasNSGLContext() 32 | { 33 | NSOpenGLContext* context = [NSOpenGLContext currentContext]; 34 | if (context) { 35 | return true; 36 | } else { 37 | return false; 38 | } 39 | } 40 | //----------------------------------------------------------------------------// 41 | -------------------------------------------------------------------------------- /src/glsl.h: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2014 Per Karlsson 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | #ifndef GPUIP_GLSL_H_ 26 | #define GPUIP_GLSL_H_ 27 | //----------------------------------------------------------------------------// 28 | #include "gpuip.h" 29 | #include 30 | //----------------------------------------------------------------------------// 31 | namespace gpuip { 32 | //----------------------------------------------------------------------------// 33 | class GLSLImpl : public ImageProcessor 34 | { 35 | public: 36 | GLSLImpl(); 37 | 38 | virtual ~GLSLImpl(); 39 | 40 | virtual double Allocate(std::string * err); 41 | 42 | virtual double Build(std::string * err); 43 | 44 | virtual double Run(std::string * err); 45 | 46 | virtual double Copy(Buffer::Ptr buffer, 47 | Buffer::CopyOperation op, 48 | void * data, 49 | std::string * err); 50 | 51 | virtual std::string BoilerplateCode(Kernel::Ptr kernel) const; 52 | 53 | protected: 54 | bool _glewInit; 55 | bool _glContextCreated; 56 | GLint64 _timer; 57 | GLuint _vbo; 58 | GLuint _rboId; 59 | GLuint _vertexShaderID; 60 | std::vector _fbos; 61 | std::vector _programs; 62 | std::map _textures; 63 | 64 | bool _DrawQuad(const Kernel & kernel, 65 | GLuint fbo, 66 | GLuint program, 67 | std::string * error); 68 | 69 | bool _InitGLEW(std::string * err); 70 | 71 | void _StartTimer(); 72 | 73 | double _StopTimer(); 74 | 75 | void _DeleteBuffers(); 76 | }; 77 | //----------------------------------------------------------------------------// 78 | } // end namespace gpuip 79 | //----------------------------------------------------------------------------// 80 | #endif 81 | -------------------------------------------------------------------------------- /src/glsl_error.h: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2014 Per Karlsson 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | #ifndef GPUIP_OPENCL_ERROR_H_ 26 | #define GPUIP_OPENCL_ERROR_H_ 27 | //----------------------------------------------------------------------------// 28 | #include 29 | //----------------------------------------------------------------------------// 30 | namespace gpuip { 31 | //----------------------------------------------------------------------------// 32 | inline std::string _glErrorToString(GLenum error) 33 | { 34 | std::stringstream ss; 35 | //deprecated in mac os 36 | #ifndef __APPLE__ 37 | const GLubyte * errLog = gluErrorString(error); 38 | ss << errLog; 39 | #endif 40 | return ss.str(); 41 | } 42 | //----------------------------------------------------------------------------// 43 | inline bool _glCheckBuildError(GLuint shader, 44 | GLuint vert_shader, 45 | GLuint frag_shader, 46 | std::string * err) 47 | { 48 | GLint gl_err; 49 | glGetProgramiv(shader, GL_LINK_STATUS, &gl_err); 50 | if (gl_err == 0) { 51 | std::stringstream ss; 52 | #define ERROR_BUFSIZE 1024 53 | GLchar errorLog[ERROR_BUFSIZE]; 54 | GLsizei length; 55 | 56 | ss << "GLSL build error.\n"; 57 | 58 | glGetShaderInfoLog(vert_shader, ERROR_BUFSIZE, &length, errorLog); 59 | ss << "Vertex shader errors:\n" << std::string(errorLog, length); 60 | 61 | glGetShaderInfoLog(frag_shader, ERROR_BUFSIZE, &length, errorLog); 62 | ss << "\nFragment shader errors:\n" << std::string(errorLog, length); 63 | 64 | glGetShaderInfoLog(shader, ERROR_BUFSIZE, &length, errorLog); 65 | ss << "\nLinker errors:\n" << std::string(errorLog, length); 66 | 67 | (*err) += ss.str(); 68 | 69 | return true; 70 | } 71 | return false; 72 | } 73 | //----------------------------------------------------------------------------// 74 | inline bool _glErrorFramebuffer(std::string * err) 75 | { 76 | GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); 77 | if(status != GL_FRAMEBUFFER_COMPLETE) { 78 | (*err) += "GLSL error: Framebuffer error. "; 79 | switch(status) { 80 | case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT: 81 | (*err) += "At least one attachment point with a renderbuffer " 82 | "or texture attached has its attached object no longer " 83 | "in existence or has an attached image with a width or " 84 | "height of zero.\n"; 85 | break; 86 | // This enum doesn't exist in older OpenGL versions. 87 | //case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS: 88 | // (*err) += "Not all attached images have the same width and " 89 | // "height.\n"; 90 | // break; 91 | case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT: 92 | (*err) += "No images are attached to the framebuffer.\n"; 93 | case GL_FRAMEBUFFER_UNSUPPORTED: 94 | (*err) += "The combination of internal formats of the attached " 95 | "images violates an implementation-dependent set of " 96 | "restrictions.\n"; 97 | break; 98 | default: 99 | break; 100 | }; 101 | 102 | 103 | return true; 104 | } 105 | return false; 106 | } 107 | //----------------------------------------------------------------------------// 108 | inline bool _glErrorCreateTexture(std::string * err) 109 | { 110 | GLenum gl_err = glGetError(); 111 | if (gl_err != GL_NO_ERROR) { 112 | (*err) += "GLSL error when creating textures.\n"; 113 | (*err) += _glErrorToString(gl_err); 114 | return true; 115 | }; 116 | return false; 117 | } 118 | //----------------------------------------------------------------------------// 119 | inline bool _glErrorDrawSetup(std::string * err, 120 | const std::string & kernel_name) 121 | { 122 | GLenum gl_err = glGetError(); 123 | if (gl_err != GL_NO_ERROR) { 124 | (*err) += "GLSL error in setup for kernel: "; 125 | (*err) += kernel_name; 126 | (*err) += "\n"; 127 | (*err) += _glErrorToString(gl_err); 128 | return true; 129 | }; 130 | return false; 131 | } 132 | //----------------------------------------------------------------------------// 133 | inline bool _glErrorDraw(std::string * err, const std::string & kernel_name) 134 | { 135 | GLenum gl_err = glGetError(); 136 | if (gl_err != GL_NO_ERROR) { 137 | (*err) += "GLSL error when drawing kernel:"; 138 | (*err) += kernel_name; 139 | (*err) += "\n"; 140 | (*err) += _glErrorToString(gl_err); 141 | return true; 142 | }; 143 | return false; 144 | } 145 | //----------------------------------------------------------------------------// 146 | inline bool _glErrorCopy(std::string * err, 147 | const std::string & buffer, 148 | Buffer::CopyOperation op) 149 | { 150 | GLenum gl_err = glGetError(); 151 | if (gl_err != GL_NO_ERROR) { 152 | (*err) += "GLSL: error when copying data "; 153 | (*err) += op == Buffer::COPY_FROM_GPU ? "FROM" : "TO"; 154 | (*err) += " buffer "; 155 | (*err) += buffer; 156 | (*err) += "\n"; 157 | (*err) += _glErrorToString(gl_err); 158 | return true; 159 | } 160 | return false; 161 | } 162 | 163 | } // end namespace gpuip 164 | //----------------------------------------------------------------------------// 165 | #endif 166 | -------------------------------------------------------------------------------- /src/gpuip.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2014 Per Karlsson 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | #include "gpuip.h" 26 | //----------------------------------------------------------------------------// 27 | #ifdef _GPUIP_OPENCL 28 | #include "opencl.h" 29 | #endif 30 | //----------------------------------------------------------------------------// 31 | #ifdef _GPUIP_CUDA 32 | #include "cuda.h" 33 | #endif 34 | //----------------------------------------------------------------------------// 35 | #ifdef _GPUIP_GLSL 36 | #include "glsl.h" 37 | #endif 38 | //----------------------------------------------------------------------------// 39 | namespace gpuip { 40 | //----------------------------------------------------------------------------// 41 | ImageProcessor::Ptr ImageProcessor::Create(GpuEnvironment env) 42 | { 43 | switch(env) { 44 | case OpenCL: 45 | #ifdef _GPUIP_OPENCL 46 | return ImageProcessor::Ptr(new OpenCLImpl()); 47 | #else 48 | throw std::logic_error("gpuip was not built with OpenCL"); 49 | #endif 50 | case CUDA: 51 | #ifdef _GPUIP_CUDA 52 | return ImageProcessor::Ptr(new CUDAImpl()); 53 | #else 54 | throw std::logic_error("gpuip was not built with CUDA"); 55 | #endif 56 | case GLSL: 57 | #ifdef _GPUIP_GLSL 58 | return ImageProcessor::Ptr(new GLSLImpl()); 59 | #else 60 | throw std::logic_error("gpuip was not built with GLSL"); 61 | #endif 62 | default: 63 | std::cerr << "gpuip error: Could not create env" << std::endl; 64 | return ImageProcessor::Ptr(); 65 | } 66 | } 67 | //----------------------------------------------------------------------------// 68 | bool ImageProcessor::CanCreate(GpuEnvironment env) 69 | { 70 | switch(env) { 71 | case OpenCL: 72 | #ifdef _GPUIP_OPENCL 73 | return true; 74 | #else 75 | return false; 76 | #endif 77 | case CUDA: 78 | #ifdef _GPUIP_CUDA 79 | return true; 80 | #else 81 | return false; 82 | #endif 83 | case GLSL: 84 | #ifdef _GPUIP_GLSL 85 | return true; 86 | #else 87 | return false; 88 | #endif 89 | default: 90 | return false; 91 | } 92 | } 93 | //----------------------------------------------------------------------------// 94 | Buffer::Buffer(const std::string & name_, Type type_, unsigned int channels_) 95 | : name(name_), type(type_), channels(channels_) 96 | { 97 | } 98 | //----------------------------------------------------------------------------// 99 | Kernel::Kernel(const std::string & name_) 100 | : name(name_) 101 | { 102 | } 103 | //----------------------------------------------------------------------------// 104 | Kernel::BufferLink::BufferLink(Buffer::Ptr buffer_, const std::string & name_) 105 | : buffer(buffer_), name(name_) 106 | { 107 | } 108 | //----------------------------------------------------------------------------// 109 | ImageProcessor::ImageProcessor(GpuEnvironment env) 110 | : _env(env), _w(0), _h(0) 111 | { 112 | 113 | } 114 | //----------------------------------------------------------------------------// 115 | Buffer::Ptr 116 | ImageProcessor::CreateBuffer(const std::string & name, 117 | Buffer::Type type, 118 | unsigned int channels) 119 | { 120 | if (_buffers.find(name) == _buffers.end()) { 121 | Buffer::Ptr p = Buffer::Ptr(new Buffer(name, type, channels)); 122 | _buffers[name] = p; 123 | return p; 124 | } else { 125 | std::cerr << "gpuip error: Buffer named " << name 126 | << " already exists. Skipping..." << std::endl; 127 | return Buffer::Ptr(new Buffer(name, type, channels)); 128 | } 129 | } 130 | //----------------------------------------------------------------------------// 131 | Kernel::Ptr ImageProcessor::CreateKernel(const std::string & name) 132 | { 133 | _kernels.push_back(Kernel::Ptr(new Kernel(name))); 134 | return _kernels.back(); 135 | } 136 | //----------------------------------------------------------------------------// 137 | void ImageProcessor::SetDimensions(unsigned int width, unsigned int height) 138 | { 139 | _w = width; 140 | _h = height; 141 | } 142 | //----------------------------------------------------------------------------// 143 | double ImageProcessor::Allocate(std::string * error) 144 | { 145 | throw std::logic_error("'Allocate' not implemented in subclass"); 146 | } 147 | //----------------------------------------------------------------------------// 148 | double ImageProcessor::Build(std::string * error) 149 | { 150 | throw std::logic_error("'Build' not implemented in subclass"); 151 | } 152 | //----------------------------------------------------------------------------// 153 | double ImageProcessor::Run(std::string * error) 154 | { 155 | throw std::logic_error("'Run' not implemented in subclass"); 156 | } 157 | //----------------------------------------------------------------------------// 158 | double ImageProcessor::Copy(Buffer::Ptr buffer, 159 | Buffer::CopyOperation operation, 160 | void * data, 161 | std::string * error) 162 | { 163 | throw std::logic_error("'Copy' not implemented in subclass"); 164 | } 165 | //----------------------------------------------------------------------------// 166 | std::string ImageProcessor::BoilerplateCode(Kernel::Ptr kernel) const 167 | { 168 | throw std::logic_error("'BoilerplateCode' not implemented in subclass"); 169 | } 170 | //----------------------------------------------------------------------------// 171 | unsigned int ImageProcessor::_BufferSize(Buffer::Ptr buffer) const 172 | { 173 | unsigned int bpp = 0; // bytes per pixel 174 | switch(buffer->type) { 175 | case Buffer::UNSIGNED_BYTE: 176 | bpp = buffer->channels; 177 | break; 178 | case Buffer::HALF: 179 | bpp = sizeof(float)/2 * buffer->channels; 180 | break; 181 | case Buffer::FLOAT: 182 | bpp = sizeof(float) * buffer->channels; 183 | break; 184 | } 185 | return bpp * _w * _h; 186 | } 187 | //----------------------------------------------------------------------------// 188 | } // end namespace gpuip 189 | //----------------------------------------------------------------------------// 190 | -------------------------------------------------------------------------------- /src/gpuip.h: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2014 Per Karlsson 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | #ifndef GPUIP_H_ 26 | #define GPUIP_H_ 27 | //----------------------------------------------------------------------------// 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | //----------------------------------------------------------------------------// 34 | #ifdef _GPUIP_PYTHON_BINDINGS 35 | #include 36 | #else 37 | #include 38 | #endif 39 | //----------------------------------------------------------------------------// 40 | /*! \file */ 41 | /*! gpuip is a cross-platform framework for image processing on the GPU. */ 42 | namespace gpuip { 43 | //----------------------------------------------------------------------------// 44 | /*! If a function that is supposed to return execution time has failed, 45 | it will return this value instead. */ 46 | #define GPUIP_ERROR -1.0 47 | //----------------------------------------------------------------------------// 48 | /*! Different GPU environments available. */ 49 | enum GpuEnvironment { 50 | /*! 51 | OpenCL, Open Computing Language by Khronos Group. */ 52 | OpenCL, 53 | 54 | /*! 55 | CUDA, Compute Unified Device Architecture by NVIDIA Corporation.*/ 56 | CUDA, 57 | 58 | /*! 59 | GLSL, OpenGL Shading Language by Khronos Group. */ 60 | GLSL }; 61 | //----------------------------------------------------------------------------// 62 | /*! 63 | \struct Buffer 64 | \brief A chunk of memory allocated on the GPU. 65 | 66 | This class 67 | */ 68 | struct Buffer 69 | { 70 | /*! \brief Smart pointer. */ 71 | #ifdef _GPUIP_PYTHON_BINDINGS 72 | typedef boost::shared_ptr Ptr; 73 | #else 74 | typedef std::tr1::shared_ptr Ptr; 75 | #endif 76 | /*! Operation when copying memory between CPU and GPU */ 77 | enum CopyOperation{ 78 | /*! Copy data from CPU to GPU */ 79 | COPY_TO_GPU, 80 | /*! Copy data from GPU to CPU */ 81 | COPY_FROM_GPU }; 82 | 83 | /*! \brief Supported data types */ 84 | enum Type{ 85 | /*! 8 bits per channel. Used in png, jpeg, tga, tiff formats */ 86 | UNSIGNED_BYTE, 87 | 88 | /*! 16 bits per channel. Used in exr images. */ 89 | HALF, 90 | 91 | /*! 32 bits per channel. Used in exr images, typically when half is 92 | not supported in the current environemnt. */ 93 | FLOAT }; 94 | 95 | Buffer(const std::string & name, Type type, unsigned int channels); 96 | 97 | /*! \brief Unique identifying name. 98 | 99 | Each buffer has a unique name. The buffer can still be called referenced 100 | as something else in a kernel. */ 101 | const std::string name; 102 | 103 | Type type; 104 | 105 | /*! \brief Channels of data per pixel. 106 | 107 | A typical RGBA image has 4 channels. Gpuip buffers with 2 or 3 channels 108 | have not been tested as much as 1 or 4 channel buffers. */ 109 | unsigned int channels; 110 | }; 111 | //----------------------------------------------------------------------------// 112 | /*! 113 | \struct Parameter 114 | \brief A parameter has a name and a value. 115 | */ 116 | template 117 | struct Parameter 118 | { 119 | Parameter(const std::string & n, T v) : name(n), value(v) {} 120 | std::string name; 121 | T value; 122 | }; 123 | //----------------------------------------------------------------------------// 124 | /*! 125 | \struct Kernel 126 | \brief 127 | */ 128 | struct Kernel 129 | { 130 | /*! \brief Smart pointer. */ 131 | #ifdef _GPUIP_PYTHON_BINDINGS 132 | typedef boost::shared_ptr Ptr; 133 | #else 134 | typedef std::tr1::shared_ptr Ptr; 135 | #endif 136 | /*! 137 | \struct BufferLink 138 | \brief Tells a kernel which buffers to use in the argument list 139 | */ 140 | struct BufferLink 141 | { 142 | BufferLink(Buffer::Ptr buffer_, const std::string & name_); 143 | 144 | /*! \brief Buffer to be used in the kernel arguments list. */ 145 | Buffer::Ptr buffer; 146 | 147 | /*! \brief The name of buffer in kernel arguments list. 148 | 149 | This does not have to be the same as the Buffer::name.*/ 150 | std::string name; 151 | }; 152 | Kernel(const std::string & name); 153 | 154 | /*! \brief Unique identifying name. 155 | 156 | Each kernel must have a unique name. 157 | The kernel function in OpenCL and CUDA has to have the same name. 158 | */ 159 | const std::string name; 160 | 161 | /*! \brief Kernel code. 162 | 163 | Must be set before the ImageProcessor::Build call.*/ 164 | std::string code; 165 | 166 | /*! \brief Buffers used for input data. Can not be modified. 167 | 168 | Must be set before the ImageProcessor::Run call.*/ 169 | std::vector inBuffers; 170 | 171 | /*! \brief Buffers used to output data. Can not be read from. 172 | 173 | Must be set before the ImageProcessor::Run call. */ 174 | std::vector outBuffers; 175 | 176 | /*! \brief Integer parameters. 177 | 178 | Must be set before the ImageProcessor::Run call. */ 179 | std::vector > paramsInt; 180 | 181 | /*! \brief Float parameters. 182 | 183 | Must be set before the ImageProcessor::Run call. */ 184 | std::vector > paramsFloat; 185 | }; 186 | //----------------------------------------------------------------------------// 187 | /*! 188 | \class ImageProcessor 189 | \brief 190 | */ 191 | class ImageProcessor 192 | { 193 | public: 194 | /*! \brief Smart pointer. */ 195 | #ifdef _GPUIP_PYTHON_BINDINGS 196 | typedef boost::shared_ptr Ptr; 197 | #else 198 | typedef std::tr1::shared_ptr Ptr; 199 | #endif 200 | 201 | /*! \brief Factory function to create an ImageProcessor entity. */ 202 | static ImageProcessor::Ptr Create(GpuEnvironment env); 203 | 204 | virtual ~ImageProcessor() {} 205 | 206 | /*! \brief Check if gpuip was compiled with a GpuEnvironment. */ 207 | static bool CanCreate(GpuEnvironment env); 208 | 209 | /*! \brief Returns the current GpuEnvironment. */ 210 | GpuEnvironment Environment() const 211 | { 212 | return _env; 213 | } 214 | 215 | /*! \brief Set the dimensions of algorithms. Must be set explicitly. */ 216 | void SetDimensions(unsigned int width, unsigned int height); 217 | 218 | /*! \brief Returns the images width in number of pixels */ 219 | unsigned int Width() const 220 | { 221 | return _w; 222 | } 223 | 224 | /*! \brief Returns the images height in number of pixels */ 225 | unsigned int Height() const 226 | { 227 | return _h; 228 | } 229 | 230 | /*! \brief Creates a Buffer object with allocation info 231 | 232 | \param name Unique identifying name of buffer 233 | \param type per channel data type 234 | \param channels number of channels of data per pixel 235 | 236 | \return A smart pointer to new registered Buffer object 237 | 238 | This is the only way a buffer can be registered to an ImageProcessor. 239 | Modifications can be made to the Buffer object as long as they are made 240 | before the ImageProcessor::Allocate call. 241 | */ 242 | Buffer::Ptr CreateBuffer(const std::string & name, 243 | Buffer::Type type, 244 | unsigned int channels); 245 | 246 | /*! \brief Creates and registeres a Kernel object. 247 | 248 | \param name Unique identifying name of the kernel 249 | 250 | \return A smart pointer to new registered Kernel object 251 | 252 | This is the only way a kernel can be registered to an ImageProcessor. 253 | Kernels will be run in the order they are created. 254 | Modifications can be made to the Kernel object as long as Kernel::code 255 | is set before the ImageProcessor::Build call and Kernel::inBuffers, 256 | Kernel::outBuffers, Kernel::paramsInt, Kernel::paramsFloat are set 257 | before the ImageProcessor::Run call. 258 | */ 259 | Kernel::Ptr CreateKernel(const std::string & name); 260 | 261 | /*! \brief Allocates needed memory on the GPU. 262 | \param error if function fails, the explaining error string is stored here 263 | \return execution time in milliseconds. \ref GPUIP_ERROR on failure 264 | 265 | Call this function once all buffers have been created. Once things have 266 | been allocated on the GPU, ImageProcessor::Copy can be called. This 267 | function can be called multiple times since it starts 268 | with resetting previous allocated memory. 269 | */ 270 | virtual double Allocate(std::string * error); 271 | 272 | /*! \brief Compiles the Kernel::Code for each Kernel object. 273 | \param error if function fails, the explaining error string is stored here 274 | \return execution time in milliseconds. \ref GPUIP_ERROR on failure 275 | 276 | Call this function once the Kernel::code has been set for all kernels. 277 | Can be called multiple times to rebuild the kernels. 278 | */ 279 | virtual double Build(std::string * error); 280 | 281 | /*! \brief Runs all of the image processing kernels. 282 | \param error if function fails, the explaining error string is stored here 283 | \return execution time in milliseconds. \ref GPUIP_ERROR on failure 284 | 285 | Runs all the image processing kernels in the order they were created. 286 | ImageProcessor::Build and ImageProcessor::Allocate must have called 287 | before this function. 288 | */ 289 | virtual double Run(std::string * error); 290 | 291 | /*! \brief Data tranfser from the CPU and the GPU. 292 | \param buffer buffer on the gpu to copy to/from 293 | \param operation decides if the copy is from the gpu or to the gpu 294 | \param data points to allocated memory on the CPU 295 | \param error if function fails, the explaining error string is stored here 296 | \return execution time in milliseconds. \ref GPUIP_ERROR on failure 297 | 298 | Copies data between the CPU and the GPU. User must guarantee that \c data 299 | is pointing to a memory space on the CPU with enough allocated memory. 300 | ImageProcessor::Allocate must be called at least once before this 301 | function. 302 | */ 303 | virtual double Copy(Buffer::Ptr buffer, 304 | Buffer::CopyOperation operation, 305 | void * data, 306 | std::string * error); 307 | 308 | /*! \brief Returns a boilerplate code for a given kernel. 309 | \param kernel Kernel to be processed 310 | \return boilerplate code 311 | 312 | Processes the kernel and the buffers to produce boilerplate code. 313 | Boilerplate code is often a good starting step when writing gpu kernels 314 | since they remove some of the redundent code that is shared between 315 | all kernels. It also guarantees that the argument list is correct. 316 | */ 317 | virtual std::string BoilerplateCode(Kernel::Ptr kernel) const; 318 | 319 | protected: 320 | ImageProcessor(GpuEnvironment env); 321 | 322 | const GpuEnvironment _env; 323 | unsigned int _w; // width 324 | unsigned int _h; // height 325 | std::map _buffers; 326 | std::vector _kernels; 327 | 328 | unsigned int _BufferSize(Buffer::Ptr buffer) const; 329 | 330 | private: 331 | ImageProcessor(); 332 | ImageProcessor(const ImageProcessor &); 333 | void operator=(const ImageProcessor &); 334 | }; 335 | //----------------------------------------------------------------------------// 336 | } //end namespace gpuip 337 | //----------------------------------------------------------------------------// 338 | #endif 339 | -------------------------------------------------------------------------------- /src/io_wrapper.h: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2014 Per Karlsson 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | #ifndef GPUIP_IO_WRAPPER_H_ 26 | #define GPUIP_IO_WRAPPER_H_ 27 | //----------------------------------------------------------------------------// 28 | #include 29 | //----------------------------------------------------------------------------// 30 | namespace boost { namespace numpy { class ndarray; } } 31 | namespace gpuip { 32 | struct Buffer; 33 | //----------------------------------------------------------------------------// 34 | namespace io { 35 | //----------------------------------------------------------------------------// 36 | void ReadFromFile(boost::numpy::ndarray * npyarray, 37 | const Buffer & buffer, 38 | const std::string & filename, 39 | int numThreads = 0); 40 | //----------------------------------------------------------------------------// 41 | void WriteToFile(const boost::numpy::ndarray * npyarray, 42 | const Buffer & buffer, 43 | const std::string & filename, 44 | int numThreads = 0); 45 | //----------------------------------------------------------------------------// 46 | } // end namespace io 47 | } // end namespace gpuip 48 | //----------------------------------------------------------------------------// 49 | #endif 50 | -------------------------------------------------------------------------------- /src/opencl.h: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2014 Per Karlsson 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | #ifndef GPUIP_OPENCL_H_ 26 | #define GPUIP_OPENCL_H_ 27 | //----------------------------------------------------------------------------// 28 | #include "gpuip.h" 29 | #ifdef __APPLE__ 30 | #include 31 | #else 32 | #include 33 | #endif 34 | //----------------------------------------------------------------------------// 35 | namespace gpuip { 36 | //----------------------------------------------------------------------------// 37 | class OpenCLImpl : public ImageProcessor 38 | { 39 | public: 40 | OpenCLImpl(); 41 | 42 | virtual ~OpenCLImpl(); 43 | 44 | virtual double Allocate(std::string * err); 45 | 46 | virtual double Build(std::string * err); 47 | 48 | virtual double Run(std::string * err); 49 | 50 | virtual double Copy(Buffer::Ptr buffer, 51 | Buffer::CopyOperation op, 52 | void * data, 53 | std::string * err); 54 | 55 | virtual std::string BoilerplateCode(Kernel::Ptr kernel) const; 56 | 57 | protected: 58 | cl_device_id _device_id; 59 | cl_context _ctx; 60 | cl_command_queue _queue; 61 | 62 | std::vector _clKernels; 63 | std::map _clBuffers; 64 | 65 | private: 66 | bool _EnqueueKernel(const Kernel & kernel, 67 | const cl_kernel & clKernel, 68 | cl_event & event, 69 | std::string * err); 70 | 71 | bool _ReleaseBuffers(std::string * err); 72 | 73 | bool _ReleaseKernels(std::string * err); 74 | }; 75 | //----------------------------------------------------------------------------// 76 | } // end namespace gpuip 77 | //----------------------------------------------------------------------------// 78 | #endif 79 | -------------------------------------------------------------------------------- /src/opencl_error.h: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2014 Per Karlsson 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | #ifndef GPUIP_OPENCL_ERROR_H_ 26 | #define GPUIP_OPENCL_ERROR_H_ 27 | #include 28 | //----------------------------------------------------------------------------// 29 | namespace gpuip { 30 | //----------------------------------------------------------------------------// 31 | inline bool _clErrorEnqueueKernel(cl_int cl_err, std::string * err, 32 | const gpuip::Kernel & kernel) 33 | { 34 | if (cl_err != CL_SUCCESS) { 35 | (*err) += "OpenCL: error when enqueuing kernel "; 36 | (*err) += kernel.name; 37 | std::cout << cl_err << std::endl; 38 | switch(cl_err) { 39 | case CL_INVALID_KERNEL_ARGS: { 40 | (*err) += ". Invalid kernel arguments. The gpuip kernel has the" 41 | " following data:\n"; 42 | std::stringstream ss; 43 | ss << "In buffers: "; 44 | for (size_t i = 0; i < kernel.inBuffers.size(); ++i) { 45 | ss << kernel.inBuffers[i].name << "(" 46 | << kernel.inBuffers[i].buffer->name << "), "; 47 | } 48 | ss << "\n"; 49 | 50 | ss << "Out buffers: "; 51 | for (size_t i = 0; i < kernel.outBuffers.size(); ++i) { 52 | ss << kernel.outBuffers[i].name << "(" 53 | << kernel.outBuffers[i].buffer->name << "), "; 54 | } 55 | ss << "\n"; 56 | 57 | ss << "Parameters int: "; 58 | for (size_t i = 0; i < kernel.paramsInt.size(); ++i) { 59 | ss << "(" << kernel.paramsInt[i].name << "," 60 | << kernel.paramsInt[i].value << "), "; 61 | } 62 | ss << "\n"; 63 | 64 | ss << "Parameters float: "; 65 | for (size_t i = 0; i < kernel.paramsFloat.size(); ++i) { 66 | ss << "(" << kernel.paramsFloat[i].name << "," 67 | << kernel.paramsFloat[i].value << "), "; 68 | } 69 | ss << "\n"; 70 | (*err) += ss.str(); 71 | break; 72 | } 73 | default: 74 | break; 75 | } 76 | return true; 77 | } 78 | return false; 79 | } 80 | //----------------------------------------------------------------------------// 81 | inline bool _clErrorInitBuffers(cl_int cl_err, std::string * err) 82 | { 83 | if (cl_err != CL_SUCCESS) { 84 | (*err) += "OpenCL: error when creating buffers\n"; 85 | switch(cl_err) { 86 | //TODO: add cases here 87 | default: 88 | break; 89 | } 90 | return true; 91 | } 92 | return false; 93 | } 94 | //----------------------------------------------------------------------------// 95 | inline bool _clErrorReleaseMemObject(cl_int cl_err, std::string * err) 96 | { 97 | if (cl_err != CL_SUCCESS) { 98 | (*err) += "OpenCL: error when releasing buffers\n"; 99 | switch(cl_err) { 100 | //TODO: add cases here 101 | default: 102 | break; 103 | } 104 | return true; 105 | } 106 | return false; 107 | } 108 | //----------------------------------------------------------------------------// 109 | inline bool _clErrorReleaseKernel(cl_int cl_err, std::string * err) 110 | { 111 | if (cl_err != CL_SUCCESS) { 112 | (*err) += "OpenCL: error when releasing kernel\n"; 113 | switch(cl_err) { 114 | //TODO: add cases here 115 | default: 116 | break; 117 | } 118 | return true; 119 | } 120 | return false; 121 | } 122 | //----------------------------------------------------------------------------// 123 | inline bool _clErrorCreateProgram(cl_int cl_err, std::string * err) 124 | { 125 | if (cl_err != CL_SUCCESS) { 126 | (*err) += "OpenCL: error when creating program\n"; 127 | switch(cl_err) { 128 | //TODO: add cases here 129 | default: 130 | break; 131 | } 132 | return true; 133 | } 134 | return false; 135 | } 136 | //----------------------------------------------------------------------------// 137 | inline bool _clErrorBuildProgram(cl_int cl_err, std::string * err, 138 | cl_program p, cl_device_id device_id, 139 | std::string kernel_name) 140 | { 141 | if (cl_err != CL_SUCCESS) { 142 | (*err) += "OpenCL: error when building kernel: "; 143 | (*err) += kernel_name; 144 | (*err) += "\n\n"; 145 | char buf[0x10000]; 146 | clGetProgramBuildInfo(p, device_id, CL_PROGRAM_BUILD_LOG, 147 | 0x10000, buf, NULL); 148 | (*err) += std::string(buf); 149 | return true; 150 | } 151 | return false; 152 | } 153 | //----------------------------------------------------------------------------// 154 | inline bool _clErrorCreateKernel(cl_int cl_err, std::string * err) 155 | { 156 | if (cl_err != CL_SUCCESS) { 157 | (*err) += "OpenCL: error when creating kernel\n"; 158 | switch(cl_err) { 159 | //TODO: add cases here 160 | default: 161 | break; 162 | } 163 | return true; 164 | } 165 | return false; 166 | } 167 | //----------------------------------------------------------------------------// 168 | inline bool _clErrorCopy(cl_int cl_err, std::string * err, 169 | const std::string & buffer, Buffer::CopyOperation op) 170 | { 171 | if (cl_err != CL_SUCCESS) { 172 | (*err) += "OpenCL: error when copying data "; 173 | (*err) += op == Buffer::COPY_FROM_GPU ? "FROM" : "TO"; 174 | (*err) += " buffer"; 175 | (*err) += buffer; 176 | switch(cl_err) { 177 | case CL_INVALID_MEM_OBJECT: 178 | (*err) += ". Invalid memory object. Does the buffer exist and " 179 | "has it been created? " 180 | "(i.e. gpuip::ImageProcessor::CreateBuffer)."; 181 | default: 182 | break; 183 | } 184 | return true; 185 | } 186 | return false; 187 | } 188 | //----------------------------------------------------------------------------// 189 | inline bool _clErrorSetKernelArg(cl_int cl_err, std::string * err, 190 | const std::string & kernel_name) 191 | { 192 | if (cl_err != CL_SUCCESS) { 193 | (*err) += "OpenCL: error in argument setup in kernel "; 194 | (*err) += kernel_name; 195 | switch(cl_err) { 196 | //TODO: add cases here 197 | default: 198 | break; 199 | } 200 | return true; 201 | } 202 | return false; 203 | } 204 | //----------------------------------------------------------------------------// 205 | } // end namespace gpuip 206 | //----------------------------------------------------------------------------// 207 | #endif 208 | -------------------------------------------------------------------------------- /src/python.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2014 Per Karlsson 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | #include "gpuip.h" 26 | #include "io_wrapper.h" 27 | #include 28 | #include 29 | //----------------------------------------------------------------------------// 30 | namespace bp = boost::python; 31 | namespace np = boost::numpy; 32 | //----------------------------------------------------------------------------// 33 | namespace gpuip { 34 | //----------------------------------------------------------------------------// 35 | namespace python { 36 | //----------------------------------------------------------------------------// 37 | class BufferWrapper 38 | { 39 | public: 40 | BufferWrapper(gpuip::Buffer::Ptr b) 41 | : buffer(b), 42 | data(np::empty( 43 | bp::make_tuple(0,0), 44 | np::dtype::get_builtin())) {} 45 | 46 | std::string name() const 47 | { 48 | return buffer->name; 49 | } 50 | 51 | gpuip::Buffer::Type type() const 52 | { 53 | return buffer->type; 54 | } 55 | 56 | unsigned int channels() const 57 | { 58 | return buffer->channels; 59 | } 60 | 61 | std::string Read(const std::string & filename) 62 | { 63 | return ReadMT(filename, 0); 64 | } 65 | 66 | std::string ReadMT(const std::string & filename, int numThreads) 67 | { 68 | std::string err; 69 | gpuip::io::ReadFromFile(&data, *buffer.get(), filename, numThreads); 70 | return err; 71 | } 72 | 73 | std::string Write(const std::string & filename) 74 | { 75 | return WriteMT(filename, 0); 76 | } 77 | 78 | std::string WriteMT(const std::string & filename, int numThreads) 79 | { 80 | std::string err; 81 | gpuip::io::WriteToFile(&data, *buffer.get(), filename, numThreads); 82 | return err; 83 | } 84 | 85 | gpuip::Buffer::Ptr buffer; 86 | np::ndarray data; 87 | }; 88 | //----------------------------------------------------------------------------// 89 | class KernelWrapper : public gpuip::Kernel 90 | { 91 | public: 92 | void SetInBuffer(const std::string & kernelBufferName, 93 | boost::shared_ptr buffer) 94 | { 95 | for(size_t i = 0; i < this->inBuffers.size(); ++i) { 96 | if (this->inBuffers[i].name == kernelBufferName) { 97 | this->inBuffers[i].buffer = buffer->buffer; 98 | return; 99 | } 100 | } 101 | this->inBuffers.push_back( 102 | gpuip::Kernel::BufferLink(buffer->buffer, kernelBufferName)); 103 | } 104 | 105 | void SetOutBuffer(const std::string & kernelBufferName, 106 | boost::shared_ptr buffer) 107 | { 108 | for(size_t i = 0; i < this->outBuffers.size(); ++i) { 109 | if (this->outBuffers[i].name == kernelBufferName) { 110 | this->outBuffers[i].buffer = buffer->buffer; 111 | return; 112 | } 113 | } 114 | this->outBuffers.push_back( 115 | gpuip::Kernel::BufferLink(buffer->buffer, kernelBufferName)); 116 | } 117 | 118 | void SetParamInt(const gpuip::Parameter & param) 119 | { 120 | for(size_t i = 0 ; i < this->paramsInt.size(); ++i) { 121 | if (this->paramsInt[i].name == param.name) { 122 | this->paramsInt[i].value = param.value; 123 | return; 124 | } 125 | } 126 | this->paramsInt.push_back(param); 127 | } 128 | 129 | void SetParamFloat(const gpuip::Parameter & param) 130 | { 131 | for(size_t i = 0 ; i < this->paramsFloat.size(); ++i) { 132 | if (this->paramsFloat[i].name == param.name) { 133 | this->paramsFloat[i].value = param.value; 134 | return; 135 | } 136 | } 137 | this->paramsFloat.push_back(param); 138 | } 139 | }; 140 | //----------------------------------------------------------------------------// 141 | class ImageProcessorWrapper 142 | { 143 | public: 144 | ImageProcessorWrapper(gpuip::GpuEnvironment env) 145 | : _ip(gpuip::ImageProcessor::Create(env)) 146 | { 147 | if (_ip.get() == NULL) { 148 | throw std::runtime_error("Could not create gpuip imageProcessor."); 149 | } 150 | } 151 | 152 | boost::shared_ptr CreateKernel(const std::string & name) 153 | { 154 | gpuip::Kernel::Ptr ptr = _ip->CreateKernel(name); 155 | // safe since KernelWrapper doesnt hold any extra data 156 | return boost::static_pointer_cast(ptr); 157 | } 158 | 159 | boost::shared_ptr CreateBuffer(const std::string & name, 160 | gpuip::Buffer::Type type, 161 | unsigned int channels) 162 | { 163 | gpuip::Buffer::Ptr ptr = _ip->CreateBuffer(name, type, channels); 164 | // safe since BufferWrapper doesnt hold any extra data 165 | return boost::shared_ptr(new BufferWrapper(ptr)); 166 | } 167 | 168 | void SetDimensions(unsigned int width, unsigned int height) 169 | { 170 | _ip->SetDimensions(width,height); 171 | } 172 | 173 | unsigned int Width() const 174 | { 175 | return _ip->Width(); 176 | } 177 | 178 | unsigned int Height() const 179 | { 180 | return _ip->Height(); 181 | } 182 | 183 | std::string Allocate() 184 | { 185 | std::string err; 186 | _ip->Allocate(&err); 187 | return err; 188 | } 189 | 190 | std::string Build() 191 | { 192 | std::string err; 193 | _ip->Build(&err); 194 | return err; 195 | } 196 | 197 | std::string Run() 198 | { 199 | std::string err; 200 | _ip->Run(&err); 201 | return err; 202 | } 203 | 204 | std::string ReadBufferFromGPU(boost::shared_ptr buffer) 205 | { 206 | std::string err; 207 | _ip->Copy(buffer->buffer, gpuip::Buffer::COPY_FROM_GPU, 208 | buffer->data.get_data(), &err); 209 | return err; 210 | } 211 | 212 | std::string WriteBufferToGPU(boost::shared_ptr buffer) 213 | { 214 | std::string err; 215 | _ip->Copy(buffer->buffer, gpuip::Buffer::COPY_TO_GPU, 216 | buffer->data.get_data(), &err); 217 | return err; 218 | } 219 | 220 | std::string BoilerplateCode(boost::shared_ptr k) const 221 | { 222 | return _ip->BoilerplateCode(k); 223 | } 224 | private: 225 | gpuip::ImageProcessor::Ptr _ip; 226 | }; 227 | //----------------------------------------------------------------------------// 228 | } //end namespace python 229 | //----------------------------------------------------------------------------// 230 | } //end namespace gpuip 231 | //----------------------------------------------------------------------------// 232 | BOOST_PYTHON_MODULE(pygpuip) 233 | { 234 | namespace gp = gpuip::python; 235 | 236 | np::initialize(); 237 | 238 | bp::enum_("Environment") 239 | .value("OpenCL", gpuip::OpenCL) 240 | .value("CUDA", gpuip::CUDA) 241 | .value("GLSL", gpuip::GLSL); 242 | 243 | bp::enum_("BufferType") 244 | .value("UNSIGNED_BYTE", gpuip::Buffer::UNSIGNED_BYTE) 245 | .value("HALF", gpuip::Buffer::HALF) 246 | .value("FLOAT", gpuip::Buffer::FLOAT); 247 | 248 | bp::class_ > 249 | ("Buffer", bp::no_init) 250 | .add_property("name", &gp::BufferWrapper::name) 251 | .add_property("type", &gp::BufferWrapper::type) 252 | .add_property("channels", &gp::BufferWrapper::channels) 253 | .def_readwrite("data", &gp::BufferWrapper::data) 254 | .def("Read", &gp::BufferWrapper::Read) 255 | .def("Read", &gp::BufferWrapper::ReadMT) 256 | .def("Write", &gp::BufferWrapper::Write) 257 | .def("Write", &gp::BufferWrapper::WriteMT); 258 | 259 | bp::class_ > 260 | ("ParamInt",bp::init()) 261 | .def_readonly("name", &gpuip::Parameter::name) 262 | .def_readwrite("value", &gpuip::Parameter::value); 263 | 264 | bp::class_ > 265 | ("ParamFloat", bp::init()) 266 | .def_readonly("name", &gpuip::Parameter::name) 267 | .def_readwrite("value", &gpuip::Parameter::value); 268 | 269 | bp::class_ > 270 | ("Kernel", bp::no_init) 271 | .def_readonly("name", &gp::KernelWrapper::name) 272 | .def_readwrite("code", &gp::KernelWrapper::code) 273 | .def("SetInBuffer", &gp::KernelWrapper::SetInBuffer) 274 | .def("SetOutBuffer", &gp::KernelWrapper::SetOutBuffer) 275 | .def("SetParam", &gp::KernelWrapper::SetParamInt) 276 | .def("SetParam", &gp::KernelWrapper::SetParamFloat); 277 | 278 | bp::class_ > 280 | ("ImageProcessor", 281 | bp::init()) 282 | .def("SetDimensions", &gp::ImageProcessorWrapper::SetDimensions) 283 | .add_property("width", &gp::ImageProcessorWrapper::Width) 284 | .add_property("height", &gp::ImageProcessorWrapper::Height) 285 | .def("CreateBuffer", &gp::ImageProcessorWrapper::CreateBuffer) 286 | .def("CreateKernel", &gp::ImageProcessorWrapper::CreateKernel) 287 | .def("Allocate", &gp::ImageProcessorWrapper::Allocate) 288 | .def("Build", &gp::ImageProcessorWrapper::Build) 289 | .def("Run", &gp::ImageProcessorWrapper::Run) 290 | .def("ReadBufferFromGPU", 291 | &gp::ImageProcessorWrapper::ReadBufferFromGPU) 292 | .def("WriteBufferToGPU", 293 | &gp::ImageProcessorWrapper::WriteBufferToGPU) 294 | .def("BoilerplateCode", 295 | &gp::ImageProcessorWrapper::BoilerplateCode); 296 | 297 | bp::def("CanCreateGpuEnvironment",&gpuip::ImageProcessor::CanCreate); 298 | 299 | std::stringstream ss; 300 | #ifdef GPUIP_VERSION 301 | ss << GPUIP_VERSION; 302 | #endif 303 | bp::scope().attr("__version__") = ss.str();std::string("lol"); 304 | } 305 | //----------------------------------------------------------------------------// 306 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | # 3 | # Copyright (c) 2014 Per Karlsson 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | project(gpuip_test) 24 | 25 | include_directories(${GPUIP_ROOT_DIR}/src) 26 | 27 | # Add C++ test 28 | add_executable(test_cpp test) 29 | target_link_libraries(test_cpp gpuip) 30 | add_test(NAME test_cpp COMMAND test_cpp) 31 | 32 | set(GPUIP_TEST_IMAGE ${GPUIP_ROOT_DIR}/examples/images/bridge.exr) 33 | if(GPUIP_TEST_LIBRARIES AND 34 | (EXISTS ${GPUIP_TEST_IMAGE} OR DOWNLOAD_EXAMPLES_IMAGES)) 35 | find_package(OpenMP) 36 | if (OPENMP_FOUND) 37 | add_definitions(-D_GPUIP_TEST_WITH_OPENMP) 38 | set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") 39 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") 40 | endif() 41 | add_executable(test_performance performance) 42 | target_link_libraries(test_performance gpuip ${GPUIP_TEST_LIBRARIES}) 43 | set(GPUIP_TEST_KERNEL_DIR ${GPUIP_ROOT_DIR}/examples/kernels) 44 | add_test(NAME test_performance 45 | COMMAND test_performance ${GPUIP_TEST_IMAGE} ${GPUIP_TEST_KERNEL_DIR}) 46 | endif() 47 | 48 | # Add python test 49 | if(BUILD_PYTHON_BINDINGS) 50 | configure_file(test.py ../src/test.py COPYONLY) 51 | add_test(NAME test_py COMMAND ${PYTHON_EXECUTABLE} ../src/test.py) 52 | endif() 53 | -------------------------------------------------------------------------------- /test/test.py: -------------------------------------------------------------------------------- 1 | import pygpuip 2 | import numpy 3 | 4 | opencl_codeA = """ 5 | __kernel void 6 | my_kernelA(__global const float * A, 7 | __global float * B, 8 | __global float * C, 9 | const int incA, 10 | const float incB, 11 | const int width, 12 | const int height) 13 | { 14 | const int x = get_global_id(0); 15 | const int y = get_global_id(1); 16 | 17 | // array index 18 | const int idx = x + width * y; 19 | 20 | // inside image bounds check 21 | if (x >= width || y >= height) { 22 | return; 23 | } 24 | 25 | // kernel code 26 | B[idx] = A[idx] + incA *0.1; 27 | C[idx] = A[idx] + incB; 28 | } 29 | """ 30 | opencl_codeB = """ 31 | __kernel void 32 | my_kernelB(__global const float * B, 33 | __global const float * C, 34 | __global float * A, 35 | const int width, 36 | const int height) 37 | { 38 | const int x = get_global_id(0); 39 | const int y = get_global_id(1); 40 | 41 | // array index 42 | const int idx = x + width * y; 43 | 44 | // inside image bounds check 45 | if (x >= width || y >= height) { 46 | return; 47 | } 48 | 49 | // kernel code 50 | A[idx] = B[idx] + C[idx]; 51 | } 52 | """ 53 | opencl_boilerplateA = \ 54 | """__kernel void 55 | my_kernelA(__global const float * A, 56 | __global float * B, 57 | __global float * C, 58 | const int incA, 59 | const float incB, 60 | const int width, 61 | const int height) 62 | { 63 | const int x = get_global_id(0); 64 | const int y = get_global_id(1); 65 | 66 | // array index 67 | const int idx = x + width * y; 68 | 69 | // inside image bounds check 70 | if (x >= width || y >= height) { 71 | return; 72 | } 73 | 74 | // kernel code 75 | B[idx] = 0; 76 | C[idx] = 0; 77 | }""" 78 | opencl_boilerplateB = \ 79 | """__kernel void 80 | my_kernelB(__global const float * B, 81 | __global const float * C, 82 | __global float * A, 83 | const int width, 84 | const int height) 85 | { 86 | const int x = get_global_id(0); 87 | const int y = get_global_id(1); 88 | 89 | // array index 90 | const int idx = x + width * y; 91 | 92 | // inside image bounds check 93 | if (x >= width || y >= height) { 94 | return; 95 | } 96 | 97 | // kernel code 98 | A[idx] = 0; 99 | }""" 100 | 101 | cuda_codeA = """ 102 | __global__ void 103 | my_kernelA(float * A, 104 | float * B, 105 | float * C, 106 | const int incA, 107 | const float incB, 108 | const int width, 109 | const int height) 110 | { 111 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 112 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 113 | 114 | // array index 115 | const int idx = x + width * y; 116 | 117 | // inside image bounds check 118 | if (x >= width || y >= height) { 119 | return; 120 | } 121 | 122 | // kernel code 123 | B[idx] = A[idx] + incA * 0.1; 124 | C[idx] = A[idx] + incB; 125 | }""" 126 | cuda_codeB = """ 127 | __global__ void 128 | my_kernelB(float * B, 129 | float * C, 130 | float * A, 131 | const int width, 132 | const int height) 133 | { 134 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 135 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 136 | 137 | // array index 138 | const int idx = x + width * y; 139 | 140 | // inside image bounds check 141 | if (x >= width || y >= height) { 142 | return; 143 | } 144 | 145 | // kernel code 146 | A[idx] = B[idx] + C[idx]; 147 | }""" 148 | 149 | cuda_boilerplateA = \ 150 | """__global__ void 151 | my_kernelA(const float * A, 152 | float * B, 153 | float * C, 154 | const int incA, 155 | const float incB, 156 | const int width, 157 | const int height) 158 | { 159 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 160 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 161 | 162 | // array index 163 | const int idx = x + width * y; 164 | 165 | // inside image bounds check 166 | if (x >= width || y >= height) { 167 | return; 168 | } 169 | 170 | // kernel code 171 | B[idx] = 0; 172 | C[idx] = 0; 173 | }""" 174 | cuda_boilerplateB = \ 175 | """__global__ void 176 | my_kernelB(const float * B, 177 | const float * C, 178 | float * A, 179 | const int width, 180 | const int height) 181 | { 182 | const int x = blockIdx.x * blockDim.x + threadIdx.x; 183 | const int y = blockIdx.y * blockDim.y + threadIdx.y; 184 | 185 | // array index 186 | const int idx = x + width * y; 187 | 188 | // inside image bounds check 189 | if (x >= width || y >= height) { 190 | return; 191 | } 192 | 193 | // kernel code 194 | A[idx] = 0; 195 | }""" 196 | 197 | glsl_codeA = """ 198 | #version 120 199 | uniform sampler2D A; 200 | uniform int incA; 201 | uniform float incB; 202 | varying vec2 x; // texture coordinates 203 | uniform float dx; // delta 204 | 205 | void main() 206 | { 207 | gl_FragData[0] = vec4(texture2D(A, x).x+incA*0.1,0,0,1); 208 | gl_FragData[1] = vec4(texture2D(A, x).x+incB,0,0,1); 209 | }""" 210 | glsl_codeB = """ 211 | #version 120 212 | uniform sampler2D B; 213 | uniform sampler2D C; 214 | varying vec2 x; // texture coordinates 215 | uniform float dx; // delta 216 | 217 | void main() 218 | { 219 | gl_FragData[0] = vec4(texture2D(B, x).x + 220 | texture2D(C, x).x, 0, 0, 1); 221 | }""" 222 | 223 | glsl_boilerplateA = \ 224 | """#version 120 225 | uniform sampler2D A; 226 | uniform int incA; 227 | uniform float incB; 228 | varying vec2 x; // texture coordinates 229 | uniform float dx; // delta 230 | 231 | void main() 232 | { 233 | // gl_FragData[0] is buffer B 234 | gl_FragData[0] = vec4(0,0,0,1); 235 | 236 | // gl_FragData[1] is buffer C 237 | gl_FragData[1] = vec4(0,0,0,1); 238 | }""" 239 | glsl_boilerplateB = \ 240 | """#version 120 241 | uniform sampler2D B; 242 | uniform sampler2D C; 243 | varying vec2 x; // texture coordinates 244 | uniform float dx; // delta 245 | 246 | void main() 247 | { 248 | // gl_FragData[0] is buffer A 249 | gl_FragData[0] = vec4(0,0,0,1); 250 | }""" 251 | 252 | width = 4 253 | height = 4 254 | N = width * height 255 | no_error = "" 256 | 257 | def test(env, codeA, codeB, boilerplateA, boilerplateB): 258 | ip = pygpuip.ImageProcessor(env) 259 | ip.SetDimensions(width, height) 260 | assert ip 261 | 262 | buffers = [] 263 | for i in xrange(3): 264 | b = ip.CreateBuffer("b%i" % i, pygpuip.BufferType.FLOAT, 1) 265 | b.data = numpy.zeros((width,height,1), dtype = numpy.float32) 266 | buffers.append(b) 267 | 268 | kernelA = ip.CreateKernel("my_kernelA") 269 | assert kernelA 270 | assert kernelA.name == "my_kernelA" 271 | kernelA.code = codeA 272 | kernelA.SetInBuffer("A", buffers[0]) 273 | kernelA.SetOutBuffer("B", buffers[1]) 274 | kernelA.SetOutBuffer("C", buffers[2]) 275 | 276 | incA = 2 277 | incB = 0.25 278 | kernelA.SetParam(pygpuip.ParamInt("incA",incA)) 279 | kernelA.SetParam(pygpuip.ParamFloat("incB", incB)) 280 | assert ip.BoilerplateCode(kernelA) == boilerplateA 281 | 282 | kernelB = ip.CreateKernel("my_kernelB") 283 | assert kernelB 284 | assert kernelB.name == "my_kernelB" 285 | kernelB.code = codeB 286 | kernelB.SetInBuffer("B", buffers[1]) 287 | kernelB.SetInBuffer("C", buffers[2]) 288 | kernelB.SetOutBuffer("A", buffers[0]) 289 | assert ip.BoilerplateCode(kernelB) == boilerplateB 290 | 291 | assert ip.Allocate() == no_error 292 | assert ip.Allocate() == no_error # reinit should not break things 293 | indata = numpy.zeros((width,height,1), dtype = numpy.float32) 294 | for i in range(width): 295 | for j in range(height): 296 | indata[i][j] = i + j * width 297 | buffers[0].data[:] = indata 298 | assert ip.WriteBufferToGPU(buffers[0]) == no_error 299 | 300 | assert ip.Build() == no_error 301 | assert ip.Build() == no_error # rebuilding should not break things 302 | assert ip.Run() == no_error 303 | 304 | for b in buffers: 305 | assert ip.ReadBufferFromGPU(b) == no_error 306 | 307 | def eq(a,b): 308 | return abs(a-b) < 0.0001 309 | 310 | b0,b1,b2 = buffers[0].data, buffers[1].data, buffers[2].data 311 | for i in range(width): 312 | for j in range(height): 313 | assert eq(b1[i][j], indata[i][j] + incA*0.1) 314 | assert eq(b2[i][j], indata[i][j] + incB) 315 | 316 | assert eq(b0[i][j], b1[i][j] + b2[i][j]) 317 | print "Test passed!\n" 318 | 319 | if __name__ == '__main__': 320 | if pygpuip.CanCreateGpuEnvironment(pygpuip.Environment.OpenCL): 321 | print "Testing OpenCL..." 322 | test(pygpuip.Environment.OpenCL, opencl_codeA, opencl_codeB, 323 | opencl_boilerplateA, opencl_boilerplateB) 324 | 325 | if pygpuip.CanCreateGpuEnvironment(pygpuip.Environment.CUDA): 326 | print "Testing CUDA..." 327 | test(pygpuip.Environment.CUDA, cuda_codeA, cuda_codeB, 328 | cuda_boilerplateA, cuda_boilerplateB) 329 | 330 | if pygpuip.CanCreateGpuEnvironment(pygpuip.Environment.GLSL): 331 | print "Testing GLSL..." 332 | test(pygpuip.Environment.GLSL, glsl_codeA, glsl_codeB, 333 | glsl_boilerplateA, glsl_boilerplateB) 334 | --------------------------------------------------------------------------------