├── .gitignore ├── AUTHORS ├── CMakeLists.txt ├── COPYING ├── ChangeLog ├── INSTALL ├── Makefile.am ├── Makefile.bkp ├── NEWS ├── README ├── autogen.sh ├── colors ├── camera_bt709_422.yuv ├── colorspace.ods ├── display_diff.sh ├── image_bt709_422.yuv ├── test_common.sh ├── test_rgb.sh ├── test_transform.sh └── test_yuv.sh ├── configure.ac ├── gpujpeg.sln ├── gpujpeg.vcxproj ├── gpujpeg.vcxproj.filters ├── libgpujpeg.pc.in ├── libgpujpeg ├── gpujpeg.h ├── gpujpeg_common.h ├── gpujpeg_common_internal.h ├── gpujpeg_decoder.h ├── gpujpeg_decoder_internal.h ├── gpujpeg_encoder.h ├── gpujpeg_encoder_internal.h ├── gpujpeg_reader.h ├── gpujpeg_table.h ├── gpujpeg_type.h ├── gpujpeg_util.h └── gpujpeg_writer.h ├── src ├── CMakeLists.txt ├── Makefile.bkp ├── gpujpeg_colorspace.h ├── gpujpeg_common.cpp ├── gpujpeg_dct_cpu.cpp ├── gpujpeg_dct_cpu.h ├── gpujpeg_dct_gpu.cu ├── gpujpeg_dct_gpu.h ├── gpujpeg_decoder.cpp ├── gpujpeg_encoder.cpp ├── gpujpeg_huffman_cpu_decoder.cpp ├── gpujpeg_huffman_cpu_decoder.h ├── gpujpeg_huffman_cpu_encoder.cpp ├── gpujpeg_huffman_cpu_encoder.h ├── gpujpeg_huffman_gpu_decoder.cu ├── gpujpeg_huffman_gpu_decoder.h ├── gpujpeg_huffman_gpu_encoder.cu ├── gpujpeg_huffman_gpu_encoder.h ├── gpujpeg_preprocessor.cu ├── gpujpeg_preprocessor.h ├── gpujpeg_reader.cpp ├── gpujpeg_table.cpp ├── gpujpeg_writer.cpp └── main.c └── test ├── .gitignore ├── memcheck ├── Makefile.am ├── Makefile.bkp └── memcheck.cu └── opengl_interop ├── Makefile.am ├── Makefile.bkp ├── README ├── image.cu ├── image.h ├── main.c ├── util.h ├── view.c └── view.h /.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | gpujpeg 3 | .libs 4 | src/*.o 5 | src/*.lo 6 | *.rgb 7 | configure 8 | Makefile 9 | Makefile.in 10 | aclocal.m4 11 | autom4te.cache/ 12 | compile 13 | config.guess 14 | config.h 15 | config.h.in 16 | config.h.in~ 17 | config.log 18 | config.status 19 | config.sub 20 | depcomp 21 | install-sh 22 | libgpujpeg.pc 23 | libtool 24 | ltmain.sh 25 | m4/ 26 | missing 27 | src/.deps/ 28 | src/.dirstamp 29 | stamp-h1 30 | libgpujpeg.la 31 | test-driver 32 | 33 | # VS 34 | *.pdb 35 | *.sdf 36 | *.suo 37 | *.user 38 | Debug 39 | Release 40 | 41 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Petr Holub, CESNET z.s.p.o 2 | Martin Srom, CESNET z.s.p.o 3 | Jiri Matela, CESNET z.s.p.o 4 | Martin Jirman, CESNET z.s.p.o 5 | 6 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CMAKE_MINIMUM_REQUIRED(VERSION 2.6.2) 2 | PROJECT(gpujpeg) 3 | 4 | # CUDA 5 | find_package(CUDA REQUIRED) 6 | include_directories(${CUDA_INCLUDE_DIRS}) 7 | 8 | include_directories("${CMAKE_CURRENT_SOURCE_DIR}") #include libgpujpeg 9 | ADD_SUBDIRECTORY(src) 10 | 11 | 12 | # Install Rules 13 | install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/libgpujpeg/" DESTINATION "libgpujpeg") -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2011, CESNET z.s.p.o 3 | Copyright (c) 2011, Silicon Genome, LLC. 4 | 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are 9 | met: 10 | * Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | 2012-08-02 2 | ---------- 3 | Changes: 4 | -Added new parallel GPU Huffman encoder for compute capabilities >= 2.0 5 | -Rewritten GPU Huffman decoder 6 | -Rewritten GPU forward DCT, preserving precision of fixed point implementation 7 | and gaining better performance than NPP implementation. 8 | (Both old implementations were thus removed.) 9 | -Rewritten GPU encoder preprocessor to remove unnecessary operations 10 | -Minor performance improvement in GPU encoder postprocessor 11 | 12 | 2012-03-29 13 | ---------- 14 | Changes: 15 | -Optionally the encoder input or decoder output can be loaded/store 16 | from/to OpenGL texture. 17 | 18 | 2012-03-09 19 | ---------- 20 | Changes: 21 | -Refactored color spaces transformations, added new color spaces 22 | -Added new implementation of DCT/IDCT on CUDA, it is slightly slower 23 | than the NPP implementation, but the new IDCT doesn't cause color space 24 | change. It can be turned on by Makefile option. By default is NPP 25 | version used. 26 | 27 | 2012-02-24 28 | ---------- 29 | Changes: 30 | -Slightly improved performance of encoder. 31 | -Correction of segment info for large images 32 | 33 | 2012-02-21 34 | ---------- 35 | Changes: 36 | -Added option to encoder for generating segment info into APP13 header that is used 37 | by decoder to perform fast stream parsing. 38 | -Renamed --interleving option to --interleaved and --chroma-subsampling to --subsampled. 39 | 40 | 2012-02-16 41 | ---------- 42 | Changes: 43 | -Added option for verbose output from console application. 44 | -Library now stores all coder time durations inside coder structure and it 45 | can be accessed from userspace. 46 | -Console application now prints GPU allocation info (when --verbose is used). 47 | -Correction of encoding/decoding for large images (e.g. 4320p or 8K) 48 | 49 | 2012-02-07 50 | ---------- 51 | -Improved preprocessor and postprocessor performance. 52 | -Added recovering from error inside JPEG stream to decoder. 53 | -Library provides function for listing CUDA devices. 54 | -Library provides option for decoder to output result right into OpenGL PBO resource 55 | which improves performance when the result should be displayed on the same GPU. 56 | -Correction of build warnings. 57 | -Correction of other errors. 58 | 59 | 2012-01-04 60 | ---------- 61 | Changes: 62 | -Added option to libgpujpeg Makefile for moving gpu huffman coder tables into constant memory. 63 | -Option is default set to true and makes better performance on older GPUs. 64 | 65 | 2012-01-03 66 | ---------- 67 | Changes: 68 | -Added interleaved mode (optionally). 69 | -Added subsampling (optionally) - better performance when used 70 | (because of smaller amount of encoded/decoded data). 71 | 72 | 2011-12-07 73 | ---------- 74 | First public release of gpujpeg library and console 75 | application. 76 | Features: 77 | -Baseline JPEG, non-interleaved mode on CUDA-enabled GPUs. 78 | -Performance - realtime HD and 4K encoding/decoding on NVIDIA GTX 580. 79 | -Using CUDA and NPP library. -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | == Requires == 2 | NVidia CUDA toolkit & NVidia CUDA SDK 3 | 4 | == Compilation == 5 | run: 6 | # ./autogen.sh 7 | # ./configure # (optional, called automatically by autogen.sh) 8 | # make 9 | 10 | If you wish to specify path to your CUDA instalation, call 11 | configure as 12 | # ./configure --with-cuda=/path/to/cuda/root 13 | 14 | == Instalation == 15 | # make install 16 | 17 | For the default install DESTDIR value (/usr/local), consider updating your 18 | PKG_CONFIG_PATH and LD_LIBRARY_PATH variables 19 | 20 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | AUTOMAKE_OPTIONS = subdir-objects 2 | ACLOCAL_AMFLAGS=-I m4 3 | CLEANFILES= 4 | 5 | SUBDIRS = . # test/memcheck test/opengl_interop 6 | 7 | CUDA_INSTALL_PATH=@CUDA_INSTALL_PATH@ 8 | NVCC=${CUDA_INSTALL_PATH}/bin/nvcc 9 | 10 | NVCCFLAGS=@NVCCFLAGS@ -Xcompiler -fPIC -Xcompiler '@COMMON_FLAGS@' \ 11 | -gencode arch=compute_35,code=compute_35 \ 12 | -gencode arch=compute_35,code=sm_35 \ 13 | -gencode arch=compute_30,code=sm_30 \ 14 | -gencode arch=compute_20,code=sm_21 \ 15 | -gencode arch=compute_20,code=sm_20 16 | 17 | SUFFIXES=.cu 18 | 19 | pkgconfigdir = $(libdir)/pkgconfig 20 | library_includedir=$(includedir)/libgpujpeg/ 21 | gpujpeg_libincludedir = $(libdir)/libgpujpeg 22 | 23 | lib_LTLIBRARIES = libgpujpeg.la 24 | bin_PROGRAMS = gpujpeg 25 | pkgconfig_DATA = libgpujpeg.pc 26 | 27 | library_include_HEADERS = libgpujpeg/*.h 28 | nodist_gpujpeg_libinclude_HEADERS = config.h 29 | 30 | gpujpeg_SOURCES = src/main.c 31 | gpujpeg_CFLAGS = -std=c99 @COMMON_FLAGS@ 32 | gpujpeg_LDADD = libgpujpeg.la 33 | gpujpeg_LDFLAGS = @GPUJPEG_LDFLAGS@ 34 | 35 | # gpu jpeg library sources 36 | libgpujpeg_la_SOURCES = src/gpujpeg_common.cpp \ 37 | src/gpujpeg_dct_cpu.cpp \ 38 | src/gpujpeg_decoder.cpp \ 39 | src/gpujpeg_encoder.cpp \ 40 | src/gpujpeg_huffman_cpu_decoder.cpp \ 41 | src/gpujpeg_huffman_cpu_encoder.cpp \ 42 | src/gpujpeg_reader.cpp \ 43 | src/gpujpeg_table.cpp \ 44 | src/gpujpeg_writer.cpp 45 | 46 | libgpujpeg_la_DEPENDENCIES = @LIBGPUJPEG_CUDA_OBJS@ 47 | 48 | libgpujpeg_la_LIBADD = $(libgpujpeg_la_DEPENDENCIES) 49 | libgpujpeg_la_LDFLAGS = -export-dynamic -version-info $(GPUJPEG_LIBRARY_VERSION) @GPUJPEG_LDFLAGS@ @GPUJPEG_LIBS@ 50 | libgpujpeg_la_CFLAGS = -std=c99 -fPIC @COMMON_FLAGS@ 51 | libgpujpeg_la_CXXFLAGS = -fPIC @COMMON_FLAGS@ 52 | #libgpujpeg_la_LINK = g++ -fPIC 53 | 54 | check-TESTS: tests 55 | tests: 56 | for testdir in `find ./test -type d` ; do \ 57 | ( cd $${testdir} ; make ) \ 58 | done 59 | 60 | if DARWIN 61 | static: $(libgpujpeg_la_DEPENDENCIES) all 62 | [ -d build/tmp/i386 ] || mkdir -p build/tmp/i386 63 | [ -d build/tmp/x86_64 ] || mkdir -p build/tmp/x86_64 64 | [ -d build/tmp/universal ] || mkdir -p build/tmp/universal 65 | for arch in i386 x86_64; do \ 66 | lipo -thin $$arch .libs/libgpujpeg.a -output build/tmp/$$arch/libgpujpeg.a ; \ 67 | cd build/tmp/$$arch; ar x libgpujpeg.a; rm libgpujpeg.a; cd -; \ 68 | for file in $(libgpujpeg_la_DEPENDENCIES); do \ 69 | BASENAME=$$(basename $$file) ; \ 70 | lipo -thin $$arch $$file -output build/tmp/$$arch/$$BASENAME ; \ 71 | ar ru build/tmp/$$arch/libgpujpeg.a build/tmp/$$arch/$$BASENAME ; \ 72 | done ; \ 73 | done 74 | for arch in i386 x86_64; do \ 75 | $(AR) rcu build/tmp/$$(arch)/libgpujpeg.a build/tmp/$$(arch)/* ; \ 76 | done 77 | $(RM) .libs/libgpujpeg.a 78 | lipo -create build/tmp/i386/libgpujpeg.a build/tmp/x86_64/libgpujpeg.a -output .libs/libgpujpeg.a 79 | else 80 | static: $(libgpujpeg_la_DEPENDENCIES) all 81 | $(AR) ru .libs/libgpujpeg.a $(libgpujpeg_la_DEPENDENCIES) 82 | $(RANLIB) .libs/libgpujpeg.a 83 | endif 84 | 85 | # Pattern rule for compiling CUDA files 86 | %.cu.o: %.cu 87 | $(NVCC) $(NVCCFLAGS) $(DEFAULT_INCLUDES) $(INCLUDES) -c $< -o $@ 88 | 89 | build/universal/%.o: build/i386/%.cu.o build/x86_64/%.cu.o 90 | [ -d build/universal ] || mkdir -p build/universal 91 | lipo -create $? -output $@ 92 | 93 | build/i386/%.cu.o: src/%.cu 94 | [ -d build/i386 ] || mkdir -p build/i386 95 | $(NVCC) -m32 $(NVCCFLAGS) $(DEFAULT_INCLUDES) $(INCLUDES) -c $< -o $@ 96 | 97 | build/x86_64/%.cu.o: src/%.cu 98 | [ -d build/x86_64 ] || mkdir -p build/x86_64 99 | $(NVCC) -m64 $(NVCCFLAGS) $(DEFAULT_INCLUDES) $(INCLUDES) -c $< -o $@ 100 | 101 | clean-local: 102 | rm -rf src/*.cu.lo src/*.cu.o 103 | rm -rf build 104 | -------------------------------------------------------------------------------- /Makefile.bkp: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2011, CESNET z.s.p.o 3 | # 4 | # All rights reserved. 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted provided that the following conditions are met: 8 | # * Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # * Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | # POSSIBILITY OF SUCH DAMAGE. 26 | # 27 | 28 | # Use shared/static libgpujpeg library? 29 | SHARED_LIBRARY ?= 1 30 | # Flag if use OpenGL 31 | USE_OPENGL ?= 0 32 | # Debug 33 | DEBUG ?= 0 34 | # CUDA install path 35 | CUDA_INSTALL_PATH ?= /usr/local/cuda 36 | 37 | # Target executable 38 | TARGET := gpujpeg 39 | # C files 40 | CFILES := main.c 41 | 42 | # Compilers 43 | CC := gcc 44 | LINK := g++ -fPIC 45 | 46 | # Common flags 47 | COMMONFLAGS += -I. -I$(CUDA_INSTALL_PATH)/include -O2 48 | # C flags 49 | CFLAGS += $(COMMONFLAGS) -std=c99 50 | # Linker flags 51 | LDFLAGS += 52 | 53 | # Link libgpujpeg library 54 | ifeq ($(SHARED_LIBRARY),1) 55 | LDFLAGS += -Llibgpujpeg -lgpujpeg 56 | else 57 | # Do 32bit vs. 64bit setup 58 | LDFLAGS += libgpujpeg/libgpujpeg.a 59 | #Other flags 60 | ifeq ($(USE_OPENGL),1) 61 | LDFLAGS += -lGLEW 62 | endif 63 | endif 64 | 65 | LBITS := $(shell getconf LONG_BIT) 66 | ifeq ($(LBITS),64) 67 | # 64bit 68 | LDFLAGS += -L$(CUDA_INSTALL_PATH)/lib64 69 | else 70 | # 32bit 71 | LDFLAGS += -L$(CUDA_INSTALL_PATH)/lib 72 | endif 73 | LDFLAGS += -lcudart 74 | 75 | # Build 76 | build: $(TARGET) $(TARGET).sh 77 | 78 | # Clean 79 | clean: 80 | rm -f *.o $(TARGET) $(TARGET).sh 81 | @cd libgpujpeg; make clean 82 | 83 | # Lists of object files 84 | COBJS=$(CFILES:.c=.c.o) 85 | 86 | # Build target 87 | $(TARGET): $(COBJS) libgpujpeg/libgpujpeg.build 88 | echo $(LDFLAGS) 89 | echo $(SHARED_LIBRARY) 90 | $(LINK) $(COBJS) $(LDFLAGS) -o $(TARGET); 91 | 92 | # Build target run script 93 | ifeq ($(SHARED_LIBRARY),1) 94 | $(TARGET).sh: 95 | @printf "PATH=$$" > $(TARGET).sh 96 | @printf "(dirname $$" >> $(TARGET).sh 97 | @printf "0)\n" >> $(TARGET).sh 98 | @printf "LD_LIBRARY_PATH=\"$$" >> $(TARGET).sh 99 | @printf "LD_LIBRARY_PATH;$$" >> $(TARGET).sh 100 | @printf "PATH/libgpujpeg\" $$" >> $(TARGET).sh 101 | @printf "PATH/gpujpeg $$" >> $(TARGET).sh 102 | @printf "@\n" >> $(TARGET).sh 103 | @chmod a+x $(TARGET).sh 104 | else 105 | $(TARGET).sh: 106 | @printf "PATH=$$" > $(TARGET).sh 107 | @printf "(dirname $$" >> $(TARGET).sh 108 | @printf "0)\n" >> $(TARGET).sh 109 | @printf "$$" >> $(TARGET).sh 110 | @printf "PATH/gpujpeg $$" >> $(TARGET).sh 111 | @printf "@\n" >> $(TARGET).sh 112 | @chmod a+x $(TARGET).sh 113 | endif 114 | 115 | # Build gpujpeg library 116 | libgpujpeg/libgpujpeg.build: 117 | @cd libgpujpeg; make DEBUG=$(DEBUG) SHARED_LIBRARY=$(SHARED_LIBRARY) USE_OPENGL=$(USE_OPENGL) 118 | 119 | # Pattern rule for compiling C files 120 | %.c.o: %.c 121 | $(CC) $(CFLAGS) -c $< -o $@ 122 | 123 | # Set file dependencies 124 | main.c.o: main.c 125 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | 2012-08-02 2 | ---------- 3 | Changes: 4 | -Added new parallel GPU Huffman encoder for compute capabilities >= 2.0 5 | -Rewritten GPU Huffman decoder 6 | -Rewritten GPU forward DCT, preserving precision of fixed point implementation 7 | and gaining better performance than NPP implementation. 8 | (Both old implementations were thus removed.) 9 | -Rewritten GPU encoder preprocessor to remove unnecessary operations 10 | -Minor performance improvement in GPU encoder postprocessor 11 | 12 | 2012-03-29 13 | ---------- 14 | Changes: 15 | -Optionally the encoder input or decoder output can be loaded/store 16 | from/to OpenGL texture. 17 | 18 | 2012-03-09 19 | ---------- 20 | Changes: 21 | -Refactored color spaces transformations, added new color spaces 22 | -Added new implementation of DCT/IDCT on CUDA, it is slightly slower 23 | than the NPP implementation, but the new IDCT doesn't cause color space 24 | change. It can be turned on by Makefile option. By default is NPP 25 | version used. 26 | 27 | 2012-02-24 28 | ---------- 29 | Changes: 30 | -Slightly improved performance of encoder. 31 | -Correction of segment info for large images 32 | 33 | 2012-02-21 34 | ---------- 35 | Changes: 36 | -Added option to encoder for generating segment info into APP13 header that is used 37 | by decoder to perform fast stream parsing. 38 | -Renamed --interleving option to --interleaved and --chroma-subsampling to --subsampled. 39 | 40 | 2012-02-16 41 | ---------- 42 | Changes: 43 | -Added option for verbose output from console application. 44 | -Library now stores all coder time durations inside coder structure and it 45 | can be accessed from userspace. 46 | -Console application now prints GPU allocation info (when --verbose is used). 47 | -Correction of encoding/decoding for large images (e.g. 4320p or 8K) 48 | 49 | 2012-02-07 50 | ---------- 51 | -Improved preprocessor and postprocessor performance. 52 | -Added recovering from error inside JPEG stream to decoder. 53 | -Library provides function for listing CUDA devices. 54 | -Library provides option for decoder to output result right into OpenGL PBO resource 55 | which improves performance when the result should be displayed on the same GPU. 56 | -Correction of build warnings. 57 | -Correction of other errors. 58 | 59 | 2012-01-04 60 | ---------- 61 | Changes: 62 | -Added option to libgpujpeg Makefile for moving gpu huffman coder tables into constant memory. 63 | -Option is default set to true and makes better performance on older GPUs. 64 | 65 | 2012-01-03 66 | ---------- 67 | Changes: 68 | -Added interleaved mode (optionally). 69 | -Added subsampling (optionally) - better performance when used 70 | (because of smaller amount of encoded/decoded data). 71 | 72 | 2011-12-07 73 | ---------- 74 | First public release of gpujpeg library and console 75 | application. 76 | Features: 77 | -Baseline JPEG, non-interleaved mode on CUDA-enabled GPUs. 78 | -Performance - realtime HD and 4K encoding/decoding on NVIDIA GTX 580. 79 | -Using CUDA and NPP library. -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | srcdir=`dirname $0` 3 | test -z "$srcdir" && srcdir=. 4 | 5 | ORIGDIR=`pwd` 6 | 7 | if [ `uname -s` = "Darwin" ]; then 8 | LIBTOOLIZE=glibtoolize 9 | else 10 | LIBTOOLIZE=libtoolize 11 | fi 12 | 13 | autoheader && \ 14 | $LIBTOOLIZE --copy && \ 15 | ( [ -d m4 ] || mkdir m4 ) && \ 16 | aclocal -I m4 && \ 17 | automake --copy --add-missing && \ 18 | autoconf && \ 19 | ./configure "$@" 20 | 21 | STATUS=$? 22 | 23 | cd $ORIGDIR 24 | 25 | ([ $STATUS -eq 0 ] && echo "Autogen done." ) || echo "Autogen failed." 26 | 27 | exit $STATUS 28 | 29 | -------------------------------------------------------------------------------- /colors/camera_bt709_422.yuv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vkocheryzhkin/gpujpeg/f12b058cd04469b5a46661bfe3aebf3e780d8c8d/colors/camera_bt709_422.yuv -------------------------------------------------------------------------------- /colors/colorspace.ods: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vkocheryzhkin/gpujpeg/f12b058cd04469b5a46661bfe3aebf3e780d8c8d/colors/colorspace.ods -------------------------------------------------------------------------------- /colors/display_diff.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Get script folder 4 | DIR=`dirname $0` 5 | 6 | convert -depth 8 -size 1920x1080 \ 7 | $1 -crop $((1920/2))x1080+0+0 +repage $DIR/_tmp_left.rgb 8 | 9 | convert -depth 8 -size 1920x1080 \ 10 | $2 -crop $((1920/2))x1080+$((1920/2))+0 +repage $DIR/_tmp_right.rgb 11 | 12 | convert -depth 8 -size $((1920/2))x1080 \ 13 | $DIR/_tmp_left.rgb -depth 8 -size $((1920/2))x1080 \ 14 | $DIR/_tmp_right.rgb +append $DIR/_tmp_diff.rgb 15 | 16 | display -depth 8 -size 1920x1080 $DIR/_tmp_diff.rgb 17 | #-equalize 18 | 19 | rm -f $DIR/_tmp_left.rgb $DIR/_tmp_right.rgb $DIR/_tmp_diff.rgb 20 | -------------------------------------------------------------------------------- /colors/image_bt709_422.yuv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vkocheryzhkin/gpujpeg/f12b058cd04469b5a46661bfe3aebf3e780d8c8d/colors/image_bt709_422.yuv -------------------------------------------------------------------------------- /colors/test_common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Get script folder 4 | DIR=`dirname $0` 5 | 6 | # Parameters 7 | # NAME = image name, e.g. "image_yuv_422" 8 | # EXTENSION = image extension, e.g. "yuv" 9 | # MODE = image arguments for gpujpeg, e.g. "--colorspace=yuv --sampling-factor=4:2:2" 10 | 11 | IMAGE=image_bt709_422.yuv 12 | #IMAGE=camera_bt709_422.yuv 13 | 14 | # Create an image from source in specified mode () 15 | $DIR/../gpujpeg.sh --size=1920x1080 --colorspace=ycbcr-bt709 --sampling-factor=4:2:2 \ 16 | --convert $MODE $DIR/$IMAGE $DIR/$NAME.$EXTENSION 17 | 18 | # Encode and Decode the image 19 | $DIR/../gpujpeg.sh --size 1920x1080 $MODE \ 20 | --encode --quality 100 $DIR/$NAME.$EXTENSION $DIR/$NAME.encoded.jpg 21 | $DIR/../gpujpeg.sh $MODE \ 22 | --decode $DIR/$NAME.encoded.jpg $DIR/$NAME.decoded.$EXTENSION 23 | 24 | # Convert the Original and the Processed Image to RGB444 25 | $DIR/../gpujpeg.sh --size=1920x1080 $MODE \ 26 | --convert --colorspace=rgb --sampling-factor=4:4:4 $DIR/$NAME.$EXTENSION $DIR/$NAME.rgb 27 | $DIR/../gpujpeg.sh --size=1920x1080 $MODE \ 28 | --convert --colorspace=rgb --sampling-factor=4:4:4 $DIR/$NAME.decoded.$EXTENSION $DIR/$NAME.decoded.rgb 29 | 30 | # Display Left/Right Diff of the Original and the Processed Image 31 | $DIR/display_diff.sh $DIR/$NAME.rgb $DIR/$NAME.decoded.rgb 32 | 33 | # Delete Created Files 34 | rm -f $DIR/$NAME.$EXTENSION $DIR/$NAME.rgb $DIR/$NAME.encoded.jpg $DIR/$NAME.decoded.$EXTENSION $DIR/$NAME.decoded.rgb 35 | 36 | -------------------------------------------------------------------------------- /colors/test_rgb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Parameters 4 | NAME="image_rgb_444" 5 | EXTENSION="rgb" 6 | MODE="--colorspace=rgb --sampling-factor=4:4:4" 7 | 8 | # Run test 9 | source `dirname $0`/test_common.sh 10 | -------------------------------------------------------------------------------- /colors/test_transform.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Parameters 4 | NAME="image_rgb_444" 5 | EXTENSION="yuv" 6 | MODE="--colorspace=ycbcr-jpeg --sampling-factor=4:4:4" 7 | 8 | # Get script folder 9 | DIR=`dirname $0` 10 | 11 | IMAGE=image_bt709_422.yuv 12 | #IMAGE=camera_bt709_422.yuv 13 | 14 | # Create an image from source in RGB 15 | $DIR/../gpujpeg.sh --size=1920x1080 --colorspace=ycbcr-bt709 --sampling-factor=4:2:2 \ 16 | --convert --colorspace=rgb --sampling-factor=4:4:4 $DIR/$IMAGE $DIR/original.rgb 17 | 18 | # Convert image to specified mode and back 19 | $DIR/../gpujpeg.sh --size=1920x1080 --colorspace=rgb --sampling-factor=4:4:4 \ 20 | --convert $MODE $DIR/original.rgb $DIR/$NAME.$EXTENSION 21 | $DIR/../gpujpeg.sh --size=1920x1080 $MODE \ 22 | --convert --colorspace=rgb --sampling-factor=4:4:4 $DIR/$NAME.$EXTENSION $DIR/processed.rgb 23 | 24 | # Display Left/Right Diff of the Original and the Processed Image 25 | $DIR/display_diff.sh $DIR/original.rgb $DIR/processed.rgb 26 | 27 | # Delete Created Files 28 | rm -f $DIR/original.rgb $DIR/$NAME.$EXTENSION $DIR/processed.rgb 29 | 30 | -------------------------------------------------------------------------------- /colors/test_yuv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Parameters 4 | NAME="image_yuv_422" 5 | EXTENSION="yuv" 6 | MODE="--colorspace=ycbcr-bt601 --sampling-factor=4:4:4" 7 | 8 | # Run test 9 | source `dirname $0`/test_common.sh 10 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_PREREQ([2.65]) 2 | AC_INIT([libgpujpeg], [0.0.2], [martin.srom@mail.muni.cz], [libgpujpeg], [https://sourceforge.net/p/gpujpeg/]) 3 | AC_CONFIG_MACRO_DIR([m4]) 4 | AC_CONFIG_SRCDIR([src/main.c]) 5 | AC_CONFIG_AUX_DIR([.]) 6 | AM_INIT_AUTOMAKE 7 | 8 | AC_CONFIG_HEADERS([config.h]) 9 | 10 | GPUJPEG_MAJOR_VERSION=0 11 | GPUJPEG_MINOR_VERSION=0 12 | GPUJPEG_MICRO_VERSION=1 13 | 14 | GPUJPEG_LIBRARY_VERSION=0:1:0 15 | 16 | GPUJPEG_API_VERSION=0.1 17 | AC_SUBST(GPUJPEG_API_VERSION) 18 | AC_SUBST(GPUJPEG_LIBRARY_VERSION) 19 | 20 | LT_PREREQ([2.2]) 21 | LT_INIT 22 | 23 | AC_PROG_LIBTOOL 24 | AM_PROG_LIBTOOL 25 | 26 | # Checks for programs. 27 | AC_PROG_CXX 28 | AC_PROG_CC 29 | AC_PROG_INSTALL 30 | AM_PROG_CC_C_O 31 | 32 | # debug 33 | AC_ARG_ENABLE(debug, [ --enable-debug compile with debug info] ) 34 | if test x$enable_debug = x ; then 35 | enable_debug=no 36 | fi 37 | 38 | if test x$enable_debug = xyes ; then 39 | COMMON_FLAGS="$COMMON_FLAGS -g -D_DEBUG -O0" 40 | NVCCFLAGS="$NVCCFLAGS -G" 41 | else 42 | COMMON_FLAGS="$COMMON_FLAGS -O2" 43 | fi 44 | 45 | # huffmann 46 | AC_ARG_ENABLE(constant-tables, [ --disable-constant-tables disable huffman coder tables in constant memory] ) 47 | if test x$enable_constant_tables = x ; then 48 | enable_constant_tables=yes 49 | fi 50 | 51 | if test x$enable_constant_tables = xyes ; then 52 | AC_DEFINE([GPUJPEG_HUFFMAN_CODER_TABLES_IN_CONSTANT], [1], [Build with huffmann coder tables in constant memory]) 53 | fi 54 | 55 | # opengl 56 | AC_ARG_ENABLE(opengl, [ --enable-opengl enable opengl support] ) 57 | if test x$enable_opengl = x ; then 58 | enable_opengl=no 59 | fi 60 | 61 | # OS X universal build 62 | AC_ARG_ENABLE(osx-universal, [ --enable-osx-universal enable OS X universal build] ) 63 | if test x$enable_osx_universal = x ; then 64 | enable_osx_universal=no 65 | fi 66 | 67 | if test $enable_opengl = yes ; then 68 | AC_CHECK_HEADER(GL/glew.h, FOUND_GLEW_H=yes) 69 | AC_CHECK_LIB(GLEW, glewIsSupported, FOUND_GLEW_L=yes) 70 | AC_CHECK_LIB(GL, glBindTexture, FOUND_GL_L=yes) 71 | 72 | if test "$FOUND_GLEW_L" = yes -a "$FOUND_GLEW_H" = yes -a "$FOUND_GL_L" = yes; then 73 | AC_DEFINE([GPUJPEG_USE_OPENGL], [1], [Build with OpenGL support]) 74 | GPUJPEG_LIBS="$GPUJPEG_LIBS -lGLEW -lGL" 75 | else 76 | enable_opengl=no 77 | fi 78 | fi 79 | 80 | 81 | # CUDA 82 | AC_ARG_WITH(cuda, 83 | [ --with-cuda=DIR specify cuda root], 84 | [CUDA_INSTALL_PATH=$withval] 85 | ) 86 | 87 | AC_PATH_PROG(CUDA, nvcc, nvcc_not_found, [$PATH$PATH_SEPARATOR$CUDA_INSTALL_PATH/bin]dnl 88 | [$PATH_SEPARATOR/opt/cuda/bin$PATH_SEPARATOR/usr/local/cuda/bin]) 89 | 90 | if test x$CUDA = xnvcc_not_found ; then 91 | AC_MSG_FAILURE([CUDA is required to build the gpujpeg library!]) 92 | fi 93 | 94 | CUDA=`dirname $CUDA` 95 | CUDA=`dirname $CUDA` 96 | CUDA_INSTALL_PATH=$CUDA 97 | 98 | LBITS=`getconf LONG_BIT` 99 | if test $LBITS = 64 -a `uname -s` != 'Darwin'; then 100 | CUDA_INSTALL_LIB="${CUDA_INSTALL_PATH}/lib64" 101 | else 102 | CUDA_INSTALL_LIB="${CUDA_INSTALL_PATH}/lib" 103 | fi 104 | 105 | AM_CONDITIONAL([DARWIN], [test `uname -s` = Darwin]) 106 | 107 | if test $enable_osx_universal = yes; then 108 | CFLAGS="$CFLAGS -arch x86_64 -arch i386" 109 | CXXFLAGS="$CXXFLAGS -arch x86_64 -arch i386" 110 | LIBGPUJPEG_CUDA_OBJS=" \ 111 | build/universal/gpujpeg_huffman_gpu_encoder.o \ 112 | build/universal/gpujpeg_dct_gpu.o \ 113 | build/universal/gpujpeg_preprocessor.o \ 114 | build/universal/gpujpeg_huffman_gpu_decoder.o" 115 | NVCCFLAGS="$NVCCFLAGS -Xcompiler -Wno-error=unused-command-line-argument-hard-error-in-future" 116 | else 117 | LIBGPUJPEG_CUDA_OBJS=" \ 118 | src/gpujpeg_huffman_gpu_encoder.cu.o \ 119 | src/gpujpeg_dct_gpu.cu.o \ 120 | src/gpujpeg_preprocessor.cu.o \ 121 | src/gpujpeg_huffman_gpu_decoder.cu.o" 122 | fi 123 | 124 | GPUJPEG_LDFLAGS="$GPUJPEG_LDFLAGS -L${CUDA_INSTALL_LIB} -lcudart" 125 | COMMON_FLAGS="$COMMON_FLAGS -I. -I${CUDA_INSTALL_PATH}/include" 126 | 127 | AC_SUBST(CUDA_EXTRA_ARCH) 128 | AC_SUBST(CUDA_INSTALL_PATH) 129 | AC_SUBST(CUDA_INSTALL_LIB) 130 | AC_SUBST(COMMON_FLAGS) 131 | AC_SUBST(GPUJPEG_LIBS) 132 | AC_SUBST(GPUJPEG_CFLAGS) 133 | AC_SUBST(GPUJPEG_LDFLAGS) 134 | AC_SUBST(GPUJPEG_LIBS) 135 | AC_SUBST(LIBGPUJPEG_CUDA_OBJS) 136 | AC_SUBST(NVCCFLAGS) 137 | 138 | AC_CONFIG_FILES([Makefile libgpujpeg.pc test/memcheck/Makefile test/opengl_interop/Makefile ]) 139 | AC_OUTPUT 140 | 141 | AC_MSG_RESULT([ 142 | Configuration summary: 143 | 144 | Target ...................... $host 145 | Prefix ...................... $prefix 146 | Debug ....................... $debug 147 | 148 | Constant tables.............. $enable_constant_tables 149 | OpenGL ...................... $enable_opengl 150 | 151 | CUDA root ................... $CUDA_INSTALL_PATH 152 | 153 | 154 | ]) 155 | 156 | 157 | -------------------------------------------------------------------------------- /gpujpeg.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Express 2012 for Windows Desktop 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gpujpeg", "gpujpeg.vcxproj", "{B9D06885-F4F3-4B01-8C43-E131210B9F27}" 5 | EndProject 6 | Global 7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 8 | Debug|Win32 = Debug|Win32 9 | Release|Win32 = Release|Win32 10 | EndGlobalSection 11 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 12 | {B9D06885-F4F3-4B01-8C43-E131210B9F27}.Debug|Win32.ActiveCfg = Debug|Win32 13 | {B9D06885-F4F3-4B01-8C43-E131210B9F27}.Debug|Win32.Build.0 = Debug|Win32 14 | {B9D06885-F4F3-4B01-8C43-E131210B9F27}.Release|Win32.ActiveCfg = Release|Win32 15 | {B9D06885-F4F3-4B01-8C43-E131210B9F27}.Release|Win32.Build.0 = Release|Win32 16 | EndGlobalSection 17 | GlobalSection(SolutionProperties) = preSolution 18 | HideSolutionNode = FALSE 19 | EndGlobalSection 20 | EndGlobal 21 | -------------------------------------------------------------------------------- /gpujpeg.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | 14 | {B9D06885-F4F3-4B01-8C43-E131210B9F27} 15 | Win32Proj 16 | gpujpeg 17 | 18 | 19 | 20 | DynamicLibrary 21 | true 22 | v110 23 | Unicode 24 | 25 | 26 | DynamicLibrary 27 | false 28 | v110 29 | true 30 | Unicode 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | true 45 | 46 | 47 | false 48 | 49 | 50 | 51 | 52 | 53 | Level3 54 | Disabled 55 | WIN32;_DEBUG;_WINDOWS;_USRDLL;GPUJPEG_EXPORTS;%(PreprocessorDefinitions) 56 | .;%(AdditionalIncludeDirectories) 57 | 58 | 59 | Windows 60 | true 61 | cudart.lib;%(AdditionalDependencies) 62 | 63 | 64 | compute_20,sm_20;compute_30,sm_30;compute_35,sm_35;%(CodeGeneration) 65 | 66 | 67 | 68 | 69 | Level3 70 | 71 | 72 | MaxSpeed 73 | true 74 | true 75 | WIN32;NDEBUG;_WINDOWS;_USRDLL;GPUJPEG_EXPORTS;%(PreprocessorDefinitions) 76 | .;%(AdditionalIncludeDirectories) 77 | 78 | 79 | Windows 80 | true 81 | true 82 | true 83 | cudart.lib;%(AdditionalDependencies) 84 | 85 | 86 | compute_20,sm_20;compute_30,sm_30;compute_35,sm_35;%(CodeGeneration) 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /gpujpeg.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Header Files 20 | 21 | 22 | Header Files 23 | 24 | 25 | Header Files 26 | 27 | 28 | Header Files 29 | 30 | 31 | Header Files 32 | 33 | 34 | Header Files 35 | 36 | 37 | Header Files 38 | 39 | 40 | Header Files 41 | 42 | 43 | Header Files 44 | 45 | 46 | Header Files 47 | 48 | 49 | Header Files 50 | 51 | 52 | Header Files 53 | 54 | 55 | Source Files 56 | 57 | 58 | Source Files 59 | 60 | 61 | Source Files 62 | 63 | 64 | Source Files 65 | 66 | 67 | Source Files 68 | 69 | 70 | Source Files 71 | 72 | 73 | Source Files 74 | 75 | 76 | Source Files 77 | 78 | 79 | 80 | 81 | Source Files 82 | 83 | 84 | Source Files 85 | 86 | 87 | Source Files 88 | 89 | 90 | Source Files 91 | 92 | 93 | Source Files 94 | 95 | 96 | Source Files 97 | 98 | 99 | Source Files 100 | 101 | 102 | Source Files 103 | 104 | 105 | Source Files 106 | 107 | 108 | 109 | 110 | Source Files 111 | 112 | 113 | Source Files 114 | 115 | 116 | Source Files 117 | 118 | 119 | Source Files 120 | 121 | 122 | -------------------------------------------------------------------------------- /libgpujpeg.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | libdir=@libdir@ 4 | includedir=@includedir@ 5 | 6 | Name: GPUjpeg 7 | Description: The first test implementation of the JPEG image compression standard for NVIDIA GPUs used for real-time transmission of high-definition video. 8 | Requires: 9 | Version: @PACKAGE_VERSION@ 10 | Libs: -L${libdir} -lgpujpeg -lcudart 11 | Cflags: 12 | -------------------------------------------------------------------------------- /libgpujpeg/gpujpeg.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_H 31 | #define GPUJPEG_H 32 | 33 | #include 34 | #include 35 | 36 | #endif // GPUJPEG_H 37 | -------------------------------------------------------------------------------- /libgpujpeg/gpujpeg_common_internal.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_COMMON_INTERNAL_H 31 | #define GPUJPEG_COMMON_INTERNAL_H 32 | 33 | #include "cuda_runtime.h" 34 | 35 | /** 36 | * Declare timer 37 | * 38 | * @param name 39 | */ 40 | #define GPUJPEG_CUSTOM_TIMER_DECLARE(name) \ 41 | cudaEvent_t name ## _start__; \ 42 | cudaEvent_t name ## _stop__; \ 43 | float name ## _elapsedTime__; \ 44 | 45 | /** 46 | * Create timer 47 | * 48 | * @param name 49 | */ 50 | #define GPUJPEG_CUSTOM_TIMER_CREATE(name) \ 51 | cudaEventCreate(&name ## _start__); \ 52 | cudaEventCreate(&name ## _stop__); \ 53 | 54 | /** 55 | * Start timer 56 | * 57 | * @param name 58 | */ 59 | #define GPUJPEG_CUSTOM_TIMER_START(name) \ 60 | cudaEventRecord(name ## _start__, 0) \ 61 | 62 | /** 63 | * Stop timer 64 | * 65 | * @param name 66 | */ 67 | #define GPUJPEG_CUSTOM_TIMER_STOP(name) \ 68 | cudaEventRecord(name ## _stop__, 0); \ 69 | cudaEventSynchronize(name ## _stop__); \ 70 | cudaEventElapsedTime(&name ## _elapsedTime__, name ## _start__, name ## _stop__) \ 71 | 72 | /** 73 | * Get duration for timer 74 | * 75 | * @param name 76 | */ 77 | #define GPUJPEG_CUSTOM_TIMER_DURATION(name) name ## _elapsedTime__ 78 | 79 | /** 80 | * Stop timer and print result 81 | * 82 | * @param name 83 | * @param text 84 | */ 85 | #define GPUJPEG_CUSTOM_TIMER_STOP_PRINT(name, text) \ 86 | GPUJPEG_CUSTOM_TIMER_STOP(name); \ 87 | printf("%s %f ms\n", text, name ## _elapsedTime__) \ 88 | 89 | /** 90 | * Destroy timer 91 | * 92 | * @param name 93 | */ 94 | #define GPUJPEG_CUSTOM_TIMER_DESTROY(name) \ 95 | cudaEventDestroy(name ## _start__); \ 96 | cudaEventDestroy(name ## _stop__); \ 97 | 98 | /** 99 | * Default timer implementation 100 | */ 101 | #define GPUJPEG_TIMER_INIT() \ 102 | GPUJPEG_CUSTOM_TIMER_DECLARE(def) \ 103 | GPUJPEG_CUSTOM_TIMER_CREATE(def) 104 | #define GPUJPEG_TIMER_START() GPUJPEG_CUSTOM_TIMER_START(def) 105 | #define GPUJPEG_TIMER_STOP() GPUJPEG_CUSTOM_TIMER_STOP(def) 106 | #define GPUJPEG_TIMER_DURATION() GPUJPEG_CUSTOM_TIMER_DURATION(def) 107 | #define GPUJPEG_TIMER_STOP_PRINT(text) GPUJPEG_CUSTOM_TIMER_STOP_PRINT(def, text) 108 | #define GPUJPEG_TIMER_DEINIT() GPUJPEG_CUSTOM_TIMER_DESTROY(def) 109 | 110 | #endif // GPUJPEG_COMMON_INTERNAL_H 111 | -------------------------------------------------------------------------------- /libgpujpeg/gpujpeg_decoder.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_DECODER_H 31 | #define GPUJPEG_DECODER_H 32 | 33 | #include 34 | #include 35 | #include 36 | 37 | #ifdef __cplusplus 38 | extern "C" { 39 | #endif 40 | 41 | #if defined _MSC_VER || defined __MINGW32__ 42 | #ifdef GPUJPEG_EXPORTS 43 | #define GPUJPEG_API __declspec(dllexport) 44 | #else 45 | #define GPUJPEG_API __declspec(dllimport) 46 | #endif 47 | #else // other platforms 48 | #define GPUJPEG_API 49 | #endif 50 | 51 | struct gpujpeg_decoder; 52 | 53 | /** 54 | * Decoder output type 55 | */ 56 | enum gpujpeg_decoder_output_type { 57 | // Decoder will use it's internal output buffer 58 | GPUJPEG_DECODER_OUTPUT_INTERNAL_BUFFER, 59 | // Decoder will use custom output buffer 60 | GPUJPEG_DECODER_OUTPUT_CUSTOM_BUFFER, 61 | // Decoder will use OpenGL Texture PBO Resource as output buffer 62 | GPUJPEG_DECODER_OUTPUT_OPENGL_TEXTURE, 63 | // Decoder will use CUDA buffer as output buffer 64 | GPUJPEG_DECODER_OUTPUT_CUDA_BUFFER, 65 | }; 66 | 67 | /** 68 | * Decoder output structure 69 | */ 70 | struct gpujpeg_decoder_output 71 | { 72 | // Output type 73 | enum gpujpeg_decoder_output_type type; 74 | 75 | // Compressed data 76 | uint8_t* data; 77 | 78 | // Compressed data size 79 | int data_size; 80 | 81 | // OpenGL texture 82 | struct gpujpeg_opengl_texture* texture; 83 | }; 84 | 85 | /** 86 | * Set default parameters to decoder output structure 87 | * 88 | * @param output Decoder output structure 89 | * @return void 90 | */ 91 | GPUJPEG_API void 92 | gpujpeg_decoder_output_set_default(struct gpujpeg_decoder_output* output); 93 | 94 | /** 95 | * Setup decoder output to custom buffer 96 | * 97 | * @param output Decoder output structure 98 | * @param custom_buffer Custom buffer 99 | * @return void 100 | */ 101 | GPUJPEG_API void 102 | gpujpeg_decoder_output_set_custom(struct gpujpeg_decoder_output* output, uint8_t* custom_buffer); 103 | 104 | /** 105 | * Set decoder output to OpenGL texture 106 | * 107 | * @param output Decoder output structure 108 | * @return void 109 | */ 110 | GPUJPEG_API void 111 | gpujpeg_decoder_output_set_texture(struct gpujpeg_decoder_output* output, struct gpujpeg_opengl_texture* texture); 112 | 113 | /** 114 | * Sets output to CUDA buffer 115 | * 116 | * @param output Decoder output structure 117 | */ 118 | GPUJPEG_API void 119 | gpujpeg_decoder_output_set_cuda_buffer(struct gpujpeg_decoder_output* output); 120 | 121 | /** 122 | * Create JPEG decoder 123 | * 124 | * @param param Parameters for coder 125 | * @param param_image Parameters for image data 126 | * @return decoder structure if succeeds, otherwise NULL 127 | */ 128 | GPUJPEG_API struct gpujpeg_decoder* 129 | gpujpeg_decoder_create(); 130 | 131 | /** 132 | * Init JPEG decoder for specific image size 133 | * 134 | * @param decoder Decoder structure 135 | * @param param Parameters for coder 136 | * @param param_image Parameters for image data 137 | * @return 0 if succeeds, otherwise nonzero 138 | */ 139 | GPUJPEG_API int 140 | gpujpeg_decoder_init(struct gpujpeg_decoder* decoder, struct gpujpeg_parameters* param, struct gpujpeg_image_parameters* param_image); 141 | 142 | /** 143 | * Decompress image by decoder 144 | * 145 | * @param decoder Decoder structure 146 | * @param image Source image data 147 | * @param image_size Source image data size 148 | * @param image_decompressed Pointer to variable where decompressed image data buffer will be placed 149 | * @param image_decompressed_size Pointer to variable where decompressed image size will be placed 150 | * @return 0 if succeeds, otherwise nonzero 151 | */ 152 | GPUJPEG_API int 153 | gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, int image_size, struct gpujpeg_decoder_output* output); 154 | 155 | /** 156 | * Destory JPEG decoder 157 | * 158 | * @param decoder Decoder structure 159 | * @return 0 if succeeds, otherwise nonzero 160 | */ 161 | GPUJPEG_API int 162 | gpujpeg_decoder_destroy(struct gpujpeg_decoder* decoder); 163 | 164 | /** 165 | * Sets output format 166 | * 167 | * @param decoder Decoder structure 168 | * @param color_space Requested output color space 169 | * @param sampling_factor Requestd color sampling factor 170 | */ 171 | GPUJPEG_API void 172 | gpujpeg_decoder_set_output_format(struct gpujpeg_decoder* decoder, 173 | enum gpujpeg_color_space color_space, 174 | enum gpujpeg_sampling_factor sampling_factor); 175 | 176 | #ifdef __cplusplus 177 | } 178 | #endif 179 | 180 | #endif // GPUJPEG_DECODER_H 181 | -------------------------------------------------------------------------------- /libgpujpeg/gpujpeg_decoder_internal.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_DECODER_INTERNAL_H 31 | #define GPUJPEG_DECODER_INTERNAL_H 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | /** 39 | * JPEG decoder structure 40 | */ 41 | struct gpujpeg_decoder 42 | { 43 | // JPEG coder structure 44 | struct gpujpeg_coder coder; 45 | 46 | // JPEG reader structure 47 | struct gpujpeg_reader* reader; 48 | 49 | // Quantization tables 50 | struct gpujpeg_table_quantization table_quantization[GPUJPEG_COMPONENT_TYPE_COUNT]; 51 | 52 | // Huffman coder tables 53 | struct gpujpeg_table_huffman_decoder table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT]; 54 | // Huffman coder tables in device memory 55 | struct gpujpeg_table_huffman_decoder* d_table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT]; 56 | 57 | // Current segment count for decoded image 58 | int segment_count; 59 | 60 | // Current data compressed size for decoded image 61 | int data_compressed_size; 62 | 63 | // Timers 64 | GPUJPEG_CUSTOM_TIMER_DECLARE(def) 65 | GPUJPEG_CUSTOM_TIMER_DECLARE(in_gpu) 66 | }; 67 | 68 | #endif // GPUJPEG_DECODER_INTERNAL_H 69 | 70 | -------------------------------------------------------------------------------- /libgpujpeg/gpujpeg_encoder.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_ENCODER_H 31 | #define GPUJPEG_ENCODER_H 32 | 33 | #include 34 | #include 35 | 36 | #ifdef __cplusplus 37 | extern "C" { 38 | #endif 39 | 40 | #if defined _MSC_VER || defined __MINGW32__ 41 | #ifdef GPUJPEG_EXPORTS 42 | #define GPUJPEG_API __declspec(dllexport) 43 | #else 44 | #define GPUJPEG_API __declspec(dllimport) 45 | #endif 46 | #else // other platforms 47 | #define GPUJPEG_API 48 | #endif 49 | 50 | struct gpujpeg_encoder; 51 | 52 | /** 53 | * Encoder input type 54 | */ 55 | enum gpujpeg_encoder_input_type { 56 | // Encoder will use custom input buffer 57 | GPUJPEG_ENCODER_INPUT_IMAGE, 58 | // Encoder will use OpenGL Texture PBO Resource as input buffer 59 | GPUJPEG_ENCODER_INPUT_OPENGL_TEXTURE, 60 | }; 61 | 62 | /** 63 | * Encoder input structure 64 | */ 65 | struct gpujpeg_encoder_input 66 | { 67 | // Output type 68 | enum gpujpeg_encoder_input_type type; 69 | 70 | // Image data 71 | uint8_t* image; 72 | 73 | // Registered OpenGL Texture 74 | struct gpujpeg_opengl_texture* texture; 75 | }; 76 | 77 | /** 78 | * Set encoder input to image data 79 | * 80 | * @param encoder_input Encoder input structure 81 | * @param image Input image data 82 | * @return void 83 | */ 84 | GPUJPEG_API void 85 | gpujpeg_encoder_input_set_image(struct gpujpeg_encoder_input* input, uint8_t* image); 86 | 87 | /** 88 | * Set encoder input to OpenGL texture 89 | * 90 | * @param encoder_input Encoder input structure 91 | * @param texture_id OpenGL texture id 92 | * @return void 93 | */ 94 | GPUJPEG_API void 95 | gpujpeg_encoder_input_set_texture(struct gpujpeg_encoder_input* input, struct gpujpeg_opengl_texture* texture); 96 | 97 | /** 98 | * Create JPEG encoder 99 | * 100 | * @param param Parameters for coder 101 | * @param param_image Parameters for image data 102 | * @return encoder structure if succeeds, otherwise NULL 103 | */ 104 | GPUJPEG_API struct gpujpeg_encoder* 105 | gpujpeg_encoder_create(struct gpujpeg_parameters* param, struct gpujpeg_image_parameters* param_image); 106 | 107 | /** 108 | * Compress image by encoder 109 | * 110 | * @param encoder Encoder structure 111 | * @param image Source image data 112 | * @param image_compressed Pointer to variable where compressed image data buffer will be placed 113 | * @param image_compressed_size Pointer to variable where compressed image size will be placed 114 | * @return 0 if succeeds, otherwise nonzero 115 | */ 116 | GPUJPEG_API int 117 | gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_encoder_input* input, uint8_t** image_compressed, int* image_compressed_size); 118 | 119 | /** 120 | * Destory JPEG encoder 121 | * 122 | * @param encoder Encoder structure 123 | * @return 0 if succeeds, otherwise nonzero 124 | */ 125 | GPUJPEG_API int 126 | gpujpeg_encoder_destroy(struct gpujpeg_encoder* encoder); 127 | 128 | #ifdef __cplusplus 129 | } 130 | #endif 131 | 132 | #endif // GPUJPEG_ENCODER_H 133 | -------------------------------------------------------------------------------- /libgpujpeg/gpujpeg_encoder_internal.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_ENCODER_INTERNAL_H 31 | #define GPUJPEG_ENCODER_INTERNAL_H 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | #ifdef __cplusplus 39 | extern "C" { 40 | #endif 41 | 42 | struct gpujpeg_encoder 43 | { 44 | // JPEG coder structure 45 | struct gpujpeg_coder coder; 46 | 47 | // JPEG writer structure 48 | struct gpujpeg_writer* writer; 49 | 50 | // Quantization tables 51 | struct gpujpeg_table_quantization table_quantization[GPUJPEG_COMPONENT_TYPE_COUNT]; 52 | 53 | // Huffman coder tables 54 | struct gpujpeg_table_huffman_encoder table_huffman[GPUJPEG_COMPONENT_TYPE_COUNT][GPUJPEG_HUFFMAN_TYPE_COUNT]; 55 | 56 | // Timers 57 | GPUJPEG_CUSTOM_TIMER_DECLARE(def) 58 | GPUJPEG_CUSTOM_TIMER_DECLARE(in_gpu) 59 | }; 60 | 61 | #ifdef __cplusplus 62 | } 63 | #endif 64 | 65 | #endif // GPUJPEG_ENCODER_INTERNAL_H 66 | -------------------------------------------------------------------------------- /libgpujpeg/gpujpeg_reader.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_READER_H 31 | #define GPUJPEG_READER_H 32 | 33 | #include 34 | 35 | #ifdef __cplusplus 36 | extern "C" { 37 | #endif 38 | 39 | /** JPEG decoder structure predeclaration */ 40 | struct gpujpeg_decoder; 41 | 42 | /** JPEG reader scan structure */ 43 | struct gpujpeg_reader_scan 44 | { 45 | // Global segment index 46 | int segment_index; 47 | // Segment count in scan 48 | int segment_count; 49 | }; 50 | 51 | /** JPEG reader structure */ 52 | struct gpujpeg_reader 53 | { 54 | // Parameters 55 | struct gpujpeg_parameters param; 56 | 57 | // Parameters for image data 58 | struct gpujpeg_image_parameters param_image; 59 | 60 | // Loaded component count 61 | int comp_count; 62 | 63 | // Loaded scans 64 | struct gpujpeg_reader_scan scan[GPUJPEG_MAX_COMPONENT_COUNT]; 65 | 66 | // Loaded scans count 67 | int scan_count; 68 | 69 | // Total segment count 70 | int segment_count; 71 | 72 | // Total readed size 73 | int data_compressed_size; 74 | 75 | // Segment info (every buffer is placed inside another header) 76 | uint8_t* segment_info[GPUJPEG_MAX_SEGMENT_INFO_HEADER_COUNT]; 77 | // Segment info buffers count (equals number of segment info headers) 78 | int segment_info_count; 79 | // Segment info total buffers size 80 | int segment_info_size; 81 | }; 82 | 83 | /** 84 | * Create JPEG reader 85 | * 86 | * @return reader structure if succeeds, otherwise NULL 87 | */ 88 | struct gpujpeg_reader* 89 | gpujpeg_reader_create(); 90 | 91 | /** 92 | * Destroy JPEG reader 93 | * 94 | * @param reader Reader structure 95 | * @return 0 if succeeds, otherwise nonzero 96 | */ 97 | int 98 | gpujpeg_reader_destroy(struct gpujpeg_reader* reader); 99 | 100 | /** 101 | * Read JPEG image from data buffer 102 | * 103 | * @param image Image data 104 | * @param image_size Image data size 105 | * @return 0 if succeeds, otherwise nonzero 106 | */ 107 | int 108 | gpujpeg_reader_read_image(struct gpujpeg_decoder* decoder, uint8_t* image, int image_size); 109 | 110 | #ifdef __cplusplus 111 | } 112 | #endif 113 | 114 | #endif // GPUJPEG_READER_H 115 | -------------------------------------------------------------------------------- /libgpujpeg/gpujpeg_table.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_TABLE_H 31 | #define GPUJPEG_TABLE_H 32 | 33 | #include 34 | 35 | #ifdef __cplusplus 36 | extern "C" { 37 | #endif 38 | 39 | #define GPUJPEG_ORDER_NATURAL_SIZE (64 + 16) 40 | 41 | /** 42 | * JPEG natural order from zigzag order 43 | * 44 | * More info: 45 | * 0 in zig-zag is 0 in natural-order (gpujpeg_order_natural[0] == 0) 46 | * 1 in zig-zag is 1 in natural-order (gpujpeg_order_natural[1] == 1) 47 | * 2 in zig-zag is 8 in natural-order (gpujpeg_order_natural[2] == 8) 48 | * 3 in zig-zag is 16 in natural-order (gpujpeg_order_natural[0] == 16) 49 | * ... 50 | * 51 | * Example: 52 | * 53 | * natural order zig-zag 54 | * 55 | * 4096 5461 6554 ... 4096 5461 5461 4681 5461 6554 ... 56 | * 5461 5461 ... 57 | * 4681 ... 58 | * ... 59 | * 60 | * Based on http://svn.ghostscript.com/ghostscript/tags/jpeg-6b/jutils.c 61 | * additional 16 entries are added. 62 | * 63 | * "When reading corrupted data, the Huffman decoders could attempt 64 | * to reference an entry beyond the end of this array (if the decoded 65 | * zero run length reaches past the end of the block). To prevent 66 | * wild stores without adding an inner-loop test, we put some extra 67 | * "63"s after the real entries. This will cause the extra coefficient 68 | * to be stored in location 63 of the block, not somewhere random. 69 | * The worst case would be a run-length of 15, which means we need 16 70 | * fake entries." 71 | */ 72 | static const int gpujpeg_order_natural[GPUJPEG_ORDER_NATURAL_SIZE] = { 73 | 0, 1, 8, 16, 9, 2, 3, 10, 74 | 17, 24, 32, 25, 18, 11, 4, 5, 75 | 12, 19, 26, 33, 40, 48, 41, 34, 76 | 27, 20, 13, 6, 7, 14, 21, 28, 77 | 35, 42, 49, 56, 57, 50, 43, 36, 78 | 29, 22, 15, 23, 30, 37, 44, 51, 79 | 58, 59, 52, 45, 38, 31, 39, 46, 80 | 53, 60, 61, 54, 47, 55, 62, 63, 81 | 63, 63, 63, 63, 63, 63, 63, 63, // Extra entries for safety in decoder 82 | 63, 63, 63, 63, 63, 63, 63, 63 83 | }; 84 | 85 | /** JPEG quantization table structure */ 86 | struct gpujpeg_table_quantization 87 | { 88 | // Quantization raw table 89 | uint8_t table_raw[64]; 90 | // Quantization forward/inverse table 91 | uint16_t table[64]; 92 | // Quantization forward/inverse table in device memory 93 | uint16_t* d_table; 94 | // Quantization table for forward DCT, pre-divided with output DCT weights and transposed for coealescent access 95 | float* d_table_forward; 96 | }; 97 | 98 | /** JPEG table for huffman encoding */ 99 | struct gpujpeg_table_huffman_encoder { 100 | // Code for each symbol 101 | unsigned int code[256]; 102 | // Length of code for each symbol 103 | char size[256]; 104 | // If no code has been allocated for a symbol S, size[S] is 0 105 | 106 | // These two fields directly represent the contents of a JPEG DHT marker 107 | // bits[k] = # of symbols with codes of length k bits; bits[0] is unused 108 | unsigned char bits[17]; 109 | // The symbols, in order of incr code length 110 | unsigned char huffval[256]; 111 | }; 112 | 113 | /** JPEG table for huffman decoding */ 114 | struct gpujpeg_table_huffman_decoder { 115 | // Smallest code of length k 116 | int mincode[17]; 117 | // Largest code of length k (-1 if none) 118 | int maxcode[18]; 119 | // Huffval[] index of 1st symbol of length k 120 | int valptr[17]; 121 | // # bits, or 0 if too long 122 | int look_nbits[256]; 123 | // Symbol, or unused 124 | unsigned char look_sym[256]; 125 | 126 | // These two fields directly represent the contents of a JPEG DHT marker 127 | // bits[k] = # of symbols with codes of 128 | unsigned char bits[17]; 129 | // The symbols, in order of incr code length 130 | unsigned char huffval[256]; 131 | }; 132 | 133 | /** 134 | * Init JPEG quantization table for encoder 135 | * 136 | * @param table Table structure 137 | * @param type Type of component for table 138 | * @param quality Quality (0-100) 139 | * @return 0 if succeeds, otherwise nonzero 140 | */ 141 | int 142 | gpujpeg_table_quantization_encoder_init(struct gpujpeg_table_quantization* table, enum gpujpeg_component_type type, int quality); 143 | 144 | /** 145 | * Init JPEG quantization table for decoder 146 | * 147 | * @param table Table structure 148 | * @param type Type of component for table 149 | * @param quality Quality (0-100) 150 | * @return 0 if succeeds, otherwise nonzero 151 | */ 152 | int 153 | gpujpeg_table_quantization_decoder_init(struct gpujpeg_table_quantization* table, enum gpujpeg_component_type type, int quality); 154 | 155 | /** 156 | * Compute JPEG quantization table for decoder 157 | * 158 | * @param table Table structure 159 | * @return 0 if succeeds, otherwise nonzero 160 | */ 161 | int 162 | gpujpeg_table_quantization_decoder_compute(struct gpujpeg_table_quantization* table); 163 | 164 | /** 165 | * Print JPEG quantization table 166 | * 167 | * @param table Table structure 168 | * @return void 169 | */ 170 | void 171 | gpujpeg_table_quantization_print(struct gpujpeg_table_quantization* table); 172 | 173 | /** 174 | * Initialize encoder huffman DC and AC table for component type 175 | * 176 | * @param table Table structure 177 | * @param comp_type Component type (luminance/chrominance) 178 | * @param huff_type Huffman type (DC/AC) 179 | * @return void 180 | */ 181 | int 182 | gpujpeg_table_huffman_encoder_init(struct gpujpeg_table_huffman_encoder* table, enum gpujpeg_component_type comp_type, enum gpujpeg_huffman_type huff_type); 183 | 184 | /** 185 | * Initialize decoder huffman DC and AC table for component type. It copies bit and values arrays to table and call compute routine. 186 | * 187 | * @param table Table structure 188 | * @param d_table Table structure in device memory 189 | * @param comp_type Component type (luminance/chrominance) 190 | * @param huff_type Huffman type (DC/AC) 191 | * @return void 192 | */ 193 | int 194 | gpujpeg_table_huffman_decoder_init(struct gpujpeg_table_huffman_decoder* table, struct gpujpeg_table_huffman_decoder* d_table, enum gpujpeg_component_type comp_type, enum gpujpeg_huffman_type huff_type); 195 | 196 | /** 197 | * Compute decoder huffman table from bits and values arrays (that are already set in table) 198 | * 199 | * @param table 200 | * @param d_table 201 | * @return void 202 | */ 203 | void 204 | gpujpeg_table_huffman_decoder_compute(struct gpujpeg_table_huffman_decoder* table, struct gpujpeg_table_huffman_decoder* d_table); 205 | 206 | #ifdef __cplusplus 207 | } 208 | #endif 209 | 210 | #endif // GPUJPEG_TABLE_H 211 | -------------------------------------------------------------------------------- /libgpujpeg/gpujpeg_type.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_TYPE_H 31 | #define GPUJPEG_TYPE_H 32 | 33 | #include 34 | 35 | #ifndef _MSC_VER 36 | #define ATTRIBUTE_UNUSED __attribute__((unused)) 37 | #else 38 | #define ATTRIBUTE_UNUSED 39 | #endif 40 | 41 | #ifdef __cplusplus 42 | extern "C" { 43 | #endif 44 | 45 | /** Contants */ 46 | #define GPUJPEG_BLOCK_SIZE 8 47 | #define GPUJPEG_BLOCK_SQUARED_SIZE 64 48 | #define GPUJPEG_MAX_COMPONENT_COUNT 3 49 | #define GPUJPEG_MAX_BLOCK_COMPRESSED_SIZE (GPUJPEG_BLOCK_SIZE * GPUJPEG_BLOCK_SIZE * 4) 50 | 51 | /** Maximum JPEG header size (MUST be divisible by 4!!!) */ 52 | #define GPUJPEG_MAX_HEADER_SIZE (65536 - 100) 53 | 54 | /** Flags */ 55 | #define GPUJPEG_VERBOSE 1 56 | #define GPUJPEG_OPENGL_INTEROPERABILITY 2 57 | 58 | /** Maximum number of segment info header in stream */ 59 | #define GPUJPEG_MAX_SEGMENT_INFO_HEADER_COUNT 100 60 | 61 | /** 62 | * Color spaces for JPEG codec 63 | */ 64 | enum gpujpeg_color_space { 65 | GPUJPEG_NONE = 0, 66 | GPUJPEG_RGB = 1, 67 | GPUJPEG_YCBCR_BT601 = 2, 68 | GPUJPEG_YCBCR_BT601_256LVLS = 3, 69 | GPUJPEG_YCBCR_BT709 = 4, 70 | GPUJPEG_YCBCR = GPUJPEG_YCBCR_BT709, 71 | GPUJPEG_YUV = 5 72 | }; 73 | 74 | /** 75 | * Get color space name 76 | * 77 | * @param color_space 78 | */ 79 | static inline const char* 80 | gpujpeg_color_space_get_name(enum gpujpeg_color_space color_space) 81 | { 82 | switch ( color_space ) { 83 | case GPUJPEG_NONE: 84 | return "None"; 85 | case GPUJPEG_RGB: 86 | return "RGB"; 87 | case GPUJPEG_YUV: 88 | return "YUV"; 89 | case GPUJPEG_YCBCR_BT601: 90 | return "YCbCr BT.601"; 91 | case GPUJPEG_YCBCR_BT601_256LVLS: 92 | return "YCbCr BT.601 256 Levels"; 93 | case GPUJPEG_YCBCR_BT709: 94 | return "YCbCr BT.709"; 95 | default: 96 | return "Unknown"; 97 | } 98 | } 99 | 100 | /** 101 | * Sampling factor for image data 102 | */ 103 | enum gpujpeg_sampling_factor { 104 | GPUJPEG_4_4_4 = ((4 << 16) | (4 << 8) | 4), 105 | GPUJPEG_4_2_2 = ((4 << 16) | (2 << 8) | 2), 106 | }; 107 | 108 | /** 109 | * Sampling factor for color component in JPEG format 110 | */ 111 | struct gpujpeg_component_sampling_factor 112 | { 113 | uint8_t horizontal; 114 | uint8_t vertical; 115 | }; 116 | 117 | /** 118 | * JPEG component type 119 | */ 120 | enum gpujpeg_component_type { 121 | GPUJPEG_COMPONENT_LUMINANCE = 0, 122 | GPUJPEG_COMPONENT_CHROMINANCE = 1, 123 | GPUJPEG_COMPONENT_TYPE_COUNT = 2 124 | }; 125 | 126 | /** 127 | * JPEG huffman type 128 | */ 129 | enum gpujpeg_huffman_type { 130 | GPUJPEG_HUFFMAN_DC = 0, 131 | GPUJPEG_HUFFMAN_AC = 1, 132 | GPUJPEG_HUFFMAN_TYPE_COUNT = 2 133 | }; 134 | 135 | #include 136 | 137 | /** 138 | * JPEG marker codes 139 | */ 140 | enum gpujpeg_marker_code { 141 | GPUJPEG_MARKER_SOF0 = 0xc0, 142 | GPUJPEG_MARKER_SOF1 = 0xc1, 143 | GPUJPEG_MARKER_SOF2 = 0xc2, 144 | GPUJPEG_MARKER_SOF3 = 0xc3, 145 | 146 | GPUJPEG_MARKER_SOF5 = 0xc5, 147 | GPUJPEG_MARKER_SOF6 = 0xc6, 148 | GPUJPEG_MARKER_SOF7 = 0xc7, 149 | 150 | GPUJPEG_MARKER_JPG = 0xc8, 151 | GPUJPEG_MARKER_SOF9 = 0xc9, 152 | GPUJPEG_MARKER_SOF10 = 0xca, 153 | GPUJPEG_MARKER_SOF11 = 0xcb, 154 | 155 | GPUJPEG_MARKER_SOF13 = 0xcd, 156 | GPUJPEG_MARKER_SOF14 = 0xce, 157 | GPUJPEG_MARKER_SOF15 = 0xcf, 158 | 159 | GPUJPEG_MARKER_DHT = 0xc4, 160 | 161 | GPUJPEG_MARKER_DAC = 0xcc, 162 | 163 | GPUJPEG_MARKER_RST0 = 0xd0, 164 | GPUJPEG_MARKER_RST1 = 0xd1, 165 | GPUJPEG_MARKER_RST2 = 0xd2, 166 | GPUJPEG_MARKER_RST3 = 0xd3, 167 | GPUJPEG_MARKER_RST4 = 0xd4, 168 | GPUJPEG_MARKER_RST5 = 0xd5, 169 | GPUJPEG_MARKER_RST6 = 0xd6, 170 | GPUJPEG_MARKER_RST7 = 0xd7, 171 | 172 | GPUJPEG_MARKER_SOI = 0xd8, 173 | GPUJPEG_MARKER_EOI = 0xd9, 174 | GPUJPEG_MARKER_SOS = 0xda, 175 | GPUJPEG_MARKER_DQT = 0xdb, 176 | GPUJPEG_MARKER_DNL = 0xdc, 177 | GPUJPEG_MARKER_DRI = 0xdd, 178 | GPUJPEG_MARKER_DHP = 0xde, 179 | GPUJPEG_MARKER_EXP = 0xdf, 180 | 181 | GPUJPEG_MARKER_APP0 = 0xe0, 182 | GPUJPEG_MARKER_APP1 = 0xe1, 183 | GPUJPEG_MARKER_APP2 = 0xe2, 184 | GPUJPEG_MARKER_APP3 = 0xe3, 185 | GPUJPEG_MARKER_APP4 = 0xe4, 186 | GPUJPEG_MARKER_APP5 = 0xe5, 187 | GPUJPEG_MARKER_APP6 = 0xe6, 188 | GPUJPEG_MARKER_APP7 = 0xe7, 189 | GPUJPEG_MARKER_APP8 = 0xe8, 190 | GPUJPEG_MARKER_APP9 = 0xe9, 191 | GPUJPEG_MARKER_APP10 = 0xea, 192 | GPUJPEG_MARKER_APP11 = 0xeb, 193 | GPUJPEG_MARKER_APP12 = 0xec, 194 | GPUJPEG_MARKER_APP13 = 0xed, 195 | GPUJPEG_MARKER_APP14 = 0xee, 196 | GPUJPEG_MARKER_APP15 = 0xef, 197 | 198 | GPUJPEG_MARKER_JPG0 = 0xf0, 199 | GPUJPEG_MARKER_JPG13 = 0xfd, 200 | GPUJPEG_MARKER_COM = 0xfe, 201 | 202 | GPUJPEG_MARKER_TEM = 0x01, 203 | 204 | GPUJPEG_MARKER_ERROR = 0x100 205 | }; 206 | 207 | static const char* 208 | gpujpeg_marker_name(enum gpujpeg_marker_code code) ATTRIBUTE_UNUSED; 209 | 210 | /** 211 | * Get marker name from code 212 | * 213 | * @param code 214 | * @return marker name 215 | */ 216 | static const char* 217 | gpujpeg_marker_name(enum gpujpeg_marker_code code) 218 | { 219 | switch (code) { 220 | case GPUJPEG_MARKER_SOF0: return "SOF0"; 221 | case GPUJPEG_MARKER_SOF1: return "SOF1"; 222 | case GPUJPEG_MARKER_SOF2: return "SOF2"; 223 | case GPUJPEG_MARKER_SOF3: return "SOF3"; 224 | case GPUJPEG_MARKER_SOF5: return "SOF5"; 225 | case GPUJPEG_MARKER_SOF6: return "SOF6"; 226 | case GPUJPEG_MARKER_SOF7: return "SOF7"; 227 | case GPUJPEG_MARKER_JPG: return "JPG"; 228 | case GPUJPEG_MARKER_SOF9: return "SOF9"; 229 | case GPUJPEG_MARKER_SOF10: return "SOF10"; 230 | case GPUJPEG_MARKER_SOF11: return "SOF11"; 231 | case GPUJPEG_MARKER_SOF13: return "SOF13"; 232 | case GPUJPEG_MARKER_SOF14: return "SOF14"; 233 | case GPUJPEG_MARKER_SOF15: return "SOF15"; 234 | case GPUJPEG_MARKER_DHT: return "DHT"; 235 | case GPUJPEG_MARKER_DAC: return "DAC"; 236 | case GPUJPEG_MARKER_RST0: return "RST0"; 237 | case GPUJPEG_MARKER_RST1: return "RST1"; 238 | case GPUJPEG_MARKER_RST2: return "RST2"; 239 | case GPUJPEG_MARKER_RST3: return "RST3"; 240 | case GPUJPEG_MARKER_RST4: return "RST4"; 241 | case GPUJPEG_MARKER_RST5: return "RST5"; 242 | case GPUJPEG_MARKER_RST6: return "RST6"; 243 | case GPUJPEG_MARKER_RST7: return "RST7"; 244 | case GPUJPEG_MARKER_SOI: return "SOI"; 245 | case GPUJPEG_MARKER_EOI: return "EOI"; 246 | case GPUJPEG_MARKER_SOS: return "SOS"; 247 | case GPUJPEG_MARKER_DQT: return "DQT"; 248 | case GPUJPEG_MARKER_DNL: return "DNL"; 249 | case GPUJPEG_MARKER_DRI: return "DRI"; 250 | case GPUJPEG_MARKER_DHP: return "DHP"; 251 | case GPUJPEG_MARKER_EXP: return "EXP"; 252 | case GPUJPEG_MARKER_APP0: return "APP0"; 253 | case GPUJPEG_MARKER_APP1: return "APP1"; 254 | case GPUJPEG_MARKER_APP2: return "APP2"; 255 | case GPUJPEG_MARKER_APP3: return "APP3"; 256 | case GPUJPEG_MARKER_APP4: return "APP4"; 257 | case GPUJPEG_MARKER_APP5: return "APP5"; 258 | case GPUJPEG_MARKER_APP6: return "APP6"; 259 | case GPUJPEG_MARKER_APP7: return "APP7"; 260 | case GPUJPEG_MARKER_APP8: return "APP8"; 261 | case GPUJPEG_MARKER_APP9: return "APP9"; 262 | case GPUJPEG_MARKER_APP10: return "APP10"; 263 | case GPUJPEG_MARKER_APP11: return "APP11"; 264 | case GPUJPEG_MARKER_APP12: return "APP12"; 265 | case GPUJPEG_MARKER_APP13: return "APP13"; 266 | case GPUJPEG_MARKER_APP14: return "APP14"; 267 | case GPUJPEG_MARKER_APP15: return "APP15"; 268 | case GPUJPEG_MARKER_JPG0: return "JPG0"; 269 | case GPUJPEG_MARKER_JPG13: return "JPG13"; 270 | case GPUJPEG_MARKER_COM: return "COM"; 271 | case GPUJPEG_MARKER_TEM: return "TEM"; 272 | case GPUJPEG_MARKER_ERROR: return "ERROR"; 273 | default: 274 | { 275 | static char buffer[255]; 276 | sprintf(buffer, "Unknown (0x%X)", code); 277 | return buffer; 278 | } 279 | } 280 | } 281 | 282 | #ifdef __cplusplus 283 | } 284 | #endif 285 | 286 | #endif // GPUJPEG_TYPE_H 287 | -------------------------------------------------------------------------------- /libgpujpeg/gpujpeg_util.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_UTIL_H 31 | #define GPUJPEG_UTIL_H 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include "cuda_runtime.h" 39 | 40 | #ifdef __cplusplus 41 | extern "C" { 42 | #endif 43 | 44 | // CUDA check error 45 | #define gpujpeg_cuda_check_error(msg, action) \ 46 | { \ 47 | cudaError_t err = cudaGetLastError(); \ 48 | if( cudaSuccess != err) { \ 49 | fprintf(stderr, "[GPUJPEG] [Error] %s (line %i): %s: %s.\n", \ 50 | __FILE__, __LINE__, msg, cudaGetErrorString( err) ); \ 51 | action; \ 52 | } \ 53 | } \ 54 | 55 | // Divide and round up 56 | #define gpujpeg_div_and_round_up(value, div) \ 57 | (((value % div) != 0) ? (value / div + 1) : (value / div)) 58 | 59 | // CUDA maximum grid size 60 | #define GPUJPEG_CUDA_MAXIMUM_GRID_SIZE 65535 61 | 62 | // CUDA C++ extension for Eclipse CDT 63 | #ifdef __CDT_PARSER__ 64 | struct { int x; int y; int z; } threadIdx; 65 | struct { int x; int y; int z; } blockIdx; 66 | struct { int x; int y; int z; } blockDim; 67 | struct { int x; int y; int z; } gridDim; 68 | #endif 69 | 70 | // OpenGL missing error 71 | #define GPUJPEG_EXIT_MISSING_OPENGL() \ 72 | fprintf(stderr, "[GPUJPEG] [Error] Can't use OpenGL. The codec was compiled without OpenGL!\n"); \ 73 | exit(-1); \ 74 | 75 | #ifdef __cplusplus 76 | } 77 | #endif 78 | 79 | #endif // GPUJPEG_UTIL_H 80 | -------------------------------------------------------------------------------- /libgpujpeg/gpujpeg_writer.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_WRITER_H 31 | #define GPUJPEG_WRITER_H 32 | 33 | #include 34 | 35 | #ifdef __cplusplus 36 | extern "C" { 37 | #endif 38 | 39 | /** JPEG encoder structure predeclaration */ 40 | struct gpujpeg_encoder; 41 | 42 | /** JPEG writer structure */ 43 | struct gpujpeg_writer 44 | { 45 | // Output buffer 46 | uint8_t* buffer; 47 | // Output buffer current position 48 | uint8_t* buffer_current; 49 | 50 | // Segment info buffers (every buffer is placed inside another header) 51 | uint8_t* segment_info[GPUJPEG_MAX_SEGMENT_INFO_HEADER_COUNT]; 52 | // Segment info buffers count (equals number of segment info headers) 53 | int segment_info_count; 54 | // Segment info current position in scan 55 | uint8_t* segment_info_position; 56 | // Segment info current segment index 57 | int segment_info_index; 58 | }; 59 | 60 | /** 61 | * Create JPEG writer 62 | * 63 | * @return writer structure if succeeds, otherwise NULL 64 | */ 65 | struct gpujpeg_writer* 66 | gpujpeg_writer_create(struct gpujpeg_encoder* encoder); 67 | 68 | /** 69 | * Destroy JPEG writer 70 | * 71 | * @param writer Writer structure 72 | * @return 0 if succeeds, otherwise nonzero 73 | */ 74 | int 75 | gpujpeg_writer_destroy(struct gpujpeg_writer* writer); 76 | 77 | /** 78 | * Write one byte to file 79 | * 80 | * @param writer Writer structure 81 | * @param value Byte value to write 82 | * @return void 83 | */ 84 | #define gpujpeg_writer_emit_byte(writer, value) { \ 85 | *writer->buffer_current = (uint8_t)(value); \ 86 | writer->buffer_current++; } 87 | 88 | /** 89 | * Write two bytes to file 90 | * 91 | * @param writer Writer structure 92 | * @param value Two-byte value to write 93 | * @return void 94 | */ 95 | #define gpujpeg_writer_emit_2byte(writer, value) { \ 96 | *writer->buffer_current = (uint8_t)(((value) >> 8) & 0xFF); \ 97 | writer->buffer_current++; \ 98 | *writer->buffer_current = (uint8_t)((value) & 0xFF); \ 99 | writer->buffer_current++; } 100 | 101 | /** 102 | * Write marker to file 103 | * 104 | * @param writer Writer structure 105 | * @oaran marker Marker to write (JPEG_MARKER_...) 106 | * @return void 107 | */ 108 | #define gpujpeg_writer_emit_marker(writer, marker) { \ 109 | *writer->buffer_current = 0xFF;\ 110 | writer->buffer_current++; \ 111 | *writer->buffer_current = (uint8_t)(marker); \ 112 | writer->buffer_current++; } 113 | 114 | /** 115 | * Write JPEG header (write soi, app0, Y_dqt, CbCr_dqt, sof, 4 * dht blocks) 116 | * 117 | * @param encoder Encoder structure 118 | * @return void 119 | */ 120 | void 121 | gpujpeg_writer_write_header(struct gpujpeg_encoder* encoder); 122 | 123 | /** 124 | * Write segment info for current position in write buffer 125 | * 126 | * @param encoder Encoder structure 127 | * @return void 128 | */ 129 | void 130 | gpujpeg_writer_write_segment_info(struct gpujpeg_encoder* encoder); 131 | 132 | /** 133 | * Write scan header for one component 134 | * 135 | * @param encoder Encoder structure 136 | * @param scan_index Scan index 137 | * @return void 138 | */ 139 | void 140 | gpujpeg_writer_write_scan_header(struct gpujpeg_encoder* encoder, int scan_index); 141 | 142 | #ifdef __cplusplus 143 | } 144 | #endif 145 | 146 | #endif // GPUJPEG_WRITER_H 147 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB GPUJPEG_SRCS "*.cpp" "*.cu") 2 | file(GLOB GPUJPEG_HEADERS "*.h" "*.cuh") 3 | 4 | cuda_add_library(GPUJPEG SHARED ${GPUJPEG_SRCS} ${GPUJPEG_HEADERS}) 5 | 6 | # Install Rules 7 | install (TARGETS GPUJPEG DESTINATION "lib") 8 | -------------------------------------------------------------------------------- /src/Makefile.bkp: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2011, CESNET z.s.p.o 3 | # Copyright (c) 2011, Silicon Genome, LLC. 4 | # 5 | # All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions are met: 9 | # * Redistributions of source code must retain the above copyright 10 | # notice, this list of conditions and the following disclaimer. 11 | # 12 | # * Redistributions in binary form must reproduce the above copyright 13 | # notice, this list of conditions and the following disclaimer in the 14 | # documentation and/or other materials provided with the distribution. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | # POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | 29 | # Shared/Static library? 30 | SHARED_LIBRARY ?= 1 31 | # Use constant memory for huffman coder tables? 32 | HUFFMAN_CODER_TABLES_IN_CONSTANT ?= 1 33 | # Flag if use OpenGL 34 | USE_OPENGL ?= 0 35 | # Debug 36 | DEBUG ?= 0 37 | # CUDA install path 38 | CUDA_INSTALL_PATH ?= /usr/local/cuda 39 | #Destination directory 40 | DESTDIR ?= /usr/local 41 | 42 | 43 | # Target executable 44 | TARGET := libgpujpeg.a 45 | ifeq ($(SHARED_LIBRARY),1) 46 | LIBRARY_NAME := libgpujpeg.so 47 | SONAME := $(LIBRARY_NAME).0 48 | REAL_NAME := $(SONAME).0.0 49 | TARGET := $(REAL_NAME) 50 | endif 51 | 52 | # C files 53 | CFILES := \ 54 | gpujpeg_common.c \ 55 | gpujpeg_encoder.c \ 56 | gpujpeg_decoder.c \ 57 | gpujpeg_table.c \ 58 | gpujpeg_dct_cpu.c \ 59 | gpujpeg_huffman_cpu_encoder.c \ 60 | gpujpeg_huffman_cpu_decoder.c \ 61 | gpujpeg_writer.c \ 62 | gpujpeg_reader.c 63 | # CUDA files 64 | CUFILES := \ 65 | gpujpeg_dct_gpu.cu \ 66 | gpujpeg_preprocessor.cu \ 67 | gpujpeg_huffman_gpu_encoder.cu \ 68 | gpujpeg_huffman_gpu_decoder.cu 69 | 70 | # Compilers 71 | CC := gcc -fPIC 72 | LINK := g++ -fPIC 73 | NVCC := $(CUDA_INSTALL_PATH)/bin/nvcc -Xcompiler -fPIC 74 | 75 | # Debug 76 | ifeq ($(DEBUG),1) 77 | COMMONFLAGS += -g -D_DEBUG -O0 78 | NVCCFLAGS += -G 79 | else 80 | COMMONFLAGS += -O2 81 | endif 82 | 83 | # Common flags 84 | COMMONFLAGS += -I. -I$(CUDA_INSTALL_PATH)/include 85 | # C flags 86 | CFLAGS += $(COMMONFLAGS) -std=c99 87 | # CUDA flags 88 | NVCCFLAGS += $(COMMONFLAGS) \ 89 | --ptxas-options="-v" \ 90 | -gencode arch=compute_20,code=sm_20 \ 91 | -gencode arch=compute_11,code=sm_11 \ 92 | -gencode arch=compute_10,code=sm_10 93 | # Linker flags 94 | LDFLAGS += -shared -Wl,-soname,$(SONAME) 95 | 96 | # Other Flags 97 | ifeq ($(HUFFMAN_CODER_TABLES_IN_CONSTANT),1) 98 | COMMONFLAGS += -DGPUJPEG_HUFFMAN_CODER_TABLES_IN_CONSTANT 99 | endif 100 | ifeq ($(USE_OPENGL),1) 101 | LDFLAGS += -lGLEW 102 | COMMONFLAGS += -DGPUJPEG_USE_OPENGL 103 | endif 104 | 105 | # Do 32bit vs. 64bit setup 106 | LBITS := $(shell getconf LONG_BIT) 107 | ifeq ($(LBITS),64) 108 | # 64bit 109 | LDFLAGS += -L$(CUDA_INSTALL_PATH)/lib64 110 | else 111 | # 32bit 112 | LDFLAGS += -L$(CUDA_INSTALL_PATH)/lib 113 | endif 114 | LDFLAGS += -lcudart 115 | 116 | # Build 117 | build: $(TARGET) 118 | ln -sf $(TARGET) $(LIBRARY_NAME) 119 | ln -sf $(TARGET) $(SONAME) 120 | 121 | # Clean 122 | clean: 123 | rm -f *.o $(TARGET) $(SONAME) $(LIBRARY_NAME) 124 | rm -f *.i *.ii 125 | rm -f *.cudafe1.c *.cudafe1.cpp *.cudafe1.gpu *.cudafe1.stub.c 126 | rm -f *.cudafe2.c *.cudafe2.gpu *.cudafe2.stub.c 127 | rm -f *.fatbin *.fatbin.c *.ptx *.hash *.cubin *.cu.cpp 128 | 129 | # Lists of object files 130 | COBJS=$(CFILES:.c=.c.o) 131 | CUOBJS=$(CUFILES:.cu=.cu.o) 132 | 133 | # Build target 134 | ifeq ($(SHARED_LIBRARY),1) 135 | $(TARGET): $(COBJS) $(CUOBJS) 136 | $(LINK) $(COBJS) $(CUOBJS) $(LDFLAGS) -o $(TARGET); 137 | else 138 | $(TARGET): $(COBJS) $(CUOBJS) 139 | 140 | ar rcs $(TARGET) $(COBJS) $(CUOBJS); 141 | endif 142 | 143 | # Set suffix for CUDA files 144 | .SUFFIXES: .cu 145 | 146 | # Pattern rule for compiling C files 147 | %.c.o: %.c 148 | $(CC) $(CFLAGS) -c $< -o $@ 149 | 150 | # Pattern rule for compiling CUDA files 151 | %.cu.o: %.cu 152 | $(NVCC) $(NVCCFLAGS) -c $< -o $@; 153 | 154 | install: $(TARGET) 155 | install -d -m 755 $(DESTDIR)/lib 156 | install -m 644 $(TARGET) $(DESTDIR)/lib 157 | ifeq ($(SHARED_LIBRARY),1) 158 | ldconfig -n $(DESTDIR)/lib 159 | ln -sf $(DESTDIR)/lib/$(SONAME) $(DESTDIR)/lib/$(LIBRARY_NAME) 160 | endif 161 | 162 | uninstall: $(TARGET) 163 | rm $(DESTDIR)/lib/$(TARGET) 164 | ifeq ($(SHARED_LIBRARY),1) 165 | rm $(DESTDIR)/lib/$(SONAME) 166 | rm $(DESTDIR)/lib/$(LIBRARY_NAME) 167 | endif 168 | 169 | 170 | # Set file dependencies 171 | gpujpeg_common.c.o: gpujpeg_common.c gpujpeg_common.h 172 | gpujpeg_encoder.c.o: gpujpeg_encoder.c gpujpeg_encoder.h 173 | gpujpeg_decoder.c.o: gpujpeg_decoder.c gpujpeg_decoder.h 174 | gpujpeg_table.c.o: gpujpeg_table.c gpujpeg_table.h 175 | gpujpeg_preprocessor.cu.o: gpujpeg_preprocessor.cu gpujpeg_preprocessor.h gpujpeg_colorspace.h 176 | gpujpeg_dct_cpu.c.o: gpujpeg_dct_cpu.c gpujpeg_dct_cpu.h 177 | gpujpeg_dct_gpu.cu.o: gpujpeg_dct_gpu.cu gpujpeg_dct_gpu.h 178 | gpujpeg_huffman_cpu_encoder.c.o: gpujpeg_huffman_cpu_encoder.c gpujpeg_huffman_cpu_encoder.h 179 | gpujpeg_huffman_gpu_encoder.cu.o: gpujpeg_huffman_gpu_encoder.cu gpujpeg_huffman_gpu_encoder.h 180 | gpujpeg_huffman_cpu_decoder.c.o: gpujpeg_huffman_cpu_decoder.c gpujpeg_huffman_cpu_decoder.h 181 | gpujpeg_huffman_gpu_decoder.cu.o: gpujpeg_huffman_gpu_decoder.cu gpujpeg_huffman_gpu_decoder.h 182 | gpujpeg_writer.c.o: gpujpeg_writer.c gpujpeg_writer.h 183 | gpujpeg_reader.c.o: gpujpeg_reader.c gpujpeg_reader.h 184 | -------------------------------------------------------------------------------- /src/gpujpeg_dct_cpu.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #include "gpujpeg_dct_cpu.h" 31 | #include 32 | 33 | #define W1 2841 // 2048*sqrt(2)*cos(1*pi/16) 34 | #define W2 2676 // 2048*sqrt(2)*cos(2*pi/16) 35 | #define W3 2408 // 2048*sqrt(2)*cos(3*pi/16) 36 | #define W5 1609 // 2048*sqrt(2)*cos(5*pi/16) 37 | #define W6 1108 // 2048*sqrt(2)*cos(6*pi/16) 38 | #define W7 565 // 2048*sqrt(2)*cos(7*pi/16) 39 | 40 | /** Clipping table and pointer to it */ 41 | static int16_t iclip[1024]; 42 | static int16_t* iclp; 43 | 44 | /** 45 | * Row (horizontal) IDCT 46 | * 47 | * 7 pi 1 48 | * dst[k] = sum c[l] * src[l] * cos( -- * ( k + - ) * l ) 49 | * l=0 8 2 50 | * 51 | * where: c[0] = 128 52 | * c[1..7] = 128*sqrt(2) 53 | */ 54 | void 55 | gpujpeg_idct_cpu_perform_row(int16_t* blk) 56 | { 57 | int x0, x1, x2, x3, x4, x5, x6, x7, x8; 58 | 59 | // shortcut 60 | if (!((x1 = blk[4]<<11) | (x2 = blk[6]) | (x3 = blk[2]) | 61 | (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3]))) 62 | { 63 | blk[0]=blk[1]=blk[2]=blk[3]=blk[4]=blk[5]=blk[6]=blk[7]=blk[0]<<3; 64 | return; 65 | } 66 | 67 | // for proper rounding in the fourth stage 68 | x0 = (blk[0]<<11) + 128; 69 | 70 | // first stage 71 | x8 = W7*(x4+x5); 72 | x4 = x8 + (W1-W7)*x4; 73 | x5 = x8 - (W1+W7)*x5; 74 | x8 = W3*(x6+x7); 75 | x6 = x8 - (W3-W5)*x6; 76 | x7 = x8 - (W3+W5)*x7; 77 | 78 | // second stage 79 | x8 = x0 + x1; 80 | x0 -= x1; 81 | x1 = W6*(x3+x2); 82 | x2 = x1 - (W2+W6)*x2; 83 | x3 = x1 + (W2-W6)*x3; 84 | x1 = x4 + x6; 85 | x4 -= x6; 86 | x6 = x5 + x7; 87 | x5 -= x7; 88 | 89 | // third stage 90 | x7 = x8 + x3; 91 | x8 -= x3; 92 | x3 = x0 + x2; 93 | x0 -= x2; 94 | x2 = (181*(x4+x5)+128)>>8; 95 | x4 = (181*(x4-x5)+128)>>8; 96 | 97 | // fourth stage 98 | blk[0] = (x7+x1)>>8; 99 | blk[1] = (x3+x2)>>8; 100 | blk[2] = (x0+x4)>>8; 101 | blk[3] = (x8+x6)>>8; 102 | blk[4] = (x8-x6)>>8; 103 | blk[5] = (x0-x4)>>8; 104 | blk[6] = (x3-x2)>>8; 105 | blk[7] = (x7-x1)>>8; 106 | } 107 | 108 | /** 109 | * Column (vertical) IDCT 110 | * 111 | * 7 pi 1 112 | * dst[8*k] = sum c[l] * src[8*l] * cos( -- * ( k + - ) * l ) 113 | * l=0 8 2 114 | * 115 | * where: c[0] = 1/1024 116 | * c[1..7] = (1/1024)*sqrt(2) 117 | */ 118 | void 119 | gpujpeg_idct_cpu_perform_column(int16_t* blk) 120 | { 121 | int x0, x1, x2, x3, x4, x5, x6, x7, x8; 122 | 123 | // shortcut 124 | if (!((x1 = (blk[8*4]<<8)) | (x2 = blk[8*6]) | (x3 = blk[8*2]) | 125 | (x4 = blk[8*1]) | (x5 = blk[8*7]) | (x6 = blk[8*5]) | (x7 = blk[8*3]))) 126 | { 127 | blk[8*0]=blk[8*1]=blk[8*2]=blk[8*3]=blk[8*4]=blk[8*5]=blk[8*6]=blk[8*7]= 128 | iclp[(blk[8*0]+32)>>6]; 129 | return; 130 | } 131 | 132 | x0 = (blk[8*0]<<8) + 8192; 133 | 134 | // first stage 135 | x8 = W7*(x4+x5) + 4; 136 | x4 = (x8+(W1-W7)*x4)>>3; 137 | x5 = (x8-(W1+W7)*x5)>>3; 138 | x8 = W3*(x6+x7) + 4; 139 | x6 = (x8-(W3-W5)*x6)>>3; 140 | x7 = (x8-(W3+W5)*x7)>>3; 141 | 142 | // second stage 143 | x8 = x0 + x1; 144 | x0 -= x1; 145 | x1 = W6*(x3+x2) + 4; 146 | x2 = (x1-(W2+W6)*x2)>>3; 147 | x3 = (x1+(W2-W6)*x3)>>3; 148 | x1 = x4 + x6; 149 | x4 -= x6; 150 | x6 = x5 + x7; 151 | x5 -= x7; 152 | 153 | // third stage 154 | x7 = x8 + x3; 155 | x8 -= x3; 156 | x3 = x0 + x2; 157 | x0 -= x2; 158 | x2 = (181*(x4+x5)+128)>>8; 159 | x4 = (181*(x4-x5)+128)>>8; 160 | 161 | // fourth stage 162 | blk[8*0] = iclp[(x7+x1)>>14]; 163 | blk[8*1] = iclp[(x3+x2)>>14]; 164 | blk[8*2] = iclp[(x0+x4)>>14]; 165 | blk[8*3] = iclp[(x8+x6)>>14]; 166 | blk[8*4] = iclp[(x8-x6)>>14]; 167 | blk[8*5] = iclp[(x0-x4)>>14]; 168 | blk[8*6] = iclp[(x3-x2)>>14]; 169 | blk[8*7] = iclp[(x7-x1)>>14]; 170 | } 171 | 172 | /** 173 | * Perform inverse DCT on 8x8 block 174 | * 175 | * @param block 176 | */ 177 | void gpujpeg_idct_cpu_perform(int16_t* block, int16_t* table) 178 | { 179 | for ( int i = 0; i < 64; i++ ) { 180 | int pos = i; 181 | block[i] = (int)block[i] * (int)table[i]; 182 | } 183 | 184 | for ( int i = 0; i < 8; i++ ) 185 | gpujpeg_idct_cpu_perform_row(block + 8 * i); 186 | 187 | for ( int i = 0; i < 8; i++ ) 188 | gpujpeg_idct_cpu_perform_column(block + i); 189 | } 190 | 191 | /** 192 | * Init inverse DCT 193 | */ 194 | void gpujpeg_idct_cpu_init() 195 | { 196 | iclp = iclip + 512; 197 | for ( int i = -512; i < 512; i++ ) 198 | iclp[i] = (i < -256) ? -256 : ((i > 255) ? 255 : i); 199 | } 200 | 201 | /** Documented at declaration */ 202 | void 203 | gpujpeg_idct_cpu(struct gpujpeg_decoder* decoder) 204 | { 205 | gpujpeg_idct_cpu_init(); 206 | 207 | // Get coder 208 | struct gpujpeg_coder* coder = &decoder->coder; 209 | 210 | // Perform IDCT and dequantization 211 | for ( int comp = 0; comp < coder->param_image.comp_count; comp++ ) { 212 | // Get component 213 | struct gpujpeg_component* component = &coder->component[comp]; 214 | 215 | // Determine table type 216 | enum gpujpeg_component_type type = (comp == 0) ? GPUJPEG_COMPONENT_LUMINANCE : GPUJPEG_COMPONENT_CHROMINANCE; 217 | 218 | // Copy data to host 219 | cudaMemcpy(component->data_quantized, component->d_data_quantized, component->data_size * sizeof(uint16_t), cudaMemcpyDeviceToHost); 220 | 221 | // Perform IDCT on CPU 222 | int width = component->data_width / GPUJPEG_BLOCK_SIZE; 223 | int height = component->data_height / GPUJPEG_BLOCK_SIZE; 224 | for ( int y = 0; y < height; y++ ) { 225 | for ( int x = 0; x < width; x++ ) { 226 | int index = y * width + x; 227 | gpujpeg_idct_cpu_perform( 228 | &component->data_quantized[index * 64], 229 | (int16_t *) decoder->table_quantization[type].table 230 | ); 231 | } 232 | } 233 | 234 | // Copy results to device 235 | uint8_t* data = NULL; 236 | assert(cudaMallocHost((void**)&data, component->data_size * sizeof(uint8_t)) == cudaSuccess); 237 | for ( int y = 0; y < height; y++ ) { 238 | for ( int x = 0; x < width; x++ ) { 239 | for ( int c = 0; c < (GPUJPEG_BLOCK_SIZE * GPUJPEG_BLOCK_SIZE); c++ ) { 240 | int coefficient_index = (y * width + x) * (GPUJPEG_BLOCK_SIZE * GPUJPEG_BLOCK_SIZE) + c; 241 | uint16_t coefficient = component->data_quantized[coefficient_index]; 242 | coefficient += 128; 243 | if ( coefficient > 255 ) 244 | coefficient = 255; 245 | if ( coefficient < 0 ) 246 | coefficient = 0; 247 | int index = ((y * GPUJPEG_BLOCK_SIZE) + (c / GPUJPEG_BLOCK_SIZE)) * component->data_width + ((x * GPUJPEG_BLOCK_SIZE) + (c % GPUJPEG_BLOCK_SIZE)); 248 | data[index] = coefficient; 249 | } 250 | } 251 | } 252 | cudaMemcpy(component->d_data, data, component->data_size * sizeof(uint8_t), cudaMemcpyHostToDevice); 253 | cudaFreeHost(data); 254 | } 255 | } 256 | -------------------------------------------------------------------------------- /src/gpujpeg_dct_cpu.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_DCT_CPU_H 31 | #define GPUJPEG_DCT_CPU_H 32 | 33 | #include 34 | #include 35 | 36 | /** 37 | * Peform inverse DCT on CPU 38 | * 39 | * @param decoder 40 | */ 41 | void 42 | gpujpeg_idct_cpu(struct gpujpeg_decoder* decoder); 43 | 44 | #endif // GPUJPEG_DCT_CPU_H 45 | -------------------------------------------------------------------------------- /src/gpujpeg_dct_gpu.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_DCT_GPU_H 31 | #define GPUJPEG_DCT_GPU_H 32 | 33 | #include 34 | #include 35 | 36 | #ifdef __cplusplus 37 | extern "C" { 38 | #endif 39 | 40 | /** 41 | * Peform forward DCT on GPU in integers 42 | * 43 | * @param decoder 44 | */ 45 | int 46 | gpujpeg_dct_gpu(struct gpujpeg_encoder* encoder); 47 | 48 | /** 49 | * Peform inverse DCT on GPU in integers 50 | * 51 | * @param decoder 52 | */ 53 | int 54 | gpujpeg_idct_gpu(struct gpujpeg_decoder* decoder); 55 | 56 | #ifdef __cplusplus 57 | } 58 | #endif 59 | 60 | #endif // GPUJPEG_DCT_GPU_H 61 | -------------------------------------------------------------------------------- /src/gpujpeg_decoder.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #include 31 | #include 32 | #include "gpujpeg_preprocessor.h" 33 | #include "gpujpeg_dct_cpu.h" 34 | #include "gpujpeg_dct_gpu.h" 35 | #include "gpujpeg_huffman_cpu_decoder.h" 36 | #include "gpujpeg_huffman_gpu_decoder.h" 37 | #include 38 | 39 | /** Documented at declaration */ 40 | void 41 | gpujpeg_decoder_output_set_default(struct gpujpeg_decoder_output* output) 42 | { 43 | output->type = GPUJPEG_DECODER_OUTPUT_INTERNAL_BUFFER; 44 | output->data = NULL; 45 | output->data_size = 0; 46 | output->texture = NULL; 47 | } 48 | 49 | /** Documented at declaration */ 50 | void 51 | gpujpeg_decoder_output_set_custom(struct gpujpeg_decoder_output* output, uint8_t* custom_buffer) 52 | { 53 | output->type = GPUJPEG_DECODER_OUTPUT_CUSTOM_BUFFER; 54 | output->data = custom_buffer; 55 | output->data_size = 0; 56 | } 57 | 58 | /** Documented at declaration */ 59 | void 60 | gpujpeg_decoder_output_set_texture(struct gpujpeg_decoder_output* output, struct gpujpeg_opengl_texture* texture) 61 | { 62 | output->type = GPUJPEG_DECODER_OUTPUT_OPENGL_TEXTURE; 63 | output->data = NULL; 64 | output->data_size = 0; 65 | output->texture = texture; 66 | } 67 | 68 | void 69 | gpujpeg_decoder_output_set_cuda_buffer(struct gpujpeg_decoder_output* output) 70 | { 71 | output->type = GPUJPEG_DECODER_OUTPUT_CUDA_BUFFER; 72 | output->data = NULL; 73 | output->data_size = 0; 74 | output->texture = NULL; 75 | } 76 | 77 | /** Documented at declaration */ 78 | struct gpujpeg_decoder* 79 | gpujpeg_decoder_create() 80 | { 81 | struct gpujpeg_decoder* decoder = (struct gpujpeg_decoder*) malloc(sizeof(struct gpujpeg_decoder)); 82 | if ( decoder == NULL ) 83 | return NULL; 84 | 85 | // Get coder 86 | struct gpujpeg_coder* coder = &decoder->coder; 87 | 88 | // Set parameters 89 | memset(decoder, 0, sizeof(struct gpujpeg_decoder)); 90 | gpujpeg_set_default_parameters(&coder->param); 91 | gpujpeg_image_set_default_parameters(&coder->param_image); 92 | coder->param_image.comp_count = 0; 93 | coder->param_image.width = 0; 94 | coder->param_image.height = 0; 95 | coder->param.restart_interval = 0; 96 | 97 | int result = 1; 98 | 99 | // Create reader 100 | decoder->reader = gpujpeg_reader_create(); 101 | if ( decoder->reader == NULL ) 102 | result = 0; 103 | 104 | // Allocate quantization tables in device memory 105 | for ( int comp_type = 0; comp_type < GPUJPEG_COMPONENT_TYPE_COUNT; comp_type++ ) { 106 | if ( cudaSuccess != cudaMalloc((void**)&decoder->table_quantization[comp_type].d_table, 64 * sizeof(uint16_t)) ) 107 | result = 0; 108 | } 109 | // Allocate huffman tables in device memory 110 | for ( int comp_type = 0; comp_type < GPUJPEG_COMPONENT_TYPE_COUNT; comp_type++ ) { 111 | for ( int huff_type = 0; huff_type < GPUJPEG_HUFFMAN_TYPE_COUNT; huff_type++ ) { 112 | if ( cudaSuccess != cudaMalloc((void**)&decoder->d_table_huffman[comp_type][huff_type], sizeof(struct gpujpeg_table_huffman_decoder)) ) 113 | result = 0; 114 | } 115 | } 116 | gpujpeg_cuda_check_error("Decoder table allocation", return NULL); 117 | 118 | // Init huffman encoder 119 | if ( gpujpeg_huffman_gpu_decoder_init() != 0 ) 120 | result = 0; 121 | 122 | if ( result == 0 ) { 123 | gpujpeg_decoder_destroy(decoder); 124 | return NULL; 125 | } 126 | 127 | // Timers 128 | GPUJPEG_CUSTOM_TIMER_CREATE(decoder->def); 129 | GPUJPEG_CUSTOM_TIMER_CREATE(decoder->in_gpu); 130 | 131 | return decoder; 132 | } 133 | 134 | /** Documented at declaration */ 135 | int 136 | gpujpeg_decoder_init(struct gpujpeg_decoder* decoder, struct gpujpeg_parameters* param, struct gpujpeg_image_parameters* param_image) 137 | { 138 | assert(param_image->comp_count == 1 || param_image->comp_count == 3); 139 | 140 | // Get coder 141 | struct gpujpeg_coder* coder = &decoder->coder; 142 | 143 | // Check if (re)inialization is needed 144 | int change = 0; 145 | change |= coder->param_image.width != param_image->width; 146 | change |= coder->param_image.height != param_image->height; 147 | change |= coder->param_image.comp_count != param_image->comp_count; 148 | change |= coder->param.restart_interval != param->restart_interval; 149 | change |= coder->param.interleaved != param->interleaved; 150 | for ( int comp = 0; comp < param_image->comp_count; comp++ ) { 151 | change |= coder->param.sampling_factor[comp].horizontal != param->sampling_factor[comp].horizontal; 152 | change |= coder->param.sampling_factor[comp].vertical != param->sampling_factor[comp].vertical; 153 | } 154 | if ( change == 0 ) 155 | return 0; 156 | 157 | // For now we can't reinitialize decoder, we can only do first initialization 158 | if ( coder->param_image.width != 0 || coder->param_image.height != 0 || coder->param_image.comp_count != 0 ) { 159 | fprintf(stderr, "[GPUJPEG] [Error] Can't reinitialize decoder, implement if needed!\n"); 160 | return -1; 161 | } 162 | 163 | coder->param = *param; 164 | coder->param_image = *param_image; 165 | 166 | // Initialize coder 167 | if ( gpujpeg_coder_init(coder) != 0 ) 168 | return -1; 169 | 170 | // Init postprocessor 171 | if ( gpujpeg_preprocessor_decoder_init(&decoder->coder) != 0 ) { 172 | fprintf(stderr, "Failed to init postprocessor!"); 173 | return -1; 174 | } 175 | 176 | return 0; 177 | } 178 | 179 | /** Documented at declaration */ 180 | int 181 | gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, int image_size, struct gpujpeg_decoder_output* output) 182 | { 183 | // Get coder 184 | struct gpujpeg_coder* coder = &decoder->coder; 185 | 186 | // Reset durations 187 | coder->duration_memory_to = 0.0; 188 | coder->duration_memory_from = 0.0; 189 | coder->duration_memory_map = 0.0; 190 | coder->duration_memory_unmap = 0.0; 191 | coder->duration_preprocessor = 0.0; 192 | coder->duration_dct_quantization = 0.0; 193 | coder->duration_huffman_coder = 0.0; 194 | coder->duration_stream = 0.0; 195 | coder->duration_in_gpu = 0.0; 196 | 197 | GPUJPEG_CUSTOM_TIMER_START(decoder->def); 198 | 199 | // Read JPEG image data 200 | if ( gpujpeg_reader_read_image(decoder, image, image_size) != 0 ) { 201 | fprintf(stderr, "[GPUJPEG] [Error] Decoder failed when decoding image data!\n"); 202 | return -1; 203 | } 204 | 205 | GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); 206 | coder->duration_stream = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); 207 | GPUJPEG_CUSTOM_TIMER_START(decoder->def); 208 | 209 | // Perform huffman decoding on CPU (when restart interval is not set) 210 | if ( coder->param.restart_interval == 0 ) { 211 | if ( gpujpeg_huffman_cpu_decoder_decode(decoder) != 0 ) { 212 | fprintf(stderr, "[GPUJPEG] [Error] Huffman decoder failed!\n"); 213 | return -1; 214 | } 215 | GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); 216 | coder->duration_huffman_coder = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); 217 | GPUJPEG_CUSTOM_TIMER_START(decoder->def); 218 | 219 | // Copy quantized data to device memory from cpu memory 220 | cudaMemcpy(coder->d_data_quantized, coder->data_quantized, coder->data_size * sizeof(int16_t), cudaMemcpyHostToDevice); 221 | 222 | GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); 223 | coder->duration_memory_to = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); 224 | GPUJPEG_CUSTOM_TIMER_START(decoder->def); 225 | 226 | GPUJPEG_CUSTOM_TIMER_START(decoder->in_gpu); 227 | } 228 | // Perform huffman decoding on GPU (when restart interval is set) 229 | else { 230 | // Reset huffman output 231 | cudaMemset(coder->d_data_quantized, 0, coder->data_size * sizeof(int16_t)); 232 | 233 | // Copy scan data to device memory 234 | cudaMemcpy(coder->d_data_compressed, coder->data_compressed, decoder->data_compressed_size * sizeof(uint8_t), cudaMemcpyHostToDevice); 235 | gpujpeg_cuda_check_error("Decoder copy compressed data", return -1); 236 | 237 | // Copy segments to device memory 238 | cudaMemcpy(coder->d_segment, coder->segment, decoder->segment_count * sizeof(struct gpujpeg_segment), cudaMemcpyHostToDevice); 239 | gpujpeg_cuda_check_error("Decoder copy compressed data", return -1); 240 | 241 | // Zero output memory 242 | cudaMemset(coder->d_data_quantized, 0, coder->data_size * sizeof(int16_t)); 243 | 244 | GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); 245 | coder->duration_memory_to = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); 246 | GPUJPEG_CUSTOM_TIMER_START(decoder->def); 247 | 248 | GPUJPEG_CUSTOM_TIMER_START(decoder->in_gpu); 249 | 250 | // Perform huffman decoding 251 | if ( gpujpeg_huffman_gpu_decoder_decode(decoder) != 0 ) { 252 | fprintf(stderr, "[GPUJPEG] [Error] Huffman decoder on GPU failed!\n"); 253 | return -1; 254 | } 255 | 256 | GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); 257 | coder->duration_huffman_coder = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); 258 | GPUJPEG_CUSTOM_TIMER_START(decoder->def); 259 | } 260 | 261 | // Perform IDCT and dequantization (own CUDA implementation) 262 | if ( gpujpeg_idct_gpu(decoder) != 0 ) 263 | return -1; 264 | 265 | GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); 266 | coder->duration_dct_quantization = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); 267 | GPUJPEG_CUSTOM_TIMER_START(decoder->def); 268 | 269 | // Preprocessing 270 | if ( gpujpeg_preprocessor_decode(&decoder->coder) != 0 ) 271 | return -1; 272 | 273 | GPUJPEG_CUSTOM_TIMER_STOP(decoder->in_gpu); 274 | coder->duration_in_gpu = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->in_gpu); 275 | 276 | GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); 277 | coder->duration_preprocessor = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); 278 | 279 | // Set decompressed image size 280 | output->data_size = coder->data_raw_size * sizeof(uint8_t); 281 | 282 | // Set decompressed image 283 | if ( output->type == GPUJPEG_DECODER_OUTPUT_INTERNAL_BUFFER ) { 284 | GPUJPEG_CUSTOM_TIMER_START(decoder->def); 285 | 286 | // Copy decompressed image to host memory 287 | cudaMemcpy(coder->data_raw, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToHost); 288 | 289 | GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); 290 | coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); 291 | 292 | // Set output to internal buffer 293 | output->data = coder->data_raw; 294 | } else if ( output->type == GPUJPEG_DECODER_OUTPUT_CUSTOM_BUFFER ) { 295 | GPUJPEG_CUSTOM_TIMER_START(decoder->def); 296 | 297 | assert(output->data != NULL); 298 | 299 | // Copy decompressed image to host memory 300 | cudaMemcpy(output->data, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToHost); 301 | 302 | GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); 303 | coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); 304 | } else if ( output->type == GPUJPEG_DECODER_OUTPUT_OPENGL_TEXTURE ) { 305 | GPUJPEG_CUSTOM_TIMER_START(decoder->def); 306 | 307 | // Map OpenGL texture 308 | int data_size = 0; 309 | uint8_t* d_data = gpujpeg_opengl_texture_map(output->texture, &data_size); 310 | assert(data_size == coder->data_raw_size); 311 | 312 | GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); 313 | coder->duration_memory_map = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); 314 | 315 | GPUJPEG_CUSTOM_TIMER_START(decoder->def); 316 | 317 | // Copy decompressed image to texture pixel buffer object device data 318 | cudaMemcpy(d_data, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToDevice); 319 | 320 | GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); 321 | coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); 322 | 323 | GPUJPEG_CUSTOM_TIMER_START(decoder->def); 324 | 325 | // Unmap OpenGL texture 326 | gpujpeg_opengl_texture_unmap(output->texture); 327 | 328 | GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); 329 | coder->duration_memory_unmap = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); 330 | } else if ( output->type == GPUJPEG_DECODER_OUTPUT_CUDA_BUFFER ) { 331 | // Copy decompressed image to texture pixel buffer object device data 332 | output->data = coder->d_data_raw; 333 | } else { 334 | // Unknown output type 335 | assert(0); 336 | } 337 | 338 | return 0; 339 | } 340 | 341 | void 342 | gpujpeg_decoder_set_output_format(struct gpujpeg_decoder* decoder, 343 | enum gpujpeg_color_space color_space, 344 | enum gpujpeg_sampling_factor sampling_factor) 345 | { 346 | decoder->coder.param_image.color_space = color_space; 347 | decoder->coder.param_image.sampling_factor = sampling_factor; 348 | } 349 | 350 | /** Documented at declaration */ 351 | int 352 | gpujpeg_decoder_destroy(struct gpujpeg_decoder* decoder) 353 | { 354 | assert(decoder != NULL); 355 | 356 | GPUJPEG_CUSTOM_TIMER_DESTROY(decoder->def); 357 | GPUJPEG_CUSTOM_TIMER_DESTROY(decoder->in_gpu); 358 | 359 | if ( gpujpeg_coder_deinit(&decoder->coder) != 0 ) 360 | return -1; 361 | 362 | for ( int comp_type = 0; comp_type < GPUJPEG_COMPONENT_TYPE_COUNT; comp_type++ ) { 363 | if ( decoder->table_quantization[comp_type].d_table != NULL ) 364 | cudaFree(decoder->table_quantization[comp_type].d_table); 365 | } 366 | 367 | if ( decoder->reader != NULL ) 368 | gpujpeg_reader_destroy(decoder->reader); 369 | 370 | free(decoder); 371 | 372 | return 0; 373 | } 374 | -------------------------------------------------------------------------------- /src/gpujpeg_encoder.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #include 31 | #include "gpujpeg_preprocessor.h" 32 | #include "gpujpeg_dct_cpu.h" 33 | #include "gpujpeg_dct_gpu.h" 34 | #include "gpujpeg_huffman_cpu_encoder.h" 35 | #include "gpujpeg_huffman_gpu_encoder.h" 36 | #include 37 | 38 | /** Documented at declaration */ 39 | void 40 | gpujpeg_encoder_input_set_image(struct gpujpeg_encoder_input* input, uint8_t* image) 41 | { 42 | input->type = GPUJPEG_ENCODER_INPUT_IMAGE; 43 | input->image = image; 44 | input->texture = NULL; 45 | } 46 | 47 | /** Documented at declaration */ 48 | void 49 | gpujpeg_encoder_input_set_texture(struct gpujpeg_encoder_input* input, struct gpujpeg_opengl_texture* texture) 50 | { 51 | input->type = GPUJPEG_ENCODER_INPUT_OPENGL_TEXTURE; 52 | input->image = NULL; 53 | input->texture = texture; 54 | } 55 | 56 | /** Documented at declaration */ 57 | struct gpujpeg_encoder* 58 | gpujpeg_encoder_create(struct gpujpeg_parameters* param, struct gpujpeg_image_parameters* param_image) 59 | { 60 | assert(param_image->comp_count == 1 || param_image->comp_count == 3); 61 | assert(param_image->comp_count <= GPUJPEG_MAX_COMPONENT_COUNT); 62 | assert(param->quality >= 0 && param->quality <= 100); 63 | assert(param->restart_interval >= 0); 64 | assert(param->interleaved == 0 || param->interleaved == 1); 65 | 66 | struct gpujpeg_encoder* encoder = (struct gpujpeg_encoder*) malloc(sizeof(struct gpujpeg_encoder)); 67 | if ( encoder == NULL ) 68 | return NULL; 69 | 70 | // Get coder 71 | struct gpujpeg_coder* coder = &encoder->coder; 72 | 73 | // Set parameters 74 | memset(encoder, 0, sizeof(struct gpujpeg_encoder)); 75 | coder->param_image = *param_image; 76 | coder->param = *param; 77 | 78 | int result = 1; 79 | 80 | // Create writer 81 | encoder->writer = gpujpeg_writer_create(encoder); 82 | if ( encoder->writer == NULL ) 83 | result = 0; 84 | 85 | // Initialize coder 86 | if ( gpujpeg_coder_init(coder) != 0 ) 87 | result = 0; 88 | 89 | // Init preprocessor 90 | if ( gpujpeg_preprocessor_encoder_init(&encoder->coder) != 0 ) { 91 | fprintf(stderr, "Failed to init preprocessor!"); 92 | result = 0; 93 | } 94 | 95 | // Allocate quantization tables in device memory 96 | for ( int comp_type = 0; comp_type < GPUJPEG_COMPONENT_TYPE_COUNT; comp_type++ ) { 97 | if ( cudaSuccess != cudaMalloc((void**)&encoder->table_quantization[comp_type].d_table, 64 * sizeof(uint16_t)) ) 98 | result = 0; 99 | if ( cudaSuccess != cudaMalloc((void**)&encoder->table_quantization[comp_type].d_table_forward, 64 * sizeof(float)) ) 100 | result = 0; 101 | } 102 | gpujpeg_cuda_check_error("Encoder table allocation", return NULL); 103 | 104 | // Init quantization tables for encoder 105 | for ( int comp_type = 0; comp_type < GPUJPEG_COMPONENT_TYPE_COUNT; comp_type++ ) { 106 | if ( gpujpeg_table_quantization_encoder_init(&encoder->table_quantization[comp_type], (enum gpujpeg_component_type)comp_type, coder->param.quality) != 0 ) 107 | result = 0; 108 | } 109 | gpujpeg_cuda_check_error("Quantization init", return NULL); 110 | 111 | // Init huffman tables for encoder 112 | for ( int comp_type = 0; comp_type < GPUJPEG_COMPONENT_TYPE_COUNT; comp_type++ ) { 113 | for ( int huff_type = 0; huff_type < GPUJPEG_HUFFMAN_TYPE_COUNT; huff_type++ ) { 114 | if ( gpujpeg_table_huffman_encoder_init(&encoder->table_huffman[comp_type][huff_type], (enum gpujpeg_component_type)comp_type, (enum gpujpeg_huffman_type)huff_type) != 0 ) 115 | result = 0; 116 | } 117 | } 118 | gpujpeg_cuda_check_error("Encoder table init", return NULL); 119 | 120 | // Init huffman encoder 121 | if ( gpujpeg_huffman_gpu_encoder_init(encoder) != 0 ) 122 | result = 0; 123 | 124 | if ( result == 0 ) { 125 | gpujpeg_encoder_destroy(encoder); 126 | return NULL; 127 | } 128 | 129 | // Timers 130 | GPUJPEG_CUSTOM_TIMER_CREATE(encoder->def); 131 | GPUJPEG_CUSTOM_TIMER_CREATE(encoder->in_gpu); 132 | 133 | return encoder; 134 | } 135 | 136 | /** Documented at declaration */ 137 | int 138 | gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_encoder_input* input, uint8_t** image_compressed, int* image_compressed_size) 139 | { 140 | // Get coder 141 | struct gpujpeg_coder* coder = &encoder->coder; 142 | 143 | // Reset durations 144 | coder->duration_memory_to = 0.0; 145 | coder->duration_memory_from = 0.0; 146 | coder->duration_memory_map = 0.0; 147 | coder->duration_memory_unmap = 0.0; 148 | coder->duration_preprocessor = 0.0; 149 | coder->duration_dct_quantization = 0.0; 150 | coder->duration_huffman_coder = 0.0; 151 | coder->duration_stream = 0.0; 152 | coder->duration_in_gpu = 0.0; 153 | 154 | // Load input image 155 | if ( input->type == GPUJPEG_ENCODER_INPUT_IMAGE ) { 156 | GPUJPEG_CUSTOM_TIMER_START(encoder->def); 157 | 158 | // Copy image to device memory 159 | if ( cudaSuccess != cudaMemcpy(coder->d_data_raw, input->image, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyHostToDevice) ) 160 | return -1; 161 | 162 | GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); 163 | coder->duration_memory_to = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); 164 | } else 165 | if ( input->type == GPUJPEG_ENCODER_INPUT_OPENGL_TEXTURE ) { 166 | assert(input->texture != NULL); 167 | 168 | GPUJPEG_CUSTOM_TIMER_START(encoder->def); 169 | 170 | // Map texture to CUDA 171 | int data_size = 0; 172 | uint8_t* d_data = gpujpeg_opengl_texture_map(input->texture, &data_size); 173 | assert(data_size == (coder->data_raw_size)); 174 | 175 | GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); 176 | coder->duration_memory_map = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); 177 | 178 | GPUJPEG_CUSTOM_TIMER_START(encoder->def); 179 | 180 | // Copy image data from texture pixel buffer object to device data 181 | cudaMemcpy(coder->d_data_raw, d_data, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToDevice); 182 | 183 | GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); 184 | coder->duration_memory_to = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); 185 | 186 | GPUJPEG_CUSTOM_TIMER_START(encoder->def); 187 | 188 | // Unmap texture from CUDA 189 | gpujpeg_opengl_texture_unmap(input->texture); 190 | 191 | GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); 192 | coder->duration_memory_unmap = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); 193 | } else { 194 | // Unknown output type 195 | assert(0); 196 | } 197 | 198 | //gpujpeg_table_print(encoder->table[JPEG_COMPONENT_LUMINANCE]); 199 | //gpujpeg_table_print(encoder->table[JPEG_COMPONENT_CHROMINANCE]); 200 | 201 | GPUJPEG_CUSTOM_TIMER_START(encoder->in_gpu); 202 | GPUJPEG_CUSTOM_TIMER_START(encoder->def); 203 | 204 | // Preprocessing 205 | if ( gpujpeg_preprocessor_encode(&encoder->coder) != 0 ) 206 | return -1; 207 | 208 | GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); 209 | coder->duration_preprocessor = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); 210 | GPUJPEG_CUSTOM_TIMER_START(encoder->def); 211 | 212 | // Perform DCT and quantization 213 | if ( gpujpeg_dct_gpu(encoder) != 0 ) 214 | return -1; 215 | 216 | // If restart interval is 0 then the GPU processing is in the end (even huffman coder will be performed on CPU) 217 | if ( coder->param.restart_interval == 0 ) { 218 | GPUJPEG_CUSTOM_TIMER_STOP(encoder->in_gpu); 219 | coder->duration_in_gpu = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->in_gpu); 220 | } 221 | 222 | // Initialize writer output buffer current position 223 | encoder->writer->buffer_current = encoder->writer->buffer; 224 | 225 | // Write header 226 | gpujpeg_writer_write_header(encoder); 227 | 228 | GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); 229 | coder->duration_dct_quantization = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); 230 | GPUJPEG_CUSTOM_TIMER_START(encoder->def); 231 | 232 | // Perform huffman coding on CPU (when restart interval is not set) 233 | if ( coder->param.restart_interval == 0 ) { 234 | // Copy quantized data from device memory to cpu memory 235 | cudaMemcpy(coder->data_quantized, coder->d_data_quantized, coder->data_size * sizeof(int16_t), cudaMemcpyDeviceToHost); 236 | 237 | GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); 238 | coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); 239 | GPUJPEG_CUSTOM_TIMER_START(encoder->def); 240 | 241 | // Perform huffman coding 242 | if ( gpujpeg_huffman_cpu_encoder_encode(encoder) != 0 ) { 243 | fprintf(stderr, "[GPUJPEG] [Error] Huffman encoder on CPU failed!\n"); 244 | return -1; 245 | } 246 | 247 | GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); 248 | coder->duration_huffman_coder = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); 249 | } 250 | // Perform huffman coding on GPU (when restart interval is set) 251 | else { 252 | // Perform huffman coding 253 | unsigned int output_size; 254 | if ( gpujpeg_huffman_gpu_encoder_encode(encoder, &output_size) != 0 ) { 255 | fprintf(stderr, "[GPUJPEG] [Error] Huffman encoder on GPU failed!\n"); 256 | return -1; 257 | } 258 | 259 | GPUJPEG_CUSTOM_TIMER_STOP(encoder->in_gpu); 260 | coder->duration_in_gpu = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->in_gpu); 261 | 262 | GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); 263 | coder->duration_huffman_coder = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); 264 | GPUJPEG_CUSTOM_TIMER_START(encoder->def); 265 | 266 | // Copy compressed data from device memory to cpu memory 267 | if ( cudaSuccess != cudaMemcpy(coder->data_compressed, coder->d_data_compressed, output_size, cudaMemcpyDeviceToHost) != 0 ) 268 | return -1; 269 | // Copy segments from device memory 270 | if ( cudaSuccess != cudaMemcpy(coder->segment, coder->d_segment, coder->segment_count * sizeof(struct gpujpeg_segment), cudaMemcpyDeviceToHost) ) 271 | return -1; 272 | 273 | GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); 274 | coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); 275 | GPUJPEG_CUSTOM_TIMER_START(encoder->def); 276 | 277 | if ( coder->param.interleaved == 1 ) { 278 | // Write scan header (only one scan is written, that contains all color components data) 279 | gpujpeg_writer_write_scan_header(encoder, 0); 280 | 281 | // Write scan data 282 | for ( int segment_index = 0; segment_index < coder->segment_count; segment_index++ ) { 283 | struct gpujpeg_segment* segment = &coder->segment[segment_index]; 284 | 285 | gpujpeg_writer_write_segment_info(encoder); 286 | 287 | // Copy compressed data to writer 288 | memcpy( 289 | encoder->writer->buffer_current, 290 | &coder->data_compressed[segment->data_compressed_index], 291 | segment->data_compressed_size 292 | ); 293 | encoder->writer->buffer_current += segment->data_compressed_size; 294 | //printf("Compressed data %d bytes\n", segment->data_compressed_size); 295 | } 296 | // Remove last restart marker in scan (is not needed) 297 | encoder->writer->buffer_current -= 2; 298 | 299 | gpujpeg_writer_write_segment_info(encoder); 300 | } else { 301 | // Write huffman coder results as one scan for each color component 302 | int segment_index = 0; 303 | for ( int comp = 0; comp < coder->param_image.comp_count; comp++ ) { 304 | // Write scan header 305 | gpujpeg_writer_write_scan_header(encoder, comp); 306 | // Write scan data 307 | for ( int index = 0; index < coder->component[comp].segment_count; index++ ) { 308 | struct gpujpeg_segment* segment = &coder->segment[segment_index]; 309 | 310 | gpujpeg_writer_write_segment_info(encoder); 311 | 312 | // Copy compressed data to writer 313 | memcpy( 314 | encoder->writer->buffer_current, 315 | &coder->data_compressed[segment->data_compressed_index], 316 | segment->data_compressed_size 317 | ); 318 | encoder->writer->buffer_current += segment->data_compressed_size; 319 | //printf("Compressed data %d bytes\n", segment->data_compressed_size); 320 | 321 | segment_index++; 322 | } 323 | // Remove last restart marker in scan (is not needed) 324 | encoder->writer->buffer_current -= 2; 325 | 326 | gpujpeg_writer_write_segment_info(encoder); 327 | } 328 | } 329 | 330 | GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); 331 | coder->duration_stream = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); 332 | } 333 | gpujpeg_writer_emit_marker(encoder->writer, GPUJPEG_MARKER_EOI); 334 | 335 | // Set compressed image 336 | *image_compressed = encoder->writer->buffer; 337 | *image_compressed_size = encoder->writer->buffer_current - encoder->writer->buffer; 338 | 339 | return 0; 340 | } 341 | 342 | /** Documented at declaration */ 343 | int 344 | gpujpeg_encoder_destroy(struct gpujpeg_encoder* encoder) 345 | { 346 | assert(encoder != NULL); 347 | 348 | GPUJPEG_CUSTOM_TIMER_DESTROY(encoder->def); 349 | GPUJPEG_CUSTOM_TIMER_DESTROY(encoder->in_gpu); 350 | 351 | if ( gpujpeg_coder_deinit(&encoder->coder) != 0 ) 352 | return -1; 353 | for ( int comp_type = 0; comp_type < GPUJPEG_COMPONENT_TYPE_COUNT; comp_type++ ) { 354 | if ( encoder->table_quantization[comp_type].d_table != NULL ) 355 | cudaFree(encoder->table_quantization[comp_type].d_table); 356 | } 357 | if ( encoder->writer != NULL ) 358 | gpujpeg_writer_destroy(encoder->writer); 359 | 360 | free(encoder); 361 | 362 | return 0; 363 | } 364 | -------------------------------------------------------------------------------- /src/gpujpeg_huffman_cpu_decoder.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_HUFFMAN_CPU_DECODER_H 31 | #define GPUJPEG_HUFFMAN_CPU_DECODER_H 32 | 33 | #include 34 | 35 | /** 36 | * Perform huffman decoding 37 | * 38 | * @return 0 if succeeds, otherwise nonzero 39 | */ 40 | int 41 | gpujpeg_huffman_cpu_decoder_decode(struct gpujpeg_decoder* decoder); 42 | 43 | #endif // GPUJPEG_HUFFMAN_CPU_DECODER_H 44 | -------------------------------------------------------------------------------- /src/gpujpeg_huffman_cpu_encoder.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #include "gpujpeg_huffman_cpu_encoder.h" 31 | #include 32 | 33 | /** Huffman encoder structure */ 34 | struct gpujpeg_huffman_cpu_encoder 35 | { 36 | // Color components 37 | struct gpujpeg_component* component; 38 | 39 | // JPEG writer structure 40 | struct gpujpeg_writer* writer; 41 | 42 | // Huffman table DC 43 | struct gpujpeg_table_huffman_encoder* table_dc[GPUJPEG_COMPONENT_TYPE_COUNT]; 44 | // Huffman table AC 45 | struct gpujpeg_table_huffman_encoder* table_ac[GPUJPEG_COMPONENT_TYPE_COUNT]; 46 | 47 | // The value (in 4 byte buffer) to be written out 48 | int put_value; 49 | // The size (in bits) to be written out 50 | int put_bits; 51 | // DC differentize for component 52 | int dc[GPUJPEG_MAX_COMPONENT_COUNT]; 53 | // Current scan index 54 | int scan_index; 55 | // Component count (1 means non-interleaving, > 1 means interleaving) 56 | int comp_count; 57 | }; 58 | 59 | /** 60 | * Output bits to the file. Only the right 24 bits of put_buffer are used; 61 | * the valid bits are left-justified in this part. At most 16 bits can be 62 | * passed to EmitBits in one call, and we never retain more than 7 bits 63 | * in put_buffer between calls, so 24 bits are sufficient. 64 | * 65 | * @param coder Huffman coder structure 66 | * @param code Huffman code 67 | * @param size Size in bits of the Huffman code 68 | * @return void 69 | */ 70 | static inline int 71 | gpujpeg_huffman_cpu_encoder_emit_bits(struct gpujpeg_huffman_cpu_encoder* coder, unsigned int code, int size) 72 | { 73 | // This routine is heavily used, so it's worth coding tightly 74 | int put_buffer = (int)code; 75 | int put_bits = coder->put_bits; 76 | // If size is 0, caller used an invalid Huffman table entry 77 | if ( size == 0 ) 78 | return -1; 79 | // Mask off any extra bits in code 80 | put_buffer &= (((int)1) << size) - 1; 81 | // New number of bits in buffer 82 | put_bits += size; 83 | // Align incoming bits 84 | put_buffer <<= 24 - put_bits; 85 | // And merge with old buffer contents 86 | put_buffer |= coder->put_value; 87 | // If there are more than 8 bits, write it out 88 | unsigned char uc; 89 | while ( put_bits >= 8 ) { 90 | // Write one byte out 91 | uc = (unsigned char) ((put_buffer >> 16) & 0xFF); 92 | gpujpeg_writer_emit_byte(coder->writer, uc); 93 | // If need to stuff a zero byte 94 | if ( uc == 0xFF ) { 95 | // Write zero byte out 96 | gpujpeg_writer_emit_byte(coder->writer, 0); 97 | } 98 | put_buffer <<= 8; 99 | put_bits -= 8; 100 | } 101 | // update state variables 102 | coder->put_value = put_buffer; 103 | coder->put_bits = put_bits; 104 | return 0; 105 | } 106 | 107 | /** 108 | * Emit left bits 109 | * 110 | * @param coder Huffman coder structure 111 | * @return void 112 | */ 113 | static inline void 114 | gpujpeg_huffman_cpu_encoder_emit_left_bits(struct gpujpeg_huffman_cpu_encoder* coder) 115 | { 116 | // Fill 7 bits with ones 117 | if ( gpujpeg_huffman_cpu_encoder_emit_bits(coder, 0x7F, 7) != 0 ) 118 | return; 119 | 120 | //unsigned char uc = (unsigned char) ((coder->put_value >> 16) & 0xFF); 121 | // Write one byte out 122 | //gpujpeg_writer_emit_byte(coder->writer, uc); 123 | 124 | coder->put_value = 0; 125 | coder->put_bits = 0; 126 | } 127 | 128 | /** 129 | * Encode one 8x8 block 130 | * 131 | * @return 0 if succeeds, otherwise nonzero 132 | */ 133 | int 134 | gpujpeg_huffman_cpu_encoder_encode_block(struct gpujpeg_huffman_cpu_encoder* coder, int16_t* block, int* dc, struct gpujpeg_table_huffman_encoder* table_dc, struct gpujpeg_table_huffman_encoder* table_ac) 135 | { 136 | /*printf("Encode block\n"); 137 | for ( int y = 0; y < 8; y++) { 138 | for ( int x = 0; x < 8; x++ ) { 139 | printf("%4d ", block[y * 8 + x]); 140 | } 141 | printf("\n"); 142 | }*/ 143 | 144 | // Encode the DC coefficient difference per section F.1.2.1 145 | int temp = block[0] - *dc; 146 | *dc = block[0]; 147 | 148 | int temp2 = temp; 149 | if ( temp < 0 ) { 150 | // Temp is abs value of input 151 | temp = -temp; 152 | // For a negative input, want temp2 = bitwise complement of abs(input) 153 | // This code assumes we are on a two's complement machine 154 | temp2--; 155 | } 156 | 157 | // Find the number of bits needed for the magnitude of the coefficient 158 | int nbits = 0; 159 | while ( temp ) { 160 | nbits++; 161 | temp >>= 1; 162 | } 163 | 164 | // Write category number 165 | if ( gpujpeg_huffman_cpu_encoder_emit_bits(coder, table_dc->code[nbits], table_dc->size[nbits]) != 0 ) { 166 | fprintf(stderr, "[GPUJPEG] [Error] Fail emit bits %d [code: %d, size: %d]!\n", nbits, table_dc->code[nbits], table_dc->size[nbits]); 167 | return -1; 168 | } 169 | 170 | // Write category offset (EmitBits rejects calls with size 0) 171 | if ( nbits ) { 172 | if ( gpujpeg_huffman_cpu_encoder_emit_bits(coder, (unsigned int) temp2, nbits) != 0 ) 173 | return -1; 174 | } 175 | 176 | // Encode the AC coefficients per section F.1.2.2 (r = run length of zeros) 177 | int r = 0; 178 | for ( int k = 1; k < 64; k++ ) 179 | { 180 | if ( (temp = block[gpujpeg_order_natural[k]]) == 0 ) { 181 | r++; 182 | } 183 | else { 184 | // If run length > 15, must emit special run-length-16 codes (0xF0) 185 | while ( r > 15 ) { 186 | if ( gpujpeg_huffman_cpu_encoder_emit_bits(coder, table_ac->code[0xF0], table_ac->size[0xF0]) != 0 ) 187 | return -1; 188 | r -= 16; 189 | } 190 | 191 | temp2 = temp; 192 | if ( temp < 0 ) { 193 | // temp is abs value of input 194 | temp = -temp; 195 | // This code assumes we are on a two's complement machine 196 | temp2--; 197 | } 198 | 199 | // Find the number of bits needed for the magnitude of the coefficient 200 | // there must be at least one 1 bit 201 | nbits = 1; 202 | while ( (temp >>= 1) ) 203 | nbits++; 204 | 205 | // Emit Huffman symbol for run length / number of bits 206 | int i = (r << 4) + nbits; 207 | if ( gpujpeg_huffman_cpu_encoder_emit_bits(coder, table_ac->code[i], table_ac->size[i]) != 0 ) 208 | return -1; 209 | 210 | // Write Category offset 211 | if ( gpujpeg_huffman_cpu_encoder_emit_bits(coder, (unsigned int) temp2, nbits) != 0 ) 212 | return -1; 213 | 214 | r = 0; 215 | } 216 | } 217 | 218 | // If all the left coefs were zero, emit an end-of-block code 219 | if ( r > 0 ) { 220 | if ( gpujpeg_huffman_cpu_encoder_emit_bits(coder, table_ac->code[0], table_ac->size[0]) != 0 ) 221 | return -1; 222 | } 223 | 224 | return 0; 225 | } 226 | 227 | /** 228 | * Encode one MCU 229 | * 230 | * @return 0 if succeeds, otherwise nonzero 231 | */ 232 | int 233 | gpujpeg_huffman_cpu_encoder_encode_mcu(struct gpujpeg_huffman_cpu_encoder* coder, int segment_index, int mcu_index) 234 | { 235 | // Non-interleaving mode 236 | if ( coder->comp_count == 1 ) { 237 | // Get component for current scan 238 | struct gpujpeg_component* component = &coder->component[coder->scan_index]; 239 | 240 | // Get component data for MCU 241 | int16_t* block = &component->data_quantized[(segment_index * component->segment_mcu_count + mcu_index) * component->mcu_size]; 242 | 243 | // Get coder parameters 244 | int* dc = &coder->dc[coder->scan_index]; 245 | struct gpujpeg_table_huffman_encoder* table_dc = coder->table_dc[component->type]; 246 | struct gpujpeg_table_huffman_encoder* table_ac = coder->table_ac[component->type]; 247 | 248 | // Encode 8x8 block 249 | if ( gpujpeg_huffman_cpu_encoder_encode_block(coder, block, dc, table_dc, table_ac) != 0 ) 250 | return -1; 251 | } 252 | // Interleaving mode 253 | else { 254 | assert(coder->scan_index == 0); 255 | for ( int comp = 0; comp < coder->comp_count; comp++ ) { 256 | struct gpujpeg_component* component = &coder->component[comp]; 257 | 258 | // Prepare mcu indexes 259 | int mcu_index_x = (segment_index * component->segment_mcu_count + mcu_index) % component->mcu_count_x; 260 | int mcu_index_y = (segment_index * component->segment_mcu_count + mcu_index) / component->mcu_count_x; 261 | // Compute base data index 262 | int data_index_base = mcu_index_y * (component->mcu_size * component->mcu_count_x) + mcu_index_x * (component->mcu_size_x * GPUJPEG_BLOCK_SIZE); 263 | 264 | // For all vertical 8x8 blocks 265 | for ( int y = 0; y < component->sampling_factor.vertical; y++ ) { 266 | // Compute base row data index 267 | assert((component->mcu_count_x * component->mcu_size_x) == component->data_width); 268 | int data_index_row = data_index_base + y * (component->mcu_count_x * component->mcu_size_x * GPUJPEG_BLOCK_SIZE); 269 | // For all horizontal 8x8 blocks 270 | for ( int x = 0; x < component->sampling_factor.horizontal; x++ ) { 271 | // Compute 8x8 block data index 272 | int data_index = data_index_row + x * GPUJPEG_BLOCK_SIZE * GPUJPEG_BLOCK_SIZE; 273 | 274 | // Get component data for MCU 275 | int16_t* block = &component->data_quantized[data_index]; 276 | 277 | // Get coder parameters 278 | int* dc = &coder->dc[comp]; 279 | struct gpujpeg_table_huffman_encoder* table_dc = coder->table_dc[component->type]; 280 | struct gpujpeg_table_huffman_encoder* table_ac = coder->table_ac[component->type]; 281 | 282 | // Encode 8x8 block 283 | if ( gpujpeg_huffman_cpu_encoder_encode_block(coder, block, dc, table_dc, table_ac) != 0 ) 284 | return -1; 285 | } 286 | } 287 | } 288 | } 289 | 290 | return 0; 291 | } 292 | 293 | /** Documented at declaration */ 294 | int 295 | gpujpeg_huffman_cpu_encoder_encode(struct gpujpeg_encoder* encoder) 296 | { 297 | // Init huffman ecoder 298 | struct gpujpeg_huffman_cpu_encoder coder; 299 | coder.writer = encoder->writer; 300 | coder.component = encoder->coder.component; 301 | 302 | // Set huffman tables 303 | for ( int type = 0; type < GPUJPEG_COMPONENT_TYPE_COUNT; type++ ) { 304 | coder.table_dc[type] = &encoder->table_huffman[type][GPUJPEG_HUFFMAN_DC]; 305 | coder.table_ac[type] = &encoder->table_huffman[type][GPUJPEG_HUFFMAN_AC]; 306 | } 307 | 308 | // Set mcu component count 309 | if ( encoder->coder.param.interleaved == 1 ) 310 | coder.comp_count = encoder->coder.param_image.comp_count; 311 | else 312 | coder.comp_count = 1; 313 | assert(coder.comp_count >= 1 && coder.comp_count <= GPUJPEG_MAX_COMPONENT_COUNT); 314 | 315 | // Ensure that before first scan the emit_left_bits will not be invoked 316 | coder.put_bits = 0; 317 | // Perform scan init also for first scan 318 | coder.scan_index = -1; 319 | 320 | // Encode all segments 321 | for ( int segment_index = 0; segment_index < encoder->coder.segment_count; segment_index++ ) { 322 | struct gpujpeg_segment* segment = &encoder->coder.segment[segment_index]; 323 | 324 | //printf("segment %d, %d\n", segment_index, segment->scan_index); 325 | 326 | // Init scan if changed 327 | if ( coder.scan_index != segment->scan_index ) { 328 | // Emit left from previous scan 329 | if ( coder.put_bits > 0 ) 330 | gpujpeg_huffman_cpu_encoder_emit_left_bits(&coder); 331 | 332 | // Write scan header 333 | gpujpeg_writer_write_scan_header(encoder, segment->scan_index); 334 | 335 | // Initialize huffman coder 336 | coder.put_value = 0; 337 | coder.put_bits = 0; 338 | for ( int comp = 0; comp < GPUJPEG_MAX_COMPONENT_COUNT; comp++ ) 339 | coder.dc[comp] = 0; 340 | 341 | // Set current scan index 342 | coder.scan_index = segment->scan_index; 343 | } 344 | 345 | // Encode segment MCUs 346 | for ( int mcu_index = 0; mcu_index < segment->mcu_count; mcu_index++ ) { 347 | if ( gpujpeg_huffman_cpu_encoder_encode_mcu(&coder, segment->scan_segment_index, mcu_index) != 0 ) { 348 | fprintf(stderr, "[GPUJPEG] [Error] Huffman encoder failed at block [%d, %d]!\n", segment_index, mcu_index); 349 | return -1; 350 | } 351 | } 352 | 353 | // Output restart marker, if segment is not last in current scan 354 | if ( (segment_index + 1) < encoder->coder.segment_count && encoder->coder.segment[segment_index + 1].scan_index == coder.scan_index ) { 355 | // Emit left bits 356 | if ( coder.put_bits > 0 ) 357 | gpujpeg_huffman_cpu_encoder_emit_left_bits(&coder); 358 | // Restart huffman coder 359 | coder.put_value = 0; 360 | coder.put_bits = 0; 361 | for ( int comp = 0; comp < GPUJPEG_MAX_COMPONENT_COUNT; comp++ ) 362 | coder.dc[comp] = 0; 363 | // Output restart marker 364 | int restart_marker = GPUJPEG_MARKER_RST0 + (segment->scan_segment_index & 0x7); 365 | gpujpeg_writer_emit_marker(encoder->writer, restart_marker); 366 | } 367 | } 368 | 369 | // Emit left 370 | if ( coder.put_bits > 0 ) 371 | gpujpeg_huffman_cpu_encoder_emit_left_bits(&coder); 372 | 373 | return 0; 374 | } 375 | -------------------------------------------------------------------------------- /src/gpujpeg_huffman_cpu_encoder.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_HUFFMAN_CPU_ENCODER_H 31 | #define GPUJPEG_HUFFMAN_CPU_ENCODER_H 32 | 33 | #include 34 | 35 | /** 36 | * Perform huffman encoding 37 | * 38 | * @param encoder Encoder structure 39 | * @param type Component type 40 | * @param data Data buffer 41 | * @return 0 if succeeds, otherwise nonzero 42 | */ 43 | int 44 | gpujpeg_huffman_cpu_encoder_encode(struct gpujpeg_encoder* encoder); 45 | 46 | #endif // GPUJPEG_HUFFMAN_CPU_ENCODER_H 47 | -------------------------------------------------------------------------------- /src/gpujpeg_huffman_gpu_decoder.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_HUFFMAN_GPU_DECODER_H 31 | #define GPUJPEG_HUFFMAN_GPU_DECODER_H 32 | 33 | #include 34 | 35 | #ifdef __cplusplus 36 | extern "C" { 37 | #endif 38 | 39 | /** 40 | * Init huffman decoder 41 | * 42 | * @return 0 if succeeds, otherwise nonzero 43 | */ 44 | int 45 | gpujpeg_huffman_gpu_decoder_init(); 46 | 47 | /** 48 | * Perform huffman decoding 49 | * 50 | * @param encoder Decoder structure 51 | * @return 0 if succeeds, otherwise nonzero 52 | */ 53 | int 54 | gpujpeg_huffman_gpu_decoder_decode(struct gpujpeg_decoder* decoder); 55 | 56 | #ifdef __cplusplus 57 | } 58 | #endif 59 | 60 | #endif // GPUJPEG_HUFFMAN_GPU_DECODER_H 61 | -------------------------------------------------------------------------------- /src/gpujpeg_huffman_gpu_encoder.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_HUFFMAN_GPU_ENCODER_H 31 | #define GPUJPEG_HUFFMAN_GPU_ENCODER_H 32 | 33 | #include 34 | 35 | #ifdef __cplusplus 36 | extern "C" { 37 | #endif 38 | 39 | /** 40 | * Init huffman encoder 41 | * 42 | * @param table_huffman pointer to initialized huffman tables in CPU memory 43 | * @return 0 if succeeds, otherwise nonzero 44 | */ 45 | int 46 | gpujpeg_huffman_gpu_encoder_init(const struct gpujpeg_encoder * encoder); 47 | 48 | /** 49 | * Perform huffman encoding 50 | * 51 | * @param encoder Encoder structure 52 | * @param output_byte_count pointer to place in main system memory, where size of output buffer part, which contains all output data, should be saved 53 | * @return 0 if succeeds, otherwise nonzero 54 | */ 55 | int 56 | gpujpeg_huffman_gpu_encoder_encode(struct gpujpeg_encoder* encoder, unsigned int * output_byte_count); 57 | 58 | #ifdef __cplusplus 59 | } 60 | #endif 61 | 62 | #endif // GPUJPEG_HUFFMAN_GPU_ENCODER_H 63 | -------------------------------------------------------------------------------- /src/gpujpeg_preprocessor.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * Copyright (c) 2011, Silicon Genome, LLC. 4 | * 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * * Redistributions of source code must retain the above copyright 11 | * notice, this list of conditions and the following disclaimer. 12 | * 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef GPUJPEG_PREPROCESSOR_H 31 | #define GPUJPEG_PREPROCESSOR_H 32 | 33 | #include 34 | #include 35 | 36 | #ifdef __cplusplus 37 | extern "C" { 38 | #endif 39 | 40 | /** 41 | * Init preprocessor encoder 42 | * 43 | * @param encoder 44 | * @return 0 if succeeds, otherwise nonzero 45 | */ 46 | int 47 | gpujpeg_preprocessor_encoder_init(struct gpujpeg_coder* coder); 48 | 49 | /** 50 | * Preprocessor encode 51 | * 52 | * @param encoder Encoder structure 53 | * @param image Image source data 54 | * @return 0 if succeeds, otherwise nonzero 55 | */ 56 | int 57 | gpujpeg_preprocessor_encode(struct gpujpeg_coder* coder); 58 | 59 | /** 60 | * Init preprocessor decoder 61 | * 62 | * @param encoder 63 | * @return 0 if succeeds, otherwise nonzero 64 | */ 65 | int 66 | gpujpeg_preprocessor_decoder_init(struct gpujpeg_coder* coder); 67 | 68 | /** 69 | * Preprocessor decode 70 | * 71 | * @param encoder Encoder structure 72 | * @param image Image source data 73 | * @return 0 if succeeds, otherwise nonzero 74 | */ 75 | int 76 | gpujpeg_preprocessor_decode(struct gpujpeg_coder* coder); 77 | 78 | #ifdef __cplusplus 79 | } 80 | #endif 81 | 82 | #endif // GPUJPEG_PREPROCESSOR_H 83 | -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | .deps 2 | 3 | -------------------------------------------------------------------------------- /test/memcheck/Makefile.am: -------------------------------------------------------------------------------- 1 | NVCC=@CUDA_INSTALL_PATH@/bin/nvcc 2 | CUDA_MEMCHECK=@CUDA_INSTALL_PATH@/bin/cuda-memcheck 3 | 4 | TESTS=memcheck_local.sh memcheck_cuda.sh 5 | check_PROGRAMS = ${TESTS} memcheck 6 | 7 | all-local: tests 8 | tests: check-TESTS 9 | 10 | memcheck: 11 | ${NVCC} memcheck.cu -o memcheck 12 | 13 | memcheck_local.sh: memcheck 14 | @echo "echo '+-------------------------------------------------------+'" > memcheck_local.sh 15 | @echo "echo '| Running without cuda memory checking (should pass)... |'" >> memcheck_local.sh 16 | @echo "echo '+-------------------------------------------------------+'" >> memcheck_local.sh 17 | @echo './memcheck' >> memcheck_local.sh 18 | @chmod +x memcheck_local.sh 19 | 20 | memcheck_cuda.sh: 21 | @echo "echo '+-----------------------------------------------------+'" > memcheck_cuda.sh 22 | @echo "echo '| Running with cuda memory checking (may not pass)... |'" >> memcheck_cuda.sh 23 | @echo "echo '+-----------------------------------------------------+'" >> memcheck_cuda.sh 24 | @echo "${CUDA_MEMCHECK} ./memcheck || exit 0" >> memcheck_cuda.sh 25 | @chmod +x memcheck_cuda.sh 26 | -------------------------------------------------------------------------------- /test/memcheck/Makefile.bkp: -------------------------------------------------------------------------------- 1 | build: memcheck 2 | @echo "+-------------------------------------------------------+" 3 | @echo "| Running without cuda memory checking (should pass)... |" 4 | @echo "+-------------------------------------------------------+" 5 | @./memcheck 6 | @echo "" 7 | @echo "+-----------------------------------------------------+" 8 | @echo "| Running with cuda memory checking (may not pass)... |" 9 | @echo "+-----------------------------------------------------+" 10 | @/usr/local/cuda/bin/cuda-memcheck ./memcheck 11 | 12 | clean: 13 | rm -f *.o memcheck 14 | 15 | memcheck: 16 | /usr/local/cuda/bin/nvcc memcheck.cu -o memcheck 17 | -------------------------------------------------------------------------------- /test/memcheck/memcheck.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // CUDA check error 5 | #define cuda_check_error(msg) \ 6 | { \ 7 | cudaError_t err = cudaGetLastError(); \ 8 | if( cudaSuccess != err) { \ 9 | fprintf(stderr, "[GPUJPEG] [Error] %s (line %i): %s: %s.\n", \ 10 | __FILE__, __LINE__, msg, cudaGetErrorString( err) ); \ 11 | exit(-1); \ 12 | } \ 13 | } \ 14 | 15 | __global__ 16 | void get_value(int* index, int* value) 17 | { 18 | int x[3]; 19 | for ( int i = 0; i < 3; i++ ) 20 | x[i] = 55; 21 | 22 | *value = x[*index]; 23 | } 24 | 25 | int main() 26 | { 27 | int* d_index; 28 | int* d_value; 29 | cudaMalloc((void**)&d_index, sizeof(int)); 30 | cudaMalloc((void**)&d_value, sizeof(int)); 31 | cuda_check_error("Alloc failed"); 32 | 33 | int index = 0; 34 | int value = 0; 35 | cudaMemcpy(d_index, &index, sizeof(int), cudaMemcpyHostToDevice); 36 | cudaMemcpy(d_value, &value, sizeof(int), cudaMemcpyHostToDevice); 37 | cuda_check_error("Init failed"); 38 | 39 | get_value<<<1, 1>>>(d_index, d_value); 40 | cudaThreadSynchronize(); 41 | cuda_check_error("Kernel failed"); 42 | 43 | cudaMemcpy(&index, d_index, sizeof(int), cudaMemcpyDeviceToHost); 44 | cudaMemcpy(&value, d_value, sizeof(int), cudaMemcpyDeviceToHost); 45 | cuda_check_error("Copy failed"); 46 | printf("index = %d\n", index); 47 | printf("value = %d\n", value); 48 | 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /test/opengl_interop/Makefile.am: -------------------------------------------------------------------------------- 1 | NVCC=@CUDA_INSTALL_PATH@/bin/nvcc 2 | CUDA_MEMCHECK=@CUDA_INSTALL_PATH@/bin/cuda-memcheck 3 | 4 | TESTS=opengl_interop 5 | check_PROGRAMS = opengl_interop 6 | 7 | NVCC_CFLAGS = @COMMON_CFLAGS@ $(DEFAULT_INCLUDES) $(INCLUDES) \ 8 | -gencode arch=compute_20,code=sm_20 \ 9 | -gencode arch=compute_11,code=sm_11 10 | 11 | opengl_interop_SOURCES = view.c main.c 12 | opengl_interop_DEPENDENCIES = image.cu.o 13 | opengl_interop_LDADD = image.cu.o ../../libgpujpeg.la 14 | opengl_interop_CFLAGS = @COMMON_CFLAGS@ $(DEFAULT_INCLUDES) $(INCLUDES) 15 | opengl_interop_LDFLAGS = -L@CUDA_INSTALL_LIB@ -lcuda -lcudart -lglut -lGLU -lGLEW -lGL -lpthread 16 | 17 | all-local: tests 18 | tests: check-TESTS 19 | 20 | # Build gpujpeg library 21 | ../../libgpujpeg.la: 22 | @cd ../../; ./config.status --recheck --enable-opengl && make 23 | 24 | # Set suffix for CUDA files 25 | SUFFIXES=.cu 26 | 27 | # Pattern rule for compiling CUDA files 28 | %.cu.o: %.cu 29 | $(NVCC) $(NVCC_CFLAGS) -c $< -o $@; 30 | 31 | 32 | -------------------------------------------------------------------------------- /test/opengl_interop/Makefile.bkp: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2011, CESNET z.s.p.o 3 | # 4 | # All rights reserved. 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted provided that the following conditions are met: 8 | # * Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # * Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | # POSSIBILITY OF SUCH DAMAGE. 26 | # 27 | 28 | # Use shared/static libgpujpeg library? 29 | SHARED_LIBRARY ?= 1 30 | # CUDA install path 31 | CUDA_INSTALL_PATH ?= /usr/local/cuda 32 | 33 | # Target executable 34 | TARGET := opengl_interop 35 | # C files 36 | CFILES := main.c view.c 37 | # CUDA files 38 | CUFILES := image.cu 39 | 40 | # Compilers 41 | CC := gcc 42 | NVCC := $(CUDA_INSTALL_PATH)/bin/nvcc 43 | 44 | # Common flags 45 | COMMONFLAGS += -I. -I$(CUDA_INSTALL_PATH)/include -O2 46 | # C flags 47 | CFLAGS += $(COMMONFLAGS) 48 | # CUDA flags 49 | NVCCFLAGS += $(COMMONFLAGS) \ 50 | -gencode arch=compute_20,code=sm_20 \ 51 | -gencode arch=compute_11,code=sm_11 52 | 53 | # Do 32bit vs. 64bit setup 54 | LBITS := $(shell getconf LONG_BIT) 55 | ifeq ($(LBITS),64) 56 | # 64bit 57 | LDFLAGS += -L$(CUDA_INSTALL_PATH)/lib64 58 | else 59 | # 32bit 60 | LDFLAGS += -L$(CUDA_INSTALL_PATH)/lib 61 | endif 62 | 63 | # Link libgpujpeg library 64 | ifeq ($(SHARED_LIBRARY),1) 65 | LDFLAGS += -L../../libgpujpeg -lgpujpeg 66 | else 67 | LDFLAGS += -lcudart ../../libgpujpeg/libgpujpeg.a 68 | endif 69 | LDFLAGS += -lcuda -lcudart -lglut -lGLU -lGLEW -lGL -lpthread 70 | 71 | # Build 72 | build: $(TARGET) $(TARGET).sh 73 | 74 | # Clean 75 | clean: 76 | rm -f *.o $(TARGET) 77 | @cd ../../libgpujpeg; make clean 78 | 79 | # Lists of object files 80 | COBJS=$(CFILES:.c=.c.o) 81 | CUOBJS=$(CUFILES:.cu=.cu.o) 82 | 83 | # Build target 84 | $(TARGET): $(COBJS) $(CUOBJS) ../../libgpujpeg/libgpujpeg.build 85 | $(CC) $(CFLAGS) $(LDFLAGS) $(COBJS) $(CUOBJS) -o $@ 86 | 87 | # Build target run script 88 | ifeq ($(SHARED_LIBRARY),1) 89 | $(TARGET).sh: 90 | @printf "PATH=$$" > $(TARGET).sh 91 | @printf "(dirname $$" >> $(TARGET).sh 92 | @printf "0)\n" >> $(TARGET).sh 93 | @printf "LD_LIBRARY_PATH=\"$$" >> $(TARGET).sh 94 | @printf "LD_LIBRARY_PATH;$$" >> $(TARGET).sh 95 | @printf "PATH/../../libgpujpeg\" $$" >> $(TARGET).sh 96 | @printf "PATH/$(TARGET) $$" >> $(TARGET).sh 97 | @printf "@\n" >> $(TARGET).sh 98 | @chmod a+x $(TARGET).sh 99 | else 100 | $(TARGET).sh: 101 | @printf "PATH=$$" > $(TARGET).sh 102 | @printf "(dirname $$" >> $(TARGET).sh 103 | @printf "0)\n" >> $(TARGET).sh 104 | @printf "$$" >> $(TARGET).sh 105 | @printf "PATH/$(TARGET) $$" >> $(TARGET).sh 106 | @printf "@\n" >> $(TARGET).sh 107 | @chmod a+x $(TARGET).sh 108 | endif 109 | 110 | # Build gpujpeg library 111 | ../../libgpujpeg/libgpujpeg.build: 112 | @cd ../../libgpujpeg; make SHARED_LIBRARY=$(SHARED_LIBRARY) USE_OPENGL=1 113 | 114 | # Set suffix for CUDA files 115 | .SUFFIXES: .cu 116 | 117 | # Pattern rule for compiling C files 118 | %.c.o: %.c 119 | $(CC) $(CFLAGS) -c $< -o $@ 120 | 121 | # Pattern rule for compiling CUDA files 122 | %.cu.o: %.cu 123 | $(NVCC) $(NVCCFLAGS) -c $< -o $@; 124 | 125 | # Set file dependencies 126 | view.c.o: view.c view.h 127 | image.cu.o: image.cu image.h 128 | -------------------------------------------------------------------------------- /test/opengl_interop/README: -------------------------------------------------------------------------------- 1 | Demonstration of libgpujpeg interoperability with OpenGL. Application 2 | runs two threads, first does rendering in infinite loop, the second 3 | thread generates and encode and decodes image data. Image data are 4 | transfered from CUDA memory right to OpenGL texture. 5 | -------------------------------------------------------------------------------- /test/opengl_interop/image.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | * POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #include "image.h" 30 | #include "util.h" 31 | 32 | /** Documented at declaration */ 33 | struct image* 34 | image_create(int width, int height) 35 | { 36 | struct image* image = (struct image*)malloc(sizeof(struct image)); 37 | if ( image == NULL ) 38 | return NULL; 39 | image->width = width; 40 | image->height = height; 41 | 42 | cudaMallocHost((void**)&image->data, image->width * image->height * 3 * sizeof(uint8_t)); 43 | cudaCheckError(); 44 | 45 | cudaMalloc((void**)&image->d_data, image->width * image->height * 3 * sizeof(uint8_t)); 46 | cudaCheckError(); 47 | 48 | return image; 49 | } 50 | 51 | /** Documented at declaration */ 52 | void 53 | image_destroy(struct image* image) 54 | { 55 | cudaFreeHost(image->data); 56 | cudaFree(image->d_data); 57 | free(image); 58 | } 59 | 60 | /** 61 | * CUDA kernel that fills image data by gradient 62 | * 63 | * @param data 64 | * @param width 65 | * @param height 66 | * @param max 67 | */ 68 | __global__ void 69 | image_render_kernel(uint8_t* data, int width, int height, int max) 70 | { 71 | int x = blockIdx.x * blockDim.x + threadIdx.x; 72 | int y = blockIdx.y * blockDim.y + threadIdx.y; 73 | if ( x >= width || y >= height ) 74 | return; 75 | 76 | int index = (y * width + x) * 3; 77 | 78 | data[index + 0] = 0; 79 | data[index + 1] = max * y / height; 80 | data[index + 2] = max * x / width; 81 | } 82 | 83 | /** Documented at declaration */ 84 | void 85 | image_render(struct image* image, int max) 86 | { 87 | dim3 block(8, 8); 88 | dim3 grid(image->width / block.x + 1, image->height / block.y + 1); 89 | image_render_kernel<<>>(image->d_data, image->width, image->height, max); 90 | cudaError cuerr = cudaThreadSynchronize(); 91 | if ( cuerr != cudaSuccess ) { 92 | fprintf(stderr, "Kernel failed: %s!\n", cudaGetErrorString(cuerr)); 93 | return; 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /test/opengl_interop/image.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | * POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef TEST_OPENGL_INTEROP_IMAGE_H 30 | #define TEST_OPENGL_INTEROP_IMAGE_H 31 | 32 | #include 33 | 34 | #ifdef __cplusplus 35 | extern "C" { 36 | #endif 37 | 38 | /** 39 | * Image structure 40 | */ 41 | struct image 42 | { 43 | int width; 44 | int height; 45 | 46 | uint8_t* data; 47 | uint8_t* d_data; 48 | }; 49 | 50 | /** 51 | * Create image 52 | * 53 | * @param width 54 | * @param height 55 | * @return image if succeeds, otherwise NULL 56 | */ 57 | struct image* 58 | image_create(int width, int height); 59 | 60 | /** 61 | * Destroy image 62 | * 63 | * @param image Image structure 64 | * @return void 65 | */ 66 | void 67 | image_destroy(struct image* image); 68 | 69 | /** 70 | * Render new image 71 | * 72 | * @param image Image structure 73 | * @param max Maximum coefficient level in image 74 | */ 75 | void 76 | image_render(struct image* image, int max); 77 | 78 | #ifdef __cplusplus 79 | } 80 | #endif 81 | 82 | #endif // TEST_OPENGL_INTEROP_IMAGE_H 83 | -------------------------------------------------------------------------------- /test/opengl_interop/main.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, git OR PROFITS; OR BUSINESS 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | * POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #include "view.h" 30 | #include "image.h" 31 | #include "util.h" 32 | #include "../../libgpujpeg/gpujpeg.h" 33 | #include 34 | #include 35 | 36 | #define TEST_OPENGL_INTEROP_MULTI_THREAD 37 | 38 | /** 39 | * Transfer type 40 | */ 41 | enum transfer_type { 42 | TRANSFER_HOST = 0, 43 | TRANSFER_DEVICE = 1 44 | }; 45 | 46 | /** OpenGL context state in view thread */ 47 | #define OPENGL_CONTEXT_ATTACHED 1 48 | #define OPENGL_CONTEXT_DETACHED 2 49 | #define OPENGL_CONTEXT_REQUEST 4 50 | 51 | /** 52 | * Application structure that hold all common variables 53 | */ 54 | struct application { 55 | // Size 56 | int width; 57 | int height; 58 | // View structure 59 | struct view* view; 60 | // Image structure 61 | struct image* image; 62 | // Mutex 63 | pthread_mutex_t mutex; 64 | // Tranfer type 65 | enum transfer_type transfer_type; 66 | // Flag if image thread should quit 67 | int quit; 68 | // Flag if view has detached OpenGL context 69 | volatile int opengl_context; 70 | // New image 71 | volatile int new_image; 72 | 73 | // OpenGL parameters 74 | unsigned int texture_id; 75 | struct gpujpeg_opengl_texture* texture; 76 | 77 | // JPEG 78 | struct gpujpeg_encoder* encoder; 79 | struct gpujpeg_decoder* decoder; 80 | struct gpujpeg_decoder_output decoder_output; 81 | }; 82 | 83 | /** 84 | * Thread that shows window and in loop renders current image. 85 | * Before rendering it calls on_render callback. 86 | */ 87 | void* 88 | thread_view_run(void* arg) 89 | { 90 | struct application* app = (struct application*)arg; 91 | 92 | #ifndef TEST_OPENGL_INTEROP_MULTI_THREAD 93 | gpujpeg_init_device(0, GPUJPEG_OPENGL_INTEROPERABILITY); 94 | #endif 95 | 96 | // Run through GLX 97 | view_glx(app->view); 98 | 99 | // Quit image thread 100 | app->quit = 1; 101 | 102 | return 0; 103 | } 104 | 105 | void 106 | thread_image_attach_opengl(void * param) 107 | { 108 | struct application* app = (struct application*)param; 109 | 110 | pthread_mutex_lock(&app->mutex); 111 | app->opengl_context |= OPENGL_CONTEXT_REQUEST; 112 | pthread_mutex_unlock(&app->mutex); 113 | 114 | while ( 1 ) { 115 | if ( app->opengl_context & OPENGL_CONTEXT_DETACHED ) 116 | break; 117 | usleep(1000); 118 | if ( app->quit == 1 ) 119 | pthread_exit(0); 120 | } 121 | view_opengl_attach(app->view); 122 | }; 123 | 124 | void 125 | thread_image_detach_opengl(void * param) 126 | { 127 | struct application* app = (struct application*)param; 128 | 129 | view_opengl_detach(app->view); 130 | 131 | pthread_mutex_lock(&app->mutex); 132 | app->opengl_context |= OPENGL_CONTEXT_REQUEST; 133 | pthread_mutex_unlock(&app->mutex); 134 | 135 | while ( 1 ) { 136 | if ( app->opengl_context & OPENGL_CONTEXT_ATTACHED ) 137 | break; 138 | usleep(1000); 139 | if ( app->quit == 1 ) 140 | pthread_exit(0); 141 | } 142 | }; 143 | 144 | /** 145 | * On init callback for view. 146 | */ 147 | void 148 | view_on_init(void* param) 149 | { 150 | struct application* app = (struct application*)param; 151 | 152 | // Create image (image should be created after OpenGL is initialized) 153 | app->image = image_create(app->width, app->height); 154 | assert(app->image != NULL); 155 | 156 | // Create texture 157 | glGenTextures(1, &app->texture_id); 158 | glBindTexture(GL_TEXTURE_2D, app->texture_id); 159 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 160 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 161 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 162 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 163 | glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, app->width, app->height, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL); 164 | glBindTexture(GL_TEXTURE_2D, 0); 165 | 166 | // Init JPEG params 167 | struct gpujpeg_parameters param_coder; 168 | gpujpeg_set_default_parameters(¶m_coder); 169 | struct gpujpeg_image_parameters param_image; 170 | gpujpeg_image_set_default_parameters(¶m_image); 171 | param_image.width = app->width; 172 | param_image.height = app->height; 173 | 174 | // Init JPEG encoder and decoder 175 | app->encoder = gpujpeg_encoder_create(¶m_coder, ¶m_image); 176 | assert(app->encoder != NULL); 177 | app->decoder = gpujpeg_decoder_create(); 178 | assert(app->decoder != NULL); 179 | assert(gpujpeg_decoder_init(app->decoder, ¶m_coder, ¶m_image) == 0); 180 | 181 | // Init JPEG texture 182 | app->texture = gpujpeg_opengl_texture_register(app->texture_id, GPUJPEG_OPENGL_TEXTURE_WRITE); 183 | assert(app->texture != NULL); 184 | 185 | // Init JPEG decoder output 186 | if ( app->transfer_type == TRANSFER_DEVICE ) { 187 | gpujpeg_decoder_output_set_texture(&app->decoder_output, app->texture); 188 | } else { 189 | gpujpeg_decoder_output_set_default(&app->decoder_output); 190 | } 191 | #ifdef TEST_OPENGL_INTEROP_MULTI_THREAD 192 | // Set texture callbacks 193 | app->texture->texture_callback_param = (void*)app; 194 | app->texture->texture_callback_attach_opengl = &thread_image_attach_opengl; 195 | app->texture->texture_callback_detach_opengl = &thread_image_detach_opengl; 196 | #endif 197 | } 198 | 199 | /** 200 | * Generate new image, encode it with JPEG and decode it into OpenGL texture 201 | * 202 | * @param app 203 | * @return void 204 | */ 205 | void 206 | image_generate(struct application* app) 207 | { 208 | static int max = 100; 209 | static int change = 10; 210 | 211 | printf("Image: ImageRender Started\n"); 212 | 213 | TIMER_INIT(); 214 | TIMER_START(); 215 | 216 | // Render new image 217 | max += change; 218 | if ( max < 0 || max > 255 ) { 219 | change = -change; 220 | max += change; 221 | } 222 | image_render(app->image, max); 223 | 224 | TIMER_STOP_PRINT("Image: ImageRendered"); 225 | TIMER_START(); 226 | 227 | // Encode image 228 | uint8_t* image_compressed = NULL; 229 | int image_compressed_size = 0; 230 | // Copy data to host memory 231 | cudaMemcpy(app->image->data, app->image->d_data, app->width * app->height * 3 * sizeof(uint8_t), cudaMemcpyDeviceToHost); 232 | struct gpujpeg_encoder_input input; 233 | gpujpeg_encoder_input_set_image(&input, app->image->data); 234 | assert(gpujpeg_encoder_encode(app->encoder, &input, &image_compressed, &image_compressed_size) == 0); 235 | 236 | TIMER_STOP_PRINT("Image: ImageEncode"); 237 | TIMER_START(); 238 | 239 | // Decode image 240 | gpujpeg_decoder_decode(app->decoder, image_compressed, image_compressed_size, &app->decoder_output); 241 | 242 | app->new_image = 1; 243 | 244 | TIMER_STOP_PRINT("Image: ImageDecode"); 245 | } 246 | 247 | /** 248 | * On render callback for view. Check if cuda_context is available (means new 249 | * image is available) and if it is, load new image to view. 250 | */ 251 | void 252 | view_on_render(void* param) 253 | { 254 | struct application* app = (struct application*)param; 255 | 256 | #ifdef TEST_OPENGL_INTEROP_MULTI_THREAD 257 | if ( (app->opengl_context & OPENGL_CONTEXT_ATTACHED) && (app->opengl_context & OPENGL_CONTEXT_REQUEST) ) { 258 | pthread_mutex_lock(&app->mutex); 259 | view_opengl_detach(app->view); 260 | app->opengl_context = OPENGL_CONTEXT_DETACHED; 261 | pthread_mutex_unlock(&app->mutex); 262 | } 263 | // If OpenGL context is detach we can't render 264 | while ( app->opengl_context & OPENGL_CONTEXT_DETACHED ) { 265 | if ( app->opengl_context & OPENGL_CONTEXT_REQUEST ) { 266 | pthread_mutex_lock(&app->mutex); 267 | view_opengl_attach(app->view); 268 | app->opengl_context = OPENGL_CONTEXT_ATTACHED; 269 | pthread_mutex_unlock(&app->mutex); 270 | } 271 | usleep(1000); 272 | } 273 | 274 | #endif 275 | 276 | #ifndef TEST_OPENGL_INTEROP_MULTI_THREAD 277 | image_generate(app); 278 | app->new_image = 1; 279 | #endif 280 | 281 | TIMER_INIT(); 282 | TIMER_START(); 283 | 284 | // Load image only when is new 285 | if ( app->new_image == 0 ) 286 | return; 287 | app->new_image = 0; 288 | 289 | if ( app->decoder_output.type == GPUJPEG_DECODER_OUTPUT_OPENGL_TEXTURE ) { 290 | // Do nothing texture is already updated 291 | } else { 292 | // Set texture data from host memory 293 | glBindTexture(GL_TEXTURE_2D, app->texture_id); 294 | glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, app->width, app->height, 0, GL_RGB, GL_UNSIGNED_BYTE, app->decoder_output.data); 295 | } 296 | glFinish(); 297 | 298 | view_set_texture(app->view, app->texture_id); 299 | 300 | TIMER_STOP_PRINT("View: ImageLoad"); 301 | } 302 | 303 | /** 304 | * Thread that in loop generates images. For every generated image it detaches 305 | * CUDA context and wait until view thread load that image by detached CUDA context 306 | */ 307 | void* 308 | thread_image_run(void* arg) 309 | { 310 | struct application* app = (struct application*)arg; 311 | 312 | gpujpeg_init_device(0, GPUJPEG_OPENGL_INTEROPERABILITY); 313 | 314 | // Wait until work thread is ready to render image 315 | while ( app->image == NULL ) { 316 | usleep(1000); 317 | continue; 318 | } 319 | 320 | // Generated image in loop until view thread quit 321 | while ( app->quit == 0 ) { 322 | usleep(30000); 323 | 324 | image_generate(app); 325 | } 326 | 327 | return 0; 328 | } 329 | 330 | int 331 | main(int argc, char **argv) 332 | { 333 | // Create application 334 | struct application app; 335 | app.width = 1920; 336 | app.height = 1080; 337 | app.view = view_create(app.width, app.height, 1280, 720); 338 | assert(app.view != NULL); 339 | app.image = NULL; 340 | assert(pthread_mutex_init(&app.mutex, NULL) == 0); 341 | app.transfer_type = TRANSFER_DEVICE; 342 | app.quit = 0; 343 | app.opengl_context = OPENGL_CONTEXT_ATTACHED; 344 | app.new_image = 0; 345 | 346 | // Set view callbacks 347 | view_set_on_init(app.view, &view_on_init, (void*)&app); 348 | view_set_on_render(app.view, &view_on_render, (void*)&app); 349 | 350 | // Create threads 351 | pthread_t thread_view; 352 | pthread_create(&thread_view, NULL, thread_view_run, (void*)&app); 353 | #ifdef TEST_OPENGL_INTEROP_MULTI_THREAD 354 | pthread_t thread_image; 355 | pthread_create(&thread_image, NULL, thread_image_run, (void*)&app); 356 | #endif 357 | 358 | // Wait for threads to exit and check result status 359 | void* result; 360 | pthread_join(thread_view, &result); 361 | assert(result == 0); 362 | #ifdef TEST_OPENGL_INTEROP_MULTI_THREAD 363 | pthread_join(thread_image, &result); 364 | assert(result == 0); 365 | #endif 366 | 367 | // Destroy application 368 | image_destroy(app.image); 369 | view_destroy(app.view); 370 | pthread_mutex_destroy(&app.mutex); 371 | gpujpeg_encoder_destroy(app.encoder); 372 | gpujpeg_decoder_destroy(app.decoder); 373 | 374 | return 0; 375 | } 376 | -------------------------------------------------------------------------------- /test/opengl_interop/util.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | * POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef TEST_OPENGL_INTEROP_UTIL_H 30 | #define TEST_OPENGL_INTEROP_UTIL_H 31 | 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | /** 47 | * Check CUDA error 48 | */ 49 | #define cudaCheckError() { \ 50 | enum cudaError err = cudaGetLastError(); \ 51 | if ( cudaSuccess != err) { \ 52 | fprintf(stderr, "CUDA error in file '%s' in line %i : %s.\n", \ 53 | __FILE__, __LINE__, cudaGetErrorString(err) ); \ 54 | exit(-1); \ 55 | } \ 56 | } \ 57 | 58 | /** 59 | * Check OpenGL error 60 | */ 61 | #define glCheckError() { \ 62 | GLenum error = glGetError(); \ 63 | if ( error != GL_NO_ERROR ) { \ 64 | fprintf(stderr, "OpenGL error in file '%s' in line %i: %s.\n", \ 65 | __FILE__, __LINE__, gluErrorString(error)); \ 66 | exit(-1); \ 67 | } \ 68 | } \ 69 | 70 | /** 71 | * Timer 72 | */ 73 | #define TIMER_INIT() \ 74 | cudaEvent_t __start, __stop; \ 75 | cudaEventCreate(&__start); \ 76 | cudaEventCreate(&__stop); \ 77 | float __elapsedTime; 78 | #define TIMER_START() \ 79 | cudaEventRecord(__start,0) 80 | #define TIMER_STOP() \ 81 | cudaEventRecord(__stop,0); \ 82 | cudaEventSynchronize(__stop); \ 83 | cudaEventElapsedTime(&__elapsedTime, __start, __stop) 84 | #define TIMER_DURATION() __elapsedTime 85 | #define TIMER_STOP_PRINT(text) \ 86 | TIMER_STOP(); \ 87 | printf("%s %f ms\n", text, __elapsedTime) 88 | 89 | #endif // TEST_OPENGL_INTEROP_UTIL_H 90 | -------------------------------------------------------------------------------- /test/opengl_interop/view.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | * POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #include "view.h" 30 | #include "image.h" 31 | #include "util.h" 32 | 33 | /** Documented at declaration */ 34 | struct view 35 | { 36 | int width; 37 | int height; 38 | int window_width; 39 | int window_height; 40 | view_callback_on_init_t on_init; 41 | void* on_init_param; 42 | view_callback_on_render_t on_render; 43 | void* on_render_param; 44 | unsigned int texture_id; 45 | 46 | // GLX 47 | Display* glx_display; 48 | GLXContext glx_context; 49 | Window glx_window; 50 | }; 51 | 52 | /** Documented at declaration */ 53 | struct view* 54 | view_create(int width, int height, int window_width, int window_height) 55 | { 56 | if ( window_width == 0 ) 57 | window_width = width; 58 | if ( window_width == 0 ) 59 | window_height = height; 60 | 61 | struct view* view = (struct view*)malloc(sizeof(struct view)); 62 | if ( view == NULL ) 63 | return NULL; 64 | view->width = width; 65 | view->height = height; 66 | view->window_width = window_width; 67 | view->window_height = window_height; 68 | view->on_init = NULL; 69 | view->on_init_param = NULL; 70 | view->on_render = NULL; 71 | view->on_render_param = NULL; 72 | view->texture_id = 0; 73 | 74 | return view; 75 | } 76 | 77 | /** Documented at declaration */ 78 | void 79 | view_set_on_init(struct view* view, view_callback_on_init_t on_init, void* param) 80 | { 81 | view->on_init = on_init; 82 | view->on_init_param = param; 83 | } 84 | 85 | /** Documented at declaration */ 86 | void 87 | view_set_on_render(struct view* view, view_callback_on_render_t on_render, void* param) 88 | { 89 | view->on_render = on_render; 90 | view->on_render_param = param; 91 | } 92 | 93 | /** Documented at declaration */ 94 | void 95 | view_set_texture(struct view* view, int texture_id) 96 | { 97 | view->texture_id = texture_id; 98 | } 99 | 100 | /** Documented at declaration */ 101 | void 102 | view_destroy(struct view* view) 103 | { 104 | free(view); 105 | } 106 | 107 | void 108 | view_init(struct view* view) 109 | { 110 | view->texture_id = 0; 111 | 112 | glEnable(GL_TEXTURE_2D); 113 | 114 | if ( view->on_init != NULL ) 115 | view->on_init(view->on_init_param); 116 | } 117 | 118 | void 119 | view_render(struct view* view) 120 | { 121 | glViewport(0, 0, view->window_width, view->window_height); 122 | glClear(GL_COLOR_BUFFER_BIT); 123 | 124 | if ( view->on_render != NULL ) 125 | view->on_render(view->on_render_param); 126 | 127 | if ( view->texture_id != 0 ) { 128 | glBindTexture(GL_TEXTURE_2D, view->texture_id); 129 | glBegin(GL_QUADS); 130 | glTexCoord2f(0.0, 0.0); glVertex2f(-1.0, 1.0); 131 | glTexCoord2f(1.0, 0.0); glVertex2f(1.0, 1.0); 132 | glTexCoord2f(1.0, 1.0); glVertex2f(1.0, -1.0); 133 | glTexCoord2f(0.0, 1.0); glVertex2f(-1.0, -1.0); 134 | glEnd(); 135 | glBindTexture(GL_TEXTURE_2D, 0); 136 | } 137 | 138 | glFlush(); 139 | glFinish(); 140 | } 141 | 142 | /** View for glut */ 143 | struct view* g_glut_view = NULL; 144 | 145 | void 146 | view_glut_render() 147 | { 148 | struct view* view = g_glut_view; 149 | if ( view == NULL ) 150 | return; 151 | 152 | TIMER_INIT(); 153 | TIMER_START(); 154 | 155 | view_render(view); 156 | 157 | glutSwapBuffers(); 158 | 159 | TIMER_STOP_PRINT("View: GlutRender"); 160 | 161 | usleep(1000); 162 | 163 | glutPostRedisplay(); 164 | } 165 | 166 | void 167 | view_glut_keyboard(unsigned char key, int x, int y) 168 | { 169 | switch ( key ) { 170 | case 27: 171 | g_glut_view = NULL; 172 | break; 173 | default: 174 | printf("Key pressed: %c (%d)\n", key, (int)key); 175 | break; 176 | } 177 | } 178 | 179 | /** Documented at declaration */ 180 | int 181 | view_glut(struct view* view) 182 | { 183 | int argc = 0; 184 | glutInit(&argc, NULL); 185 | glutInitDisplayMode(GLUT_DOUBLE | GLUT_DEPTH | GLUT_RGBA); 186 | glutCreateWindow("OpenGL and CUDA"); 187 | glutReshapeWindow(view->window_width, view->window_height); 188 | glutDisplayFunc(view_glut_render); 189 | glutKeyboardFunc(view_glut_keyboard); 190 | glutIdleFunc(view_glut_render); 191 | 192 | view_init(view); 193 | 194 | g_glut_view = view; 195 | 196 | glutPostRedisplay(); 197 | while( g_glut_view != NULL ) { 198 | glutMainLoopEvent(); 199 | } 200 | glutHideWindow(); 201 | 202 | return 0; 203 | } 204 | 205 | void 206 | view_glx_render(struct view* view) 207 | { 208 | TIMER_INIT(); 209 | TIMER_START(); 210 | 211 | view_render(view); 212 | 213 | glXSwapBuffers(view->glx_display, view->glx_window); 214 | 215 | TIMER_STOP_PRINT("View: GlxRender"); 216 | 217 | usleep(1000); 218 | } 219 | 220 | int 221 | view_glx_keyboard(int keycode, const char* key) 222 | { 223 | switch ( keycode ) { 224 | case 9: 225 | return 1; 226 | default: 227 | printf("Key pressed: %s (%d)\n", key, keycode); 228 | break; 229 | } 230 | return 0; 231 | } 232 | 233 | /** Documented at declaration */ 234 | int 235 | view_glx(struct view* view) 236 | { 237 | // Open display 238 | view->glx_display = XOpenDisplay(0); 239 | if ( view->glx_display == NULL ) { 240 | fprintf(stderr, "Failed to open X display!\n"); 241 | pthread_exit(0); 242 | } 243 | 244 | // Choose visual 245 | static int attributes[] = { 246 | GLX_RGBA, 247 | GLX_DOUBLEBUFFER, 248 | GLX_RED_SIZE, 1, 249 | GLX_GREEN_SIZE, 1, 250 | GLX_BLUE_SIZE, 1, 251 | None 252 | }; 253 | XVisualInfo* visual = glXChooseVisual(view->glx_display, DefaultScreen(view->glx_display), attributes); 254 | if ( visual == NULL ) { 255 | fprintf(stderr, "Failed to choose visual!\n"); 256 | pthread_exit(0); 257 | } 258 | 259 | // Create OpenGL context 260 | view->glx_context = glXCreateContext(view->glx_display, visual, 0, GL_TRUE); 261 | if ( view->glx_context == NULL ) { 262 | fprintf(stderr, "Failed to create OpenGL context!\n"); 263 | pthread_exit(0); 264 | } 265 | 266 | // Create window 267 | Colormap colormap = XCreateColormap(view->glx_display, RootWindow(view->glx_display, visual->screen), visual->visual, AllocNone); 268 | XSetWindowAttributes swa; 269 | swa.colormap = colormap; 270 | swa.border_pixel = 0; 271 | swa.event_mask = KeyPressMask; 272 | view->glx_window = XCreateWindow( 273 | view->glx_display, 274 | RootWindow(view->glx_display, visual->screen), 275 | 0, 0, view->window_width, view->window_height, 276 | 0, visual->depth, InputOutput, visual->visual, 277 | CWBorderPixel | CWColormap | CWEventMask, 278 | &swa 279 | ); 280 | XStoreName(view->glx_display, view->glx_window, "OpenGL and CUDA interoperability"); 281 | XMapWindow(view->glx_display, view->glx_window); 282 | 283 | view_opengl_attach(view); 284 | view_init(view); 285 | 286 | while ( 1 ) { 287 | view_glx_render(view); 288 | 289 | XEvent event; 290 | if ( XCheckWindowEvent(view->glx_display, view->glx_window, KeyPressMask, &event)) { 291 | int keycode = (int)event.xkey.keycode; 292 | char* key = XKeysymToString(XKeycodeToKeysym(view->glx_display, keycode, 0)); 293 | if ( view_glx_keyboard(keycode, key) != 0 ) 294 | break; 295 | } 296 | } 297 | 298 | view_opengl_detach(view); 299 | 300 | // Cleanup 301 | glXDestroyContext(view->glx_display, view->glx_context); 302 | XDestroyWindow(view->glx_display, view->glx_window); 303 | XCloseDisplay(view->glx_display); 304 | 305 | return 0; 306 | } 307 | 308 | /** Documented at declaration */ 309 | void 310 | view_opengl_attach(struct view* view) 311 | { 312 | glXMakeCurrent(view->glx_display, view->glx_window, view->glx_context); 313 | } 314 | 315 | /** Documented at declaration */ 316 | void 317 | view_opengl_detach(struct view* view) 318 | { 319 | glXMakeCurrent(view->glx_display, None, NULL); 320 | } 321 | -------------------------------------------------------------------------------- /test/opengl_interop/view.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2011, CESNET z.s.p.o 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | * POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef TEST_OPENGL_INTEROP_VIEW_H 30 | #define TEST_OPENGL_INTEROP_VIEW_H 31 | 32 | /** 33 | * View structure 34 | */ 35 | struct view; 36 | 37 | /** 38 | * View on init callback called 39 | */ 40 | typedef void (*view_callback_on_init_t)(void* param); 41 | 42 | /** 43 | * View on render callback called 44 | */ 45 | typedef void (*view_callback_on_render_t)(void* param); 46 | 47 | /** 48 | * Create view 49 | * 50 | * @param width Data width 51 | * @param height Data height 52 | * @param window_width Window width 53 | * @param window_height Window height 54 | * @return view structure if succeeds, otherwise NULL 55 | */ 56 | struct view* 57 | view_create(int width, int height, int window_width, int window_height); 58 | 59 | /** 60 | * Set on init callback to view 61 | * 62 | * @param view 63 | * @param on_init 64 | * @param param 65 | */ 66 | void 67 | view_set_on_init(struct view* view, view_callback_on_init_t on_init, void* param); 68 | 69 | /** 70 | * Set on render callback to view 71 | * 72 | * @param view 73 | * @param on_render 74 | * @param param 75 | */ 76 | void 77 | view_set_on_render(struct view* view, view_callback_on_render_t on_render, void* param); 78 | 79 | /** 80 | * Set texture to show 81 | * 82 | * @param view 83 | * @param texture_id 84 | * @return void 85 | */ 86 | void 87 | view_set_texture(struct view* view, int texture_id); 88 | 89 | /** 90 | * Destroy view 91 | * 92 | * @param view 93 | * @return void 94 | */ 95 | void 96 | view_destroy(struct view* view); 97 | 98 | /** 99 | * Run view by GLX 100 | * 101 | * @param view 102 | * @return void 103 | */ 104 | int 105 | view_glx(struct view* view); 106 | 107 | /** 108 | * Attach OpenGL context 109 | * 110 | * @param view 111 | * @return void 112 | */ 113 | void 114 | view_opengl_attach(struct view* view); 115 | 116 | /** 117 | * Detach OpenGL context 118 | * 119 | * @param view 120 | * @return void 121 | */ 122 | void 123 | view_opengl_detach(struct view* view); 124 | 125 | #endif // TEST_OPENGL_INTEROP_VIEW_H 126 | --------------------------------------------------------------------------------