├── experiments ├── __init__.py ├── compression │ ├── compresso │ │ ├── __init__.py │ │ ├── cpp-compresso.h │ │ ├── setup.py │ │ ├── compresso.pyx │ │ └── cpp-compresso.cpp │ ├── neuroglancer │ │ ├── __init__.py │ │ ├── cpp-neuroglancer.h │ │ ├── setup.py │ │ ├── pyneuroglancer.py │ │ ├── neuroglancer.pyx │ │ └── cpp-neuroglancer.cpp │ ├── __init__.py │ ├── lz78.py │ ├── x264.py │ ├── jpeg.py │ ├── _png.py │ ├── methods.py │ └── util.py ├── figures │ └── compression-performance.png ├── requirements.txt ├── plot.py └── run.py ├── banner.png ├── paper └── paper.pdf ├── .editorconfig ├── CITATION.bib ├── src ├── python │ ├── setup.py │ └── compresso.pyx └── c++ │ └── compresso.hxx ├── LICENSE ├── .gitignore ├── requirements.txt └── README.md /experiments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/compression/compresso/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/compression/neuroglancer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCG/compresso/HEAD/banner.png -------------------------------------------------------------------------------- /paper/paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCG/compresso/HEAD/paper/paper.pdf -------------------------------------------------------------------------------- /experiments/figures/compression-performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VCG/compresso/HEAD/experiments/figures/compression-performance.png -------------------------------------------------------------------------------- /experiments/compression/compresso/cpp-compresso.h: -------------------------------------------------------------------------------- 1 | namespace compresso { 2 | unsigned long *Compress(unsigned long *data, int zres, int yres, int xres, int zstep, int ystep, int xstep); 3 | 4 | unsigned long *Decompress(unsigned long *compressed_data); 5 | } 6 | -------------------------------------------------------------------------------- /experiments/compression/neuroglancer/cpp-neuroglancer.h: -------------------------------------------------------------------------------- 1 | namespace neuroglancer { 2 | unsigned long *Compress(unsigned long *data, int zres, int yres, int xres, int bz, int by, int bx, int origz, int origy, int origx); 3 | 4 | unsigned long *Decompress(unsigned long *compressed_data, int bz, int by, int bx); 5 | } -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | charset = utf-8 6 | trim_trailing_whitespace = true 7 | insert_final_newline = true 8 | indent_style = space 9 | indent_size = 2 10 | 11 | 12 | [*.py] 13 | indent_size = 4 14 | 15 | 16 | [*.md] 17 | indent_size = 4 18 | trim_trailing_whitespace = false 19 | -------------------------------------------------------------------------------- /experiments/compression/__init__.py: -------------------------------------------------------------------------------- 1 | from util import Util 2 | from compresso import compresso 3 | from neuroglancer import neuroglancer 4 | from methods import ( 5 | # general purpose 6 | BZ2, 7 | LZ78, 8 | LZF, 9 | LZMA, 10 | LZO, 11 | LZW, 12 | ZLIB, 13 | ZSTD, 14 | # image specific 15 | JPEG2000, 16 | PNG, 17 | # segmentation specific 18 | COMPRESSO, 19 | NEUROGLANCER, 20 | # video specific 21 | X264, 22 | # default 23 | NONE 24 | ) 25 | -------------------------------------------------------------------------------- /experiments/compression/compresso/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | from Cython.Build import cythonize 3 | import numpy as np 4 | 5 | extensions = [ 6 | Extension( 7 | 'compresso', 8 | include_dirs=[np.get_include()], 9 | sources=['compresso.pyx', 'cpp-compresso.cpp'], 10 | extra_compile_args=['-O4', '-std=c++0x'], 11 | language='c++' 12 | ) 13 | ] 14 | 15 | setup( 16 | ext_modules = cythonize(extensions) 17 | ) 18 | -------------------------------------------------------------------------------- /experiments/compression/neuroglancer/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | from Cython.Build import cythonize 3 | import numpy as np 4 | 5 | extensions = [ 6 | Extension( 7 | 'neuroglancer', 8 | include_dirs=[np.get_include()], 9 | sources=['neuroglancer.pyx', 'cpp-neuroglancer.cpp'], 10 | extra_compile_args=['-O4', '-std=c++0x'], 11 | language='c++' 12 | ) 13 | ] 14 | 15 | setup( 16 | ext_modules = cythonize(extensions) 17 | ) 18 | -------------------------------------------------------------------------------- /CITATION.bib: -------------------------------------------------------------------------------- 1 | @inbook{matejek2017compresso, 2 | author = {Brian Matejek and Daniel Haehn and Fritz Lekschas and Michael Mitzenmacher and Hanspeter Pfister}, 3 | title = {Compresso: Efficient Compression of Segmentation Data For Connectomics}, 4 | booktitle = {Medical Image Computing and Computer Assisted Intervention}, 5 | series = {MICCAI ’17}, 6 | publisher = {Springer International Publishing}, 7 | year = {2017}, 8 | month = {9}, 9 | day = {4}, 10 | pages = {781--788}, 11 | doi = {10.1007/978-3-319-66182-7_89}, 12 | } 13 | -------------------------------------------------------------------------------- /src/python/setup.py: -------------------------------------------------------------------------------- 1 | ## example: 2 | ## http://stackoverflow.com/questions/16792792/project-organization-with-cython-and-c 3 | 4 | from distutils.core import setup, Extension 5 | from Cython.Build import cythonize 6 | import numpy as np 7 | 8 | extensions = [ 9 | Extension( 10 | 'compresso', 11 | include_dirs=[np.get_include(), '../c++/'], 12 | sources=['compresso.pyx'], 13 | extra_compile_args=['-O4', '-std=c++11', '-C'], 14 | language='c++' 15 | ) 16 | ] 17 | 18 | setup( 19 | ext_modules=cythonize(extensions) 20 | ) 21 | -------------------------------------------------------------------------------- /experiments/compression/lz78.py: -------------------------------------------------------------------------------- 1 | class lz78(object): 2 | 3 | @staticmethod 4 | def name(): 5 | return 'LZ78' 6 | 7 | @staticmethod 8 | def compress(data, *args, **kwargs): 9 | '''LZ78 compression 10 | ''' 11 | 12 | d, word = {0: ''}, 0 13 | dyn_d = ( 14 | lambda d, key: d.get(key) or d.__setitem__(key, len(d)) or 0 15 | ) 16 | 17 | return [ 18 | token for 19 | char in 20 | data for 21 | token in 22 | [(word, char)] for 23 | word in [dyn_d(d, token)] if not word 24 | ] + [(word, '')] 25 | 26 | @staticmethod 27 | def decompress(data, *args, **kwargs): 28 | '''LZ78 decompression 29 | ''' 30 | 31 | d, j = {0: ''}, ''.join 32 | dyn_d = ( 33 | lambda d, value: d.__setitem__(len(d), value) or value 34 | ) 35 | 36 | return j([dyn_d(d, d[codeword] + char) for (codeword, char) in data]) 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Rhoana 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | PAPER/main.pdf 6 | PAPER/main.synctex.gz 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | results/ 28 | neuroglancer.cpp 29 | compresso.cpp 30 | 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *,cover 51 | .hypothesis/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # IPython Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | 93 | # Rope project settings 94 | .ropeproject 95 | 96 | # Texpad Stuff 97 | .texpadtmp 98 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # This file may be used to create an environment using: 2 | # $ conda create --name --file 3 | # platform: linux-64 4 | backports.lzma=0.0.3=py27_0 5 | cairo=1.14.8=0 6 | certifi=2016.2.28=py27_0 7 | contextlib2=0.5.5=py27_0 8 | cycler=0.10.0=py27_0 9 | cython=0.26=py27_0 10 | dbus=1.10.20=0 11 | enum34=1.1.6=py27_0 12 | expat=2.1.0=0 13 | fontconfig=2.12.1=3 14 | freetype=2.5.5=2 15 | funcsigs=1.0.2=py27_0 16 | functools32=3.2.3.2=py27_0 17 | glib=2.50.2=1 18 | glymur=0.8.11=py27_1 19 | gst-plugins-base=1.8.0=0 20 | gstreamer=1.8.0=0 21 | h5py=2.7.0=np113py27_0 22 | hdf5=1.8.17=2 23 | icu=54.1=0 24 | jbig=2.1=0 25 | jpeg=9b=0 26 | libffi=3.2.1=1 27 | libgcc=5.2.0=0 28 | libiconv=1.14=0 29 | libpng=1.6.30=1 30 | libtiff=4.0.6=3 31 | libxcb=1.12=1 32 | libxml2=2.9.4=0 33 | libxslt=1.1.29=0 34 | llvmlite=0.19.0=py27_0 35 | lxml=3.8.0=py27_0 36 | lzo=2.10=0 37 | matplotlib=2.0.2=np113py27_0 38 | mkl=2017.0.3=0 39 | numba=0.34.0=np113py27_0 40 | numpy=1.13.1=py27_0 41 | olefile=0.44=py27_0 42 | openjpeg=2.1.2=3 43 | openssl=1.0.2l=0 44 | pcre=8.39=1 45 | pillow=4.2.1=py27_0 46 | pip=9.0.1=py27_1 47 | pixman=0.34.0=0 48 | pycairo=1.10.0=py27_0 49 | pyparsing=2.2.0=py27_0 50 | pypng=0.0.16=py27_0 51 | pyqt=5.6.0=py27_2 52 | python=2.7.13=0 53 | python-dateutil=2.6.1=py27_0 54 | python-lzf=0.2.1=py27_0 55 | python-lzo=1.11=py27_0 56 | pytz=2017.2=py27_0 57 | qt=5.6.2=5 58 | readline=6.2=2 59 | setuptools=36.4.0=py27_0 60 | singledispatch=3.4.0.3=py27_0 61 | sip=4.18=py27_0 62 | six=1.10.0=py27_0 63 | sqlite=3.13.0=0 64 | subprocess32=3.2.7=py27_0 65 | tk=8.5.18=0 66 | wheel=0.29.0=py27_0 67 | xz=5.2.3=0 68 | zlib=1.2.11=0 69 | zstandard=0.4.0=py27_0 70 | -------------------------------------------------------------------------------- /experiments/requirements.txt: -------------------------------------------------------------------------------- 1 | # This file may be used to create an environment using: 2 | # $ conda create --name --file 3 | # platform: linux-64 4 | backports.lzma=0.0.3=py27_0 5 | cairo=1.14.8=0 6 | certifi=2016.2.28=py27_0 7 | contextlib2=0.5.5=py27_0 8 | cycler=0.10.0=py27_0 9 | cython=0.26=py27_0 10 | dbus=1.10.20=0 11 | enum34=1.1.6=py27_0 12 | expat=2.1.0=0 13 | fontconfig=2.12.1=3 14 | freetype=2.5.5=2 15 | funcsigs=1.0.2=py27_0 16 | functools32=3.2.3.2=py27_0 17 | glib=2.50.2=1 18 | glymur=0.8.11=py27_1 19 | gst-plugins-base=1.8.0=0 20 | gstreamer=1.8.0=0 21 | h5py=2.7.0=np113py27_0 22 | hdf5=1.8.17=2 23 | icu=54.1=0 24 | jbig=2.1=0 25 | jpeg=9b=0 26 | libffi=3.2.1=1 27 | libgcc=5.2.0=0 28 | libiconv=1.14=0 29 | libpng=1.6.30=1 30 | libtiff=4.0.6=3 31 | libxcb=1.12=1 32 | libxml2=2.9.4=0 33 | libxslt=1.1.29=0 34 | llvmlite=0.19.0=py27_0 35 | lxml=3.8.0=py27_0 36 | lzo=2.10=0 37 | matplotlib=2.0.2=np113py27_0 38 | mkl=2017.0.3=0 39 | numba=0.34.0=np113py27_0 40 | numpy=1.13.1=py27_0 41 | olefile=0.44=py27_0 42 | openjpeg=2.1.2=3 43 | openssl=1.0.2l=0 44 | pcre=8.39=1 45 | pillow=4.2.1=py27_0 46 | pip=9.0.1=py27_1 47 | pixman=0.34.0=0 48 | pycairo=1.10.0=py27_0 49 | pyparsing=2.2.0=py27_0 50 | pypng=0.0.16=py27_0 51 | pyqt=5.6.0=py27_2 52 | python=2.7.13=0 53 | python-dateutil=2.6.1=py27_0 54 | python-lzf=0.2.1=py27_0 55 | python-lzo=1.11=py27_0 56 | pytz=2017.2=py27_0 57 | qt=5.6.2=5 58 | readline=6.2=2 59 | setuptools=36.4.0=py27_0 60 | singledispatch=3.4.0.3=py27_0 61 | sip=4.18=py27_0 62 | six=1.10.0=py27_0 63 | sqlite=3.13.0=0 64 | subprocess32=3.2.7=py27_0 65 | tk=8.5.18=0 66 | wheel=0.29.0=py27_0 67 | xz=5.2.3=0 68 | zlib=1.2.11=0 69 | zstandard=0.4.0=py27_0 70 | -------------------------------------------------------------------------------- /experiments/plot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import os 5 | import sys 6 | import cPickle as pickle 7 | 8 | sys.path.insert(1, os.path.join(sys.path[0], '..')) 9 | 10 | import compression as C 11 | 12 | 13 | def render_plots(pickle_file, output): 14 | with open(pickle_file, 'rb') as f: 15 | results = pickle.load(f) 16 | 17 | C.Util.plot( 18 | method_labels=results['methods'], 19 | data_bytes=results['comp_bytes'], 20 | ratios=results['ratios'], 21 | com_speed=results['total_comp_speed'], 22 | com_speed_stderr=results['total_comp_speed_std'], 23 | dcom_speed=results['total_decomp_speed'], 24 | dcom_speed_stderr=results['total_decomp_speed_std'], 25 | save=output, 26 | dpi=300, 27 | bw=False 28 | ) 29 | 30 | 31 | if __name__ == '__main__': 32 | parser = argparse.ArgumentParser() 33 | 34 | parser.add_argument( 35 | 'results', 36 | metavar='PATH', 37 | type=str, 38 | help='path to pickled results' 39 | ) 40 | 41 | parser.add_argument( 42 | '--output', 43 | '-o', 44 | metavar='PATH', 45 | dest='output', 46 | action='store', 47 | type=str, 48 | default='figures', 49 | help='output (default: figures/.eps)' 50 | ) 51 | 52 | args = parser.parse_args() 53 | 54 | if not os.path.isfile(args.results): 55 | print('Results file not found') 56 | sys.exit() 57 | 58 | output = os.path.basename(args.results) 59 | 60 | if args.output: 61 | output = os.path.join(args.output, output) 62 | 63 | render_plots(args.results, output) 64 | -------------------------------------------------------------------------------- /src/python/compresso.pyx: -------------------------------------------------------------------------------- 1 | # cimports 2 | cimport numpy as np 3 | cimport cython 4 | 5 | # python imports 6 | from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t 7 | import numpy as np 8 | 9 | ctypedef fused Type: 10 | uint8_t 11 | uint16_t 12 | uint32_t 13 | uint64_t 14 | 15 | # import c++ functions 16 | cdef extern from "compresso.hxx" namespace "Compresso": 17 | unsigned char *Compress(...) 18 | Type *Decompress[Type](...) 19 | 20 | 21 | class Compresso(object): 22 | @staticmethod 23 | def name(): 24 | return 'Compresso' 25 | 26 | @staticmethod 27 | def compress(Type[:,:,:] data, res, steps): 28 | # call the c++ compression function 29 | cdef long *cpp_res = [res[0], res[1], res[2]] 30 | cdef long *cpp_steps = [steps[0], steps[1], steps[2]] 31 | cdef long *nentries = [0] 32 | cdef unsigned char *compressed_data = Compress(&(data[0,0,0]), cpp_res, cpp_steps, nentries) 33 | 34 | # convert to numpy array 35 | cdef unsigned char[:] tmp_compressed_data = compressed_data 36 | 37 | return np.asarray(tmp_compressed_data) 38 | 39 | @staticmethod 40 | def decompress(data): 41 | # get the number of bytes per uint (1, 2, 4, or 8) 42 | # the 76 comes from the offset in the header 43 | BYTE_OFFSET = 76 44 | nbytes = data[BYTE_OFFSET] 45 | 46 | # call the c++ decompression function 47 | cdef long *res = [0, 0, 0] 48 | cdef np.ndarray[unsigned char, ndim=1, mode='c'] cpp_data = np.ascontiguousarray(data) 49 | 50 | # just call this as unsigned long and convert later 51 | # TODO this is a bad hack 52 | cdef unsigned long *cpp_decompressed_data = Decompress['unsigned long'](&(cpp_data[0]), res) 53 | 54 | # convert the c++ pointer to a numpy array 55 | nentries = res[0] * res[1] * res[2] 56 | cdef unsigned long[:] tmp_decompressed_data = cpp_decompressed_data 57 | decompressed_data = np.asarray(tmp_decompressed_data).reshape((res[0], res[1], res[2])) 58 | 59 | # convert to a different data type if needed 60 | if nbytes == 1: decompressed_data = decompressed_data.astype(np.uint8) 61 | elif nbytes == 2: decompressed_data = decompressed_data.astype(np.uint16) 62 | elif nbytes == 4: decompressed_data = decompressed_data.astype(np.uint32) 63 | 64 | return np.asarray(decompressed_data) -------------------------------------------------------------------------------- /experiments/compression/neuroglancer/pyneuroglancer.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import time 4 | from numba import jit 5 | 6 | @jit(nopython=True) 7 | def DecodeValues(block, values, encoded_values, bz, by, bx, nbits): 8 | # get the number of values per 8 byte uint64 9 | if (nbits > 0): 10 | values_per_uint64 = 64 / nbits 11 | 12 | ie = 0 13 | for value in encoded_values: 14 | for i in range(0, values_per_uint64): 15 | lower_bits_to_remove = ( 16 | (values_per_uint64 - i - 1) * nbits 17 | ) 18 | values[ie] = ( 19 | (value >> lower_bits_to_remove) % 2**nbits 20 | ) 21 | ie += 1 22 | 23 | ii = 0 24 | # get the lookup table 25 | for iw in range(0, bz): 26 | for iv in range(0, by): 27 | for iu in range(0, bx): 28 | block[iw, iv, iu] = values[ii] 29 | ii += 1 30 | 31 | return block, values 32 | 33 | @jit(nopython=True) 34 | def LookupTable(decompressed_data, lookup_table, block, iz, iy, ix, bz, by, bx): 35 | # read the lookup label 36 | for iw in range(0, bz): 37 | for iv in range(0, by): 38 | for iu in range(0, bx): 39 | decompressed_data[iz * bz + iw, iy * by + iv,ix * bx + iu] = lookup_table[block[iw, iv, iu]] 40 | 41 | 42 | return decompressed_data 43 | 44 | 45 | def DecodeNeuroglancer(data, table_offsets, nbits, values_offsets, data_entries, bz, by, bx): 46 | # get the size of the data 47 | az, ay, ax = data[1], data[2], data[3] 48 | gz, gy, gx = ( 49 | int(az / bz), 50 | int(ay / by), 51 | int(ax / bx) 52 | ) 53 | 54 | decompressed_data = np.zeros((az, ay, ax), dtype=np.uint64) 55 | 56 | block_size = bz * by * bx 57 | 58 | index = 0 59 | for iz in range(0, gz): 60 | for iy in range(0, gy): 61 | for ix in range(0, gx): 62 | # get the total number of bits needed 63 | uint64s_needed = ( 64 | nbits[index] * block_size 65 | ) / 64 66 | 67 | uint64s_needed = int(uint64s_needed + 0.5) 68 | 69 | # get the encoded values 70 | encoded_values = data[values_offsets[index]:values_offsets[index] + uint64s_needed] 71 | 72 | # reconstruct the block with their ids 73 | block = np.zeros((bz, by, bx), dtype=np.uint32) 74 | 75 | # decode the values based on the number of bytes needed 76 | values = np.zeros(block_size, dtype=np.uint32) 77 | 78 | block, values = DecodeValues(block, values, encoded_values, bz, by, bx, nbits[index]) 79 | 80 | # find the number of unique elements 81 | nunique = len(np.unique(block)) 82 | lookup_table = data[ 83 | table_offsets[index]:table_offsets[index] + nunique 84 | ] 85 | decompressed_data = LookupTable(decompressed_data, lookup_table, block, iz, iy, ix, bz, by, bx) 86 | 87 | index += 1 88 | 89 | return decompressed_data -------------------------------------------------------------------------------- /experiments/compression/x264.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from PIL import Image 4 | from subprocess import Popen, PIPE 5 | import tempfile 6 | 7 | class x264(object): 8 | 9 | @staticmethod 10 | def name(): 11 | '''X.264 compression 12 | ''' 13 | 14 | return 'X.264' 15 | 16 | @staticmethod 17 | def compress(data): 18 | '''X.264 compression 19 | ''' 20 | 21 | from util import Util 22 | outlog = tempfile.mktemp() 23 | outvideo = tempfile.mktemp(suffix='.mp4') 24 | 25 | process_output = open(outlog,'w') 26 | p = Popen(['ffmpeg', 27 | '-f', 'rawvideo', 28 | '-vcodec', 'rawvideo', 29 | '-y', 30 | '-r', str(data.shape[0]), 31 | '-video_size', str(data.shape[1])+'x'+str(data.shape[2]), 32 | '-pixel_format', 'yuv444p', 33 | '-i', '-', 34 | '-c:v', 'libx264', 35 | '-pix_fmt', 'yuv444p', 36 | '-profile:v', 'high444', 37 | '-crf', '0', 38 | '-preset:v', 'slow', 39 | outvideo], stdin=PIPE, stdout=process_output, stderr=process_output) 40 | 41 | for z in range(data.shape[0]): 42 | Util.convert_to_rgb(data[z]).tofile(p.stdin) 43 | 44 | process_output.close() 45 | p.stdin.close() 46 | p.wait() 47 | 48 | 49 | outdata = None 50 | 51 | with open(outvideo, 'rb') as f: 52 | outdata = f.read() 53 | 54 | # we also need to pass the X,Y,Z dimensions 55 | dims = np.zeros((3), dtype=np.uint64) 56 | dims[0] = data.shape[0]# Z 57 | dims[1] = data.shape[1]# Y 58 | dims[2] = data.shape[2]# X 59 | 60 | return dims.tobytes() + outdata 61 | 62 | 63 | @staticmethod 64 | def decompress(data): 65 | '''X.264 decompression 66 | ''' 67 | from util import Util 68 | errlog = tempfile.mktemp() 69 | outvideo = tempfile.mktemp(suffix='.mp4') 70 | 71 | dims = data[0:3*8] # 3 * 64bit 72 | dims = np.fromstring(dims, dtype=np.uint64) 73 | 74 | videodata = data[3*8:] 75 | 76 | with open(outvideo, 'wb') as f: 77 | f.write(videodata) 78 | 79 | process_output = open(errlog,'w') 80 | 81 | p = Popen(['ffmpeg', 82 | '-i', outvideo, 83 | '-vcodec', 'rawvideo', 84 | '-f', 'image2pipe', 85 | '-video_size', str(dims[1])+'x'+str(dims[2]), 86 | '-pix_fmt', 'yuv444p', 87 | '-' 88 | ], stdout=PIPE, stderr=process_output) 89 | 90 | framesize = dims[1]*dims[2]*3 91 | 92 | frames = p.stdout.read(int(framesize*dims[0])) 93 | 94 | output_data = np.fromstring(frames, dtype=np.uint8) 95 | output_data_rgb = output_data.reshape((dims[0], dims[1], dims[2], 3)) 96 | output_data_64 = np.zeros((dims[0], dims[1], dims[2]), dtype=np.uint64) 97 | for z in range(output_data_64.shape[0]): 98 | 99 | slice64 = Util.convert_from_rgb(output_data_rgb[z]) 100 | 101 | output_data_64[z] = slice64 102 | 103 | p.stdout.close() 104 | p.wait() 105 | process_output.close() 106 | 107 | return output_data_64 108 | -------------------------------------------------------------------------------- /experiments/run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import os 5 | import sys 6 | import cPickle as pickle 7 | 8 | sys.path.insert(1, os.path.join(sys.path[0], '..')) 9 | 10 | import compression as C 11 | 12 | 13 | def run_experiments( 14 | enc_name, com_name, dataset, N, data_loc=None, slices=-1, verbose=False 15 | ): 16 | try: 17 | enc_alg = getattr(C, enc_name) 18 | 19 | # This is a stupid test to ensure that the correct encoder has been 20 | # found, which is needed since we have uppercase module imports and 21 | # lowercase filenames 22 | enc_alg.name() 23 | except Exception: 24 | print 'Encoding scheme not found!' 25 | sys.exit() 26 | 27 | try: 28 | com_alg = getattr(C, com_name) 29 | 30 | # This is a stupid test to ensure that the correct compressor has been 31 | # found, which is needed since we have uppercase module imports and 32 | # lowercase filenames 33 | com_alg.name() 34 | except Exception: 35 | print 'Encoding scheme not found!' 36 | sys.exit() 37 | 38 | data = C.Util.load_data(dataset, slices, data_loc) 39 | 40 | results = C.Util.run_experiment( 41 | com=com_alg, 42 | enc=enc_alg, 43 | data=data, 44 | N=N, 45 | verbose=verbose 46 | ) 47 | 48 | filename = '_'.join([enc_name, com_name, dataset, str(N), str(slices)]) 49 | keepcharacters = ('-', '.', '_') 50 | 51 | filename = ''.join( 52 | [c for c in filename if c.isalnum() or c in keepcharacters] 53 | ).rstrip() 54 | 55 | res = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'results') 56 | 57 | if not os.path.exists(res): 58 | os.makedirs(res) 59 | 60 | print(results) 61 | 62 | with open(os.path.join(res, filename), 'w') as f: 63 | for result in results: 64 | f.write('{}: {}\n'.format(result, results[result])) 65 | 66 | 67 | if __name__ == '__main__': 68 | parser = argparse.ArgumentParser() 69 | 70 | parser.add_argument( 71 | 'encoding', 72 | type=str, 73 | help='name of encoding scheme' 74 | ) 75 | 76 | parser.add_argument( 77 | 'compression', 78 | type=str, 79 | help='name of compression scheme' 80 | ) 81 | 82 | parser.add_argument( 83 | 'dataset', 84 | type=str, 85 | help='name of data set' 86 | ) 87 | 88 | parser.add_argument( 89 | '--directory', 90 | '-d', 91 | dest='dir', 92 | metavar='PATH', 93 | action='store', 94 | type=str, 95 | default=None, 96 | help='path to data directory' 97 | ) 98 | 99 | parser.add_argument( 100 | '--runs', 101 | '-r', 102 | dest='runs', 103 | metavar='NUM', 104 | action='store', 105 | type=int, 106 | default=1, 107 | help='number of runs (default: 1)' 108 | ) 109 | 110 | parser.add_argument( 111 | '--slices', 112 | '-s', 113 | dest='slices', 114 | metavar='NUM', 115 | action='store', 116 | type=int, 117 | default=-1, 118 | help='number of slices per dataset (default: -1 (all))' 119 | ) 120 | 121 | parser.add_argument( 122 | '--verbose', 123 | '-v', 124 | dest='verbose', 125 | action='store_true', 126 | help='print progress (default: False)' 127 | ) 128 | 129 | args = parser.parse_args() 130 | 131 | run_experiments( 132 | args.encoding, 133 | args.compression, 134 | args.dataset, 135 | args.runs, 136 | args.dir, 137 | args.slices, 138 | args.verbose 139 | ) 140 | -------------------------------------------------------------------------------- /experiments/compression/jpeg.py: -------------------------------------------------------------------------------- 1 | import glymur 2 | import os 3 | import numpy as np 4 | import tempfile 5 | 6 | 7 | class jpeg(object): 8 | 9 | @staticmethod 10 | def name(): 11 | '''No Encoding 12 | ''' 13 | 14 | return 'JPEG2000' 15 | 16 | @staticmethod 17 | def compress(data, *args, **kwargs): 18 | '''JPEG2000 compression 19 | ''' 20 | 21 | TMPFOLDER = tempfile.mkdtemp() 22 | 23 | compressed_data = '' 24 | 25 | sizes = [] 26 | 27 | for iz in range(0, data.shape[0]): 28 | img = data[iz, :, :] 29 | 30 | colorized = np.zeros( 31 | (3, img.shape[0], img.shape[1]), dtype=np.uint16 32 | ) 33 | 34 | # for every value split into three 16 bit samples 35 | colorized[0, :, :] = img % (2**16) 36 | img = img >> 16 37 | colorized[1, :, :] = img % (2**16) 38 | img = img >> 32 39 | colorized[2, :, :] = img % (2**16) 40 | 41 | #print colorized.shape 42 | 43 | glymur.Jp2k(TMPFOLDER+'/tmp_' + str(iz) + '.jp2', colorized) 44 | #glymur.Jp2k('JPEG_TMP/tmp_' + str(iz) + '.jp2', img.astype(np.uint16)) 45 | with open(TMPFOLDER+'/tmp_' + str(iz) + '.jp2', 'rb') as fd: 46 | c_data = fd.read() 47 | compressed_data += c_data 48 | sizes.append(len(c_data)) 49 | 50 | 51 | frames = np.zeros((len(sizes)), dtype=np.uint64) 52 | 53 | for i,s in enumerate(sizes): 54 | 55 | frames[i] = s 56 | 57 | # 58 | # 59 | # no of frames 60 | output = np.uint64(len(sizes)).tobytes() 61 | 62 | # frame sizes 63 | output += frames.tobytes() 64 | 65 | output += compressed_data 66 | 67 | # print sizes 68 | 69 | return output 70 | 71 | @staticmethod 72 | def decompress(data, *args, **kwargs): 73 | '''JPEG2000 decompression 74 | ''' 75 | 76 | TMPFOLDER = tempfile.mkdtemp() 77 | 78 | # grab no of frames 79 | no_frames = np.fromstring(data[0:8], dtype=np.uint64) 80 | # print no_frames, len(data), data[8:8*no_frames] 81 | no_frames = no_frames[0] 82 | 83 | frame_sizes = data[8:8+int(8*no_frames)] 84 | 85 | # print no_frames, frame_sizes 86 | 87 | # grab frame sizes 88 | sizes = np.fromstring(frame_sizes, dtype=np.uint64) 89 | 90 | # store each frame to TMP FOLDER 91 | data_start_byte = 8 + 8*no_frames 92 | 93 | current_byte_pointer = data_start_byte 94 | for i in range(sizes.shape[0]): 95 | 96 | # print 'writing',i,current_byte_pointer,current_byte_pointer+sizes[i] 97 | 98 | current_bytes = data[int(current_byte_pointer):int(current_byte_pointer+sizes[i])] 99 | with open(TMPFOLDER+'/tmp_'+str(i)+'.jp2', 'wb') as f: 100 | f.write(current_bytes) 101 | 102 | 103 | 104 | current_byte_pointer = current_byte_pointer+sizes[i] 105 | 106 | 107 | nfiles = len(os.listdir(TMPFOLDER)) 108 | for ie, filename in enumerate(os.listdir(TMPFOLDER)): 109 | input_filename = TMPFOLDER + '/' + filename 110 | colorized = glymur.Jp2k(input_filename) 111 | 112 | index = int(filename.split('_')[1].split('.')[0]) 113 | 114 | if (ie == 0): 115 | decompressed_data = np.zeros( 116 | (nfiles, colorized.shape[1], colorized.shape[2]), 117 | dtype=np.uint64 118 | ) 119 | 120 | decompressed_data[index, :, :] = ( 121 | colorized[0, :, :] + 122 | colorized[1, :, :] * (2 ** 16) + 123 | colorized[2, :, :] * (2 ** 16) 124 | ) 125 | 126 | 127 | return decompressed_data 128 | -------------------------------------------------------------------------------- /experiments/compression/_png.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import numpy as np 3 | import os 4 | import png 5 | import tempfile 6 | 7 | 8 | class _png(object): 9 | 10 | @staticmethod 11 | def name(): 12 | '''No Encoding 13 | ''' 14 | 15 | return 'PNG' 16 | 17 | @staticmethod 18 | def compress(data, *args, **kwargs): 19 | '''PNG compression 20 | ''' 21 | TMPFOLDER = tempfile.mkdtemp() 22 | 23 | compressed_data = '' 24 | 25 | sizes = [] 26 | 27 | 28 | for iz in range(0, data.shape[0]): 29 | img = data[iz, :, :] 30 | 31 | colorized = np.zeros( 32 | (3, img.shape[0], img.shape[1]), dtype=np.uint16 33 | ) 34 | 35 | # for every value split into three 16 bit samples 36 | colorized[0, :, :] = img % (2**16) 37 | img = img >> 16 38 | colorized[1, :, :] = img % (2**16) 39 | img = img >> 16 40 | colorized[2, :, :] = img % (2**16) 41 | 42 | colorized = colorized.swapaxes(0, 1).swapaxes(1, 2) 43 | 44 | row_count, column_count, plane_count = colorized.shape 45 | 46 | pngfile = open(TMPFOLDER+'/tmp_' + str(iz) + '.png', 'wb') 47 | pngWriter = png.Writer( 48 | column_count, 49 | row_count, 50 | greyscale=False, 51 | alpha=False, 52 | bitdepth=16 53 | ) 54 | pngWriter.write( 55 | pngfile, 56 | np.reshape(colorized, (-1, column_count * plane_count)) 57 | ) 58 | pngfile.close() 59 | 60 | with open(TMPFOLDER+'/tmp_' + str(iz) + '.png', 'rb') as fd: 61 | c_data = fd.read() 62 | compressed_data += c_data 63 | sizes.append(len(c_data)) 64 | 65 | 66 | frames = np.zeros((len(sizes)), dtype=np.uint64) 67 | 68 | for i,s in enumerate(sizes): 69 | 70 | frames[i] = s 71 | 72 | # 73 | # 74 | # no of frames 75 | output = np.uint64(len(sizes)).tobytes() 76 | 77 | # frame sizes 78 | output += frames.tobytes() 79 | 80 | output += compressed_data 81 | 82 | # print sizes 83 | 84 | return output 85 | 86 | @staticmethod 87 | def decompress(data, *args, **kwargs): 88 | '''PNG decompression 89 | ''' 90 | 91 | 92 | TMPFOLDER = tempfile.mkdtemp() 93 | 94 | # grab no of frames 95 | no_frames = np.fromstring(data[0:8], dtype=np.uint64) 96 | # print no_frames, len(data), data[8:8*no_frames] 97 | no_frames = no_frames[0] 98 | 99 | frame_sizes = data[8:8+int(8*no_frames)] 100 | 101 | # print no_frames, frame_sizes 102 | 103 | # grab frame sizes 104 | sizes = np.fromstring(frame_sizes, dtype=np.uint64) 105 | 106 | # store each frame to TMP FOLDER 107 | data_start_byte = 8 + 8*no_frames 108 | 109 | current_byte_pointer = data_start_byte 110 | for i in range(sizes.shape[0]): 111 | 112 | # print 'writing',i,current_byte_pointer,current_byte_pointer+sizes[i] 113 | 114 | current_bytes = data[int(current_byte_pointer):int(current_byte_pointer+sizes[i])] 115 | with open(TMPFOLDER+'/tmp_'+str(i)+'.jp2', 'wb') as f: 116 | f.write(current_bytes) 117 | 118 | 119 | 120 | current_byte_pointer = current_byte_pointer+sizes[i] 121 | 122 | 123 | 124 | nfiles = len(os.listdir(TMPFOLDER)) 125 | for ie, filename in enumerate(os.listdir(TMPFOLDER)): 126 | input_filename = TMPFOLDER + '/' + filename 127 | 128 | index = int(filename.split('_')[1].split('.')[0]) 129 | 130 | pngReader = png.Reader(filename=input_filename) 131 | row_count, column_count, png_data, meta = pngReader.asDirect() 132 | plane_count = meta['planes'] 133 | 134 | # make sure rgb files 135 | assert plane_count == 3 136 | 137 | img = np.vstack(itertools.imap(np.uint16, png_data)) 138 | colorized = np.reshape(img, (row_count, column_count, plane_count)) 139 | 140 | colorized = colorized.swapaxes(1, 2).swapaxes(0, 1) 141 | 142 | if (ie == 0): 143 | decompressed_data = np.zeros( 144 | (nfiles, colorized.shape[1], colorized.shape[2]), 145 | dtype=np.uint64 146 | ) 147 | 148 | decompressed_data[index, :, :] = ( 149 | colorized[0, :, :] + 150 | colorized[1, :, :] * (2 ** 16) + 151 | colorized[2, :, :] * (2 ** 16) 152 | ) 153 | 154 | 155 | return decompressed_data 156 | -------------------------------------------------------------------------------- /experiments/compression/compresso/compresso.pyx: -------------------------------------------------------------------------------- 1 | cimport cython 2 | cimport numpy as np 3 | import numpy as np 4 | import ctypes 5 | from math import ceil 6 | 7 | cdef extern from "cpp-compresso.h" namespace "compresso": 8 | unsigned long *Compress(unsigned long *data, int zres, int yres, int xres, int zstep, int ystep, int xstep) 9 | unsigned long *Decompress(unsigned long *compressed_data) 10 | 11 | 12 | ####################################### 13 | ### ENCODE/DECODE CLASS DEFINITIONS ### 14 | ####################################### 15 | 16 | class compresso(object): 17 | 18 | @staticmethod 19 | def name(): 20 | return 'Compresso' 21 | 22 | 23 | @staticmethod 24 | def compress(data): 25 | '''Boundary Encoding compression 26 | ''' 27 | # reshape the data into one dimension 28 | zres, yres, xres = data.shape 29 | (zstep, ystep, xstep) = (1, 8, 8) 30 | header_size = 9 31 | 32 | nzblocks = int(ceil(float(zres) / zstep)) 33 | nyblocks = int(ceil(float(yres) / ystep)) 34 | nxblocks = int(ceil(float(xres) / xstep)) 35 | nblocks = nzblocks * nyblocks * nxblocks 36 | 37 | # call the Cython function 38 | cdef np.ndarray[unsigned long, ndim=3, mode='c'] cpp_data 39 | cpp_data = np.ascontiguousarray(data, dtype=ctypes.c_uint64) 40 | cdef unsigned long *cpp_compressed_data = Compress(&(cpp_data[0,0,0]), zres, yres, xres, zstep, ystep, xstep) 41 | length = header_size + cpp_compressed_data[3] + cpp_compressed_data[4] + cpp_compressed_data[5] + nblocks 42 | cdef unsigned long[:] tmp_compressed_data = cpp_compressed_data 43 | compressed_data = np.asarray(tmp_compressed_data) 44 | 45 | # compress all the zeros in the window values 46 | 47 | nblocks = int(ceil(float(zres) / zstep)) * int(ceil(float(yres) / ystep)) * int(ceil(float(xres) / xstep)) 48 | 49 | intro_data = compressed_data[:-nblocks] 50 | block_data = compressed_data[-nblocks:] 51 | 52 | if (np.max(block_data) < 2**32): 53 | block_data = block_data.astype(np.uint32) 54 | 55 | condensed_blocks = list() 56 | inzero = False 57 | prev_zero = 0 58 | for ie, block in enumerate(block_data): 59 | if block == 0: 60 | # start counting zeros 61 | if not inzero: 62 | inzero = True 63 | prev_zero = ie 64 | else: 65 | if inzero: 66 | # add information for the previous zero segment 67 | condensed_blocks.append((ie - prev_zero) * 2 + 1) 68 | inzero = False 69 | condensed_blocks.append(block * 2) 70 | 71 | condensed_blocks = np.array(condensed_blocks).astype(np.uint32) 72 | 73 | return intro_data.tobytes() + condensed_blocks.tobytes() 74 | 75 | 76 | @staticmethod 77 | def decompress(data): 78 | '''Boundary Decoding decompression 79 | ''' 80 | 81 | # read the first nine bytes corresponding to the header 82 | header = np.fromstring(data[0:72], dtype=np.uint64) 83 | 84 | zres = header[0] 85 | yres = header[1] 86 | xres = header[2] 87 | ids_size = int(header[3]) 88 | values_size = int(header[4]) 89 | locations_size = int(header[5]) 90 | zstep = header[6] 91 | ystep = header[7] 92 | xstep = header[8] 93 | 94 | # get the intro data 95 | intro_size = 9 + ids_size + values_size + locations_size 96 | intro_data = np.fromstring(data[0:intro_size*8], dtype=np.uint64) 97 | 98 | # get the compressed blocks 99 | nblocks = int(ceil(float(zres) / zstep)) * int(ceil(float(yres) / ystep)) * int(ceil(float(xres) / xstep)) 100 | compressed_blocks = np.fromstring(data[intro_size*8:], dtype=np.uint32) 101 | block_data = np.zeros(nblocks, dtype=np.uint64) 102 | 103 | index = 0 104 | for block in compressed_blocks: 105 | # greater values correspond to zero blocks 106 | if block % 2: 107 | nzeros = (block - 1) / 2 108 | block_data[index:index+nzeros] = 0 109 | index += nzeros 110 | else: 111 | block_data[index] = block / 2 112 | index += 1 113 | 114 | data = np.concatenate((intro_data, block_data)) 115 | 116 | cdef np.ndarray[unsigned long, ndim=1, mode='c'] cpp_data 117 | cpp_data = np.ascontiguousarray(data, dtype=ctypes.c_uint64) 118 | n = zres * yres * xres 119 | 120 | cdef unsigned long[:] cpp_decompressed_data = Decompress(&(cpp_data[0])) 121 | decompressed_data = np.reshape(np.asarray(cpp_decompressed_data), (zres, yres, xres)) 122 | 123 | return decompressed_data -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Compresso: Efficient Compression of Segmentation Data For Connectomics 2 | 3 | [![Paper](https://img.shields.io/badge/paper-accepted-red.svg?colorB=f52ef0)](https://vcg.seas.harvard.edu/publications/compresso-efficient-compression-of-segmentation-data-for-connectomics) 4 | [![MICCAI](https://img.shields.io/badge/presentation-MICCAI%202017-red.svg?colorB=135f89)](http://www.miccai2017.org/schedule) 5 | [![doi](https://img.shields.io/badge/used%20by-rhoana-red.svg?colorB=2bf55b)](http://www.rhoana.org) 6 | 7 | ![Segmentations](/banner.png?raw=true) 8 | 9 | > Recent advances in segmentation methods for connectomics and biomedical imaging produce very large datasets with labels that assign object classes to image pixels. The resulting label volumes are bigger than the raw image data and need compression for efficient storage and transfer. General-purpose compression methods are less effective because the label data consists of large low-frequency regions with structured boundaries unlike natural image data. We present Compresso, a new compression scheme for label data that outperforms existing approaches by using a sliding window to exploit redundancy across border regions in 2D and 3D. We compare our method to existing compression schemes and provide a detailed evaluation on eleven biomedical and image segmentation datasets. Our method provides a factor of 600-2200x compression for label volumes, with running times suitable for practice. 10 | 11 | **Paper**: Matejek _et al._, "Compresso: Efficient Compression of Segmentation Data For Connectomics", Proceedings of the International Conference on Medical Image Computing and Computer-Assisted Intervention (MICCAI), 2017, 10-14. \[[CITE](https://scholar.google.com/scholar?q=Compresso%3A+Efficient+Compression+of+Segmentation+Data+For+Connectomics) | [PDF](https://vcg.seas.harvard.edu/publications/compresso-efficient-compression-of-segmentation-data-for-connectomics/paper)\] 12 | 13 | ## Requirements 14 | 15 | - Python 2.7 16 | - conda 17 | 18 | ## Pip Installation 19 | 20 | Thanks to Will Silversmith, you can now install compresso with pip! 21 | 22 | ``` 23 | pip install compresso 24 | ``` 25 | 26 | ## Setup 27 | 28 | ```bash 29 | git clone https://github.com/vcg/compresso && cd compresso 30 | conda create -n compresso_env --file requirements.txt -c chen -c sunpy -c conda-forge -c auto -c indygreg 31 | source activate compresso_env 32 | # for Compresso scheme as presented in MICCAI 33 | cd experiments/compression/compresso; python setup.py build_ext --inplace 34 | # to run the neuroglancer compression scheme 35 | cd ../neuroglancer; python setup.py build_ext --inplace 36 | # for Compresso v2 that is under development 37 | cd ../../../src/python; python setup.py build_ext --inplace 38 | ``` 39 | 40 | ## Compress Segmentation Stacks 41 | 42 | There are two versions of Compresso in this repository. Under the src folder there is an updated c++ and python version that extends on the Compresso scheme presented in MICCAI. This algorithm, among other things, implements bit-packing to further improve compression results. 43 | 44 | The compression scheme in `experiments/compression/compresso` follows the MICCAI paper exactly. 45 | 46 | ## Compress Your Segmentation Stack 47 | 48 | In order to test Compresso on your own data simply use: 49 | 50 | ``` 51 | import compression as C 52 | # With LZMA 53 | C.LZMA.compress(C.COMPRESSO.compress()) 54 | ``` 55 | 56 | ## Experiments 57 | 58 | ``` 59 | # the dataset must be in hdf5 format. 60 | experiments/run.py COMPRESSO LZMA ac3 -r 1 -s 1 -d '///' 61 | ``` 62 | 63 | Usage: 64 | 65 | ``` 66 | usage: run.py [-h] [--directory PATH] [--runs NUM] [--slices NUM] 67 | [--verbose] 68 | encoding compression dataset 69 | 70 | positional arguments: 71 | encoding name of encoding scheme 72 | compression name of compression scheme 73 | dataset name of data set 74 | 75 | optional arguments: 76 | -h, --help show this help message and exit 77 | --directory PATH, -d PATH 78 | path to data directory 79 | --runs NUM, -r NUM number of runs (default: 1) 80 | --slices NUM, -s NUM number of slices per dataset (default: -1 (all)) 81 | --verbose, -v print progress (default: False) 82 | ``` 83 | 84 | 85 | Make sure the data sets are located in `~/compresso/data/` or specify the location. The data from the paper can be found here: 86 | 87 | - AC3: _(Kasthuri et al. Saturated reconstruction of a volume of neocortex. Cell 2015.)_ 88 | - CREMI: 89 | - CYL: _(Kasthuri et al. Saturated reconstruction of a volume of neocortex. Cell 2015.)_ 90 | - SPL Brain Atlas: _(Halle M., Talos I-F., Jakab M., Makris N., Meier D., Wald L., Fischl B., Kikinis R. Multi-modality MRI-based Atlas of the Brain. SPL 2017 Jan)_ 91 | - SPL Knee Atlas: _(Richolt J.A., Jakab M., Kikinis R. SPL Knee Atlas. SPL 2015 Sep)_ 92 | - SPL Abdominal Atlas: _(Talos I-F., Jakab M., Kikinis R. SPL Abdominal Atlas. SPL 2015 Sep)_ 93 | - BSD500: _(Contour Detection and Hierarchical Image Segmentation P. Arbelaez, M. Maire, C. Fowlkes and J. Malik. IEEE TPAMI, Vol. 33, No. 5, pp. 898-916, May 2011.)_ 94 | - VOC2012: _(Everingham, M. and Van~Gool, L. and Williams, C. K. I. and Winn, J. and Zisserman, A., The PASCAL Visual Object Classes Challenge 2012 (VOC2012) Results)_ 95 | 96 | ### Results From the Paper 97 | 98 | **Compression Performance** 99 | 100 | ![Compression Performance of Connectomics Datasets](/experiments/figures/compression-performance.png?raw=true) 101 | 102 | Compression ratios of general-purpose compression methods combined with Compresso and Neuroglancer. Compresso paired with LZMA yields the best compression ratios for all connectomics datasets (left) and in average (four out of five) for the others (right). 103 | -------------------------------------------------------------------------------- /experiments/compression/neuroglancer/neuroglancer.pyx: -------------------------------------------------------------------------------- 1 | cimport cython 2 | cimport numpy as np 3 | import numpy as np 4 | import ctypes 5 | import math 6 | import sys 7 | 8 | (bz, by, bx) = (8, 8, 8) 9 | (chunkz, chunky, chunkx) = (64, 64, 64) 10 | 11 | cdef extern from 'cpp-neuroglancer.h' namespace 'neuroglancer': 12 | unsigned long *Compress(unsigned long *data, int zres, int yres, int xres, int bz, int by, int bx, int origz, int origy, int origx) 13 | unsigned long *Decompress(unsigned long *compressed_data, int bz, int by, int bx) 14 | 15 | from pyneuroglancer import DecodeNeuroglancer 16 | 17 | # use cython for decompress (slower) 18 | cython_decompress = True 19 | 20 | 21 | ######################## 22 | ### DECODE FUNCTIONS ### 23 | ######################## 24 | 25 | class neuroglancer(object): 26 | 27 | @staticmethod 28 | def name(): 29 | return 'Neuroglancer' 30 | 31 | @staticmethod 32 | def compress(data, *args, **kwargs): 33 | '''Neuroglancer compression 34 | ''' 35 | origz, origy, origx = data.shape 36 | 37 | # determine the number of chunks of data 38 | zres, yres, xres = data.shape 39 | 40 | nzchunks, nychunks, nxchunks = (int(math.ceil(float(zres) / chunkz) + 0.5), int(math.ceil(float(yres) / chunky) + 0.5), int(math.ceil(float(xres) / chunkx) + 0.5)) 41 | 42 | compressed_data = np.zeros(3, dtype=np.uint64) 43 | compressed_data[0] = zres 44 | compressed_data[1] = yres 45 | compressed_data[2] = xres 46 | 47 | cdef np.ndarray[unsigned long, ndim=3, mode='c'] cpp_data 48 | cdef unsigned long *cpp_compressed_data 49 | cdef unsigned long[:] tmp_compressed_data 50 | 51 | # compress every chunk 52 | for iz in range(0, nzchunks): 53 | for iy in range(0, nychunks): 54 | for ix in range(0, nxchunks): 55 | chunk = data[ 56 | iz * chunkz:(iz + 1) * chunkz, 57 | iy * chunky:(iy + 1) * chunky, 58 | ix * chunkx:(ix + 1) * chunkx 59 | ] 60 | 61 | # create header variables 62 | zres, yres, xres = chunk.shape 63 | origz, origy, origx = zres, yres, xres 64 | 65 | if zres % bz: zpad = (bz - zres % bz) 66 | else: zpad = 0 67 | if yres % by: ypad = (by - yres % by) 68 | else: ypad = 0 69 | if xres % bx: xpad = (bx - xres % bx) 70 | else: xpad = 0 71 | 72 | zres += zpad 73 | yres += ypad 74 | xres += xpad 75 | 76 | padded_data = np.pad(chunk, ((0, zpad), (0, ypad), (0, xpad)), 'reflect').astype(np.uint64) 77 | 78 | cpp_data = np.ascontiguousarray(padded_data, dtype=ctypes.c_uint64) 79 | cpp_compressed_data = Compress(&(cpp_data[0,0,0]), zres, yres, xres, bz, by, bx, origz, origy, origx) 80 | length = cpp_compressed_data[0] 81 | tmp_compressed_data = cpp_compressed_data 82 | compressed_data = np.concatenate((compressed_data, np.asarray(tmp_compressed_data))) 83 | 84 | return compressed_data 85 | 86 | @staticmethod 87 | def decompress(data, *args, **kwargs): 88 | '''Neuroglancer decompression 89 | ''' 90 | # get the uncompressed data size 91 | zres = data[0] 92 | yres = data[1] 93 | xres = data[2] 94 | data = data[3:] 95 | 96 | nzchunks, nychunks, nxchunks = (int(math.ceil(float(zres) / chunkz) + 0.5), int(math.ceil(float(yres) / chunky) + 0.5), int(math.ceil(float(xres) / chunkx) + 0.5)) 97 | 98 | # create an empty decompressed array 99 | decompressed_data = np.zeros((zres, yres, xres), dtype=np.uint64) 100 | 101 | cdef np.ndarray[unsigned long, ndim=1, mode='c'] cpp_data 102 | cdef unsigned long[:] cpp_decompressed_chunk 103 | 104 | if not cython_decompress: 105 | # go through every chunk 106 | for iz in range(0, nzchunks): 107 | for iy in range(0, nychunks): 108 | for ix in range(0, nxchunks): 109 | # get the size of the data 110 | length = int(data[0]) 111 | 112 | az, ay, ax = data[1], data[2], data[3] 113 | 114 | gz, gy, gx = ( 115 | int(math.ceil(float(az) / bz)), 116 | int(math.ceil(float(ay) / by)), 117 | int(math.ceil(float(ax) / bx)) 118 | ) 119 | 120 | # get the total size of the header 121 | nelements = gz * gy * gx 122 | 123 | # later will become 3 bytes from 4 124 | table_offsets = np.zeros(nelements, dtype=np.uint32) 125 | nbits = np.zeros(nelements, dtype=np.uint8) 126 | values_offsets = np.zeros(nelements, dtype=np.uint32) 127 | 128 | ################################ 129 | # DECOMPRESS HEADER VALUES 130 | ################################ 131 | 132 | # get the original data size 133 | origz, origy, origx = data[4], data[5], data[6] 134 | 135 | data_entries = 7 136 | for ie in range(0, nelements): 137 | header = long(data[data_entries]) 138 | table_offsets[ie] = header >> 40 139 | nbits[ie] = (header << 24) >> 56 140 | values_offsets[ie] = (header << 32) >> 32 141 | 142 | data_entries += 1 143 | 144 | ############################### 145 | # DECOMPRESS ENTIRE IMAGE 146 | ############################### 147 | 148 | # remove the first element (length not needed) 149 | decompressed_chunk = DecodeNeuroglancer(data[:length], table_offsets, nbits, values_offsets, data_entries, bz, by, bx) 150 | decompressed_chunk = np.reshape(decompressed_chunk, (az, ay, ax)) 151 | 152 | decompressed_data[ 153 | iz * chunkz:(iz + 1) * chunkz, 154 | iy * chunky:(iy + 1) * chunky, 155 | ix * chunkx:(ix + 1) * chunkx 156 | ] = decompressed_chunk[0:origz,0:origy,0:origx] 157 | 158 | data = data[length:] 159 | 160 | return decompressed_data 161 | else: 162 | # go through every chunk 163 | for iz in range(0, nzchunks): 164 | for iy in range(0, nychunks): 165 | for ix in range(0, nxchunks): 166 | # get the size of the data 167 | length = int(data[0]) 168 | az, ay, ax = data[1], data[2], data[3] 169 | origz, origy, origx = data[4], data[5], data[6] 170 | 171 | ############################### 172 | # DECOMPRESS ENTIRE IMAGE 173 | ############################### 174 | cpp_data = np.ascontiguousarray(data[:length], dtype=ctypes.c_uint64) 175 | n = az * ay * ax 176 | 177 | cpp_decompressed_chunk = Decompress(&(cpp_data[0]), bz, by, bx) 178 | decompressed_chunk = np.reshape(np.asarray(cpp_decompressed_chunk), (az, ay, ax)) 179 | 180 | decompressed_data[ 181 | iz * chunkz:(iz + 1) * chunkz, 182 | iy * chunky:(iy + 1) * chunky, 183 | ix * chunkx:(ix + 1) * chunkx 184 | ] = decompressed_chunk[0:origz,0:origy,0:origx] 185 | 186 | data = data[length:] 187 | 188 | return decompressed_data 189 | -------------------------------------------------------------------------------- /experiments/compression/methods.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # general-purpose compression imports 4 | import bz2 5 | import lzf 6 | import backports.lzma as lzma 7 | import lzo 8 | import zlib 9 | import zstd 10 | import lz78 11 | 12 | # image compression imports 13 | from jpeg import jpeg 14 | from _png import _png 15 | 16 | # video compression imports 17 | from x264 import x264 18 | from compresso import compresso 19 | from neuroglancer import neuroglancer 20 | 21 | 22 | class NONE(object): 23 | @staticmethod 24 | def name(): 25 | return 'None' 26 | 27 | @staticmethod 28 | def compress(data): 29 | return data 30 | 31 | @staticmethod 32 | def decompress(data): 33 | return data 34 | 35 | 36 | ############################# 37 | ### SEGMENTATION SPECIFIC ### 38 | ############################# 39 | 40 | class COMPRESSO(object): 41 | @staticmethod 42 | def name(): 43 | return compresso.compresso.name() 44 | 45 | @staticmethod 46 | def decompress(data, *args, **kwargs): 47 | return compresso.compresso.decompress(data, *args, **kwargs) 48 | 49 | @staticmethod 50 | def compress(data, *args, **kwargs): 51 | return compresso.compresso.compress(data, *args, **kwargs) 52 | 53 | 54 | class NEUROGLANCER(object): 55 | @staticmethod 56 | def name(): 57 | return 'Neuroglancer' 58 | 59 | @staticmethod 60 | def compress(data, *args, **kwargs): 61 | return neuroglancer.neuroglancer.compress(data, *args, **kwargs) 62 | 63 | @staticmethod 64 | def decompress(data, *args, **kwargs): 65 | return neuroglancer.neuroglancer.decompress(data, *args, **kwargs) 66 | 67 | 68 | ####################### 69 | ### GENERAL PURPOSE ### 70 | ####################### 71 | 72 | 73 | class BZ2(object): 74 | 75 | @staticmethod 76 | def name(): 77 | return 'BZip2' 78 | 79 | @staticmethod 80 | def compress(data, *args, **kwargs): 81 | return bz2.compress(data, *args, **kwargs) 82 | 83 | @staticmethod 84 | def decompress(data, *args, **kwargs): 85 | return bz2.decompress(data, *args, **kwargs) 86 | 87 | 88 | class LZ78(object): 89 | 90 | @staticmethod 91 | def name(): 92 | return 'LZ78' 93 | 94 | @staticmethod 95 | def compress(data, *args, **kwargs): 96 | if type(data) is np.ndarray: 97 | str_data = data.tobytes() 98 | elif type(data) is str: 99 | str_data = data 100 | else: 101 | raise ValueError('Data type not supported') 102 | 103 | dictionary = lz78.lz78.compress(str_data, *args, **kwargs) 104 | 105 | array = np.zeros(len(dictionary), dtype=np.uint32) 106 | retry = False 107 | for ie, entry in enumerate(dictionary): 108 | if entry[1] == '': 109 | if (entry[0] >= 2**24): 110 | retry = True 111 | break 112 | array[ie] = (entry[0] << 8) 113 | else: 114 | if (entry[0] >= 2**24): 115 | retry = True 116 | break 117 | array[ie] = (entry[0] << 8) + ord(entry[1]) 118 | 119 | if not retry: return array 120 | else: 121 | array = np.zeros(len(dictionary), dtype=np.uint64) 122 | for ie, entry in enumerate(dictionary): 123 | if entry[1] == '': 124 | array[ie] = (entry[0] << 8) 125 | else: 126 | array[ie] = (entry[0] << 8) + ord(entry[1]) 127 | return array 128 | 129 | @staticmethod 130 | def decompress(data, *args, **kwargs): 131 | dictionary = list() 132 | 133 | for ie, entry in enumerate(data): 134 | int_value = long(entry) / (2**8) 135 | if ie == data.size - 1: 136 | char_value = '' 137 | else: 138 | char_value = chr(long(entry) % (2**8)) 139 | 140 | dictionary.append((int_value, char_value)) 141 | 142 | return lz78.lz78.decompress(dictionary, *args, **kwargs) 143 | 144 | 145 | class LZF(object): 146 | 147 | @staticmethod 148 | def name(): 149 | return 'LZF' 150 | 151 | @staticmethod 152 | def compress(data, *args, **kwargs): 153 | return lzf.compress(data, *args, **kwargs) 154 | 155 | @staticmethod 156 | def decompress(data, *args, **kwargs): 157 | return lzf.decompress(data, args[0]) 158 | 159 | 160 | class LZMA(object): 161 | 162 | @staticmethod 163 | def name(): 164 | return 'LZMA' 165 | 166 | @staticmethod 167 | def compress(data, *args, **kwargs): 168 | return lzma.compress(data, *args, **kwargs) 169 | 170 | @staticmethod 171 | def decompress(data, *args, **kwargs): 172 | return lzma.decompress(data, *args, **kwargs) 173 | 174 | 175 | class LZO(object): 176 | 177 | @staticmethod 178 | def name(): 179 | return 'LZO' 180 | 181 | @staticmethod 182 | def compress(data, *args, **kwargs): 183 | return lzo.compress(data, *args, **kwargs) 184 | 185 | @staticmethod 186 | def decompress(data, *args, **kwargs): 187 | return lzo.decompress(data, *args, **kwargs) 188 | 189 | 190 | class LZW(object): 191 | 192 | @staticmethod 193 | def name(): 194 | return 'LZW' 195 | 196 | @staticmethod 197 | def compress(data, *args, **kwargs): 198 | if type(data) is np.ndarray: 199 | str_data = data.tobytes() 200 | elif type(data) is str: 201 | str_data = data 202 | else: 203 | raise ValueError('Data type not supported') 204 | 205 | # create an empty dictionary 206 | dict_size = 2**8 207 | dictionary = dict((chr(i), i) for i in xrange(dict_size)) 208 | 209 | w = '' 210 | result = [] 211 | for c in str_data: 212 | wc = w + c 213 | if wc in dictionary: 214 | w = wc 215 | else: 216 | result.append(dictionary[w]) 217 | dictionary[wc] = dict_size 218 | dict_size += 1 219 | w = c 220 | 221 | if w: 222 | result.append(dictionary[w]) 223 | 224 | return np.array(result, dtype=np.uint32) 225 | 226 | @staticmethod 227 | def decompress(data, *args, **kwargs): 228 | from cStringIO import StringIO 229 | 230 | data = list(data) 231 | 232 | dict_size = 256 233 | dictionary = dict((i, chr(i)) for i in xrange(dict_size)) 234 | 235 | result = StringIO() 236 | w = chr(data.pop(0)) 237 | result.write(w) 238 | 239 | for k in data: 240 | if k in dictionary: 241 | entry = dictionary[k] 242 | elif k == dict_size: 243 | entry = w + w[0] 244 | else: 245 | raise ValueError('Bad compressed k: %s' % k) 246 | result.write(entry) 247 | 248 | # Add w+entry[0] to the dictionary 249 | dictionary[dict_size] = str(w + entry[0]) 250 | dict_size += 1 251 | 252 | w = entry 253 | 254 | return result.getvalue() 255 | 256 | class ZLIB(object): 257 | 258 | @staticmethod 259 | def name(): 260 | return 'Zlib' 261 | 262 | @staticmethod 263 | def compress(data, *args, **kwargs): 264 | return zlib.compress(data, *args, **kwargs) 265 | 266 | @staticmethod 267 | def decompress(data, *args, **kwargs): 268 | return zlib.decompress(data, *args, **kwargs) 269 | 270 | 271 | class ZSTD(object): 272 | 273 | @staticmethod 274 | def name(): 275 | return 'ZStandard' 276 | 277 | @staticmethod 278 | def compress(data, *args, **kwargs): 279 | return zstd.compress(data, *args, **kwargs) 280 | 281 | @staticmethod 282 | def decompress(data, *args, **kwargs): 283 | return zstd.decompress(data, *args, **kwargs) 284 | 285 | 286 | ######################### 287 | ### IMAGE COMPRESSION ### 288 | ######################### 289 | 290 | class JPEG2000(object): 291 | 292 | @staticmethod 293 | def name(): 294 | return 'JPEG2000' 295 | 296 | @staticmethod 297 | def compress(data, *args, **kwargs): 298 | return jpeg.compress(data, *args, **kwargs) 299 | 300 | @staticmethod 301 | def decompress(data, *args, **kwargs): 302 | return jpeg.decompress(data, *args, **kwargs) 303 | 304 | 305 | class PNG(object): 306 | 307 | @staticmethod 308 | def name(): 309 | return 'PNG' 310 | 311 | @staticmethod 312 | def compress(data, *args, **kwargs): 313 | return _png.compress(data) 314 | 315 | @staticmethod 316 | def decompress(data, *args, **kwargs): 317 | return _png.decompress(data) 318 | 319 | 320 | ######################### 321 | ### VIDEO COMPRESSION ### 322 | ######################### 323 | 324 | class X264(object): 325 | 326 | @staticmethod 327 | def name(): 328 | return 'X.264' 329 | 330 | @staticmethod 331 | def compress(data, *args, **kwargs): 332 | return x264.compress(data) 333 | 334 | @staticmethod 335 | def decompress(data, *args, **kwargs): 336 | return x264.decompress(data) 337 | 338 | -------------------------------------------------------------------------------- /experiments/compression/neuroglancer/cpp-neuroglancer.cpp: -------------------------------------------------------------------------------- 1 | #include "cpp-neuroglancer.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | 10 | // size of various dimensions 11 | 12 | static int row_size = -1; 13 | static int sheet_size = -1; 14 | static int grid_size = -1; 15 | 16 | 17 | 18 | /////////////////////////////////// 19 | //// INTERNAL HELPER FUNCTIONS //// 20 | /////////////////////////////////// 21 | 22 | static int 23 | IndicesToIndex(int ix, int iy, int iz) 24 | { 25 | return iz * sheet_size + iy * row_size + ix; 26 | } 27 | 28 | 29 | static const int header_size = 7; 30 | 31 | 32 | //////////////////////////////////////////// 33 | //// NEUROGLANCER COMPRESSION ALGORITHM //// 34 | //////////////////////////////////////////// 35 | 36 | unsigned long * 37 | neuroglancer::Compress(unsigned long *data, int zres, int yres, int xres, int bz, int by, int bx, int origz, int origy, int origx) 38 | { 39 | // set global variables 40 | row_size = xres; 41 | sheet_size = yres * xres; 42 | grid_size = zres * yres * xres; 43 | 44 | // the number of blocks 45 | unsigned long gz = (unsigned long)(ceil((double)zres / bz) + 0.5); 46 | unsigned long gy = (unsigned long)(ceil((double)yres / by) + 0.5); 47 | unsigned long gx = (unsigned long)(ceil((double)xres / bx) + 0.5); 48 | 49 | // the number of elements and the block size 50 | unsigned long nelements = gz * gy * gx; 51 | unsigned int block_size = bz * by * bx; 52 | 53 | // get the end of the header 54 | unsigned int header_offset = nelements + header_size; 55 | 56 | // create arrays that store the table offset, number of bits, and the value offsets 57 | unsigned int *table_offsets = new unsigned int[nelements]; 58 | unsigned char *nbits = new unsigned char[nelements]; 59 | unsigned int *values_offsets = new unsigned int[nelements]; 60 | for (unsigned int iv = 0; iv < nelements; ++iv) { 61 | table_offsets[iv] = 0; 62 | nbits[iv] = 0; 63 | values_offsets[iv] = 0; 64 | } 65 | 66 | // create the arrays for the encoded values and the look up table 67 | unsigned int **encoded_values = new unsigned int *[nelements]; 68 | std::vector *lookup_table = new std::vector[nelements]; 69 | for (unsigned int iv = 0; iv < nelements; ++iv) { 70 | lookup_table[iv] = std::vector(); 71 | encoded_values[iv] = new unsigned int[block_size]; 72 | for (unsigned int ie = 0; ie < block_size; ++ie) 73 | encoded_values[iv][ie] = 0; 74 | } 75 | 76 | // get the number of blocks for each dimension 77 | int nyblocks = yres / by; 78 | int nxblocks = xres / bx; 79 | 80 | unsigned int offset = header_offset; 81 | // iterate over every block 82 | for (unsigned int index = 0; index < nelements; ++index) { 83 | // get the block in terms if x, y, z 84 | int iz = index / (nyblocks * nxblocks); 85 | int iy = (index - iz * nyblocks * nxblocks) / nxblocks; 86 | int ix = index % nxblocks; 87 | 88 | // get the block 89 | unsigned long *block = new unsigned long[block_size]; 90 | 91 | // populate the temporary block array 92 | int iv = 0; 93 | for (int ik = iz * bz; ik < (iz + 1) * bz; ++ik) { 94 | for (int ij = iy * by; ij < (iy + 1) * by; ++ij) { 95 | for (int ii = ix * bx; ii < (ix + 1) * bx; ++ii, ++iv) { 96 | block[iv] = data[IndicesToIndex(ii, ij, ik)]; 97 | } 98 | } 99 | } 100 | 101 | // get an ordered list of unique elements 102 | std::vector unique_elements = std::vector(); 103 | std::unordered_set hash_set = std::unordered_set(); 104 | 105 | for (unsigned int iv = 0; iv < block_size; ++iv) { 106 | if (!hash_set.count(block[iv])) { 107 | unique_elements.push_back(block[iv]); 108 | hash_set.insert(block[iv]); 109 | } 110 | } 111 | 112 | std::sort(unique_elements.begin(), unique_elements.end()); 113 | 114 | // create a mapping for the look up table and populate the lookup table 115 | unsigned int nunique = unique_elements.size(); 116 | std::unordered_map mapping = std::unordered_map(); 117 | for (unsigned int iv = 0; iv < nunique; ++iv) { 118 | mapping[unique_elements[iv]] = iv; 119 | lookup_table[index].push_back(unique_elements[iv]); 120 | } 121 | 122 | // populate the encoded values array 123 | for (unsigned int iv = 0; iv < block_size; ++iv) { 124 | encoded_values[index][iv] = mapping[block[iv]]; 125 | } 126 | 127 | // determine the number of bits 128 | if (nunique <= 1) nbits[index] = 0; 129 | else if (nunique <= 1<<1) nbits[index] = 1; 130 | else if (nunique <= 1<<2) nbits[index] = 2; 131 | else if (nunique <= 1<<4) nbits[index] = 4; 132 | else if (nunique <= 1<<8) nbits[index] = 8; 133 | else if (nunique <= 1<<16) nbits[index] = 16; 134 | else nbits[index] = 32; 135 | 136 | values_offsets[index] = offset; 137 | offset += nbits[index] * block_size / 64; 138 | table_offsets[index] = offset; 139 | offset += nunique; 140 | 141 | // free memory 142 | delete[] block; 143 | } 144 | 145 | unsigned long *compressed_data = new unsigned long[offset + 1]; 146 | for (unsigned int iv = 0; iv < offset + 1; ++iv) { 147 | compressed_data[iv] = 0; 148 | } 149 | 150 | // add the header information 151 | compressed_data[0] = offset + 1; 152 | compressed_data[1] = zres; 153 | compressed_data[2] = yres; 154 | compressed_data[3] = xres; 155 | compressed_data[4] = origz; 156 | compressed_data[5] = origy; 157 | compressed_data[6] = origx; 158 | 159 | int data_entry = header_size; 160 | for (unsigned int iv = 0; iv < nelements; ++iv, ++data_entry) { 161 | compressed_data[data_entry] = ((unsigned long)table_offsets[iv] << 40) + ((unsigned long)nbits[iv] << 32) + values_offsets[iv]; 162 | } 163 | 164 | // add the encoded values 165 | for (unsigned int index = 0; index < nelements; ++index) { 166 | // encode all of the values 167 | if (nbits[index] > 0) { 168 | // get the number of values per 8 bytes 169 | unsigned int nvalues_per_entry = 64 / nbits[index]; 170 | // get the number of entries for this block 171 | unsigned int nentries = block_size * nbits[index] / 64; 172 | 173 | // for every entry, for every value 174 | int ii = 0; 175 | for (unsigned int ie = 0; ie < nentries; ++ie, ++data_entry) { 176 | unsigned long value = 0; 177 | for (unsigned int iv = 0; iv < nvalues_per_entry; ++iv, ++ii) { 178 | // get the encoded value for this location 179 | unsigned long encoded_value = (unsigned long)encoded_values[index][ii]; 180 | 181 | // the amount to shift the encoded value 182 | unsigned int shift = (nvalues_per_entry - 1 - iv) * nbits[index]; 183 | value += (encoded_value << shift); 184 | } 185 | compressed_data[data_entry] = value; 186 | } 187 | } 188 | 189 | // add the lookup table 190 | for (unsigned int iv = 0; iv < lookup_table[index].size(); ++iv, ++data_entry) { 191 | compressed_data[data_entry] = lookup_table[index][iv]; 192 | } 193 | } 194 | 195 | // free memory 196 | delete[] table_offsets; 197 | delete[] nbits; 198 | delete[] values_offsets; 199 | for (unsigned int iv = 0; iv < nelements; ++iv) 200 | delete[] encoded_values[iv]; 201 | delete[] encoded_values; 202 | delete[] lookup_table; 203 | 204 | return compressed_data; 205 | } 206 | 207 | 208 | 209 | ////////////////////////////////////////////// 210 | //// NEUROGLANCER DECOMPRESSION ALGORITHM //// 211 | ////////////////////////////////////////////// 212 | 213 | unsigned long * 214 | neuroglancer::Decompress(unsigned long *compressed_data, int bz, int by, int bx) 215 | { 216 | int zres = (int)compressed_data[1]; 217 | int yres = (int)compressed_data[2]; 218 | int xres = (int)compressed_data[3]; 219 | 220 | // set global variables 221 | row_size = xres; 222 | sheet_size = yres * xres; 223 | grid_size = zres * yres * xres; 224 | 225 | // the number of blocks 226 | unsigned long gz = (unsigned long)(ceil((double)zres / bz) + 0.5); 227 | unsigned long gy = (unsigned long)(ceil((double)yres / by) + 0.5); 228 | unsigned long gx = (unsigned long)(ceil((double)xres / bx) + 0.5); 229 | 230 | // get the number of elements 231 | unsigned long nelements = gz * gy * gx; 232 | unsigned int block_size = bz * by * bx; 233 | 234 | // create arrays that store the table offset, number of bits, and the value offsets 235 | unsigned int *table_offsets = new unsigned int[nelements]; 236 | unsigned char *nbits = new unsigned char[nelements]; 237 | unsigned int *values_offsets = new unsigned int[nelements]; 238 | for (unsigned int iv = 0; iv < nelements; ++iv) { 239 | table_offsets[iv] = 0; 240 | nbits[iv] = 0; 241 | values_offsets[iv] = 0; 242 | } 243 | 244 | // decompress header values 245 | unsigned long data_entry = header_size; 246 | for (unsigned int index = 0; index < nelements; ++index, ++data_entry) { 247 | unsigned long header = compressed_data[data_entry]; 248 | 249 | table_offsets[index] = header >> 40; 250 | nbits[index] = (header << 24) >> 56; 251 | values_offsets[index] = (header << 32) >> 32; 252 | } 253 | 254 | // create decompressed data array 255 | unsigned long *decompressed_data = new unsigned long[zres * yres * xres]; 256 | for (int iv = 0; iv < zres * yres * xres; ++iv) 257 | decompressed_data[iv] = 0; 258 | 259 | // get the number of blocks for each dimension 260 | int nyblocks = yres / by; 261 | int nxblocks = xres / bx; 262 | 263 | // decode each block 264 | for (unsigned int index = 0; index < nelements; ++index) { 265 | // get the number of encoded blocks 266 | int nblocks = nbits[index] * block_size / 64; 267 | 268 | // get the encoded values 269 | unsigned long *encoded_values = new unsigned long[nblocks]; 270 | for (int iv = 0; iv < nblocks; ++iv) 271 | encoded_values[iv] = compressed_data[values_offsets[index] + iv]; 272 | 273 | // create empty block array 274 | unsigned long *block = new unsigned long[block_size]; 275 | for (unsigned int iv = 0; iv < block_size; ++iv) 276 | block[iv] = 0; 277 | 278 | // get the number of values per 8 bytes 279 | if (nbits[index]) { 280 | unsigned long nvalues_per_long = 64 / nbits[index]; 281 | 282 | // for every long value 283 | int ib = 0; 284 | for (int iv = 0; iv < nblocks; ++iv) { 285 | unsigned long value = encoded_values[iv]; 286 | // for every entry per 8 bytes 287 | for (unsigned int ie = 0; ie < nvalues_per_long; ++ie, ++ib) { 288 | unsigned int lower_bits_to_remove = (nvalues_per_long - ie - 1) * nbits[index]; 289 | 290 | block[ib] = (value >> lower_bits_to_remove) % (int)(pow(2, nbits[index]) + 0.5); 291 | } 292 | } 293 | } 294 | 295 | // get an ordered list of unique elements 296 | std::unordered_set hash_set = std::unordered_set(); 297 | 298 | for (unsigned int iv = 0; iv < block_size; ++iv) { 299 | if (!hash_set.count(block[iv])) { 300 | hash_set.insert(block[iv]); 301 | } 302 | } 303 | 304 | // get the lookup table 305 | unsigned int nunique = hash_set.size(); 306 | unsigned long *lookup_table = new unsigned long[nunique]; 307 | for (unsigned int iv = 0; iv < nunique; ++iv) { 308 | lookup_table[iv] = compressed_data[table_offsets[index] + iv]; 309 | } 310 | 311 | // update the block values 312 | for (unsigned int iv = 0; iv < block_size; ++iv) { 313 | block[iv] = lookup_table[block[iv]]; 314 | } 315 | 316 | // get the block in terms if x, y, z 317 | int iz = index / (nyblocks * nxblocks); 318 | int iy = (index - iz * nyblocks * nxblocks) / nxblocks; 319 | int ix = index % nxblocks; 320 | 321 | int iv = 0; 322 | for (int ik = iz * bz; ik < (iz + 1) * bz; ++ik) { 323 | for (int ij = iy * by; ij < (iy + 1) * by; ++ij) { 324 | for (int ii = ix * bx; ii < (ix + 1) * bx; ++ii, ++iv) { 325 | decompressed_data[IndicesToIndex(ii, ij, ik)] = block[iv]; 326 | } 327 | } 328 | } 329 | 330 | // free memory 331 | delete[] encoded_values; 332 | delete[] block; 333 | delete[] lookup_table; 334 | } 335 | 336 | 337 | // free memory 338 | delete[] table_offsets; 339 | delete[] nbits; 340 | delete[] values_offsets; 341 | 342 | return decompressed_data; 343 | } 344 | -------------------------------------------------------------------------------- /experiments/compression/compresso/cpp-compresso.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "cpp-compresso.h" 8 | 9 | 10 | 11 | // size of various dimensions 12 | 13 | static int row_size = -1; 14 | static int sheet_size = -1; 15 | static int grid_size = -1; 16 | 17 | 18 | 19 | /////////////////////////////////// 20 | //// INTERNAL HELPER FUNCTIONS //// 21 | /////////////////////////////////// 22 | 23 | static int 24 | IndicesToIndex(int ix, int iy, int iz) 25 | { 26 | return iz * sheet_size + iy * row_size + ix; 27 | } 28 | 29 | 30 | 31 | ///////////////////////////////////////// 32 | //// UNION-FIND CLASS FOR COMPONENTS //// 33 | ///////////////////////////////////////// 34 | 35 | class UnionFindElement { 36 | public: 37 | UnionFindElement(unsigned long label) : 38 | label(label), 39 | parent(this), 40 | rank(0) 41 | {} 42 | 43 | public: 44 | unsigned long label; 45 | UnionFindElement *parent; 46 | int rank; 47 | }; 48 | 49 | UnionFindElement * 50 | Find(UnionFindElement *x) 51 | { 52 | if (x->parent != x) { 53 | x->parent = Find(x->parent); 54 | } 55 | return x->parent; 56 | } 57 | 58 | void 59 | Union(UnionFindElement *x, UnionFindElement *y) 60 | { 61 | UnionFindElement *xroot = Find(x); 62 | UnionFindElement *yroot = Find(y); 63 | 64 | if (xroot == yroot) { return; } 65 | 66 | // merge teh two roots 67 | if (xroot->rank < yroot->rank) { 68 | xroot->parent = yroot; 69 | } 70 | else if (xroot->rank > yroot->rank) { 71 | yroot->parent = xroot; 72 | } 73 | else { 74 | yroot->parent = xroot; 75 | xroot->rank = xroot->rank + 1; 76 | } 77 | } 78 | 79 | 80 | 81 | ///////////////////////////////////////// 82 | //// COMPRESSO COMPRESSION ALGORITHM //// 83 | ///////////////////////////////////////// 84 | 85 | static bool * 86 | ExtractBoundaries(unsigned long *data, int zres, int yres, int xres) 87 | { 88 | // create the boundaries array 89 | bool *boundaries = new bool[grid_size]; 90 | if (!boundaries) { fprintf(stderr, "Failed to allocate memory for boundaries...\n"); exit(-1); } 91 | 92 | // determine which pixels differ from east or south neighbors 93 | for (int iz = 0; iz < zres; ++iz) { 94 | for (int iy = 0; iy < yres; ++iy) { 95 | for (int ix = 0; ix < xres; ++ix) { 96 | int iv = IndicesToIndex(ix, iy, iz); 97 | 98 | boundaries[iv] = false; 99 | 100 | // check the east neighbor 101 | if (ix < xres - 1) { 102 | if (data[iv] != data[IndicesToIndex(ix + 1, iy, iz)]) boundaries[iv] = true; 103 | } 104 | // check the south neighbor 105 | if (iy < yres - 1) { 106 | if (data[iv] != data[IndicesToIndex(ix, iy + 1, iz)]) boundaries[iv] = true; 107 | } 108 | } 109 | } 110 | } 111 | 112 | // return the boundary array 113 | return boundaries; 114 | } 115 | 116 | static unsigned long * 117 | ConnectedComponents(bool *boundaries, int zres, int yres, int xres) 118 | { 119 | // create the connected components 120 | unsigned long *components = new unsigned long[grid_size]; 121 | if (!components) { fprintf(stderr, "Failed to allocate memory for connected components...\n"); exit(-1); } 122 | for (int iv = 0; iv < grid_size; ++iv) 123 | components[iv] = 0; 124 | 125 | // run connected components for each slice 126 | for (int iz = 0; iz < zres; ++iz) { 127 | 128 | // create vector for union find elements 129 | std::vector union_find = std::vector(); 130 | 131 | // current label in connected component 132 | int curlab = 1; 133 | for (int iy = 0; iy < yres; ++iy) { 134 | for (int ix = 0; ix < xres; ++ix) { 135 | int iv = IndicesToIndex(ix, iy, iz); 136 | 137 | // continue if boundary 138 | if (boundaries[iv]) continue; 139 | 140 | // only consider the pixel directly to the north and west 141 | int north = IndicesToIndex(ix - 1, iy, iz); 142 | int west = IndicesToIndex(ix, iy - 1, iz); 143 | 144 | int neighbor_labels[2] = { 0, 0 }; 145 | 146 | // get the labels for the relevant neighbor 147 | if (ix > 0) neighbor_labels[0] = components[north]; 148 | if (iy > 0) neighbor_labels[1] = components[west]; 149 | 150 | // if the neighbors are boundary, create new label 151 | if (!neighbor_labels[0] && !neighbor_labels[1]) { 152 | components[iv] = curlab; 153 | 154 | // add to union find structure 155 | union_find.push_back(new UnionFindElement(0)); 156 | 157 | // update the next label 158 | curlab++; 159 | } 160 | // the two pixels have equal non-trivial values 161 | else if (neighbor_labels[0] == neighbor_labels[1]) 162 | components[iv] = neighbor_labels[0]; 163 | // neighbors have differing values 164 | else { 165 | if (!neighbor_labels[0]) components[iv] = neighbor_labels[1]; 166 | else if (!neighbor_labels[1]) components[iv] = neighbor_labels[0]; 167 | // neighbors have differing non-trivial values 168 | else { 169 | // take minimum value 170 | components[iv] = std::min(neighbor_labels[0], neighbor_labels[1]); 171 | 172 | // set the equivalence relationship 173 | Union(union_find[neighbor_labels[0] - 1], union_find[neighbor_labels[1] - 1]); 174 | } 175 | } 176 | } 177 | } 178 | 179 | // reset the current label to 1 180 | curlab = 1; 181 | 182 | // create connected components (ordered) 183 | for (int iy = 0; iy < yres; ++iy) { 184 | for (int ix = 0; ix < xres; ++ix) { 185 | int iv = IndicesToIndex(ix, iy, iz); 186 | 187 | if (boundaries[iv]) continue; 188 | 189 | // get the parent for this component 190 | UnionFindElement *comp = Find(union_find[components[iv] - 1]); 191 | if (!comp->label) { 192 | comp->label = curlab; 193 | curlab++; 194 | } 195 | 196 | components[iv] = comp->label; 197 | } 198 | } 199 | 200 | for (unsigned int iv = 0; iv < union_find.size(); ++iv) 201 | delete union_find[iv]; 202 | } 203 | 204 | 205 | // return the connected components array 206 | return components; 207 | } 208 | 209 | static std::vector * 210 | IDMapping(unsigned long *components, unsigned long *data, int zres, int yres, int xres) 211 | { 212 | // create a vector of the ids 213 | std::vector *ids = new std::vector(); 214 | 215 | for (int iz = 0; iz < zres; ++iz) { 216 | // create a set for this individual slice 217 | std::set hash_map = std::set(); 218 | 219 | // iterate over the entire slice 220 | for (int iy = 0; iy < yres; ++iy) { 221 | for (int ix = 0; ix < xres; ++ix) { 222 | int iv = IndicesToIndex(ix, iy, iz); 223 | 224 | // get the segment id 225 | unsigned long component_id = components[iv]; 226 | 227 | // if this component does not belong yet, add it 228 | if (!hash_map.count(component_id)) { 229 | hash_map.insert(component_id); 230 | 231 | // add the segment id 232 | unsigned long segment_id = data[iv] + 1; 233 | ids->push_back(segment_id); 234 | } 235 | } 236 | } 237 | 238 | } 239 | 240 | // return the mapping 241 | return ids; 242 | } 243 | 244 | static unsigned long * 245 | EncodeBoundaries(bool *boundaries, int zres, int yres, int xres, int zstep, int ystep, int xstep) 246 | { 247 | // determine the number of blocks in the z, y, and x dimensions 248 | int nzblocks = (int) (ceil((double)zres / zstep) + 0.5); 249 | int nyblocks = (int) (ceil((double)yres / ystep) + 0.5); 250 | int nxblocks = (int) (ceil((double)xres / xstep) + 0.5); 251 | 252 | // create an empty array for the encodings 253 | int nblocks = nzblocks * nyblocks * nxblocks; 254 | unsigned long *boundary_data = new unsigned long[nblocks]; 255 | for (int iv = 0; iv < nblocks; ++iv) 256 | boundary_data[iv] = 0; 257 | 258 | for (int iz = 0; iz < zres; ++iz) { 259 | for (int iy = 0; iy < yres; ++iy) { 260 | for (int ix = 0; ix < xres; ++ix) { 261 | int iv = IndicesToIndex(ix, iy, iz); 262 | 263 | // no encoding for non-boundaries 264 | if (!boundaries[iv]) continue; 265 | 266 | // find the block from the index 267 | int zblock = iz / zstep; 268 | int yblock = iy / ystep; 269 | int xblock = ix / xstep; 270 | 271 | // find the offset within the block 272 | int zoffset = iz % zstep; 273 | int yoffset = iy % ystep; 274 | int xoffset = ix % xstep; 275 | 276 | int block = zblock * (nyblocks * nxblocks) + yblock * nxblocks + xblock; 277 | int offset = zoffset * (ystep * xstep) + yoffset * xstep + xoffset; 278 | 279 | boundary_data[block] += (1LU << offset); 280 | } 281 | } 282 | } 283 | 284 | return boundary_data; 285 | } 286 | 287 | static std::vector * 288 | ValueMapping(unsigned long *boundary_data, int nblocks) 289 | { 290 | // get a list of values 291 | std::vector *values = new std::vector(); 292 | std::set hash_map = std::set(); 293 | 294 | // go through all boundary data to create array of values 295 | for (int iv = 0; iv < nblocks; ++iv) { 296 | if (!hash_map.count(boundary_data[iv])) { 297 | hash_map.insert(boundary_data[iv]); 298 | values->push_back(boundary_data[iv]); 299 | } 300 | } 301 | 302 | // sort the values 303 | sort(values->begin(), values->end()); 304 | 305 | // create mapping from values to indices 306 | std::unordered_map mapping = std::unordered_map(); 307 | for (unsigned int iv = 0; iv < values->size(); ++iv) { 308 | mapping[(*values)[iv]] = iv; 309 | } 310 | 311 | // update boundary data 312 | for (int iv = 0; iv < nblocks; ++iv) { 313 | boundary_data[iv] = mapping[boundary_data[iv]]; 314 | } 315 | 316 | // return values 317 | return values; 318 | } 319 | 320 | std::vector * 321 | EncodeIndeterminateLocations(bool *boundaries, unsigned long *data, int zres, int yres, int xres) 322 | { 323 | // update global size variables 324 | row_size = xres; 325 | sheet_size = yres * xres; 326 | grid_size = zres * yres * xres; 327 | 328 | std::vector *locations = new std::vector(); 329 | 330 | int iv = 0; 331 | for (int iz = 0; iz < zres; ++iz) { 332 | for (int iy = 0; iy < yres; ++iy) { 333 | for (int ix = 0; ix < xres; ++ix, ++iv) { 334 | 335 | if (!boundaries[iv]) continue; 336 | else if (iy > 0 && !boundaries[IndicesToIndex(ix, iy - 1, iz)]) continue; //boundaries[iv] = 0; 337 | else if (ix > 0 && !boundaries[IndicesToIndex(ix - 1, iy, iz)]) continue; //boundaries[iv] = 0; 338 | else { 339 | int north = IndicesToIndex(ix - 1, iy, iz); 340 | int south = IndicesToIndex(ix + 1, iy, iz); 341 | int east = IndicesToIndex(ix, iy - 1, iz); 342 | int west = IndicesToIndex(ix, iy + 1, iz); 343 | int up = IndicesToIndex(ix, iy, iz + 1); 344 | int down = IndicesToIndex(ix, iy, iz - 1); 345 | 346 | // see if any of the immediate neighbors are candidates 347 | if (ix > 0 && !boundaries[north] && data[north] == data[iv]) 348 | locations->push_back(0); 349 | else if (ix < xres - 1 && !boundaries[south] && data[south] == data[iv]) 350 | locations->push_back(1); 351 | else if (iy > 0 && !boundaries[east] && data[east] == data[iv]) 352 | locations->push_back(2); 353 | else if (iy < yres - 1 && !boundaries[west] && data[west] == data[iv]) 354 | locations->push_back(3); 355 | else if (iz > 0 && !boundaries[down] && data[down] == data[iv]) 356 | locations->push_back(4); 357 | else if (iz < zres - 1 && !boundaries[up] && data[up] == data[iv]) 358 | locations->push_back(5); 359 | else 360 | locations->push_back(data[IndicesToIndex(ix, iy, iz)] + 6); 361 | } 362 | } 363 | } 364 | } 365 | 366 | return locations; 367 | } 368 | 369 | 370 | unsigned long * 371 | compresso::Compress(unsigned long *data, int zres, int yres, int xres, int zstep, int ystep, int xstep) 372 | { 373 | // set global variables 374 | row_size = xres; 375 | sheet_size = yres * xres; 376 | grid_size = zres * yres * xres; 377 | 378 | // determine the number of blocks in the z, y, and x dimensions 379 | int nzblocks = (int) (ceil((double)zres / zstep) + 0.5); 380 | int nyblocks = (int) (ceil((double)yres / ystep) + 0.5); 381 | int nxblocks = (int) (ceil((double)xres / xstep) + 0.5); 382 | 383 | // create an empty array for the encodings 384 | int nblocks = nzblocks * nyblocks * nxblocks; 385 | 386 | // get boundary voxels 387 | bool *boundaries = ExtractBoundaries(data, zres, yres, xres); 388 | 389 | // get the connected components 390 | unsigned long *components = ConnectedComponents(boundaries, zres, yres, xres); 391 | 392 | std::vector *ids = IDMapping(components, data, zres, yres, xres); 393 | 394 | unsigned long *boundary_data = EncodeBoundaries(boundaries, zres, yres, xres, zstep, ystep, xstep); 395 | 396 | std::vector *values = ValueMapping(boundary_data, nblocks); 397 | 398 | std::vector *locations = EncodeIndeterminateLocations(boundaries, data, zres, yres, xres); 399 | 400 | unsigned short header_size = 9; 401 | unsigned long *compressed_data = new unsigned long[header_size + ids->size() + values->size() + locations->size() + nblocks]; 402 | 403 | // add the resolution 404 | compressed_data[0] = zres; 405 | compressed_data[1] = yres; 406 | compressed_data[2] = xres; 407 | 408 | // add the sizes of the vectors 409 | compressed_data[3] = ids->size(); 410 | compressed_data[4] = values->size(); 411 | compressed_data[5] = locations->size(); 412 | 413 | compressed_data[6] = zstep; 414 | compressed_data[7] = ystep; 415 | compressed_data[8] = xstep; 416 | 417 | int iv = header_size; 418 | for (unsigned int ix = 0 ; ix < ids->size(); ++ix, ++iv) 419 | compressed_data[iv] = (*ids)[ix]; 420 | for (unsigned int ix = 0; ix < values->size(); ++ix, ++iv) 421 | compressed_data[iv] = (*values)[ix]; 422 | for (unsigned int ix = 0; ix < locations->size(); ++ix, ++iv) 423 | compressed_data[iv] = (*locations)[ix]; 424 | for (int ix = 0; ix < nblocks; ++ix, ++iv) 425 | compressed_data[iv] = boundary_data[ix]; 426 | 427 | // free memory 428 | delete[] boundaries; 429 | delete[] components; 430 | delete ids; 431 | delete[] boundary_data; 432 | delete values; 433 | delete locations; 434 | 435 | return compressed_data; 436 | } 437 | 438 | 439 | 440 | /////////////////////////////////////////// 441 | //// COMPRESSO DECOMPRESSION ALGORITHM //// 442 | /////////////////////////////////////////// 443 | 444 | static bool * 445 | DecodeBoundaries(unsigned long *boundary_data, std::vector *values, int zres, int yres, int xres, int zstep, int ystep, int xstep) 446 | { 447 | int nyblocks = (int)(ceil((double)yres / ystep) + 0.5); 448 | int nxblocks = (int)(ceil((double)xres / xstep) + 0.5); 449 | 450 | bool *boundaries = new bool[grid_size]; 451 | for (int iv = 0; iv < grid_size; ++iv) 452 | boundaries[iv] = false; 453 | 454 | for (int iz = 0; iz < zres; ++iz) { 455 | for (int iy = 0; iy < yres; ++iy) { 456 | for (int ix = 0; ix < xres; ++ix) { 457 | int iv = IndicesToIndex(ix, iy, iz); 458 | 459 | int zblock = iz / zstep; 460 | int yblock = iy / ystep; 461 | int xblock = ix / xstep; 462 | 463 | int zoffset = iz % zstep; 464 | int yoffset = iy % ystep; 465 | int xoffset = ix % xstep; 466 | 467 | int block = zblock * (nyblocks * nxblocks) + yblock * nxblocks + xblock; 468 | int offset = zoffset * (ystep * xstep) + yoffset * xstep + xoffset; 469 | 470 | unsigned long value = (*values)[boundary_data[block]]; 471 | if ((value >> offset) % 2) boundaries[iv] = true; 472 | } 473 | } 474 | } 475 | 476 | return boundaries; 477 | } 478 | 479 | static unsigned long * 480 | IDReverseMapping(unsigned long *components, std::vector *ids, int zres, int yres, int xres) 481 | { 482 | unsigned long *decompressed_data = new unsigned long[grid_size]; 483 | for (int iv = 0; iv < grid_size; ++iv) 484 | decompressed_data[iv] = 0; 485 | 486 | int ids_index = 0; 487 | for (int iz = 0; iz < zres; ++iz) { 488 | 489 | // create mapping (not memory efficient but FAST!!) 490 | // number of components is guaranteed to be less than ids->size() 491 | unsigned long *mapping = new unsigned long[ids->size()]; 492 | for (unsigned int iv = 0; iv < ids->size(); ++iv) { 493 | mapping[iv] = 0; 494 | } 495 | 496 | for (int iy = 0; iy < yres; ++iy) { 497 | for (int ix = 0; ix < xres; ++ix) { 498 | int iv = IndicesToIndex(ix, iy, iz); 499 | 500 | if (!mapping[components[iv]]) { 501 | mapping[components[iv]] = (*ids)[ids_index]; 502 | ids_index++; 503 | } 504 | 505 | decompressed_data[iv] = mapping[components[iv]] - 1; 506 | } 507 | } 508 | } 509 | 510 | return decompressed_data; 511 | } 512 | 513 | static void 514 | DecodeIndeterminateLocations(bool *boundaries, unsigned long *decompressed_data, std::vector *locations, int zres, int yres, int xres) 515 | { 516 | int iv = 0; 517 | int index = 0; 518 | 519 | // go through all coordinates 520 | for (int iz = 0; iz < zres; ++iz) { 521 | for (int iy = 0; iy < yres; ++iy) { 522 | for (int ix = 0; ix < xres; ++ix, ++iv) { 523 | int north = IndicesToIndex(ix - 1, iy, iz); 524 | int west = IndicesToIndex(ix, iy - 1, iz); 525 | 526 | if (!boundaries[iv]) continue; 527 | else if (ix > 0 && !boundaries[north]) { 528 | decompressed_data[iv] = decompressed_data[north]; 529 | //boundaries[iv] = 0; 530 | } 531 | else if (iy > 0 && !boundaries[west]) { 532 | decompressed_data[iv] = decompressed_data[west]; 533 | //boundaries[iv] = 0; 534 | } 535 | else { 536 | int offset = (*locations)[index]; 537 | if (offset == 0) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix - 1, iy, iz)]; 538 | else if (offset == 1) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix + 1, iy, iz)]; 539 | else if (offset == 2) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix, iy - 1, iz)]; 540 | else if (offset == 3) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix, iy + 1, iz)]; 541 | else if (offset == 4) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix, iy, iz - 1)]; 542 | else if (offset == 5) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix, iy, iz + 1)]; 543 | else { 544 | decompressed_data[iv] = offset - 6; 545 | } 546 | index += 1; 547 | } 548 | } 549 | } 550 | } 551 | } 552 | 553 | unsigned long* 554 | compresso::Decompress(unsigned long *compressed_data) 555 | { 556 | // constants 557 | int header_size = 9; 558 | 559 | // get the resolution 560 | int zres = compressed_data[0]; 561 | int yres = compressed_data[1]; 562 | int xres = compressed_data[2]; 563 | 564 | // set global variables 565 | row_size = xres; 566 | sheet_size = yres * xres; 567 | grid_size = zres * yres * xres; 568 | 569 | // get the size of the vectors 570 | int ids_size = compressed_data[3]; 571 | int values_size = compressed_data[4]; 572 | int locations_size = compressed_data[5]; 573 | 574 | // get the step size 575 | int zstep = compressed_data[6]; 576 | int ystep = compressed_data[7]; 577 | int xstep = compressed_data[8]; 578 | 579 | // determine the number of blocks in the z, y, and x dimensions 580 | int nzblocks = (int) (ceil((double)zres / zstep) + 0.5); 581 | int nyblocks = (int) (ceil((double)yres / ystep) + 0.5); 582 | int nxblocks = (int) (ceil((double)xres / xstep) + 0.5); 583 | 584 | // create an empty array for the encodings 585 | int nblocks = nzblocks * nyblocks * nxblocks; 586 | 587 | // allocate memory for all arrays 588 | std::vector *ids = new std::vector(); 589 | std::vector *values = new std::vector(); 590 | std::vector *locations = new std::vector(); 591 | unsigned long *boundary_data = new unsigned long[nblocks]; 592 | 593 | int iv = header_size; 594 | for (int ix = 0; ix < ids_size; ++ix, ++iv) 595 | ids->push_back(compressed_data[iv]); 596 | for (int ix = 0; ix < values_size; ++ix, ++iv) 597 | values->push_back(compressed_data[iv]); 598 | for (int ix = 0; ix < locations_size; ++ix, ++iv) 599 | locations->push_back(compressed_data[iv]); 600 | for (int ix = 0; ix < nblocks; ++ix, ++iv) 601 | boundary_data[ix] = compressed_data[iv]; 602 | 603 | bool *boundaries = DecodeBoundaries(boundary_data, values, zres, yres, xres, zstep, ystep, xstep); 604 | 605 | unsigned long *components = ConnectedComponents(boundaries, zres, yres, xres); 606 | 607 | unsigned long *decompressed_data = IDReverseMapping(components, ids, zres, yres, xres); 608 | 609 | DecodeIndeterminateLocations(boundaries, decompressed_data, locations, zres, yres, xres); 610 | 611 | // free memory 612 | delete[] boundaries; 613 | delete[] components; 614 | delete[] boundary_data; 615 | delete ids; 616 | delete values; 617 | delete locations; 618 | 619 | return decompressed_data; 620 | } 621 | -------------------------------------------------------------------------------- /experiments/compression/util.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import h5py 3 | import matplotlib 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import os 7 | import time 8 | import sys 9 | 10 | from matplotlib.font_manager import FontProperties 11 | from matplotlib.patches import Rectangle 12 | from matplotlib.colors import ListedColormap 13 | 14 | 15 | class Util(object): 16 | 17 | @staticmethod 18 | def adj_fig_size(width=10, height=10): 19 | '''Adjust figsize of plot 20 | ''' 21 | 22 | fig_size = plt.rcParams["figure.figsize"] 23 | fig_size[0] = width 24 | fig_size[1] = height 25 | plt.rcParams["figure.figsize"] = fig_size 26 | 27 | @staticmethod 28 | def colorize(slice): 29 | colorized = np.zeros(slice.shape + (3,), dtype=np.uint8) 30 | 31 | colorized[:, :, 0] = np.mod(107 * slice[:, :], 700).astype(np.uint8) 32 | colorized[:, :, 1] = np.mod(509 * slice[:, :], 900).astype(np.uint8) 33 | colorized[:, :, 2] = np.mod(200 * slice[:, :], 777).astype(np.uint8) 34 | 35 | return colors 36 | 37 | @staticmethod 38 | def convert_to_rgb(img): 39 | 40 | colorized = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint8) 41 | 42 | colorized[:, :, 0] = img % (2**8) 43 | img = img >> 8 44 | colorized[:, :, 1] = img % (2**8) 45 | img = img >> 8 46 | colorized[:, :, 2] = img % (2**8) 47 | 48 | return colorized 49 | 50 | @staticmethod 51 | def convert_to_rgba(img): 52 | colorized = np.zeros((img.shape[0], img.shape[1], 4), dtype=np.uint8) 53 | 54 | colorized[:, :, 0] = img % (2**8) 55 | img = img >> 8 56 | colorized[:, :, 1] = img % (2**8) 57 | img = img >> 8 58 | colorized[:, :, 2] = img % (2**8) 59 | img = img >> 8 60 | colorized[:, :, 3] = img % (2**8) 61 | 62 | return colorized 63 | 64 | @staticmethod 65 | def convert_from_rgb(frame): 66 | 67 | img = np.zeros((frame.shape[0], frame.shape[1]), dtype=np.uint64) 68 | img[:] = (np.uint64(frame[:, :, 0]) + np.uint64(frame[:, :, 1]) * 256 + np.uint64(frame[:, :, 2]) * 256 * 256) 69 | 70 | return img 71 | 72 | @staticmethod 73 | def convert_from_rgba(frame): 74 | img = np.zeros((frame.shape[0], frame.shape[1]), dtype=np.uint64) 75 | img[:] = (frame[:, :, 0] + frame[:, :, 1] * 256 + frame[:, :, 2] * 256 * 256 + frame[:, :, 3] * 256 * 256 * 256) 76 | 77 | return img 78 | 79 | @staticmethod 80 | def get_size(variable): 81 | '''Get bytes of variable 82 | ''' 83 | if type(variable).__module__ == np.__name__: 84 | variable = variable.tobytes() 85 | elif type(variable) is str: 86 | assert (all(ord(c) < 256) for c in variable) 87 | else: 88 | raise ValueError('Data type not supported') 89 | 90 | # checking the length of a bytestring is more accurate 91 | return len(variable) 92 | 93 | @staticmethod 94 | def to_best_type(array): 95 | '''Convert array to lowest possible bitrate. 96 | ''' 97 | ui8 = np.iinfo(np.uint8) 98 | ui8 = ui8.max 99 | ui16 = np.iinfo(np.uint16) 100 | ui16 = ui16.max 101 | ui32 = np.iinfo(np.uint32) 102 | ui32 = ui32.max 103 | ui64 = np.iinfo(np.uint64) 104 | ui64 = ui64.max 105 | 106 | if array.max() <= ui64: 107 | new_type = np.uint64 108 | if array.max() <= ui32: 109 | new_type = np.uint32 110 | if array.max() <= ui16: 111 | new_type = np.uint16 112 | if array.max() <= ui8: 113 | new_type = np.uint8 114 | 115 | return array.astype(new_type) 116 | 117 | @staticmethod 118 | def load_data(name='ac3', N=-1, prefix=None, gold=False): 119 | '''Load data 120 | ''' 121 | 122 | if not 'mri' in name: 123 | if gold: filename = '~/compresso/data/' + name + '/gold/' + name + '_gold.h5' 124 | else: filename = '~/compresso/data/' + name + '/rhoana/' + name + '_rhoana.h5' 125 | 126 | with h5py.File(os.path.expanduser(filename), 'r') as hf: 127 | output = np.array(hf['main'], dtype=np.uint64) 128 | else: 129 | filename = '~/compresso/data/MRI/' + name + '.h5' 130 | 131 | with h5py.File(os.path.expanduser(filename), 'r') as hf: 132 | output = np.array(hf['main'], dtype=np.uint64) 133 | 134 | if (not N == -1): 135 | output = output[0:N,:,:] 136 | 137 | return output 138 | 139 | @staticmethod 140 | def compress(method, data, *args, **kwargs): 141 | '''Compress data 142 | ''' 143 | t0 = time.time() 144 | 145 | compressed_data = method.compress(data, *args, **kwargs) 146 | 147 | return compressed_data, time.time() - t0 148 | 149 | @staticmethod 150 | def decompress(method, compressed_data, *args, **kwargs): 151 | '''Decompress data 152 | ''' 153 | t0 = time.time() 154 | 155 | data = method.decompress(compressed_data, *args, **kwargs) 156 | 157 | return data, time.time() - t0 158 | 159 | @staticmethod 160 | def encode(method, data, *args, **kwargs): 161 | '''Encode data 162 | ''' 163 | t0 = time.time() 164 | 165 | enc_data = method.compress(data, *args, **kwargs) 166 | 167 | return enc_data, time.time() - t0 168 | 169 | @staticmethod 170 | def decode(method, enc_data, *args, **kwargs): 171 | '''Decode data 172 | ''' 173 | t0 = time.time() 174 | 175 | data = method.decompress(enc_data, *args, **kwargs) 176 | 177 | return data, time.time() - t0 178 | 179 | @staticmethod 180 | def run_experiment(com, enc, data, N=100, verbose=True): 181 | '''Benchmark one compression method 182 | com = [BROTLI, BZ2, LZMA, LZO, ZLIB, ZSTD] 183 | data = data set 184 | N = number of runs 185 | *args / **kwargs = settings for the compression method, e.g. `9` 186 | ''' 187 | enc_speed = [] 188 | denc_speed = [] 189 | 190 | com_speed = [] 191 | dcom_speed = [] 192 | 193 | total_com_speed = [] 194 | total_dcom_speed = [] 195 | 196 | # run N iterations 197 | for n in range(N): 198 | start_time = time.time() 199 | 200 | # run encoding and compression 201 | encoded_data, t1 = Util.encode(enc, data) 202 | compressed_data, t2 = Util.compress(com, encoded_data) 203 | 204 | # decompress data 205 | if (com.name() == 'LZF'): 206 | # LZF requires the original output size 207 | decompressed_data, t3 = Util.decompress(com, compressed_data, 8 * long(data.size)) 208 | else: 209 | decompressed_data, t3 = Util.decompress(com, compressed_data) 210 | 211 | # make sure the data is returned as an array 212 | if not isinstance(decompressed_data, (np.ndarray, np.generic)) and isinstance(encoded_data, (np.ndarray, np.generic)): 213 | # convert back to numpy array 214 | decompressed_data = np.fromstring(decompressed_data, dtype=encoded_data.dtype) 215 | 216 | # decode the data 217 | decoded_data, t4 = Util.decode(enc, decompressed_data) 218 | 219 | # update the speed lists 220 | enc_speed.append(t1) 221 | denc_speed.append(t4) 222 | 223 | com_speed.append(t2) 224 | dcom_speed.append(t3) 225 | 226 | total_com_speed.append(t1 + t2) 227 | total_dcom_speed.append(t3 + t4) 228 | 229 | # guarantee lossless behavior 230 | assert np.array_equal(np.ndarray.flatten(data), np.ndarray.flatten(decoded_data)) 231 | 232 | print 'Ran iteration ' + str(n + 1) + ' of ' + str(N) + ' on ' + enc.name() + ' + ' + com.name() + ' in %0.2f seconds' % (time.time() - start_time) 233 | sys.stdout.flush() 234 | 235 | com_MB = Util.get_size(compressed_data) / float(1000**2) 236 | dec_MB = Util.get_size(data) / float(1000**2) 237 | 238 | # Higher is better 239 | ratio = dec_MB / com_MB 240 | 241 | # turn the speeds in MB / s 242 | for n in range(N): 243 | if enc_speed[n] == 0: 244 | enc_speed[n] = 0.01 245 | if denc_speed[n] == 0: 246 | denc_speed[n] = 0.01 247 | if com_speed[n] == 0: 248 | com_speed[n] = 0.01 249 | if dcom_speed[n] == 0: 250 | dcom_speed[n] = 0.01 251 | if total_com_speed[n] == 0: 252 | total_com_speed[n] = 0.01 253 | if total_dcom_speed[n] == 0: 254 | total_dcom_speed[n] = 0.01 255 | 256 | enc_speed[n] = dec_MB / enc_speed[n] 257 | denc_speed[n] = dec_MB / denc_speed[n] 258 | 259 | com_speed[n] = dec_MB / com_speed[n] 260 | dcom_speed[n] = dec_MB / dcom_speed[n] 261 | 262 | total_com_speed[n] = dec_MB / total_com_speed[n] 263 | total_dcom_speed[n] = dec_MB / total_dcom_speed[n] 264 | 265 | # get stddev for speeds 266 | com_speed_std = np.std(com_speed) 267 | dcom_speed_std = np.std(dcom_speed) 268 | 269 | enc_speed_std = np.std(enc_speed) 270 | denc_speed_std = np.std(denc_speed) 271 | 272 | total_com_speed_std = np.std(total_com_speed) 273 | total_dcom_speed_std = np.std(total_dcom_speed) 274 | 275 | # get means for speeds 276 | enc_speed = np.mean(enc_speed) 277 | denc_speed = np.mean(denc_speed) 278 | 279 | com_speed = np.mean(com_speed) 280 | dcom_speed = np.mean(dcom_speed) 281 | 282 | total_com_speed = np.mean(total_com_speed) 283 | total_dcom_speed = np.mean(total_dcom_speed) 284 | 285 | if verbose: 286 | print '>>>> %s + %s <<<<' % (enc.name(), com.name()) 287 | print 'Compression Method:', com.name() 288 | print 'Encoding Method:', enc.name() 289 | print 'Input Size:', dec_MB, 'MB' 290 | print 'Output Size:', com_MB, 'MB' 291 | print 'Ratio:', ratio 292 | print 'Total Compression Speed [MB/s]:', total_com_speed 293 | print 'Total Decompression Speed [MB/s]:', total_dcom_speed 294 | print 'Compression (Only) Speed [MB/s]:', com_speed 295 | print 'Decompression (Only) Speed [MB/s]:', dcom_speed 296 | print 'Encoding Speed [MB/s]:', enc_speed 297 | print 'Decoding Speed [MB/s]:', denc_speed 298 | print '' 299 | 300 | return { 301 | 'encoding': enc.name(), 302 | 'compression': com.name(), 303 | 'orig_bytes': dec_MB, 304 | 'comp_bytes': com_MB, 305 | 'ratio': ratio, 306 | 'comp_speed': com_speed, 307 | 'comp_speed_stddev': com_speed_std, 308 | 'decomp_speed': dcom_speed, 309 | 'decomp_speed_stddev': dcom_speed_std, 310 | 'enc_speed': enc_speed, 311 | 'enc_speed_stddev': enc_speed_std, 312 | 'denc_speed': denc_speed, 313 | 'denc_speed_stddev': denc_speed_std, 314 | 'total_comp_speed': total_com_speed, 315 | 'total_comp_speed_stddev': total_com_speed_std, 316 | 'total_decomp_speed': total_dcom_speed, 317 | 'total_decomp_speed_stddev': total_dcom_speed_std 318 | } 319 | 320 | @staticmethod 321 | def run_variable_experiment(com, enc, data, steps, N=100, verbose=True): 322 | '''Benchmark one compression method 323 | com = [BROTLI, BZ2, LZMA, LZO, ZLIB, ZSTD] 324 | data = data set 325 | N = number of runs 326 | *args / **kwargs = settings for the compression method, e.g. `9` 327 | ''' 328 | enc_speed = [] 329 | denc_speed = [] 330 | 331 | com_speed = [] 332 | dcom_speed = [] 333 | 334 | total_com_speed = [] 335 | total_dcom_speed = [] 336 | 337 | # run N iterations 338 | for n in range(N): 339 | start_time = time.time() 340 | 341 | # run encoding and compression 342 | encoded_data, t1 = Util.encode(enc, data, steps) 343 | compressed_data, t2 = Util.compress(com, encoded_data, *args) 344 | 345 | # decompress data 346 | if (com.name() == 'LZF'): 347 | # LZF requires the original output size 348 | decompressed_data, t3 = Util.decompress(com, compressed_data, 8 * data.size) 349 | else: 350 | decompressed_data, t3 = Util.decompress(com, compressed_data) 351 | 352 | # make sure the data is returned as an array 353 | if not isinstance(decompressed_data, (np.ndarray, np.generic)) and isinstance(encoded_data, (np.ndarray, np.generic)): 354 | # convert back to numpy array 355 | decompressed_data = np.fromstring(decompressed_data, dtype=encoded_data.dtype) 356 | 357 | # decode the data 358 | decoded_data, t4 = Util.decode(enc, decompressed_data, steps) 359 | 360 | # update the speed lists 361 | enc_speed.append(t1) 362 | denc_speed.append(t4) 363 | 364 | com_speed.append(t2) 365 | dcom_speed.append(t3) 366 | 367 | total_com_speed.append(t1 + t2) 368 | total_dcom_speed.append(t3 + t4) 369 | 370 | # guarantee lossless behavior 371 | assert np.array_equal(np.ndarray.flatten(data), np.ndarray.flatten(decoded_data)) 372 | 373 | print 'Ran iteration ' + str(n + 1) + ' of ' + str(N) + ' on ' + enc.name() + ' + ' + com.name() + ' in %0.2f seconds' % (time.time() - start_time) 374 | sys.stdout.flush() 375 | 376 | com_MB = Util.get_size(compressed_data) / float(1000**2) 377 | dec_MB = Util.get_size(data) / float(1000**2) 378 | 379 | # Higher is better 380 | ratio = dec_MB / com_MB 381 | 382 | # turn the speeds in MB / s 383 | for n in range(N): 384 | if enc_speed[n] == 0: 385 | enc_speed[n] = 0.01 386 | if denc_speed[n] == 0: 387 | denc_speed[n] = 0.01 388 | if com_speed[n] == 0: 389 | com_speed[n] = 0.01 390 | if dcom_speed[n] == 0: 391 | dcom_speed[n] = 0.01 392 | if total_com_speed[n] == 0: 393 | total_com_speed[n] = 0.01 394 | if total_dcom_speed[n] == 0: 395 | total_dcom_speed[n] = 0.01 396 | 397 | enc_speed[n] = dec_MB / enc_speed[n] 398 | denc_speed[n] = dec_MB / denc_speed[n] 399 | 400 | com_speed[n] = dec_MB / com_speed[n] 401 | dcom_speed[n] = dec_MB / dcom_speed[n] 402 | 403 | total_com_speed[n] = dec_MB / total_com_speed[n] 404 | total_dcom_speed[n] = dec_MB / total_dcom_speed[n] 405 | 406 | # get stddev for speeds 407 | com_speed_std = np.std(com_speed) 408 | dcom_speed_std = np.std(dcom_speed) 409 | 410 | enc_speed_std = np.std(enc_speed) 411 | denc_speed_std = np.std(denc_speed) 412 | 413 | total_com_speed_std = np.std(total_com_speed) 414 | total_dcom_speed_std = np.std(total_dcom_speed) 415 | 416 | # get means for speeds 417 | enc_speed = np.mean(enc_speed) 418 | denc_speed = np.mean(denc_speed) 419 | 420 | com_speed = np.mean(com_speed) 421 | dcom_speed = np.mean(dcom_speed) 422 | 423 | total_com_speed = np.mean(total_com_speed) 424 | total_dcom_speed = np.mean(total_dcom_speed) 425 | 426 | if verbose: 427 | print '>>>> %s + %s <<<<' % (enc.name(), com.name()) 428 | print 'Compression Method:', com.name() 429 | print 'Encoding Method:', enc.name() 430 | print 'Input Size:', dec_MB, 'MB' 431 | print 'Output Size:', com_MB, 'MB' 432 | print 'Ratio:', ratio 433 | print 'Total Compression Speed [MB/s]:', total_com_speed 434 | print 'Total Decompression Speed [MB/s]:', total_dcom_speed 435 | print 'Compression (Only) Speed [MB/s]:', com_speed 436 | print 'Decompression (Only) Speed [MB/s]:', dcom_speed 437 | print 'Encoding Speed [MB/s]:', enc_speed 438 | print 'Decoding Speed [MB/s]:', denc_speed 439 | print '' 440 | 441 | return { 442 | 'encoding': enc.name(), 443 | 'compression': com.name(), 444 | 'orig_bytes': dec_MB, 445 | 'comp_bytes': com_MB, 446 | 'ratio': ratio, 447 | 'comp_speed': com_speed, 448 | 'comp_speed_stddev': com_speed_std, 449 | 'decomp_speed': dcom_speed, 450 | 'decomp_speed_stddev': dcom_speed_std, 451 | 'enc_speed': enc_speed, 452 | 'enc_speed_stddev': enc_speed_std, 453 | 'denc_speed': denc_speed, 454 | 'denc_speed_stddev': denc_speed_std, 455 | 'total_comp_speed': total_com_speed, 456 | 'total_comp_speed_stddev': total_com_speed_std, 457 | 'total_decomp_speed': total_dcom_speed, 458 | 'total_decomp_speed_stddev': total_dcom_speed_std 459 | } 460 | 461 | @staticmethod 462 | def plot_all( 463 | results, 464 | what, 465 | x_range=None, 466 | y_range=None, 467 | name=None, 468 | leg=True, 469 | leg_loc='right', 470 | no_bw=True, 471 | input_bytes=-1, 472 | output='', 473 | emphasis=-1, 474 | bar_label=-1, 475 | no_leg_bars=False, 476 | log=False, 477 | digital=True, 478 | no_errorbars=False, 479 | title=None 480 | ): 481 | 482 | if name is None: 483 | raise ValueError( 484 | 'Holy Moly you haven\'t specified a `name`! Shame on you.' 485 | ) 486 | 487 | if what not in results: 488 | raise ValueError('Wrong `what` parameter. Not found in `results`.') 489 | 490 | labels = [] 491 | 492 | for i, method in enumerate(results['methods']): 493 | labels.append(method.split()[0]) 494 | 495 | labels = sorted(list(set(labels)), key=lambda s: s.lower()) 496 | 497 | font_base = FontProperties() 498 | 499 | font_bold = font_base.copy() 500 | font_bold.set_weight('bold') 501 | 502 | none = [0] * len(labels) 503 | none_std = [0] * len(labels) 504 | neuroglancer = [0] * len(labels) 505 | neuroglancer_std = [0] * len(labels) 506 | compresso = [0] * len(labels) 507 | compresso_std = [0] * len(labels) 508 | 509 | for i, label in enumerate(labels): 510 | none_index = label + ' None' 511 | 512 | if none_index in results['methods']: 513 | none_index = results['methods'].index(none_index) 514 | else: 515 | none_index = -1 516 | 517 | neuroglancer_index = label + ' Neuroglancer' 518 | 519 | if neuroglancer_index in results['methods']: 520 | neuroglancer_index = results['methods'].index( 521 | neuroglancer_index 522 | ) 523 | else: 524 | neuroglancer_index = -1 525 | 526 | compresso_index = label + ' Compresso' 527 | 528 | if compresso_index in results['methods']: 529 | compresso_index = results['methods'].index(compresso_index) 530 | else: 531 | compresso_index = -1 532 | 533 | if none_index != -1: 534 | none[i] = results[what][none_index] 535 | if what + '_std' in results: 536 | none_std[i] = results[what + '_std'][none_index] 537 | else: 538 | none_std[i] = 0 539 | 540 | if neuroglancer_index != -1: 541 | neuroglancer[i] = results[what][neuroglancer_index] 542 | if what + '_std' in results: 543 | neuroglancer_std[i] = results[what + '_std'][neuroglancer_index] 544 | else: 545 | neuroglancer_std[i] = 0 546 | 547 | if compresso_index != -1: 548 | compresso[i] = results[what][compresso_index] 549 | if what + '_std' in results: 550 | compresso_std[i] = results[what + '_std'][compresso_index] 551 | else: 552 | compresso_std[i] = 0 553 | 554 | plt.figure(figsize=(10, 10)) 555 | N = len(labels) 556 | ind = np.arange(N) # the x locations for the groups 557 | width = 0.25 # the width of the bars 558 | 559 | font = { 560 | 'family': 'sans-serif', 561 | 'size': 13.5 562 | } 563 | plt.rc('font', **font) 564 | 565 | if no_bw: 566 | ind = ind[1:] 567 | none = none[1:] 568 | none_std = none_std[1:] 569 | neuroglancer = neuroglancer[1:] 570 | neuroglancer_std = neuroglancer_std[1:] 571 | compresso = compresso[1:] 572 | compresso_std = compresso_std[1:] 573 | labels = labels[1:] 574 | 575 | lab_none = 'No first stage encoding' 576 | lab_neuroglancer = 'Neuroglancer' 577 | lab_compresso = 'Compresso' 578 | 579 | if what == 'bytes': 580 | lab_none = None 581 | lab_neuroglancer = None 582 | lab_compresso = None 583 | 584 | if what.endswith('speed') and log: 585 | none = np.log10([max(x, 1) for x in none]) 586 | neuroglancer = np.log10([max(x, 1) for x in neuroglancer]) 587 | compresso = np.log10([max(x, 1) for x in compresso]) 588 | 589 | capthick = 0 if no_errorbars else 2 590 | 591 | fig, ax = plt.subplots() 592 | ne = ax.bar( 593 | ind, 594 | none, 595 | width, 596 | color='#bbbbbb', 597 | label=lab_none, 598 | edgecolor='#ffffff', 599 | linewidth=0, 600 | yerr=none_std, 601 | ecolor=(0, 0, 0, 0.2) if digital else (0, 0, 0, 1), 602 | error_kw=dict(lw=2, capsize=2, capthick=capthick) 603 | ) 604 | 605 | ng = ax.bar( 606 | ind + width, 607 | neuroglancer, 608 | width, 609 | color='#999999' if digital else '#808080', 610 | label=lab_neuroglancer, 611 | edgecolor='#ffffff', 612 | linewidth=0, 613 | yerr=neuroglancer_std, 614 | ecolor=(0, 0, 0, 0.2) if digital else (0, 0, 0, 1), 615 | error_kw=dict(lw=2, capsize=2, capthick=capthick) 616 | ) 617 | cp = ax.bar( 618 | ind + width * 2, 619 | compresso, 620 | width, 621 | color='#dc133b', 622 | label=lab_compresso, 623 | edgecolor='#ffffff', 624 | linewidth=0, 625 | yerr=compresso_std, 626 | ecolor=(0, 0, 0, 0.2) if digital else (0, 0, 0, 1), 627 | error_kw=dict(lw=2, capsize=2, capthick=capthick) 628 | ) 629 | 630 | if what == 'bytes_size': 631 | ax.axhline( 632 | y=input_bytes, 633 | color='gray', 634 | label='Input', 635 | linewidth=2, 636 | linestyle='--' 637 | ) 638 | 639 | ax.tick_params( 640 | axis='y', 641 | color='#cccccc' if digital else '#888888', 642 | labelcolor='#999999' if digital else '#333333' 643 | ) 644 | 645 | ax.tick_params( 646 | axis='x', 647 | color='#cccccc' 648 | ) 649 | 650 | xticks_colors = ['#666666' if digital else '#333333'] * len(compresso) 651 | compresso_font = font_base 652 | compresso_font.set_size(15) 653 | xticks_fonts = [compresso_font] * len(compresso) 654 | 655 | if emphasis >= 0: 656 | xticks_colors[emphasis] = '#333333' if digital else '#000000' 657 | xticks_fonts[emphasis] = font_bold 658 | xticks_fonts[emphasis].set_size(16) 659 | 660 | for xtick, color, fp in zip( 661 | ax.get_xticklabels(), xticks_colors, xticks_fonts 662 | ): 663 | xtick.set_color(color) 664 | xtick.set_font_properties(fp) 665 | 666 | for spine in ax.spines.values(): 667 | spine.set_edgecolor('#cccccc' if digital else '#888888') 668 | 669 | ylabel = 'Compression Ratio\n(Original / Compressed)' 670 | 671 | if what.endswith('comp_speed'): 672 | ylabel = 'Compression Speed\n(MB/s)' 673 | 674 | if what.endswith('dcom_speed'): 675 | ylabel = 'Decompression Speed\n(MB/s)' 676 | 677 | if what.endswith('bytes_size'): 678 | ylabel = 'Size\n(MB)' 679 | ax.set_yscale('log', nonposy='clip') 680 | 681 | plt.ylabel( 682 | ylabel, 683 | color='#333333', 684 | labelpad=10, 685 | fontsize=16 686 | ) 687 | 688 | if leg: 689 | leg = plt.legend( 690 | loc='upper %s' % leg_loc, 691 | prop={ 692 | 'size': 15 693 | } 694 | ) 695 | if what != 'bytes': 696 | if no_leg_bars: 697 | if type(leg.legendHandles[0]) == Rectangle: 698 | leg.legendHandles[0].set_width(1) 699 | 700 | if type(leg.legendHandles[1]) == Rectangle: 701 | leg.legendHandles[1].set_width(1) 702 | 703 | if type(leg.legendHandles[2]) == Rectangle: 704 | leg.legendHandles[2].set_width(1) 705 | 706 | # leg.legendHandles[0].set_width(2) 707 | leg.legendHandles[0].set_color('#bbbbbb') 708 | # leg.legendHandles[1].set_color('#8c8c8c') 709 | leg.legendHandles[1].set_color( 710 | '#999999' if digital else '#777777' 711 | ) 712 | leg.legendHandles[2].set_color('#dc133b') 713 | frm = leg.get_frame() 714 | frm.set_edgecolor('#ffffff') 715 | frm.set_facecolor('#ffffff') 716 | leg_texts = leg.get_texts() 717 | leg_texts[0].set_color('#999999' if digital else '#808080') 718 | leg_texts[1].set_color('#666666') 719 | leg_texts[2].set_color('#dc133b') 720 | 721 | ax.set_xticks(ind + width * 1.5) 722 | ax.set_xticklabels(labels, rotation='vertical') 723 | 724 | if y_range is not None: 725 | plt.ylim(0, y_range) 726 | 727 | if no_bw: 728 | plt.xlim(0.5, len(none) + 1) 729 | else: 730 | plt.xlim(-0.5, len(none) + 0.5) 731 | 732 | plt.tick_params( 733 | axis='x', 734 | which='both', 735 | bottom='off', 736 | top='off' 737 | ) 738 | 739 | if bar_label >= 0: 740 | # Plot label above bar to clarify relatiobship 741 | height = ne[bar_label].get_height() 742 | plt.text( 743 | ne[bar_label].get_x() + ne[bar_label].get_width() / 2.0, 744 | height, 745 | '1', 746 | ha='center', 747 | va='bottom', 748 | color='#bbbbbb', 749 | fontproperties=font_bold 750 | ) 751 | 752 | height = ng[bar_label].get_height() 753 | plt.text( 754 | ng[bar_label].get_x() + ng[bar_label].get_width() / 2.0, 755 | height, 756 | '2', 757 | ha='center', 758 | va='bottom', 759 | color='#999999', 760 | fontproperties=font_bold 761 | ) 762 | 763 | height = cp[bar_label].get_height() 764 | plt.text( 765 | cp[bar_label].get_x() + cp[bar_label].get_width() / 2.0, 766 | height, 767 | '3', 768 | ha='center', 769 | va='bottom', 770 | color='#dc133b', 771 | fontproperties=font_bold 772 | ) 773 | 774 | if title is not None: 775 | plt.title(title, fontsize=18) 776 | 777 | ttl = ax.title 778 | ttl.set_position([.5, 1.05]) 779 | ttl.set_font_properties 780 | 781 | plt.savefig( 782 | os.path.join(output, '%s_compression_%s.pdf' % (name, what)), 783 | bbox_inches='tight' 784 | ) 785 | -------------------------------------------------------------------------------- /src/c++/compresso.hxx: -------------------------------------------------------------------------------- 1 | #ifndef __COMPRESSO_H__ 2 | #define __COMPRESSO_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | 13 | 14 | 15 | 16 | namespace Compresso { 17 | // function definitions 18 | template unsigned char *Compress(Type *data, long res[3], long steps[3], long *nentries = NULL); 19 | template Type *Decompress(unsigned char *compressed_data, long *res = NULL); 20 | 21 | // dimension constants 22 | static const short RN_X = 2; 23 | static const short RN_Y = 1; 24 | static const short RN_Z = 0; 25 | 26 | // global variables 27 | static long row_size = -1; 28 | static long sheet_size = -1; 29 | static long grid_size = -1; 30 | 31 | // internal helper function 32 | static inline long IndicesToIndex(long ix, long iy, long iz) { 33 | return iz * sheet_size + iy * row_size + ix; 34 | }; 35 | 36 | 37 | 38 | /////////////////////////////////////////////////// 39 | //// UNION-FIND CLASS FOR CONNECTED COMPONENTS //// 40 | /////////////////////////////////////////////////// 41 | 42 | class UnionFindElement { 43 | public: 44 | // constructor 45 | UnionFindElement(unsigned long label) : 46 | label(label), 47 | parent(this), 48 | rank(0) 49 | {} 50 | 51 | public: 52 | // instance variables 53 | unsigned long label; 54 | UnionFindElement *parent; 55 | int rank; 56 | }; 57 | 58 | UnionFindElement * 59 | Find(UnionFindElement *x) 60 | { 61 | if (x->parent != x) x->parent = Find(x->parent); 62 | return x->parent; 63 | }; 64 | 65 | void 66 | Union(UnionFindElement *x, UnionFindElement *y) 67 | { 68 | UnionFindElement *xroot = Find(x); 69 | UnionFindElement *yroot = Find(y); 70 | 71 | // root already the same 72 | if (xroot == yroot) return; 73 | 74 | // merge the two roots 75 | if (xroot->rank < yroot->rank) xroot->parent = yroot; 76 | else if (xroot->rank > yroot->rank) yroot->parent = xroot; 77 | else { 78 | yroot->parent = xroot; 79 | xroot->rank = xroot->rank + 1; 80 | } 81 | }; 82 | 83 | 84 | 85 | /////////////////////////////// 86 | //// COMPRESSION ALGORITHM //// 87 | /////////////////////////////// 88 | 89 | template bool * 90 | ExtractBoundaries(Type *data, long res[3]) 91 | { 92 | // create the boundaries array 93 | bool *boundaries = new bool[grid_size]; 94 | if (!boundaries) { fprintf(stderr, "Failed to allocate memory for boundaries...\n"); return NULL; } 95 | 96 | // determine which pixels differ from east or south neighbors 97 | for (long iz = 0; iz < res[RN_Z]; ++iz) { 98 | for (long iy = 0; iy < res[RN_Y]; ++iy) { 99 | for (long ix = 0; ix < res[RN_X]; ++ix) { 100 | long iv = IndicesToIndex(ix, iy, iz); 101 | 102 | boundaries[iv] = false; 103 | 104 | if (ix < res[RN_X] - 1) { 105 | if (data[iv] != data[IndicesToIndex(ix + 1, iy, iz)]) boundaries[iv] = true; 106 | } 107 | if (iy < res[RN_Y] - 1) { 108 | if (data[iv] != data[IndicesToIndex(ix, iy + 1, iz)]) boundaries[iv] = true; 109 | } 110 | } 111 | } 112 | } 113 | 114 | return boundaries; 115 | }; 116 | 117 | static unsigned long * 118 | ConnectedComponents(bool *boundaries, long res[3]) 119 | { 120 | // create the connected components grid 121 | unsigned long *components = new unsigned long[grid_size]; 122 | if (!components) { fprintf(stderr, "Failed to allocate memory for connected components...\n"); return NULL; } 123 | 124 | // initialize to zero 125 | for (long iv = 0; iv < grid_size; ++iv) 126 | components[iv] = 0; 127 | 128 | // run connected components for every slice 129 | for (long iz = 0; iz < res[RN_Z]; ++iz) { 130 | // create a vector of union find elements 131 | std::vector union_find = std::vector(); 132 | 133 | // current label in connected component 134 | int curlab = 1; 135 | for (long iy = 0; iy < res[RN_Y]; ++iy) { 136 | for (long ix = 0; ix < res[RN_X]; ++ix) { 137 | long iv = IndicesToIndex(ix, iy, iz); 138 | 139 | // continue if boundary 140 | if (boundaries[iv]) continue; 141 | 142 | // only consider the pixel to the north and west 143 | long north = IndicesToIndex(ix - 1, iy, iz); 144 | long west = IndicesToIndex(ix, iy - 1, iz); 145 | 146 | unsigned long neighbor_labels[2] = { 0, 0 }; 147 | 148 | // get the labels for the relevant neighbor 149 | if (ix > 0) neighbor_labels[0] = components[north]; 150 | if (iy > 0) neighbor_labels[1] = components[west]; 151 | 152 | // if the neighbors are boundary, create new label 153 | if (!neighbor_labels[0] && !neighbor_labels[1]) { 154 | components[iv] = curlab; 155 | 156 | // add to union find structure 157 | union_find.push_back(new UnionFindElement(0)); 158 | 159 | // update the next label 160 | curlab++; 161 | } 162 | // the two pixels have equal non-trivial values 163 | else if (neighbor_labels[0] == neighbor_labels[1]) 164 | components[iv] = neighbor_labels[0]; 165 | else { 166 | if (!neighbor_labels[0]) components[iv] = neighbor_labels[1]; 167 | else if (!neighbor_labels[1]) components[iv] = neighbor_labels[0]; 168 | else { 169 | // take the minimum value 170 | components[iv] = std::min(neighbor_labels[0], neighbor_labels[1]); 171 | 172 | // set the equivalence relationship 173 | Union(union_find[neighbor_labels[0] - 1], union_find[neighbor_labels[1] - 1]); 174 | } 175 | } 176 | } 177 | } 178 | 179 | // reset the current label to 1 180 | curlab = 1; 181 | 182 | // create the connected components in order 183 | for (long iy = 0; iy < res[RN_Y]; ++iy) { 184 | for (long ix = 0; ix < res[RN_X]; ++ix) { 185 | long iv = IndicesToIndex(ix, iy, iz); 186 | 187 | if (boundaries[iv]) continue; 188 | 189 | // get the parent for this component 190 | UnionFindElement *comp = Find(union_find[components[iv] - 1]); 191 | if (!comp->label) { 192 | comp->label = curlab; 193 | curlab++; 194 | } 195 | 196 | components[iv] = comp->label; 197 | } 198 | } 199 | 200 | // free memory 201 | for (unsigned long iv = 0; iv < union_find.size(); ++iv) 202 | delete union_find[iv]; 203 | } 204 | 205 | // return the connected components array 206 | return components; 207 | } 208 | 209 | template void 210 | IDMapping(unsigned long *components, Type *data, std::vector &ids, long res[3]) 211 | { 212 | // iterate over every slice 213 | for (int iz = 0; iz < res[RN_Z]; ++iz) { 214 | // create a set of components for this slice 215 | std::unordered_set hash_map = std::unordered_set(); 216 | 217 | // iterate over the entire slice 218 | for (long iy = 0; iy < res[RN_Y]; ++iy) { 219 | for (long ix = 0; ix < res[RN_X]; ++ix) { 220 | long iv = IndicesToIndex(ix, iy, iz); 221 | 222 | // get the component label 223 | unsigned long component_id = components[iv]; 224 | 225 | // if this component does not belong yet, add it 226 | if (!hash_map.count(component_id)) { 227 | hash_map.insert(component_id); 228 | 229 | // add the segment id 230 | unsigned long segment_id = (unsigned long)data[iv] + 1; 231 | ids.push_back(segment_id); 232 | } 233 | } 234 | } 235 | } 236 | } 237 | 238 | unsigned long * 239 | EncodeBoundaries(bool *boundaries, long res[3], long steps[3]) 240 | { 241 | // determine the number of bloxks in each direction 242 | long nblocks[3]; 243 | for (int dim = 0; dim <= 2; ++dim) { 244 | nblocks[dim] = (long) (ceil((double)res[dim] / steps[dim]) + 0.5); 245 | } 246 | long nwindows = nblocks[RN_Z] * nblocks[RN_Y] * nblocks[RN_X]; 247 | 248 | // create an empty array for the encodings 249 | unsigned long *boundary_data = new unsigned long[nwindows]; 250 | if (!boundary_data) { fprintf(stderr, "Failed to allocate memory for boundary windows...\n"); return NULL; } 251 | for (long iv = 0; iv < nwindows; ++iv) 252 | boundary_data[iv] = 0; 253 | 254 | for (long iz = 0; iz < res[RN_Z]; ++iz) { 255 | for (long iy = 0; iy < res[RN_Y]; ++iy) { 256 | for (long ix = 0; ix < res[RN_X]; ++ix) { 257 | long iv = IndicesToIndex(ix, iy, iz); 258 | 259 | // no encoding for non-boundaries 260 | if (!boundaries[iv]) continue; 261 | 262 | // find the block from the index 263 | long zblock = iz / steps[RN_Z]; 264 | long yblock = iy / steps[RN_Y]; 265 | long xblock = ix / steps[RN_X]; 266 | 267 | // find the offset within the block 268 | long zoffset = iz % steps[RN_Z]; 269 | long yoffset = iy % steps[RN_Y]; 270 | long xoffset = ix % steps[RN_X]; 271 | 272 | long block = zblock * (nblocks[RN_Y] * nblocks[RN_X]) + yblock * nblocks[RN_X] + xblock; 273 | long offset = zoffset * (steps[RN_Y] * steps[RN_X]) + yoffset * steps[RN_X] + xoffset; 274 | 275 | boundary_data[block] += (1LU << offset); 276 | } 277 | } 278 | } 279 | 280 | // return the encodings 281 | return boundary_data; 282 | } 283 | 284 | void 285 | ValueMapping(unsigned long *boundary_data, std::vector &values, long nwindows) 286 | { 287 | // keep a set of seen window values 288 | std::unordered_set hash_map = std::unordered_set(); 289 | 290 | // go through all of the boundary data to create array of values 291 | for (long iv = 0; iv < nwindows; ++iv) { 292 | if (!hash_map.count(boundary_data[iv])) { 293 | hash_map.insert(boundary_data[iv]); 294 | values.push_back(boundary_data[iv]); 295 | } 296 | } 297 | 298 | // sort the values 299 | sort(values.begin(), values.end()); 300 | 301 | // create mapping from values to indices 302 | std::unordered_map mapping = std::unordered_map(); 303 | for (unsigned long iv = 0; iv < values.size(); ++iv) { 304 | mapping[values[iv]] = iv; 305 | } 306 | 307 | // update boundary data 308 | for (long iv = 0; iv < nwindows; ++iv) { 309 | boundary_data[iv] = mapping[boundary_data[iv]]; 310 | } 311 | } 312 | 313 | template void 314 | EncodeIndeterminateLocations(bool *boundaries, Type *data, std::vector &locations, long res[3]) 315 | { 316 | for (long iz = 0; iz < res[RN_Z]; ++iz) { 317 | for (long iy = 0; iy < res[RN_Y]; ++iy) { 318 | for (long ix = 0; ix < res[RN_X]; ++ix) { 319 | long iv = IndicesToIndex(ix, iy, iz); 320 | 321 | if (!boundaries[iv]) continue; 322 | else if (iy > 0 && !boundaries[IndicesToIndex(ix, iy - 1, iz)]) continue; 323 | else if (ix > 0 && !boundaries[IndicesToIndex(ix - 1, iy, iz)]) continue; 324 | else { 325 | long north = IndicesToIndex(ix - 1, iy, iz); 326 | long south = IndicesToIndex(ix + 1, iy, iz); 327 | long east = IndicesToIndex(ix, iy - 1, iz); 328 | long west = IndicesToIndex(ix, iy + 1, iz); 329 | long up = IndicesToIndex(ix, iy, iz + 1); 330 | long down = IndicesToIndex(ix, iy, iz - 1); 331 | 332 | // see if any of the immediate neighbors are candidates 333 | if (ix > 0 && !boundaries[north] && data[north] == data[iv]) locations.push_back(0); 334 | else if (ix < res[RN_X] - 1 && !boundaries[south] && data[south] == data[iv]) locations.push_back(1); 335 | else if (iy > 0 && !boundaries[east] && data[east] == data[iv]) locations.push_back(2); 336 | else if (iy < res[RN_Y] - 1 && !boundaries[west] && data[west] == data[iv]) locations.push_back(3); 337 | else if (iz > 0 && !boundaries[down] && data[down] == data[iv]) locations.push_back(4); 338 | else if (iz < res[RN_Z] - 1 && !boundaries[up] && data[up] == data[iv]) locations.push_back(5); 339 | else locations.push_back(((unsigned long)data[IndicesToIndex(ix, iy, iz)]) + 6); 340 | } 341 | } 342 | } 343 | } 344 | } 345 | 346 | static unsigned char 347 | BytesNeeded(unsigned long maximum_value) 348 | { 349 | if (maximum_value < 1L << 8) return 1; 350 | else if (maximum_value < 1L << 16) return 2; 351 | else if (maximum_value < 1L << 24) return 3; 352 | else if (maximum_value < 1L << 32) return 4; 353 | else if (maximum_value < 1L << 40) return 5; 354 | else if (maximum_value < 1L << 48) return 6; 355 | else if (maximum_value < 1L << 56) return 7; 356 | else return 8; 357 | } 358 | 359 | static void 360 | AppendValue(std::vector &data, unsigned long value, unsigned char nbytes) 361 | { 362 | for (unsigned char iv = 0; iv < nbytes; ++iv) { 363 | // get the 8 low order bits 364 | unsigned char low_order = value % 256; 365 | // add the one byte to the data 366 | data.push_back(low_order); 367 | // update the value by shifting one byte to the left 368 | value = value >> 8; 369 | } 370 | } 371 | 372 | template unsigned char * 373 | Compress(Type *data, long res[3], long steps[3], long *nentries) 374 | { 375 | // set the global variables 376 | row_size = res[RN_X]; 377 | sheet_size = res[RN_X] * res[RN_Y]; 378 | grid_size = res[RN_X] * res[RN_Y] * res[RN_Z]; 379 | 380 | // determine the number of blocks in each direction 381 | long nblocks[3]; 382 | for (int dim = 0; dim <= 2; ++dim) { 383 | nblocks[dim] = (long) (ceil((double)res[dim] / steps[dim]) + 0.5); 384 | } 385 | long nwindows = nblocks[RN_Z] * nblocks[RN_Y] * nblocks[RN_X]; 386 | 387 | // get the boundary voxels 388 | std::clock_t start_time = std::clock(); 389 | bool *boundaries = ExtractBoundaries(data, res); 390 | if (!boundaries) return NULL; 391 | printf("Extract boundaries: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC); 392 | 393 | // get the connected components 394 | // use unsigned long since there could be more components than Type.MAX 395 | start_time = std::clock(); 396 | unsigned long *components = ConnectedComponents(boundaries, res); 397 | if (!components) return NULL; 398 | printf("Connected components: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC); 399 | 400 | // get the ids 401 | start_time = std::clock(); 402 | std::vector ids = std::vector(); 403 | IDMapping(components, data, ids, res); 404 | printf("ID mapping: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC); 405 | 406 | // free memory 407 | delete[] components; 408 | 409 | // encode the boundary data 410 | start_time = std::clock(); 411 | unsigned long *boundary_data = EncodeBoundaries(boundaries, res, steps); 412 | if (!boundary_data) return NULL; 413 | printf("Encode boundaries: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC); 414 | 415 | // map the window values 416 | start_time = std::clock(); 417 | std::vector values = std::vector(); 418 | ValueMapping(boundary_data, values, nwindows); 419 | printf("Map values: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC); 420 | 421 | // get the locations 422 | start_time = std::clock(); 423 | std::vector locations = std::vector(); 424 | EncodeIndeterminateLocations(boundaries, data, locations, res); 425 | printf("Encode locations: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC); 426 | 427 | // get the maximum id value 428 | unsigned long maximum_id = 0; 429 | for (unsigned long iv = 0; iv < ids.size(); ++iv) 430 | if (ids[iv] > maximum_id) maximum_id = ids[iv]; 431 | 432 | // get the maximum value for the locations array 433 | unsigned long maximum_location = 0; 434 | for (unsigned long iv = 0; iv < locations.size(); ++iv) 435 | if (locations[iv] > maximum_location) maximum_location = locations[iv]; 436 | 437 | // get the maximum boundary data window value 438 | unsigned long maximum_boundary_data = 2 * values.size() + 1; 439 | // find the maximum run of zeros 440 | unsigned long maximum_zero_run = 0; 441 | unsigned long current_run = 0; 442 | for (long iv = 0; iv < nwindows; ++iv) { 443 | if (!boundary_data[iv]) current_run++; 444 | else { 445 | if (current_run > maximum_zero_run) maximum_zero_run = current_run; 446 | current_run = 0; 447 | } 448 | } 449 | if (current_run > maximum_zero_run) maximum_zero_run = current_run; 450 | // multiply by two to pad for run length encoding 451 | maximum_zero_run *= 2; 452 | if (maximum_zero_run > maximum_boundary_data) maximum_boundary_data = maximum_zero_run; 453 | 454 | // get the number of bits per window as small as possible 455 | unsigned char bytes_per_window = steps[RN_X] * steps[RN_Y] * steps[RN_Z] / 8; 456 | unsigned char bytes_per_id = BytesNeeded(maximum_id); 457 | unsigned char bytes_per_location = BytesNeeded(maximum_location); 458 | unsigned char bytes_per_data = BytesNeeded(maximum_boundary_data); 459 | 460 | std::vector compressed_data = std::vector(); 461 | 462 | // add the header to the decompressed data 463 | AppendValue(compressed_data, res[RN_Z], 8); 464 | AppendValue(compressed_data, res[RN_Y], 8); 465 | AppendValue(compressed_data, res[RN_X], 8); 466 | AppendValue(compressed_data, steps[RN_Z], 8); 467 | AppendValue(compressed_data, steps[RN_Y], 8); 468 | AppendValue(compressed_data, steps[RN_X], 8); 469 | AppendValue(compressed_data, values.size(), 8); 470 | AppendValue(compressed_data, ids.size(), 8); 471 | AppendValue(compressed_data, locations.size(), 8); 472 | // need one byte to say how large each chunk of data is 473 | AppendValue(compressed_data, bytes_per_window, 1); 474 | AppendValue(compressed_data, bytes_per_id, 1); 475 | AppendValue(compressed_data, bytes_per_location, 1); 476 | AppendValue(compressed_data, bytes_per_data, 1); 477 | // only final byte shows the original data Type 478 | AppendValue(compressed_data, sizeof(Type), 1); 479 | 480 | // add in all window values 481 | for (unsigned long iv = 0; iv < values.size(); ++iv) 482 | AppendValue(compressed_data, values[iv], bytes_per_window); 483 | // add in all ids 484 | for (unsigned long iv = 0; iv < ids.size(); ++iv) 485 | AppendValue(compressed_data, ids[iv], bytes_per_id); 486 | // add in all locations 487 | for (unsigned long iv = 0; iv < locations.size(); ++iv) 488 | AppendValue(compressed_data, locations[iv], bytes_per_location); 489 | 490 | // add in all boundary data - apply run length encoding 491 | unsigned long current_zero_run = 0; 492 | for (long iv = 0; iv < nwindows; ++iv) { 493 | if (!boundary_data[iv]) current_zero_run++; 494 | else { 495 | if (current_zero_run) AppendValue(compressed_data, 2 * current_zero_run, bytes_per_data); 496 | AppendValue(compressed_data, 2 * boundary_data[iv] + 1, bytes_per_data); 497 | current_zero_run = 0; 498 | } 499 | } 500 | 501 | // have to add in the last zero run 502 | if (current_zero_run) AppendValue(compressed_data, 2 * current_zero_run, bytes_per_data); 503 | 504 | unsigned char *compressed_pointer = new unsigned char[compressed_data.size()]; 505 | for (unsigned long iv = 0; iv < compressed_data.size(); ++iv) 506 | compressed_pointer[iv] = compressed_data[iv]; 507 | 508 | if (nentries) *nentries = compressed_data.size(); 509 | 510 | // free memory 511 | delete[] boundaries; 512 | delete[] boundary_data; 513 | 514 | return compressed_pointer; 515 | }; 516 | 517 | static unsigned long 518 | ExtractValue(unsigned char *data, unsigned long &offset, unsigned char nbytes) 519 | { 520 | // set the value to 0 521 | unsigned long value = 0; 522 | for (unsigned char iv = 0; iv < nbytes; ++iv) { 523 | // get the current bit values 524 | unsigned long byte = (unsigned long)data[offset]; 525 | // shift over the proper amount 526 | byte = byte << (8 * iv); 527 | // update the value 528 | value += byte; 529 | // update the offset 530 | offset++; 531 | } 532 | 533 | return value; 534 | } 535 | 536 | static bool * 537 | DecodeBoundaries(unsigned long *boundary_data, std::vector &values, long res[3], long steps[3]) 538 | { 539 | // determine the number of bloxks in each direction 540 | long nblocks[3]; 541 | for (int dim = 0; dim <= 2; ++dim) { 542 | nblocks[dim] = (long) (ceil((double)res[dim] / steps[dim]) + 0.5); 543 | } 544 | 545 | bool *boundaries = new bool[grid_size]; 546 | for (long iv = 0; iv < grid_size; ++iv) 547 | boundaries[iv] = false; 548 | 549 | for (long iz = 0; iz < res[RN_Z]; ++iz) { 550 | for (long iy = 0; iy < res[RN_Y]; ++iy) { 551 | for (long ix = 0; ix < res[RN_X]; ++ix) { 552 | long iv = IndicesToIndex(ix, iy, iz); 553 | 554 | // find the block from the index 555 | long zblock = iz / steps[RN_Z]; 556 | long yblock = iy / steps[RN_Y]; 557 | long xblock = ix / steps[RN_X]; 558 | 559 | // find the offset within the block 560 | long zoffset = iz % steps[RN_Z]; 561 | long yoffset = iy % steps[RN_Y]; 562 | long xoffset = ix % steps[RN_X]; 563 | 564 | long block = zblock * (nblocks[RN_Y] * nblocks[RN_X]) + yblock * nblocks[RN_X] + xblock; 565 | long offset = zoffset * (steps[RN_Y] * steps[RN_X]) + yoffset * steps[RN_X] + xoffset; 566 | 567 | unsigned long value = values[boundary_data[block]]; 568 | if ((value >> offset) % 2) boundaries[iv] = true; 569 | } 570 | } 571 | } 572 | 573 | return boundaries; 574 | } 575 | 576 | template Type * 577 | IDReverseMapping(unsigned long *components, std::vector ids, long res[3]) 578 | { 579 | Type *decompressed_data = new Type[grid_size]; 580 | for (long iv = 0; iv < grid_size; ++iv) 581 | decompressed_data[iv] = 0; 582 | 583 | int ids_index = 0; 584 | for (long iz = 0; iz < res[RN_Z]; ++iz) { 585 | // create mapping (not memory efficient but FAST!!) 586 | // number of components is guaranteed to be less than ids->size() 587 | unsigned long *mapping = new unsigned long[ids.size() + 1]; 588 | for (unsigned long iv = 0; iv < ids.size() + 1; ++iv) 589 | mapping[iv] = 0; 590 | 591 | for (long iy = 0; iy < res[RN_Y]; ++iy) { 592 | for (long ix = 0; ix < res[RN_X]; ++ix) { 593 | long iv = IndicesToIndex(ix, iy, iz); 594 | if (!mapping[components[iv]]) { 595 | mapping[components[iv]] = ids[ids_index]; 596 | ids_index++; 597 | } 598 | 599 | decompressed_data[iv] = (Type)(mapping[components[iv]] - 1); 600 | } 601 | } 602 | } 603 | 604 | return decompressed_data; 605 | } 606 | 607 | template void 608 | DecodeIndeterminateLocations(bool *boundaries, Type *decompressed_data, std::vector locations, long res[3]) 609 | { 610 | long index = 0; 611 | 612 | // go through all voxels 613 | for (long iz = 0; iz < res[RN_Z]; ++iz) { 614 | for (long iy = 0; iy < res[RN_Y]; ++iy) { 615 | for (long ix = 0; ix < res[RN_X]; ++ix) { 616 | long iv = IndicesToIndex(ix, iy, iz); 617 | 618 | // get the north and west neighbors 619 | long north = IndicesToIndex(ix - 1, iy, iz); 620 | long west = IndicesToIndex(ix, iy - 1, iz); 621 | 622 | if (!boundaries[iv]) continue; 623 | else if (ix > 0 && !boundaries[north]) { 624 | decompressed_data[iv] = decompressed_data[north]; 625 | } 626 | else if (iy > 0 && !boundaries[west]) { 627 | decompressed_data[iv] = decompressed_data[west]; 628 | } 629 | else { 630 | unsigned long offset = locations[index]; 631 | if (offset == 0) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix - 1, iy, iz)]; 632 | else if (offset == 1) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix + 1, iy, iz)]; 633 | else if (offset == 2) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix, iy - 1, iz)]; 634 | else if (offset == 3) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix, iy + 1, iz)]; 635 | else if (offset == 4) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix, iy, iz - 1)]; 636 | else if (offset == 5) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix, iy, iz + 1)]; 637 | else decompressed_data[iv] = (Type)(offset - 6); 638 | index++; 639 | } 640 | } 641 | } 642 | } 643 | } 644 | 645 | template Type* 646 | Decompress(unsigned char *compressed_data, long *res) 647 | { 648 | // extract all of the header information 649 | if (!res) res = new long[3]; 650 | long steps[3]; 651 | 652 | // the offset for the compressed data 653 | unsigned long offset = 0; 654 | // extract the header 655 | res[RN_Z] = ExtractValue(compressed_data, offset, 8); 656 | res[RN_Y] = ExtractValue(compressed_data, offset, 8); 657 | res[RN_X] = ExtractValue(compressed_data, offset, 8); 658 | steps[RN_Z] = ExtractValue(compressed_data, offset, 8); 659 | steps[RN_Y] = ExtractValue(compressed_data, offset, 8); 660 | steps[RN_X] = ExtractValue(compressed_data, offset, 8); 661 | unsigned long nvalues = ExtractValue(compressed_data, offset, 8); 662 | unsigned long nids = ExtractValue(compressed_data, offset, 8); 663 | unsigned long nlocations = ExtractValue(compressed_data, offset, 8); 664 | unsigned char bytes_per_window = ExtractValue(compressed_data, offset, 1); 665 | unsigned char bytes_per_id = ExtractValue(compressed_data, offset, 1); 666 | unsigned char bytes_per_location = ExtractValue(compressed_data, offset, 1); 667 | unsigned char bytes_per_data = ExtractValue(compressed_data, offset, 1); 668 | unsigned char bytes_for_output = ExtractValue(compressed_data, offset, 1); 669 | 670 | // set the global variables 671 | row_size = res[RN_X]; 672 | sheet_size = res[RN_X] * res[RN_Y]; 673 | grid_size = res[RN_X] * res[RN_Y] * res[RN_Z]; 674 | 675 | // determine the number of blocks in each direction 676 | long nblocks[3]; 677 | for (int dim = 0; dim <= 2; ++dim) { 678 | nblocks[dim] = (long) (ceil((double)res[dim] / steps[dim]) + 0.5); 679 | } 680 | long nwindows = nblocks[RN_Z] * nblocks[RN_Y] * nblocks[RN_X]; 681 | 682 | // allocate memory for all arrays 683 | std::vector ids = std::vector(); 684 | std::vector values = std::vector(); 685 | std::vector locations = std::vector(); 686 | unsigned long *boundary_data = new unsigned long[nwindows]; 687 | for (unsigned long iv = 0; iv < nvalues; ++iv) 688 | values.push_back(ExtractValue(compressed_data, offset, bytes_per_window)); 689 | for (unsigned long iv = 0; iv < nids; ++iv) 690 | ids.push_back(ExtractValue(compressed_data, offset, bytes_per_id)); 691 | for (unsigned long iv = 0; iv < nlocations; ++iv) 692 | locations.push_back(ExtractValue(compressed_data, offset, bytes_per_location)); 693 | 694 | // get the boundary data (undo run length encoding) 695 | long iv = 0; 696 | while (iv < nwindows) { 697 | unsigned long window_value = ExtractValue(compressed_data, offset, bytes_per_data); 698 | if (window_value % 2) { 699 | window_value = window_value / 2; 700 | assert (iv < nwindows); 701 | boundary_data[iv] = window_value; 702 | iv++; 703 | } 704 | else { 705 | unsigned long nzeros = window_value / 2; 706 | for (unsigned long iz = 0; iz < nzeros; ++iz, ++iv) { 707 | boundary_data[iv] = 0; 708 | } 709 | } 710 | } 711 | 712 | // get the boundaries from the data 713 | std::clock_t start_time = std::clock(); 714 | bool *boundaries = DecodeBoundaries(boundary_data, values, res, steps); 715 | if (!boundaries) return NULL; 716 | printf("Decode boundaries: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC); 717 | 718 | // free memory 719 | delete[] boundary_data; 720 | 721 | // get the connected components 722 | start_time = std::clock(); 723 | unsigned long *components = ConnectedComponents(boundaries, res); 724 | if (!components) return NULL; 725 | printf("Connected components: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC); 726 | 727 | // decompress the data 728 | start_time = std::clock(); 729 | Type *decompressed_data = IDReverseMapping(components, ids, res); 730 | if (!decompressed_data) return NULL; 731 | printf("Reverse mapping: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC); 732 | 733 | // free memory 734 | delete[] components; 735 | 736 | // decode the final indeterminate locations 737 | start_time = std::clock(); 738 | DecodeIndeterminateLocations(boundaries, decompressed_data, locations, res); 739 | printf("Decode locations: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC); 740 | 741 | // return the decompressed data 742 | return decompressed_data; 743 | } 744 | }; 745 | 746 | #endif --------------------------------------------------------------------------------