├── experiments
    ├── __init__.py
    ├── compression
    │   ├── compresso
    │   │   ├── __init__.py
    │   │   ├── cpp-compresso.h
    │   │   ├── setup.py
    │   │   ├── compresso.pyx
    │   │   └── cpp-compresso.cpp
    │   ├── neuroglancer
    │   │   ├── __init__.py
    │   │   ├── cpp-neuroglancer.h
    │   │   ├── setup.py
    │   │   ├── pyneuroglancer.py
    │   │   ├── neuroglancer.pyx
    │   │   └── cpp-neuroglancer.cpp
    │   ├── __init__.py
    │   ├── lz78.py
    │   ├── x264.py
    │   ├── jpeg.py
    │   ├── _png.py
    │   ├── methods.py
    │   └── util.py
    ├── figures
    │   └── compression-performance.png
    ├── requirements.txt
    ├── plot.py
    └── run.py
├── banner.png
├── paper
    └── paper.pdf
├── .editorconfig
├── CITATION.bib
├── src
    ├── python
    │   ├── setup.py
    │   └── compresso.pyx
    └── c++
    │   └── compresso.hxx
├── LICENSE
├── .gitignore
├── requirements.txt
└── README.md


/experiments/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/compression/compresso/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/compression/neuroglancer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCG/compresso/HEAD/banner.png


--------------------------------------------------------------------------------
/paper/paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCG/compresso/HEAD/paper/paper.pdf


--------------------------------------------------------------------------------
/experiments/figures/compression-performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VCG/compresso/HEAD/experiments/figures/compression-performance.png


--------------------------------------------------------------------------------
/experiments/compression/compresso/cpp-compresso.h:
--------------------------------------------------------------------------------
1 | namespace compresso {
2 |     unsigned long *Compress(unsigned long *data, int zres, int yres, int xres, int zstep, int ystep, int xstep);
3 | 
4 |     unsigned long *Decompress(unsigned long *compressed_data);
5 | }
6 | 


--------------------------------------------------------------------------------
/experiments/compression/neuroglancer/cpp-neuroglancer.h:
--------------------------------------------------------------------------------
1 | namespace neuroglancer {
2 |     unsigned long *Compress(unsigned long *data, int zres, int yres, int xres, int bz, int by, int bx, int origz, int origy, int origx);
3 | 
4 |     unsigned long *Decompress(unsigned long *compressed_data, int bz, int by, int bx);
5 | }


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | end_of_line = lf
 5 | charset = utf-8
 6 | trim_trailing_whitespace = true
 7 | insert_final_newline = true
 8 | indent_style = space
 9 | indent_size = 2
10 | 
11 | 
12 | [*.py]
13 | indent_size = 4
14 | 
15 | 
16 | [*.md]
17 | indent_size = 4
18 | trim_trailing_whitespace = false
19 | 


--------------------------------------------------------------------------------
/experiments/compression/__init__.py:
--------------------------------------------------------------------------------
 1 | from util import Util
 2 | from compresso import compresso
 3 | from neuroglancer import neuroglancer
 4 | from methods import (
 5 |     # general purpose
 6 |     BZ2,
 7 |     LZ78,
 8 |     LZF,
 9 |     LZMA,
10 |     LZO,
11 |     LZW,
12 |     ZLIB,
13 |     ZSTD,    
14 |     # image specific
15 |     JPEG2000,
16 |     PNG,
17 |     # segmentation specific
18 |     COMPRESSO,
19 |     NEUROGLANCER,
20 |     # video specific
21 |     X264,
22 |     # default
23 |     NONE
24 | )
25 | 


--------------------------------------------------------------------------------
/experiments/compression/compresso/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup, Extension
 2 | from Cython.Build import cythonize
 3 | import numpy as np
 4 | 
 5 | extensions = [
 6 |     Extension(
 7 |             'compresso',
 8 |             include_dirs=[np.get_include()],
 9 |             sources=['compresso.pyx', 'cpp-compresso.cpp'],
10 |             extra_compile_args=['-O4', '-std=c++0x'],
11 |             language='c++'
12 |         )
13 | ]
14 | 
15 | setup(
16 |     ext_modules = cythonize(extensions)
17 | )
18 | 


--------------------------------------------------------------------------------
/experiments/compression/neuroglancer/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup, Extension
 2 | from Cython.Build import cythonize
 3 | import numpy as np
 4 | 
 5 | extensions = [
 6 |     Extension(
 7 |             'neuroglancer',
 8 |             include_dirs=[np.get_include()],
 9 |             sources=['neuroglancer.pyx', 'cpp-neuroglancer.cpp'],
10 |             extra_compile_args=['-O4', '-std=c++0x'],
11 |             language='c++'
12 |         )
13 | ]
14 | 
15 | setup(
16 |     ext_modules = cythonize(extensions)
17 | )
18 | 


--------------------------------------------------------------------------------
/CITATION.bib:
--------------------------------------------------------------------------------
 1 | @inbook{matejek2017compresso,
 2 |   author = {Brian Matejek and Daniel Haehn and Fritz Lekschas and Michael Mitzenmacher and Hanspeter Pfister},
 3 |   title = {Compresso: Efficient Compression of Segmentation Data For Connectomics},
 4 |   booktitle = {Medical Image Computing and Computer Assisted Intervention},
 5 |   series = {MICCAI ’17},
 6 |   publisher = {Springer International Publishing},
 7 |   year = {2017},
 8 |   month = {9},
 9 |   day = {4},
10 |   pages = {781--788},
11 |   doi = {10.1007/978-3-319-66182-7_89},
12 | }
13 | 


--------------------------------------------------------------------------------
/src/python/setup.py:
--------------------------------------------------------------------------------
 1 | ## example:
 2 | ## http://stackoverflow.com/questions/16792792/project-organization-with-cython-and-c
 3 | 
 4 | from distutils.core import setup, Extension
 5 | from Cython.Build import cythonize
 6 | import numpy as np
 7 | 
 8 | extensions = [
 9 |     Extension(
10 |         'compresso',
11 |         include_dirs=[np.get_include(), '../c++/'],
12 |         sources=['compresso.pyx'],
13 |         extra_compile_args=['-O4', '-std=c++11', '-C'],
14 |         language='c++'
15 |     )
16 | ]
17 | 
18 | setup(
19 |     ext_modules=cythonize(extensions)
20 | )
21 | 


--------------------------------------------------------------------------------
/experiments/compression/lz78.py:
--------------------------------------------------------------------------------
 1 | class lz78(object):
 2 | 
 3 |     @staticmethod
 4 |     def name():
 5 |         return 'LZ78'
 6 | 
 7 |     @staticmethod
 8 |     def compress(data, *args, **kwargs):
 9 |         '''LZ78 compression
10 |         '''
11 | 
12 |         d, word = {0: ''}, 0
13 |         dyn_d = (
14 |             lambda d, key: d.get(key) or d.__setitem__(key, len(d)) or 0
15 |         )
16 | 
17 |         return [
18 |             token for
19 |             char in
20 |             data for
21 |             token in
22 |             [(word, char)] for
23 |             word in [dyn_d(d, token)] if not word
24 |         ] + [(word, '')]
25 | 
26 |     @staticmethod
27 |     def decompress(data, *args, **kwargs):
28 |         '''LZ78 decompression
29 |         '''
30 | 
31 |         d, j = {0: ''}, ''.join
32 |         dyn_d = (
33 |             lambda d, value: d.__setitem__(len(d), value) or value
34 |         )
35 | 
36 |         return j([dyn_d(d, d[codeword] + char) for (codeword, char) in data])
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Rhoana
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | PAPER/main.pdf
 6 | PAPER/main.synctex.gz
 7 | # C extensions
 8 | *.so
 9 | 
10 | # Distribution / packaging
11 | .Python
12 | env/
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | results/
28 | neuroglancer.cpp
29 | compresso.cpp
30 | 
31 | 
32 | # PyInstaller
33 | #  Usually these files are written by a python script from a template
34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 | 
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 | 
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *,cover
51 | .hypothesis/
52 | 
53 | # Translations
54 | *.mo
55 | *.pot
56 | 
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 | 
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 | 
65 | # Scrapy stuff:
66 | .scrapy
67 | 
68 | # Sphinx documentation
69 | docs/_build/
70 | 
71 | # PyBuilder
72 | target/
73 | 
74 | # IPython Notebook
75 | .ipynb_checkpoints
76 | 
77 | # pyenv
78 | .python-version
79 | 
80 | # celery beat schedule file
81 | celerybeat-schedule
82 | 
83 | # dotenv
84 | .env
85 | 
86 | # virtualenv
87 | venv/
88 | ENV/
89 | 
90 | # Spyder project settings
91 | .spyderproject
92 | 
93 | # Rope project settings
94 | .ropeproject
95 | 
96 | # Texpad Stuff
97 | .texpadtmp
98 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # This file may be used to create an environment using:
 2 | # $ conda create --name <env> --file <this file>
 3 | # platform: linux-64
 4 | backports.lzma=0.0.3=py27_0
 5 | cairo=1.14.8=0
 6 | certifi=2016.2.28=py27_0
 7 | contextlib2=0.5.5=py27_0
 8 | cycler=0.10.0=py27_0
 9 | cython=0.26=py27_0
10 | dbus=1.10.20=0
11 | enum34=1.1.6=py27_0
12 | expat=2.1.0=0
13 | fontconfig=2.12.1=3
14 | freetype=2.5.5=2
15 | funcsigs=1.0.2=py27_0
16 | functools32=3.2.3.2=py27_0
17 | glib=2.50.2=1
18 | glymur=0.8.11=py27_1
19 | gst-plugins-base=1.8.0=0
20 | gstreamer=1.8.0=0
21 | h5py=2.7.0=np113py27_0
22 | hdf5=1.8.17=2
23 | icu=54.1=0
24 | jbig=2.1=0
25 | jpeg=9b=0
26 | libffi=3.2.1=1
27 | libgcc=5.2.0=0
28 | libiconv=1.14=0
29 | libpng=1.6.30=1
30 | libtiff=4.0.6=3
31 | libxcb=1.12=1
32 | libxml2=2.9.4=0
33 | libxslt=1.1.29=0
34 | llvmlite=0.19.0=py27_0
35 | lxml=3.8.0=py27_0
36 | lzo=2.10=0
37 | matplotlib=2.0.2=np113py27_0
38 | mkl=2017.0.3=0
39 | numba=0.34.0=np113py27_0
40 | numpy=1.13.1=py27_0
41 | olefile=0.44=py27_0
42 | openjpeg=2.1.2=3
43 | openssl=1.0.2l=0
44 | pcre=8.39=1
45 | pillow=4.2.1=py27_0
46 | pip=9.0.1=py27_1
47 | pixman=0.34.0=0
48 | pycairo=1.10.0=py27_0
49 | pyparsing=2.2.0=py27_0
50 | pypng=0.0.16=py27_0
51 | pyqt=5.6.0=py27_2
52 | python=2.7.13=0
53 | python-dateutil=2.6.1=py27_0
54 | python-lzf=0.2.1=py27_0
55 | python-lzo=1.11=py27_0
56 | pytz=2017.2=py27_0
57 | qt=5.6.2=5
58 | readline=6.2=2
59 | setuptools=36.4.0=py27_0
60 | singledispatch=3.4.0.3=py27_0
61 | sip=4.18=py27_0
62 | six=1.10.0=py27_0
63 | sqlite=3.13.0=0
64 | subprocess32=3.2.7=py27_0
65 | tk=8.5.18=0
66 | wheel=0.29.0=py27_0
67 | xz=5.2.3=0
68 | zlib=1.2.11=0
69 | zstandard=0.4.0=py27_0
70 | 


--------------------------------------------------------------------------------
/experiments/requirements.txt:
--------------------------------------------------------------------------------
 1 | # This file may be used to create an environment using:
 2 | # $ conda create --name <env> --file <this file>
 3 | # platform: linux-64
 4 | backports.lzma=0.0.3=py27_0
 5 | cairo=1.14.8=0
 6 | certifi=2016.2.28=py27_0
 7 | contextlib2=0.5.5=py27_0
 8 | cycler=0.10.0=py27_0
 9 | cython=0.26=py27_0
10 | dbus=1.10.20=0
11 | enum34=1.1.6=py27_0
12 | expat=2.1.0=0
13 | fontconfig=2.12.1=3
14 | freetype=2.5.5=2
15 | funcsigs=1.0.2=py27_0
16 | functools32=3.2.3.2=py27_0
17 | glib=2.50.2=1
18 | glymur=0.8.11=py27_1
19 | gst-plugins-base=1.8.0=0
20 | gstreamer=1.8.0=0
21 | h5py=2.7.0=np113py27_0
22 | hdf5=1.8.17=2
23 | icu=54.1=0
24 | jbig=2.1=0
25 | jpeg=9b=0
26 | libffi=3.2.1=1
27 | libgcc=5.2.0=0
28 | libiconv=1.14=0
29 | libpng=1.6.30=1
30 | libtiff=4.0.6=3
31 | libxcb=1.12=1
32 | libxml2=2.9.4=0
33 | libxslt=1.1.29=0
34 | llvmlite=0.19.0=py27_0
35 | lxml=3.8.0=py27_0
36 | lzo=2.10=0
37 | matplotlib=2.0.2=np113py27_0
38 | mkl=2017.0.3=0
39 | numba=0.34.0=np113py27_0
40 | numpy=1.13.1=py27_0
41 | olefile=0.44=py27_0
42 | openjpeg=2.1.2=3
43 | openssl=1.0.2l=0
44 | pcre=8.39=1
45 | pillow=4.2.1=py27_0
46 | pip=9.0.1=py27_1
47 | pixman=0.34.0=0
48 | pycairo=1.10.0=py27_0
49 | pyparsing=2.2.0=py27_0
50 | pypng=0.0.16=py27_0
51 | pyqt=5.6.0=py27_2
52 | python=2.7.13=0
53 | python-dateutil=2.6.1=py27_0
54 | python-lzf=0.2.1=py27_0
55 | python-lzo=1.11=py27_0
56 | pytz=2017.2=py27_0
57 | qt=5.6.2=5
58 | readline=6.2=2
59 | setuptools=36.4.0=py27_0
60 | singledispatch=3.4.0.3=py27_0
61 | sip=4.18=py27_0
62 | six=1.10.0=py27_0
63 | sqlite=3.13.0=0
64 | subprocess32=3.2.7=py27_0
65 | tk=8.5.18=0
66 | wheel=0.29.0=py27_0
67 | xz=5.2.3=0
68 | zlib=1.2.11=0
69 | zstandard=0.4.0=py27_0
70 | 


--------------------------------------------------------------------------------
/experiments/plot.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import argparse
 4 | import os
 5 | import sys
 6 | import cPickle as pickle
 7 | 
 8 | sys.path.insert(1, os.path.join(sys.path[0], '..'))
 9 | 
10 | import compression as C
11 | 
12 | 
13 | def render_plots(pickle_file, output):
14 |     with open(pickle_file, 'rb') as f:
15 |         results = pickle.load(f)
16 | 
17 |     C.Util.plot(
18 |         method_labels=results['methods'],
19 |         data_bytes=results['comp_bytes'],
20 |         ratios=results['ratios'],
21 |         com_speed=results['total_comp_speed'],
22 |         com_speed_stderr=results['total_comp_speed_std'],
23 |         dcom_speed=results['total_decomp_speed'],
24 |         dcom_speed_stderr=results['total_decomp_speed_std'],
25 |         save=output,
26 |         dpi=300,
27 |         bw=False
28 |     )
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     parser = argparse.ArgumentParser()
33 | 
34 |     parser.add_argument(
35 |         'results',
36 |         metavar='PATH',
37 |         type=str,
38 |         help='path to pickled results'
39 |     )
40 | 
41 |     parser.add_argument(
42 |         '--output',
43 |         '-o',
44 |         metavar='PATH',
45 |         dest='output',
46 |         action='store',
47 |         type=str,
48 |         default='figures',
49 |         help='output (default: figures/<results>.eps)'
50 |     )
51 | 
52 |     args = parser.parse_args()
53 | 
54 |     if not os.path.isfile(args.results):
55 |         print('Results file not found')
56 |         sys.exit()
57 | 
58 |     output = os.path.basename(args.results)
59 | 
60 |     if args.output:
61 |         output = os.path.join(args.output, output)
62 | 
63 |     render_plots(args.results, output)
64 | 


--------------------------------------------------------------------------------
/src/python/compresso.pyx:
--------------------------------------------------------------------------------
 1 | # cimports
 2 | cimport numpy as np
 3 | cimport cython
 4 | 
 5 | # python imports
 6 | from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t
 7 | import numpy as np
 8 | 
 9 | ctypedef fused Type:
10 |     uint8_t
11 |     uint16_t
12 |     uint32_t
13 |     uint64_t
14 | 
15 | # import c++ functions
16 | cdef extern from "compresso.hxx" namespace "Compresso":
17 |     unsigned char *Compress(...)
18 |     Type *Decompress[Type](...)
19 | 
20 | 
21 | class Compresso(object):
22 |     @staticmethod
23 |     def name():
24 |         return 'Compresso'
25 | 
26 |     @staticmethod
27 |     def compress(Type[:,:,:] data, res, steps):
28 |         # call the c++ compression function
29 |         cdef long *cpp_res = [res[0], res[1], res[2]]
30 |         cdef long *cpp_steps = [steps[0], steps[1], steps[2]]
31 |         cdef long *nentries = [0]
32 |         cdef unsigned char *compressed_data = Compress(&(data[0,0,0]), cpp_res, cpp_steps, nentries)
33 | 
34 |         # convert to numpy array
35 |         cdef unsigned char[:] tmp_compressed_data = <unsigned char[:nentries[0]]> compressed_data
36 | 
37 |         return np.asarray(tmp_compressed_data)
38 | 
39 |     @staticmethod
40 |     def decompress(data):
41 |         # get the number of bytes per uint (1, 2, 4, or 8)
42 |         # the 76 comes from the offset in the header
43 |         BYTE_OFFSET = 76
44 |         nbytes = data[BYTE_OFFSET]
45 | 
46 |         # call the c++ decompression function
47 |         cdef long *res = [0, 0, 0]
48 |         cdef np.ndarray[unsigned char, ndim=1, mode='c'] cpp_data = np.ascontiguousarray(data)
49 | 
50 |         # just call this as unsigned long and convert later    
51 |         # TODO this is a bad hack
52 |         cdef unsigned long *cpp_decompressed_data = Decompress['unsigned long'](&(cpp_data[0]), res)
53 | 
54 |         # convert the c++ pointer to a numpy array
55 |         nentries = res[0] * res[1] * res[2]
56 |         cdef unsigned long[:] tmp_decompressed_data = <unsigned long[:nentries]> cpp_decompressed_data
57 |         decompressed_data = np.asarray(tmp_decompressed_data).reshape((res[0], res[1], res[2]))
58 | 
59 |         # convert to a different data type if needed
60 |         if nbytes == 1: decompressed_data = decompressed_data.astype(np.uint8)
61 |         elif nbytes == 2: decompressed_data = decompressed_data.astype(np.uint16)
62 |         elif nbytes == 4: decompressed_data = decompressed_data.astype(np.uint32)
63 | 
64 |         return np.asarray(decompressed_data)


--------------------------------------------------------------------------------
/experiments/compression/neuroglancer/pyneuroglancer.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | import time
 4 | from numba import jit
 5 | 
 6 | @jit(nopython=True)
 7 | def DecodeValues(block, values, encoded_values, bz, by, bx, nbits):
 8 |     # get the number of values per 8 byte uint64
 9 |     if (nbits > 0):
10 |         values_per_uint64 = 64 / nbits
11 | 
12 |         ie = 0
13 |         for value in encoded_values:
14 |             for i in range(0, values_per_uint64):
15 |                 lower_bits_to_remove = (
16 |                     (values_per_uint64 - i - 1) * nbits
17 |                 )
18 |                 values[ie] = (
19 |                     (value >> lower_bits_to_remove) % 2**nbits
20 |                 )
21 |                 ie += 1
22 | 
23 |     ii = 0
24 |     # get the lookup table
25 |     for iw in range(0, bz):
26 |         for iv in range(0, by):
27 |             for iu in range(0, bx):
28 |                 block[iw, iv, iu] = values[ii]
29 |                 ii += 1
30 | 
31 |     return block, values
32 | 
33 | @jit(nopython=True)
34 | def LookupTable(decompressed_data, lookup_table, block, iz, iy, ix, bz, by, bx):
35 |     # read the lookup label
36 |     for iw in range(0, bz):
37 |         for iv in range(0, by):
38 |             for iu in range(0, bx):
39 |                 decompressed_data[iz * bz + iw, iy * by + iv,ix * bx + iu] = lookup_table[block[iw, iv, iu]]
40 | 
41 | 
42 |     return decompressed_data
43 | 
44 | 
45 | def DecodeNeuroglancer(data, table_offsets, nbits, values_offsets, data_entries, bz, by, bx):
46 |     # get the size of the data
47 |     az, ay, ax = data[1], data[2], data[3]
48 |     gz, gy, gx = (
49 |         int(az / bz),
50 |         int(ay / by),
51 |         int(ax / bx)
52 |     )
53 | 
54 |     decompressed_data = np.zeros((az, ay, ax), dtype=np.uint64)
55 |     
56 |     block_size = bz * by * bx
57 | 
58 |     index = 0
59 |     for iz in range(0, gz):
60 |         for iy in range(0, gy):
61 |             for ix in range(0, gx):
62 |                 # get the total number of bits needed
63 |                 uint64s_needed = (
64 |                     nbits[index] * block_size
65 |                 ) / 64
66 | 
67 |                 uint64s_needed = int(uint64s_needed + 0.5)
68 | 
69 |                 # get the encoded values
70 |                 encoded_values = data[values_offsets[index]:values_offsets[index] + uint64s_needed]
71 | 
72 |                 # reconstruct the block with their ids
73 |                 block = np.zeros((bz, by, bx), dtype=np.uint32)
74 | 
75 |                 # decode the values based on the number of bytes needed
76 |                 values = np.zeros(block_size, dtype=np.uint32)
77 | 
78 |                 block, values = DecodeValues(block, values, encoded_values, bz, by, bx, nbits[index])
79 | 
80 |                 # find the number of unique elements
81 |                 nunique = len(np.unique(block))
82 |                 lookup_table = data[
83 |                     table_offsets[index]:table_offsets[index] + nunique
84 |                 ]
85 |                 decompressed_data = LookupTable(decompressed_data, lookup_table, block, iz, iy, ix, bz, by, bx)
86 |                 
87 |                 index += 1
88 | 
89 |     return decompressed_data


--------------------------------------------------------------------------------
/experiments/compression/x264.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | from PIL import Image
  4 | from subprocess import Popen, PIPE
  5 | import tempfile
  6 | 
  7 | class x264(object):
  8 | 
  9 |     @staticmethod
 10 |     def name():
 11 |         '''X.264 compression
 12 |         '''
 13 | 
 14 |         return 'X.264'
 15 | 
 16 |     @staticmethod
 17 |     def compress(data):
 18 |         '''X.264 compression
 19 |         '''
 20 | 
 21 |         from util import Util
 22 |         outlog = tempfile.mktemp()
 23 |         outvideo = tempfile.mktemp(suffix='.mp4')
 24 | 
 25 |         process_output = open(outlog,'w')
 26 |         p = Popen(['ffmpeg', 
 27 |                    '-f', 'rawvideo',
 28 |                    '-vcodec', 'rawvideo',
 29 |                    '-y', 
 30 |                    '-r', str(data.shape[0]),
 31 |                    '-video_size', str(data.shape[1])+'x'+str(data.shape[2]),
 32 |                    '-pixel_format', 'yuv444p',
 33 |                    '-i', '-',
 34 |                    '-c:v', 'libx264',
 35 |                    '-pix_fmt', 'yuv444p',
 36 |                    '-profile:v', 'high444',
 37 |                    '-crf', '0',
 38 |                    '-preset:v', 'slow',
 39 |                    outvideo], stdin=PIPE, stdout=process_output, stderr=process_output)
 40 |         
 41 |         for z in range(data.shape[0]):
 42 |           Util.convert_to_rgb(data[z]).tofile(p.stdin)
 43 | 
 44 |         process_output.close()
 45 |         p.stdin.close()
 46 |         p.wait()
 47 | 
 48 | 
 49 |         outdata = None
 50 | 
 51 |         with open(outvideo, 'rb') as f:
 52 |             outdata = f.read()
 53 | 
 54 |         # we also need to pass the X,Y,Z dimensions
 55 |         dims = np.zeros((3), dtype=np.uint64)
 56 |         dims[0] = data.shape[0]# Z
 57 |         dims[1] = data.shape[1]# Y
 58 |         dims[2] = data.shape[2]# X
 59 | 
 60 |         return dims.tobytes() + outdata
 61 | 
 62 | 
 63 |     @staticmethod
 64 |     def decompress(data):
 65 |         '''X.264 decompression
 66 |         '''
 67 |         from util import Util
 68 |         errlog = tempfile.mktemp()
 69 |         outvideo = tempfile.mktemp(suffix='.mp4')
 70 | 
 71 |         dims = data[0:3*8] # 3 * 64bit
 72 |         dims = np.fromstring(dims, dtype=np.uint64)
 73 | 
 74 |         videodata = data[3*8:]
 75 | 
 76 |         with open(outvideo, 'wb') as f:
 77 |             f.write(videodata)
 78 | 
 79 |         process_output = open(errlog,'w')
 80 | 
 81 |         p = Popen(['ffmpeg', 
 82 |                    '-i', outvideo, 
 83 |                    '-vcodec', 'rawvideo',        
 84 |                    '-f', 'image2pipe',
 85 |                    '-video_size', str(dims[1])+'x'+str(dims[2]),
 86 |                    '-pix_fmt', 'yuv444p',
 87 |                    '-'
 88 |                   ], stdout=PIPE, stderr=process_output)
 89 | 
 90 |         framesize = dims[1]*dims[2]*3
 91 | 
 92 |         frames = p.stdout.read(int(framesize*dims[0]))
 93 | 
 94 |         output_data = np.fromstring(frames, dtype=np.uint8)
 95 |         output_data_rgb = output_data.reshape((dims[0], dims[1], dims[2], 3))
 96 |         output_data_64 = np.zeros((dims[0], dims[1], dims[2]), dtype=np.uint64)
 97 |         for z in range(output_data_64.shape[0]):
 98 | 
 99 |           slice64 = Util.convert_from_rgb(output_data_rgb[z])
100 | 
101 |           output_data_64[z] = slice64
102 | 
103 |         p.stdout.close()
104 |         p.wait()
105 |         process_output.close()     
106 | 
107 |         return output_data_64
108 | 


--------------------------------------------------------------------------------
/experiments/run.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | import os
  5 | import sys
  6 | import cPickle as pickle
  7 | 
  8 | sys.path.insert(1, os.path.join(sys.path[0], '..'))
  9 | 
 10 | import compression as C
 11 | 
 12 | 
 13 | def run_experiments(
 14 |     enc_name, com_name, dataset, N, data_loc=None, slices=-1, verbose=False
 15 | ):
 16 |     try:
 17 |         enc_alg = getattr(C, enc_name)
 18 | 
 19 |         # This is a stupid test to ensure that the correct encoder has been
 20 |         # found, which is needed since we have uppercase module imports and
 21 |         # lowercase filenames
 22 |         enc_alg.name()
 23 |     except Exception:
 24 |         print 'Encoding scheme not found!'
 25 |         sys.exit()
 26 | 
 27 |     try:
 28 |         com_alg = getattr(C, com_name)
 29 | 
 30 |         # This is a stupid test to ensure that the correct compressor has been
 31 |         # found, which is needed since we have uppercase module imports and
 32 |         # lowercase filenames
 33 |         com_alg.name()
 34 |     except Exception:
 35 |         print 'Encoding scheme not found!'
 36 |         sys.exit()
 37 | 
 38 |     data = C.Util.load_data(dataset, slices, data_loc)
 39 | 
 40 |     results = C.Util.run_experiment(
 41 |         com=com_alg,
 42 |         enc=enc_alg,
 43 |         data=data,
 44 |         N=N,
 45 |         verbose=verbose
 46 |     )
 47 | 
 48 |     filename = '_'.join([enc_name, com_name, dataset, str(N), str(slices)])
 49 |     keepcharacters = ('-', '.', '_')
 50 | 
 51 |     filename = ''.join(
 52 |         [c for c in filename if c.isalnum() or c in keepcharacters]
 53 |     ).rstrip()
 54 | 
 55 |     res = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'results')
 56 | 
 57 |     if not os.path.exists(res):
 58 |         os.makedirs(res)
 59 | 
 60 |     print(results)
 61 | 
 62 |     with open(os.path.join(res, filename), 'w') as f:
 63 |         for result in results:
 64 |             f.write('{}: {}\n'.format(result, results[result]))
 65 | 
 66 | 
 67 | if __name__ == '__main__':
 68 |     parser = argparse.ArgumentParser()
 69 | 
 70 |     parser.add_argument(
 71 |         'encoding',
 72 |         type=str,
 73 |         help='name of encoding scheme'
 74 |     )
 75 | 
 76 |     parser.add_argument(
 77 |         'compression',
 78 |         type=str,
 79 |         help='name of compression scheme'
 80 |     )
 81 | 
 82 |     parser.add_argument(
 83 |         'dataset',
 84 |         type=str,
 85 |         help='name of data set'
 86 |     )
 87 | 
 88 |     parser.add_argument(
 89 |         '--directory',
 90 |         '-d',
 91 |         dest='dir',
 92 |         metavar='PATH',
 93 |         action='store',
 94 |         type=str,
 95 |         default=None,
 96 |         help='path to data directory'
 97 |     )
 98 | 
 99 |     parser.add_argument(
100 |         '--runs',
101 |         '-r',
102 |         dest='runs',
103 |         metavar='NUM',
104 |         action='store',
105 |         type=int,
106 |         default=1,
107 |         help='number of runs (default: 1)'
108 |     )
109 | 
110 |     parser.add_argument(
111 |         '--slices',
112 |         '-s',
113 |         dest='slices',
114 |         metavar='NUM',
115 |         action='store',
116 |         type=int,
117 |         default=-1,
118 |         help='number of slices per dataset (default: -1 (all))'
119 |     )
120 | 
121 |     parser.add_argument(
122 |         '--verbose',
123 |         '-v',
124 |         dest='verbose',
125 |         action='store_true',
126 |         help='print progress (default: False)'
127 |     )
128 | 
129 |     args = parser.parse_args()
130 | 
131 |     run_experiments(
132 |         args.encoding,
133 |         args.compression,
134 |         args.dataset,
135 |         args.runs,
136 |         args.dir,
137 |         args.slices,
138 |         args.verbose
139 |     )
140 | 


--------------------------------------------------------------------------------
/experiments/compression/jpeg.py:
--------------------------------------------------------------------------------
  1 | import glymur
  2 | import os
  3 | import numpy as np
  4 | import tempfile
  5 | 
  6 | 
  7 | class jpeg(object):
  8 | 
  9 |     @staticmethod
 10 |     def name():
 11 |         '''No Encoding
 12 |         '''
 13 | 
 14 |         return 'JPEG2000'
 15 | 
 16 |     @staticmethod
 17 |     def compress(data, *args, **kwargs):
 18 |         '''JPEG2000 compression
 19 |         '''
 20 | 
 21 |         TMPFOLDER = tempfile.mkdtemp()
 22 | 
 23 |         compressed_data = ''
 24 | 
 25 |         sizes = []
 26 | 
 27 |         for iz in range(0, data.shape[0]):
 28 |             img = data[iz, :, :]
 29 | 
 30 |             colorized = np.zeros(
 31 |                 (3, img.shape[0], img.shape[1]), dtype=np.uint16
 32 |             )
 33 | 
 34 |             # for every value split into three 16 bit samples
 35 |             colorized[0, :, :] = img % (2**16)
 36 |             img = img >> 16
 37 |             colorized[1, :, :] = img % (2**16)
 38 |             img = img >> 32
 39 |             colorized[2, :, :] = img % (2**16)
 40 | 
 41 |             #print colorized.shape
 42 | 
 43 |             glymur.Jp2k(TMPFOLDER+'/tmp_' + str(iz) + '.jp2', colorized)
 44 |             #glymur.Jp2k('JPEG_TMP/tmp_' + str(iz) + '.jp2', img.astype(np.uint16))
 45 |             with open(TMPFOLDER+'/tmp_' + str(iz) + '.jp2', 'rb') as fd:
 46 |                 c_data = fd.read()
 47 |                 compressed_data += c_data
 48 |                 sizes.append(len(c_data))
 49 | 
 50 | 
 51 |         frames = np.zeros((len(sizes)), dtype=np.uint64)
 52 | 
 53 |         for i,s in enumerate(sizes):
 54 | 
 55 |             frames[i] = s
 56 | 
 57 |         #
 58 |         #
 59 |         # no of frames
 60 |         output = np.uint64(len(sizes)).tobytes()
 61 | 
 62 |         # frame sizes
 63 |         output += frames.tobytes()
 64 | 
 65 |         output += compressed_data
 66 | 
 67 |         # print sizes
 68 | 
 69 |         return output
 70 | 
 71 |     @staticmethod
 72 |     def decompress(data, *args, **kwargs):
 73 |         '''JPEG2000 decompression
 74 |         '''
 75 | 
 76 |         TMPFOLDER = tempfile.mkdtemp()
 77 | 
 78 |         # grab no of frames
 79 |         no_frames = np.fromstring(data[0:8], dtype=np.uint64)
 80 |         # print no_frames, len(data), data[8:8*no_frames]
 81 |         no_frames = no_frames[0]
 82 | 
 83 |         frame_sizes = data[8:8+int(8*no_frames)]
 84 | 
 85 |         # print no_frames, frame_sizes
 86 | 
 87 |         # grab frame sizes
 88 |         sizes = np.fromstring(frame_sizes, dtype=np.uint64)
 89 | 
 90 |         # store each frame to TMP FOLDER
 91 |         data_start_byte = 8 + 8*no_frames
 92 | 
 93 |         current_byte_pointer = data_start_byte
 94 |         for i in range(sizes.shape[0]):
 95 | 
 96 |             # print 'writing',i,current_byte_pointer,current_byte_pointer+sizes[i]
 97 | 
 98 |             current_bytes = data[int(current_byte_pointer):int(current_byte_pointer+sizes[i])]
 99 |             with open(TMPFOLDER+'/tmp_'+str(i)+'.jp2', 'wb') as f:
100 |                 f.write(current_bytes)
101 | 
102 |             
103 | 
104 |             current_byte_pointer = current_byte_pointer+sizes[i]
105 | 
106 | 
107 |         nfiles = len(os.listdir(TMPFOLDER))
108 |         for ie, filename in enumerate(os.listdir(TMPFOLDER)):
109 |             input_filename = TMPFOLDER + '/' + filename
110 |             colorized = glymur.Jp2k(input_filename)
111 | 
112 |             index = int(filename.split('_')[1].split('.')[0])
113 | 
114 |             if (ie == 0):
115 |                 decompressed_data = np.zeros(
116 |                     (nfiles, colorized.shape[1], colorized.shape[2]),
117 |                     dtype=np.uint64
118 |                 )
119 | 
120 |             decompressed_data[index, :, :] = (
121 |                 colorized[0, :, :] +
122 |                 colorized[1, :, :] * (2 ** 16) +
123 |                 colorized[2, :, :] * (2 ** 16)
124 |             )
125 | 
126 | 
127 |         return decompressed_data
128 | 


--------------------------------------------------------------------------------
/experiments/compression/_png.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import numpy as np
  3 | import os
  4 | import png
  5 | import tempfile
  6 | 
  7 | 
  8 | class _png(object):
  9 | 
 10 |     @staticmethod
 11 |     def name():
 12 |         '''No Encoding
 13 |         '''
 14 | 
 15 |         return 'PNG'
 16 | 
 17 |     @staticmethod
 18 |     def compress(data, *args, **kwargs):
 19 |         '''PNG compression
 20 |         '''
 21 |         TMPFOLDER = tempfile.mkdtemp()
 22 | 
 23 |         compressed_data = ''
 24 | 
 25 |         sizes = []
 26 | 
 27 | 
 28 |         for iz in range(0, data.shape[0]):
 29 |             img = data[iz, :, :]
 30 | 
 31 |             colorized = np.zeros(
 32 |                 (3, img.shape[0], img.shape[1]), dtype=np.uint16
 33 |             )
 34 | 
 35 |             # for every value split into three 16 bit samples
 36 |             colorized[0, :, :] = img % (2**16)
 37 |             img = img >> 16
 38 |             colorized[1, :, :] = img % (2**16)
 39 |             img = img >> 16
 40 |             colorized[2, :, :] = img % (2**16)
 41 | 
 42 |             colorized = colorized.swapaxes(0, 1).swapaxes(1, 2)
 43 | 
 44 |             row_count, column_count, plane_count = colorized.shape
 45 | 
 46 |             pngfile = open(TMPFOLDER+'/tmp_' + str(iz) + '.png', 'wb')
 47 |             pngWriter = png.Writer(
 48 |                 column_count,
 49 |                 row_count,
 50 |                 greyscale=False,
 51 |                 alpha=False,
 52 |                 bitdepth=16
 53 |             )
 54 |             pngWriter.write(
 55 |                 pngfile,
 56 |                 np.reshape(colorized, (-1, column_count * plane_count))
 57 |             )
 58 |             pngfile.close()
 59 | 
 60 |             with open(TMPFOLDER+'/tmp_' + str(iz) + '.png', 'rb') as fd:
 61 |                 c_data = fd.read()
 62 |                 compressed_data += c_data
 63 |                 sizes.append(len(c_data))
 64 | 
 65 | 
 66 |         frames = np.zeros((len(sizes)), dtype=np.uint64)
 67 | 
 68 |         for i,s in enumerate(sizes):
 69 | 
 70 |             frames[i] = s
 71 | 
 72 |         #
 73 |         #
 74 |         # no of frames
 75 |         output = np.uint64(len(sizes)).tobytes()
 76 | 
 77 |         # frame sizes
 78 |         output += frames.tobytes()
 79 | 
 80 |         output += compressed_data
 81 | 
 82 |         # print sizes
 83 | 
 84 |         return output
 85 | 
 86 |     @staticmethod
 87 |     def decompress(data, *args, **kwargs):
 88 |         '''PNG decompression
 89 |         '''
 90 | 
 91 | 
 92 |         TMPFOLDER = tempfile.mkdtemp()
 93 | 
 94 |         # grab no of frames
 95 |         no_frames = np.fromstring(data[0:8], dtype=np.uint64)
 96 |         # print no_frames, len(data), data[8:8*no_frames]
 97 |         no_frames = no_frames[0]
 98 | 
 99 |         frame_sizes = data[8:8+int(8*no_frames)]
100 | 
101 |         # print no_frames, frame_sizes
102 | 
103 |         # grab frame sizes
104 |         sizes = np.fromstring(frame_sizes, dtype=np.uint64)
105 | 
106 |         # store each frame to TMP FOLDER
107 |         data_start_byte = 8 + 8*no_frames
108 | 
109 |         current_byte_pointer = data_start_byte
110 |         for i in range(sizes.shape[0]):
111 | 
112 |             # print 'writing',i,current_byte_pointer,current_byte_pointer+sizes[i]
113 | 
114 |             current_bytes = data[int(current_byte_pointer):int(current_byte_pointer+sizes[i])]
115 |             with open(TMPFOLDER+'/tmp_'+str(i)+'.jp2', 'wb') as f:
116 |                 f.write(current_bytes)
117 | 
118 |             
119 | 
120 |             current_byte_pointer = current_byte_pointer+sizes[i]
121 | 
122 | 
123 | 
124 |         nfiles = len(os.listdir(TMPFOLDER))
125 |         for ie, filename in enumerate(os.listdir(TMPFOLDER)):
126 |             input_filename = TMPFOLDER + '/' + filename
127 | 
128 |             index = int(filename.split('_')[1].split('.')[0])
129 | 
130 |             pngReader = png.Reader(filename=input_filename)
131 |             row_count, column_count, png_data, meta = pngReader.asDirect()
132 |             plane_count = meta['planes']
133 | 
134 |             # make sure rgb files
135 |             assert plane_count == 3
136 | 
137 |             img = np.vstack(itertools.imap(np.uint16, png_data))
138 |             colorized = np.reshape(img, (row_count, column_count, plane_count))
139 | 
140 |             colorized = colorized.swapaxes(1, 2).swapaxes(0, 1)
141 | 
142 |             if (ie == 0):
143 |                 decompressed_data = np.zeros(
144 |                     (nfiles, colorized.shape[1], colorized.shape[2]),
145 |                     dtype=np.uint64
146 |                 )
147 | 
148 |             decompressed_data[index, :, :] = (
149 |                 colorized[0, :, :] +
150 |                 colorized[1, :, :] * (2 ** 16) +
151 |                 colorized[2, :, :] * (2 ** 16)
152 |             )
153 | 
154 | 
155 |         return decompressed_data
156 | 


--------------------------------------------------------------------------------
/experiments/compression/compresso/compresso.pyx:
--------------------------------------------------------------------------------
  1 | cimport cython
  2 | cimport numpy as np
  3 | import numpy as np
  4 | import ctypes
  5 | from math import ceil
  6 | 
  7 | cdef extern from "cpp-compresso.h" namespace "compresso":
  8 |     unsigned long *Compress(unsigned long *data, int zres, int yres, int xres, int zstep, int ystep, int xstep)
  9 |     unsigned long *Decompress(unsigned long *compressed_data)
 10 | 
 11 | 
 12 | #######################################
 13 | ### ENCODE/DECODE CLASS DEFINITIONS ###
 14 | #######################################
 15 | 
 16 | class compresso(object):
 17 | 
 18 |     @staticmethod
 19 |     def name():
 20 |         return 'Compresso'
 21 | 
 22 | 
 23 |     @staticmethod
 24 |     def compress(data):
 25 |         '''Boundary Encoding compression
 26 |         '''
 27 |         # reshape the data into one dimension
 28 |         zres, yres, xres = data.shape
 29 |         (zstep, ystep, xstep) = (1, 8, 8)
 30 |         header_size = 9
 31 | 
 32 |         nzblocks = int(ceil(float(zres) / zstep))
 33 |         nyblocks = int(ceil(float(yres) / ystep))
 34 |         nxblocks = int(ceil(float(xres) / xstep))
 35 |         nblocks = nzblocks * nyblocks * nxblocks
 36 | 
 37 |         # call the Cython function
 38 |         cdef np.ndarray[unsigned long, ndim=3, mode='c'] cpp_data
 39 |         cpp_data = np.ascontiguousarray(data, dtype=ctypes.c_uint64)
 40 |         cdef unsigned long *cpp_compressed_data = Compress(&(cpp_data[0,0,0]), zres, yres, xres, zstep, ystep, xstep)
 41 |         length = header_size + cpp_compressed_data[3] + cpp_compressed_data[4] + cpp_compressed_data[5] + nblocks
 42 |         cdef unsigned long[:] tmp_compressed_data = <unsigned long[:length]> cpp_compressed_data
 43 |         compressed_data = np.asarray(tmp_compressed_data)
 44 | 
 45 |         # compress all the zeros in the window values
 46 | 
 47 |         nblocks = int(ceil(float(zres) / zstep)) * int(ceil(float(yres) / ystep)) * int(ceil(float(xres) / xstep))
 48 |         
 49 |         intro_data = compressed_data[:-nblocks]
 50 |         block_data = compressed_data[-nblocks:]
 51 |         
 52 |         if (np.max(block_data) < 2**32):
 53 |             block_data = block_data.astype(np.uint32)
 54 | 
 55 |         condensed_blocks = list()
 56 |         inzero = False
 57 |         prev_zero = 0
 58 |         for ie, block in enumerate(block_data):
 59 |             if block == 0:
 60 |                 # start counting zeros
 61 |                 if not inzero:
 62 |                     inzero = True
 63 |                     prev_zero = ie
 64 |             else:
 65 |                 if inzero:
 66 |                     # add information for the previous zero segment
 67 |                     condensed_blocks.append((ie - prev_zero) * 2 + 1)
 68 |                     inzero = False
 69 |                 condensed_blocks.append(block * 2)
 70 | 
 71 |         condensed_blocks = np.array(condensed_blocks).astype(np.uint32)
 72 | 
 73 |         return intro_data.tobytes() + condensed_blocks.tobytes()
 74 | 
 75 | 
 76 |     @staticmethod
 77 |     def decompress(data):
 78 |         '''Boundary Decoding decompression
 79 |         '''
 80 | 
 81 |         # read the first nine bytes corresponding to the header
 82 |         header = np.fromstring(data[0:72], dtype=np.uint64)
 83 | 
 84 |         zres = header[0]
 85 |         yres = header[1]
 86 |         xres = header[2]
 87 |         ids_size = int(header[3])
 88 |         values_size = int(header[4])
 89 |         locations_size = int(header[5])
 90 |         zstep = header[6]
 91 |         ystep = header[7]
 92 |         xstep = header[8]
 93 | 
 94 |         # get the intro data
 95 |         intro_size = 9 + ids_size + values_size + locations_size
 96 |         intro_data = np.fromstring(data[0:intro_size*8], dtype=np.uint64)
 97 | 
 98 |         # get the compressed blocks
 99 |         nblocks = int(ceil(float(zres) / zstep)) * int(ceil(float(yres) / ystep)) * int(ceil(float(xres) / xstep))
100 |         compressed_blocks = np.fromstring(data[intro_size*8:], dtype=np.uint32)
101 |         block_data = np.zeros(nblocks, dtype=np.uint64)
102 | 
103 |         index = 0
104 |         for block in compressed_blocks:
105 |             # greater values correspond to zero blocks
106 |             if block % 2:
107 |                 nzeros = (block  - 1) / 2
108 |                 block_data[index:index+nzeros] = 0
109 |                 index += nzeros
110 |             else:
111 |                 block_data[index] = block / 2
112 |                 index += 1
113 | 
114 |         data = np.concatenate((intro_data, block_data))
115 | 
116 |         cdef np.ndarray[unsigned long, ndim=1, mode='c'] cpp_data
117 |         cpp_data = np.ascontiguousarray(data, dtype=ctypes.c_uint64)
118 |         n = zres * yres * xres
119 | 
120 |         cdef unsigned long[:] cpp_decompressed_data = <unsigned long[:n]> Decompress(&(cpp_data[0]))
121 |         decompressed_data = np.reshape(np.asarray(cpp_decompressed_data), (zres, yres, xres))
122 | 
123 |         return decompressed_data


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Compresso: Efficient Compression of Segmentation Data For Connectomics
  2 | 
  3 | [![Paper](https://img.shields.io/badge/paper-accepted-red.svg?colorB=f52ef0)](https://vcg.seas.harvard.edu/publications/compresso-efficient-compression-of-segmentation-data-for-connectomics)
  4 | [![MICCAI](https://img.shields.io/badge/presentation-MICCAI%202017-red.svg?colorB=135f89)](http://www.miccai2017.org/schedule)
  5 | [![doi](https://img.shields.io/badge/used%20by-rhoana-red.svg?colorB=2bf55b)](http://www.rhoana.org)
  6 | 
  7 | ![Segmentations](/banner.png?raw=true)
  8 | 
  9 | > Recent advances in segmentation methods for connectomics and biomedical imaging produce very large datasets with labels that assign object classes to image pixels. The resulting label volumes are bigger than the raw image data and need compression for efficient storage and transfer. General-purpose compression methods are less effective because the label data consists of large low-frequency regions with structured boundaries unlike natural image data. We present Compresso, a new compression scheme for label data that outperforms existing approaches by using a sliding window to exploit redundancy across border regions in 2D and 3D. We compare our method to existing compression schemes and provide a detailed evaluation on eleven biomedical and image segmentation datasets. Our method provides a factor of 600-2200x compression for label volumes, with running times suitable for practice.
 10 | 
 11 | **Paper**: Matejek _et al._, "Compresso: Efficient Compression of Segmentation Data For Connectomics", Proceedings of the International Conference on Medical Image Computing and Computer-Assisted Intervention (MICCAI), 2017, 10-14. \[[CITE](https://scholar.google.com/scholar?q=Compresso%3A+Efficient+Compression+of+Segmentation+Data+For+Connectomics) | [PDF](https://vcg.seas.harvard.edu/publications/compresso-efficient-compression-of-segmentation-data-for-connectomics/paper)\]
 12 | 
 13 | ## Requirements
 14 | 
 15 | - Python 2.7
 16 | - conda
 17 | 
 18 | ## Pip Installation
 19 | 
 20 | Thanks to Will Silversmith, you can now install compresso with pip!
 21 | 
 22 | ```
 23 | pip install compresso
 24 | ```
 25 |     
 26 | ## Setup
 27 | 
 28 | ```bash
 29 | git clone https://github.com/vcg/compresso && cd compresso
 30 | conda create -n compresso_env --file requirements.txt -c chen -c sunpy -c conda-forge -c auto -c indygreg
 31 | source activate compresso_env
 32 | # for Compresso scheme as presented in MICCAI
 33 | cd experiments/compression/compresso; python setup.py build_ext --inplace
 34 | # to run the neuroglancer compression scheme
 35 | cd ../neuroglancer; python setup.py build_ext --inplace
 36 | # for Compresso v2 that is under development
 37 | cd ../../../src/python; python setup.py build_ext --inplace
 38 | ```
 39 | 
 40 | ## Compress Segmentation Stacks
 41 | 
 42 | There are two versions of Compresso in this repository. Under the src folder there is an updated c++ and python version that extends on the Compresso scheme presented in MICCAI. This algorithm, among other things, implements bit-packing to further improve compression results.
 43 | 
 44 | The compression scheme in `experiments/compression/compresso` follows the MICCAI paper exactly. 
 45 | 
 46 | ## Compress Your Segmentation Stack
 47 | 
 48 | In order to test Compresso on your own data simply use:
 49 | 
 50 | ```
 51 | import compression as C
 52 | # With LZMA
 53 | C.LZMA.compress(C.COMPRESSO.compress(<NUMPY-3D-ARRAY>))
 54 | ```
 55 | 
 56 | ## Experiments
 57 | 
 58 | ```
 59 | # the dataset must be in hdf5 format.
 60 | experiments/run.py COMPRESSO LZMA ac3 -r 1 -s 1 -d '/<PATH>/<TO>/<DATA>'
 61 | ```
 62 | 
 63 | Usage:
 64 | 
 65 | ```
 66 | usage: run.py [-h] [--directory PATH] [--runs NUM] [--slices NUM]
 67 |               [--verbose]
 68 |               encoding compression dataset
 69 | 
 70 | positional arguments:
 71 |   encoding              name of encoding scheme
 72 |   compression           name of compression scheme
 73 |   dataset               name of data set
 74 | 
 75 | optional arguments:
 76 |   -h, --help            show this help message and exit
 77 |   --directory PATH, -d PATH
 78 |                         path to data directory
 79 |   --runs NUM, -r NUM    number of runs (default: 1)
 80 |   --slices NUM, -s NUM  number of slices per dataset (default: -1 (all))
 81 |   --verbose, -v         print progress (default: False) 
 82 | ```
 83 | 
 84 | 
 85 | Make sure the data sets are located in `~/compresso/data/` or specify the location. The data from the paper can be found here:
 86 | 
 87 | - AC3: <http://www.openconnectomeproject.org/kasthuri11> _(Kasthuri et al. Saturated reconstruction of a volume of neocortex. Cell 2015.)_
 88 | - CREMI: <http://www.cremi.org>
 89 | - CYL: <http://www.openconnectomeproject.org/kasthuri11> _(Kasthuri et al. Saturated reconstruction of a volume of neocortex. Cell 2015.)_
 90 | - SPL Brain Atlas: <http://www.spl.harvard.edu/publications/item/view/2037> _(Halle M., Talos I-F., Jakab M., Makris N., Meier D., Wald L., Fischl B., Kikinis R. Multi-modality MRI-based Atlas of the Brain. SPL 2017 Jan)_
 91 | - SPL Knee Atlas: <http://www.spl.harvard.edu/publications/item/view/2037> _(Richolt J.A., Jakab M., Kikinis R. SPL Knee Atlas. SPL 2015 Sep)_
 92 | - SPL Abdominal Atlas: <http://www.spl.harvard.edu/publications/item/view/1918> _(Talos I-F., Jakab M., Kikinis R. SPL Abdominal Atlas. SPL 2015 Sep)_
 93 | - BSD500: <https://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/resources.html> _(Contour Detection and Hierarchical Image Segmentation P. Arbelaez, M. Maire, C. Fowlkes and J. Malik. IEEE TPAMI, Vol. 33, No. 5, pp. 898-916, May 2011.)_
 94 | - VOC2012: <http://host.robots.ox.ac.uk/pascal/VOC/voc2012/> _(Everingham, M. and Van~Gool, L. and Williams, C. K. I. and Winn, J. and Zisserman, A., The PASCAL Visual Object Classes Challenge 2012 (VOC2012) Results)_
 95 | 
 96 | ### Results From the Paper
 97 | 
 98 | **Compression Performance**
 99 | 
100 | ![Compression Performance of Connectomics Datasets](/experiments/figures/compression-performance.png?raw=true)
101 | 
102 | Compression ratios of general-purpose compression methods combined with Compresso and Neuroglancer. Compresso paired with LZMA yields the best compression ratios for all connectomics datasets (left) and in average (four out of five) for the others (right).
103 | 


--------------------------------------------------------------------------------
/experiments/compression/neuroglancer/neuroglancer.pyx:
--------------------------------------------------------------------------------
  1 | cimport cython
  2 | cimport numpy as np
  3 | import numpy as np
  4 | import ctypes
  5 | import math
  6 | import sys
  7 |         
  8 | (bz, by, bx) = (8, 8, 8)
  9 | (chunkz, chunky, chunkx) = (64, 64, 64)
 10 | 
 11 | cdef extern from 'cpp-neuroglancer.h' namespace 'neuroglancer':
 12 |     unsigned long *Compress(unsigned long *data, int zres, int yres, int xres, int bz, int by, int bx, int origz, int origy, int origx)
 13 |     unsigned long *Decompress(unsigned long *compressed_data, int bz, int by, int bx)
 14 | 
 15 | from pyneuroglancer import DecodeNeuroglancer
 16 | 
 17 | # use cython for decompress (slower)
 18 | cython_decompress = True
 19 | 
 20 | 
 21 | ########################
 22 | ### DECODE FUNCTIONS ###
 23 | ########################
 24 | 
 25 | class neuroglancer(object):
 26 | 
 27 |     @staticmethod
 28 |     def name():
 29 |         return 'Neuroglancer'
 30 | 
 31 |     @staticmethod
 32 |     def compress(data, *args, **kwargs):
 33 |         '''Neuroglancer compression
 34 |         '''
 35 |         origz, origy, origx = data.shape
 36 | 
 37 |         # determine the number of chunks of data
 38 |         zres, yres, xres = data.shape
 39 | 
 40 |         nzchunks, nychunks, nxchunks = (int(math.ceil(float(zres) / chunkz) + 0.5), int(math.ceil(float(yres) / chunky) + 0.5), int(math.ceil(float(xres) / chunkx) + 0.5))
 41 |         
 42 |         compressed_data = np.zeros(3, dtype=np.uint64)
 43 |         compressed_data[0] = zres
 44 |         compressed_data[1] = yres
 45 |         compressed_data[2] = xres
 46 | 
 47 |         cdef np.ndarray[unsigned long, ndim=3, mode='c'] cpp_data
 48 |         cdef unsigned long *cpp_compressed_data
 49 |         cdef unsigned long[:] tmp_compressed_data
 50 | 
 51 |         # compress every chunk
 52 |         for iz in range(0, nzchunks):
 53 |             for iy in range(0, nychunks):
 54 |                 for ix in range(0, nxchunks):
 55 |                     chunk = data[
 56 |                         iz * chunkz:(iz + 1) * chunkz, 
 57 |                         iy * chunky:(iy + 1) * chunky,
 58 |                         ix * chunkx:(ix + 1) * chunkx
 59 |                     ]
 60 | 
 61 |                     # create header variables
 62 |                     zres, yres, xres = chunk.shape
 63 |                     origz, origy, origx = zres, yres, xres
 64 | 
 65 |                     if zres % bz: zpad = (bz - zres % bz)
 66 |                     else: zpad = 0
 67 |                     if yres % by: ypad = (by - yres % by)
 68 |                     else: ypad = 0
 69 |                     if xres % bx: xpad = (bx - xres % bx)
 70 |                     else: xpad = 0
 71 | 
 72 |                     zres += zpad
 73 |                     yres += ypad
 74 |                     xres += xpad
 75 | 
 76 |                     padded_data = np.pad(chunk, ((0, zpad), (0, ypad), (0, xpad)), 'reflect').astype(np.uint64)
 77 |                                 
 78 |                     cpp_data = np.ascontiguousarray(padded_data, dtype=ctypes.c_uint64)
 79 |                     cpp_compressed_data = Compress(&(cpp_data[0,0,0]), zres, yres, xres, bz, by, bx, origz, origy, origx)
 80 |                     length = cpp_compressed_data[0]
 81 |                     tmp_compressed_data = <unsigned long[:length]> cpp_compressed_data
 82 |                     compressed_data = np.concatenate((compressed_data, np.asarray(tmp_compressed_data)))
 83 | 
 84 |         return compressed_data
 85 | 
 86 |     @staticmethod
 87 |     def decompress(data, *args, **kwargs):
 88 |         '''Neuroglancer decompression
 89 |         '''
 90 |         # get the uncompressed data size
 91 |         zres = data[0]
 92 |         yres = data[1]
 93 |         xres = data[2]
 94 |         data = data[3:]
 95 | 
 96 |         nzchunks, nychunks, nxchunks = (int(math.ceil(float(zres) / chunkz) + 0.5), int(math.ceil(float(yres) / chunky) + 0.5), int(math.ceil(float(xres) / chunkx) + 0.5))
 97 | 
 98 |         # create an empty decompressed array
 99 |         decompressed_data = np.zeros((zres, yres, xres), dtype=np.uint64)
100 | 
101 |         cdef np.ndarray[unsigned long, ndim=1, mode='c'] cpp_data
102 |         cdef unsigned long[:] cpp_decompressed_chunk
103 |         
104 |         if not cython_decompress:
105 |             # go through every chunk
106 |             for iz in range(0, nzchunks):
107 |                 for iy in range(0, nychunks):
108 |                     for ix in range(0, nxchunks):
109 |                         # get the size of the data
110 |                         length = int(data[0])
111 | 
112 |                         az, ay, ax = data[1], data[2], data[3]
113 | 
114 |                         gz, gy, gx = (
115 |                             int(math.ceil(float(az) / bz)),
116 |                             int(math.ceil(float(ay) / by)),
117 |                             int(math.ceil(float(ax) / bx))
118 |                         )
119 | 
120 |                         # get the total size of the header
121 |                         nelements = gz * gy * gx
122 | 
123 |                         # later will become 3 bytes from 4
124 |                         table_offsets = np.zeros(nelements, dtype=np.uint32)
125 |                         nbits = np.zeros(nelements, dtype=np.uint8)
126 |                         values_offsets = np.zeros(nelements, dtype=np.uint32)
127 | 
128 |                         ################################
129 |                         # DECOMPRESS HEADER VALUES
130 |                         ################################
131 | 
132 |                         # get the original data size
133 |                         origz, origy, origx = data[4], data[5], data[6]
134 | 
135 |                         data_entries = 7
136 |                         for ie in range(0, nelements):
137 |                             header = long(data[data_entries])
138 |                             table_offsets[ie] = header >> 40
139 |                             nbits[ie] = (header << 24) >> 56
140 |                             values_offsets[ie] = (header << 32) >> 32
141 | 
142 |                             data_entries += 1
143 | 
144 |                         ###############################
145 |                         # DECOMPRESS ENTIRE IMAGE
146 |                         ###############################
147 | 
148 |                         # remove the first element (length not needed)
149 |                         decompressed_chunk = DecodeNeuroglancer(data[:length], table_offsets, nbits, values_offsets, data_entries, bz, by, bx)
150 |                         decompressed_chunk = np.reshape(decompressed_chunk, (az, ay, ax))
151 |  
152 |                         decompressed_data[
153 |                             iz * chunkz:(iz + 1) * chunkz,
154 |                             iy * chunky:(iy + 1) * chunky,
155 |                             ix * chunkx:(ix + 1) * chunkx
156 |                         ] = decompressed_chunk[0:origz,0:origy,0:origx]
157 | 
158 |                         data = data[length:]
159 | 
160 |             return decompressed_data
161 |         else:
162 |             # go through every chunk
163 |             for iz in range(0, nzchunks):
164 |                 for iy in range(0, nychunks):
165 |                     for ix in range(0, nxchunks):
166 |                         # get the size of the data
167 |                         length = int(data[0])
168 |                         az, ay, ax = data[1], data[2], data[3]
169 |                         origz, origy, origx = data[4], data[5], data[6]
170 | 
171 |                         ###############################
172 |                         # DECOMPRESS ENTIRE IMAGE
173 |                         ###############################
174 |                         cpp_data = np.ascontiguousarray(data[:length], dtype=ctypes.c_uint64)
175 |                         n = az * ay * ax
176 | 
177 |                         cpp_decompressed_chunk = <unsigned long[:n]> Decompress(&(cpp_data[0]), bz, by, bx)
178 |                         decompressed_chunk = np.reshape(np.asarray(cpp_decompressed_chunk), (az, ay, ax))
179 |                         
180 |                         decompressed_data[
181 |                             iz * chunkz:(iz + 1) * chunkz,
182 |                             iy * chunky:(iy + 1) * chunky,
183 |                             ix * chunkx:(ix + 1) * chunkx
184 |                         ] = decompressed_chunk[0:origz,0:origy,0:origx]
185 | 
186 |                         data = data[length:]
187 |             
188 |             return decompressed_data
189 | 


--------------------------------------------------------------------------------
/experiments/compression/methods.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | # general-purpose compression imports
  4 | import bz2
  5 | import lzf
  6 | import backports.lzma as lzma
  7 | import lzo
  8 | import zlib
  9 | import zstd
 10 | import lz78
 11 | 
 12 | # image compression imports
 13 | from jpeg import jpeg
 14 | from _png import _png
 15 | 
 16 | # video compression imports
 17 | from x264 import x264
 18 | from compresso import compresso
 19 | from neuroglancer import neuroglancer
 20 | 
 21 | 
 22 | class NONE(object):
 23 |     @staticmethod
 24 |     def name():
 25 |         return 'None'
 26 | 
 27 |     @staticmethod
 28 |     def compress(data):
 29 |         return data
 30 | 
 31 |     @staticmethod
 32 |     def decompress(data):
 33 |         return data
 34 | 
 35 |     
 36 | #############################
 37 | ### SEGMENTATION SPECIFIC ###
 38 | #############################
 39 | 
 40 | class COMPRESSO(object):
 41 |     @staticmethod
 42 |     def name():
 43 |         return compresso.compresso.name()
 44 | 
 45 |     @staticmethod
 46 |     def decompress(data, *args, **kwargs):
 47 |         return compresso.compresso.decompress(data, *args, **kwargs)
 48 | 
 49 |     @staticmethod
 50 |     def compress(data, *args, **kwargs):
 51 |         return compresso.compresso.compress(data, *args, **kwargs)
 52 | 
 53 | 
 54 | class NEUROGLANCER(object):
 55 |     @staticmethod
 56 |     def name():
 57 |         return 'Neuroglancer'
 58 | 
 59 |     @staticmethod
 60 |     def compress(data, *args, **kwargs):
 61 |         return neuroglancer.neuroglancer.compress(data, *args, **kwargs)
 62 | 
 63 |     @staticmethod
 64 |     def decompress(data, *args, **kwargs):
 65 |         return neuroglancer.neuroglancer.decompress(data, *args, **kwargs)
 66 | 
 67 | 
 68 | #######################
 69 | ### GENERAL PURPOSE ###
 70 | #######################
 71 | 
 72 | 
 73 | class BZ2(object):
 74 | 
 75 |     @staticmethod
 76 |     def name():
 77 |         return 'BZip2'
 78 | 
 79 |     @staticmethod
 80 |     def compress(data, *args, **kwargs):
 81 |         return bz2.compress(data, *args, **kwargs)
 82 | 
 83 |     @staticmethod
 84 |     def decompress(data, *args, **kwargs):
 85 |         return bz2.decompress(data, *args, **kwargs)
 86 | 
 87 | 
 88 | class LZ78(object):
 89 | 
 90 |     @staticmethod
 91 |     def name():
 92 |         return 'LZ78'
 93 | 
 94 |     @staticmethod
 95 |     def compress(data, *args, **kwargs):
 96 |         if type(data) is np.ndarray:
 97 |             str_data = data.tobytes()
 98 |         elif type(data) is str:
 99 |             str_data = data
100 |         else:
101 |             raise ValueError('Data type not supported')
102 | 
103 |         dictionary = lz78.lz78.compress(str_data, *args, **kwargs)
104 | 
105 |         array = np.zeros(len(dictionary), dtype=np.uint32)
106 |         retry = False
107 |         for ie, entry in enumerate(dictionary):
108 |             if entry[1] == '':
109 |                 if (entry[0] >= 2**24):
110 |                     retry = True
111 |                     break
112 |                 array[ie] = (entry[0] << 8)
113 |             else:
114 |                 if (entry[0] >= 2**24):
115 |                     retry =  True
116 |                     break
117 |                 array[ie] = (entry[0] << 8) + ord(entry[1])
118 |                 
119 |         if not retry: return array
120 |         else:
121 |             array = np.zeros(len(dictionary), dtype=np.uint64)
122 |             for ie, entry in enumerate(dictionary):
123 |                 if entry[1] == '':
124 |                     array[ie] = (entry[0] << 8)
125 |                 else:
126 |                     array[ie] = (entry[0] << 8) + ord(entry[1])
127 |             return array
128 | 
129 |     @staticmethod
130 |     def decompress(data, *args, **kwargs):
131 |         dictionary = list()
132 | 
133 |         for ie, entry in enumerate(data):
134 |             int_value = long(entry) / (2**8)
135 |             if ie == data.size - 1:
136 |                 char_value = ''
137 |             else:
138 |                 char_value = chr(long(entry) % (2**8))
139 | 
140 |             dictionary.append((int_value, char_value))
141 | 
142 |         return lz78.lz78.decompress(dictionary, *args, **kwargs)
143 | 
144 | 
145 | class LZF(object):
146 | 
147 |     @staticmethod
148 |     def name():
149 |         return 'LZF'
150 | 
151 |     @staticmethod
152 |     def compress(data, *args, **kwargs):
153 |         return lzf.compress(data, *args, **kwargs)
154 | 
155 |     @staticmethod
156 |     def decompress(data, *args, **kwargs):
157 |         return lzf.decompress(data, args[0])
158 | 
159 | 
160 | class LZMA(object):
161 | 
162 |     @staticmethod
163 |     def name():
164 |         return 'LZMA'
165 | 
166 |     @staticmethod
167 |     def compress(data, *args, **kwargs):
168 |         return lzma.compress(data, *args, **kwargs)
169 | 
170 |     @staticmethod
171 |     def decompress(data, *args, **kwargs):
172 |         return lzma.decompress(data, *args, **kwargs)
173 | 
174 | 
175 | class LZO(object):
176 | 
177 |     @staticmethod
178 |     def name():
179 |         return 'LZO'
180 | 
181 |     @staticmethod
182 |     def compress(data, *args, **kwargs):
183 |         return lzo.compress(data, *args, **kwargs)
184 | 
185 |     @staticmethod
186 |     def decompress(data, *args, **kwargs):
187 |         return lzo.decompress(data, *args, **kwargs)
188 | 
189 | 
190 | class LZW(object):
191 | 
192 |     @staticmethod
193 |     def name():
194 |         return 'LZW'
195 | 
196 |     @staticmethod
197 |     def compress(data, *args, **kwargs):        
198 |         if type(data) is np.ndarray:
199 |             str_data = data.tobytes()
200 |         elif type(data) is str:
201 |             str_data = data
202 |         else:
203 |             raise ValueError('Data type not supported')
204 | 
205 |         # create an empty dictionary
206 |         dict_size = 2**8
207 |         dictionary = dict((chr(i), i) for i in xrange(dict_size))
208 |         
209 |         w = ''
210 |         result = []
211 |         for c in str_data:
212 |             wc = w + c
213 |             if wc in dictionary:
214 |                 w = wc
215 |             else:
216 |                 result.append(dictionary[w])
217 |                 dictionary[wc] = dict_size
218 |                 dict_size += 1
219 |                 w = c
220 | 
221 |         if w:
222 |             result.append(dictionary[w])
223 | 
224 |         return np.array(result, dtype=np.uint32)
225 | 
226 |     @staticmethod
227 |     def decompress(data, *args, **kwargs):
228 |         from cStringIO import StringIO
229 | 
230 |         data = list(data)
231 | 
232 |         dict_size = 256
233 |         dictionary = dict((i, chr(i)) for i in xrange(dict_size))
234 |         
235 |         result = StringIO()
236 |         w = chr(data.pop(0))
237 |         result.write(w)
238 | 
239 |         for k in data:
240 |             if k in dictionary:
241 |                 entry = dictionary[k]
242 |             elif k == dict_size:
243 |                 entry = w + w[0]
244 |             else:
245 |                 raise ValueError('Bad compressed k: %s' % k)
246 |             result.write(entry)
247 | 
248 |             # Add w+entry[0] to the dictionary
249 |             dictionary[dict_size] = str(w + entry[0])
250 |             dict_size += 1
251 | 
252 |             w = entry
253 | 
254 |         return result.getvalue()
255 | 
256 | class ZLIB(object):
257 | 
258 |     @staticmethod
259 |     def name():
260 |         return 'Zlib'
261 | 
262 |     @staticmethod
263 |     def compress(data, *args, **kwargs):
264 |         return zlib.compress(data, *args, **kwargs)
265 | 
266 |     @staticmethod
267 |     def decompress(data, *args, **kwargs):
268 |         return zlib.decompress(data, *args, **kwargs)
269 | 
270 | 
271 | class ZSTD(object):
272 | 
273 |     @staticmethod
274 |     def name():
275 |         return 'ZStandard'
276 | 
277 |     @staticmethod
278 |     def compress(data, *args, **kwargs):
279 |         return zstd.compress(data, *args, **kwargs)
280 | 
281 |     @staticmethod
282 |     def decompress(data, *args, **kwargs):
283 |         return zstd.decompress(data, *args, **kwargs)
284 | 
285 | 
286 | #########################
287 | ### IMAGE COMPRESSION ###
288 | #########################
289 | 
290 | class JPEG2000(object):
291 | 
292 |     @staticmethod
293 |     def name():
294 |         return 'JPEG2000'
295 | 
296 |     @staticmethod
297 |     def compress(data, *args, **kwargs):
298 |         return jpeg.compress(data, *args, **kwargs)
299 | 
300 |     @staticmethod
301 |     def decompress(data, *args, **kwargs):
302 |         return jpeg.decompress(data, *args, **kwargs)
303 | 
304 | 
305 | class PNG(object):
306 | 
307 |     @staticmethod
308 |     def name():
309 |         return 'PNG'
310 | 
311 |     @staticmethod
312 |     def compress(data, *args, **kwargs):
313 |         return _png.compress(data)
314 | 
315 |     @staticmethod
316 |     def decompress(data, *args, **kwargs):
317 |         return _png.decompress(data)
318 | 
319 | 
320 | #########################
321 | ### VIDEO COMPRESSION ###
322 | #########################
323 | 
324 | class X264(object):
325 | 
326 |     @staticmethod
327 |     def name():
328 |         return 'X.264'
329 | 
330 |     @staticmethod
331 |     def compress(data, *args, **kwargs):
332 |         return x264.compress(data)
333 | 
334 |     @staticmethod
335 |     def decompress(data, *args, **kwargs):
336 |         return x264.decompress(data)
337 | 
338 | 


--------------------------------------------------------------------------------
/experiments/compression/neuroglancer/cpp-neuroglancer.cpp:
--------------------------------------------------------------------------------
  1 | #include "cpp-neuroglancer.h"
  2 | #include <cmath>
  3 | #include <vector>
  4 | #include <unordered_set>
  5 | #include <unordered_map>
  6 | #include <algorithm>
  7 | 
  8 | 
  9 | 
 10 | // size of various dimensions
 11 | 
 12 | static int row_size = -1;
 13 | static int sheet_size = -1;
 14 | static int grid_size = -1;
 15 | 
 16 | 
 17 | 
 18 | ///////////////////////////////////
 19 | //// INTERNAL HELPER FUNCTIONS ////
 20 | ///////////////////////////////////
 21 | 
 22 | static int 
 23 | IndicesToIndex(int ix, int iy, int iz)
 24 | {
 25 |     return iz * sheet_size + iy * row_size + ix;
 26 | }
 27 | 
 28 | 
 29 | static const int header_size = 7;
 30 | 
 31 | 
 32 | ////////////////////////////////////////////
 33 | //// NEUROGLANCER COMPRESSION ALGORITHM ////
 34 | ////////////////////////////////////////////
 35 | 
 36 | unsigned long *
 37 | neuroglancer::Compress(unsigned long *data, int zres, int yres, int xres, int bz, int by, int bx, int origz, int origy, int origx)
 38 | {
 39 |     // set global variables
 40 |     row_size = xres;
 41 |     sheet_size = yres * xres;
 42 |     grid_size = zres * yres * xres;
 43 | 
 44 |     // the number of blocks
 45 |     unsigned long gz = (unsigned long)(ceil((double)zres / bz) + 0.5);
 46 |     unsigned long gy = (unsigned long)(ceil((double)yres / by) + 0.5);
 47 |     unsigned long gx = (unsigned long)(ceil((double)xres / bx) + 0.5);
 48 | 
 49 |     // the number of elements and the block size
 50 |     unsigned long nelements = gz * gy * gx;
 51 |     unsigned int block_size = bz * by * bx;
 52 | 
 53 |     // get the end of the header
 54 |     unsigned int header_offset = nelements + header_size;
 55 | 
 56 |     // create arrays that store the table offset, number of bits, and the value offsets
 57 |     unsigned int *table_offsets = new unsigned int[nelements];
 58 |     unsigned char *nbits = new unsigned char[nelements];
 59 |     unsigned int *values_offsets = new unsigned int[nelements];
 60 |     for (unsigned int iv = 0; iv < nelements; ++iv) {
 61 |         table_offsets[iv] = 0;
 62 |         nbits[iv] = 0;
 63 |         values_offsets[iv] = 0;
 64 |     }
 65 | 
 66 |     // create the arrays for the encoded values and the look up table
 67 |     unsigned int **encoded_values = new unsigned int *[nelements];
 68 |     std::vector<unsigned long> *lookup_table = new std::vector<unsigned long>[nelements];
 69 |     for (unsigned int iv = 0; iv < nelements; ++iv) {
 70 |         lookup_table[iv] = std::vector<unsigned long>();
 71 |         encoded_values[iv] = new unsigned int[block_size];
 72 |         for (unsigned int ie = 0; ie < block_size; ++ie)
 73 |             encoded_values[iv][ie] = 0;
 74 |     }
 75 | 
 76 |     // get the number of blocks for each dimension
 77 |     int nyblocks = yres / by;
 78 |     int nxblocks = xres / bx;
 79 | 
 80 |     unsigned int offset = header_offset;
 81 |     // iterate over every block
 82 |     for (unsigned int index = 0; index < nelements; ++index) {
 83 |         // get the block in terms if x, y, z
 84 |         int iz = index / (nyblocks * nxblocks);
 85 |         int iy = (index - iz * nyblocks * nxblocks) / nxblocks;
 86 |         int ix = index % nxblocks;
 87 | 
 88 |         // get the block 
 89 |         unsigned long *block = new unsigned long[block_size];
 90 | 
 91 |         // populate the temporary block array
 92 |         int iv = 0;
 93 |         for (int ik = iz * bz; ik < (iz + 1) * bz; ++ik) {
 94 |             for (int ij = iy * by; ij < (iy + 1) * by; ++ij) {
 95 |                 for (int ii = ix * bx; ii < (ix + 1) * bx; ++ii, ++iv) {
 96 |                     block[iv] = data[IndicesToIndex(ii, ij, ik)];
 97 |                 }
 98 |             }
 99 |         }
100 | 
101 |         // get an ordered list of unique elements
102 |         std::vector<unsigned long> unique_elements = std::vector<unsigned long>();
103 |         std::unordered_set<unsigned long> hash_set = std::unordered_set<unsigned long>();
104 | 
105 |         for (unsigned int iv = 0; iv < block_size; ++iv) {
106 |             if (!hash_set.count(block[iv])) {
107 |                 unique_elements.push_back(block[iv]);
108 |                 hash_set.insert(block[iv]);
109 |             }
110 |         }
111 | 
112 |         std::sort(unique_elements.begin(), unique_elements.end());
113 | 
114 |         // create a mapping for the look up table and populate the lookup table
115 |         unsigned int nunique = unique_elements.size();
116 |         std::unordered_map<unsigned long, unsigned int> mapping = std::unordered_map<unsigned long, unsigned int>();
117 |         for (unsigned int iv = 0; iv < nunique; ++iv) {
118 |             mapping[unique_elements[iv]] = iv;
119 |             lookup_table[index].push_back(unique_elements[iv]);
120 |         }
121 | 
122 |         // populate the encoded values array
123 |         for (unsigned int iv = 0; iv < block_size; ++iv) {
124 |             encoded_values[index][iv] = mapping[block[iv]];
125 |         }
126 | 
127 |         // determine the number of bits
128 |         if (nunique <= 1) nbits[index] = 0;
129 |         else if (nunique <= 1<<1) nbits[index] = 1;
130 |         else if (nunique <= 1<<2) nbits[index] = 2;
131 |         else if (nunique <= 1<<4) nbits[index] = 4;
132 |         else if (nunique <= 1<<8) nbits[index] = 8;
133 |         else if (nunique <= 1<<16) nbits[index] = 16;
134 |         else nbits[index] = 32;
135 | 
136 |         values_offsets[index] = offset;
137 |         offset += nbits[index] * block_size / 64;
138 |         table_offsets[index] = offset;
139 |         offset += nunique;
140 | 
141 |         // free memory
142 |         delete[] block;
143 |     }
144 | 
145 |     unsigned long *compressed_data = new unsigned long[offset + 1];
146 |     for (unsigned int iv = 0; iv < offset + 1; ++iv) {
147 |         compressed_data[iv] = 0;
148 |     }
149 | 
150 |     // add the header information
151 |     compressed_data[0] = offset + 1;
152 |     compressed_data[1] = zres;
153 |     compressed_data[2] = yres;
154 |     compressed_data[3] = xres;
155 |     compressed_data[4] = origz;
156 |     compressed_data[5] = origy;
157 |     compressed_data[6] = origx;
158 | 
159 |     int data_entry = header_size;
160 |     for (unsigned int iv = 0; iv < nelements; ++iv, ++data_entry) {
161 |         compressed_data[data_entry] = ((unsigned long)table_offsets[iv] << 40) + ((unsigned long)nbits[iv] << 32) + values_offsets[iv];
162 |     }
163 | 
164 |     // add the encoded values
165 |     for (unsigned int index = 0; index < nelements; ++index) {
166 |         // encode all of the values
167 |         if (nbits[index] > 0) {
168 |             // get the number of values per 8 bytes
169 |             unsigned int nvalues_per_entry = 64 / nbits[index];
170 |             // get the number of entries for this block
171 |             unsigned int nentries = block_size * nbits[index] / 64;
172 | 
173 |             // for every entry, for every value
174 |             int ii = 0;
175 |             for (unsigned int ie = 0; ie < nentries; ++ie, ++data_entry) {
176 |                 unsigned long value = 0;
177 |                 for (unsigned int iv = 0; iv < nvalues_per_entry; ++iv, ++ii) {
178 |                     // get the encoded value for this location
179 |                     unsigned long encoded_value = (unsigned long)encoded_values[index][ii];
180 | 
181 |                     // the amount to shift the encoded value
182 |                     unsigned int shift = (nvalues_per_entry - 1 - iv) * nbits[index];
183 |                     value += (encoded_value << shift);
184 |                 }
185 |                 compressed_data[data_entry] = value;
186 |             }
187 |         }
188 | 
189 |         // add the lookup table
190 |         for (unsigned int iv = 0; iv < lookup_table[index].size(); ++iv, ++data_entry) {
191 |             compressed_data[data_entry] = lookup_table[index][iv];
192 |         }
193 |     }
194 | 
195 |     // free memory
196 |     delete[] table_offsets;
197 |     delete[] nbits;
198 |     delete[] values_offsets;
199 |     for (unsigned int iv = 0; iv < nelements; ++iv)
200 |         delete[] encoded_values[iv];
201 |     delete[] encoded_values;
202 |     delete[] lookup_table;
203 | 
204 |     return compressed_data;
205 | }
206 | 
207 | 
208 | 
209 | //////////////////////////////////////////////
210 | //// NEUROGLANCER DECOMPRESSION ALGORITHM ////
211 | //////////////////////////////////////////////
212 | 
213 | unsigned long *
214 | neuroglancer::Decompress(unsigned long *compressed_data, int bz, int by, int bx)
215 | {
216 |     int zres = (int)compressed_data[1];
217 |     int yres = (int)compressed_data[2];
218 |     int xres = (int)compressed_data[3];
219 | 
220 |     // set global variables
221 |     row_size = xres;
222 |     sheet_size = yres * xres;
223 |     grid_size = zres * yres * xres;
224 | 
225 |     // the number of blocks
226 |     unsigned long gz = (unsigned long)(ceil((double)zres / bz) + 0.5);
227 |     unsigned long gy = (unsigned long)(ceil((double)yres / by) + 0.5);
228 |     unsigned long gx = (unsigned long)(ceil((double)xres / bx) + 0.5);
229 | 
230 |     // get the number of elements
231 |     unsigned long nelements = gz * gy * gx;
232 |     unsigned int block_size = bz * by * bx;
233 | 
234 |     // create arrays that store the table offset, number of bits, and the value offsets
235 |     unsigned int *table_offsets = new unsigned int[nelements];
236 |     unsigned char *nbits = new unsigned char[nelements];
237 |     unsigned int *values_offsets = new unsigned int[nelements];
238 |     for (unsigned int iv = 0; iv < nelements; ++iv) {
239 |         table_offsets[iv] = 0;
240 |         nbits[iv] = 0;
241 |         values_offsets[iv] = 0;
242 |     }
243 | 
244 |     // decompress header values
245 |     unsigned long data_entry = header_size;
246 |     for (unsigned int index = 0; index < nelements; ++index, ++data_entry) {
247 |         unsigned long header = compressed_data[data_entry];
248 | 
249 |         table_offsets[index] = header >> 40;
250 |         nbits[index] = (header << 24) >> 56;
251 |         values_offsets[index] = (header << 32) >> 32;
252 |     }
253 | 
254 |     // create decompressed data array
255 |     unsigned long *decompressed_data = new unsigned long[zres * yres * xres];
256 |     for (int iv = 0; iv < zres * yres * xres; ++iv)
257 |         decompressed_data[iv] = 0;
258 | 
259 |     // get the number of blocks for each dimension
260 |     int nyblocks = yres / by;
261 |     int nxblocks = xres / bx;
262 | 
263 |     // decode each block
264 |     for (unsigned int index = 0; index < nelements; ++index) {
265 |         // get the number of encoded blocks
266 |         int nblocks = nbits[index] * block_size / 64;
267 | 
268 |         // get the encoded values
269 |         unsigned long *encoded_values = new unsigned long[nblocks];
270 |         for (int iv = 0; iv < nblocks; ++iv) 
271 |             encoded_values[iv] = compressed_data[values_offsets[index] + iv];
272 | 
273 |         // create empty block array
274 |         unsigned long *block = new unsigned long[block_size];
275 |         for (unsigned int iv = 0; iv < block_size; ++iv)
276 |             block[iv] = 0;
277 | 
278 |         // get the number of values per 8 bytes
279 |         if (nbits[index]) {
280 |             unsigned long nvalues_per_long = 64 / nbits[index];
281 | 
282 |             // for every long value
283 |             int ib = 0;
284 |             for (int iv = 0; iv < nblocks; ++iv) {
285 |                 unsigned long value = encoded_values[iv];
286 |                 // for every entry per 8 bytes
287 |                 for (unsigned int ie = 0; ie < nvalues_per_long; ++ie, ++ib) {
288 |                     unsigned int lower_bits_to_remove = (nvalues_per_long - ie - 1) * nbits[index];
289 | 
290 |                     block[ib] = (value >> lower_bits_to_remove) % (int)(pow(2, nbits[index]) + 0.5);
291 |                 }
292 |             }
293 |         }
294 | 
295 |         // get an ordered list of unique elements
296 |         std::unordered_set<unsigned long> hash_set = std::unordered_set<unsigned long>();
297 | 
298 |         for (unsigned int iv = 0; iv < block_size; ++iv) {
299 |             if (!hash_set.count(block[iv])) {
300 |                 hash_set.insert(block[iv]);
301 |             }
302 |         }
303 | 
304 |         // get the lookup table
305 |         unsigned int nunique = hash_set.size();
306 |         unsigned long *lookup_table = new unsigned long[nunique];
307 |         for (unsigned int iv = 0; iv < nunique; ++iv) {
308 |             lookup_table[iv] = compressed_data[table_offsets[index] + iv];
309 |         }
310 | 
311 |         // update the block values
312 |         for (unsigned int iv = 0; iv < block_size; ++iv) {
313 |             block[iv] = lookup_table[block[iv]];
314 |         }
315 | 
316 |         // get the block in terms if x, y, z
317 |         int iz = index / (nyblocks * nxblocks);
318 |         int iy = (index - iz * nyblocks * nxblocks) / nxblocks;
319 |         int ix = index % nxblocks;
320 | 
321 |         int iv = 0;
322 |         for (int ik = iz * bz; ik < (iz + 1) * bz; ++ik) {
323 |             for (int ij = iy * by; ij < (iy + 1) * by; ++ij) {
324 |                 for (int ii = ix * bx; ii < (ix + 1) * bx; ++ii, ++iv) {
325 |                     decompressed_data[IndicesToIndex(ii, ij, ik)] = block[iv];
326 |                 }
327 |             }
328 |         }
329 | 
330 |         // free memory
331 |         delete[] encoded_values;
332 |         delete[] block;
333 |         delete[] lookup_table;
334 |     }
335 | 
336 | 
337 |     // free memory
338 |     delete[] table_offsets;
339 |     delete[] nbits;
340 |     delete[] values_offsets;
341 | 
342 |     return decompressed_data;
343 | }
344 | 


--------------------------------------------------------------------------------
/experiments/compression/compresso/cpp-compresso.cpp:
--------------------------------------------------------------------------------
  1 | #include <unordered_map>
  2 | #include <set>
  3 | #include <cmath>
  4 | #include <vector>
  5 | #include <algorithm>
  6 | #include <stdio.h>
  7 | #include "cpp-compresso.h"
  8 | 
  9 | 
 10 | 
 11 | // size of various dimensions
 12 | 
 13 | static int row_size = -1;
 14 | static int sheet_size = -1;
 15 | static int grid_size = -1;
 16 | 
 17 | 
 18 | 
 19 | ///////////////////////////////////
 20 | //// INTERNAL HELPER FUNCTIONS ////
 21 | ///////////////////////////////////
 22 | 
 23 | static int 
 24 | IndicesToIndex(int ix, int iy, int iz)
 25 | {
 26 |     return iz * sheet_size + iy * row_size + ix;
 27 | }
 28 | 
 29 | 
 30 | 
 31 | /////////////////////////////////////////
 32 | //// UNION-FIND CLASS FOR COMPONENTS ////
 33 | /////////////////////////////////////////
 34 | 
 35 | class UnionFindElement {
 36 | public:
 37 |     UnionFindElement(unsigned long label) :
 38 |     label(label),
 39 |     parent(this),
 40 |     rank(0)
 41 |     {}
 42 | 
 43 | public:
 44 |     unsigned long label;
 45 |     UnionFindElement *parent;
 46 |     int rank;
 47 | };
 48 | 
 49 | UnionFindElement *
 50 | Find(UnionFindElement *x) 
 51 | {
 52 |     if (x->parent != x) {
 53 |         x->parent = Find(x->parent);
 54 |     }
 55 |     return x->parent;
 56 | }
 57 | 
 58 | void 
 59 | Union(UnionFindElement *x, UnionFindElement *y)
 60 | {
 61 |     UnionFindElement *xroot = Find(x);
 62 |     UnionFindElement *yroot = Find(y);
 63 | 
 64 |     if (xroot == yroot) { return; }
 65 | 
 66 |     // merge teh two roots
 67 |     if (xroot->rank < yroot->rank) {
 68 |         xroot->parent = yroot;
 69 |     }
 70 |     else if (xroot->rank > yroot->rank) {
 71 |         yroot->parent = xroot;
 72 |     }
 73 |     else {
 74 |         yroot->parent = xroot;
 75 |         xroot->rank = xroot->rank + 1;
 76 |     }
 77 | }
 78 | 
 79 | 
 80 | 
 81 | /////////////////////////////////////////
 82 | //// COMPRESSO COMPRESSION ALGORITHM ////
 83 | /////////////////////////////////////////
 84 | 
 85 | static bool *
 86 | ExtractBoundaries(unsigned long *data, int zres, int yres, int xres)
 87 | {
 88 |     // create the boundaries array
 89 |     bool *boundaries = new bool[grid_size];
 90 |     if (!boundaries) { fprintf(stderr, "Failed to allocate memory for boundaries...\n"); exit(-1); }
 91 | 
 92 |     // determine which pixels differ from east or south neighbors
 93 |     for (int iz = 0; iz < zres; ++iz) {
 94 |         for (int iy = 0; iy < yres; ++iy) {
 95 |             for (int ix = 0; ix < xres; ++ix) {
 96 |                 int iv = IndicesToIndex(ix, iy, iz);
 97 | 
 98 |                 boundaries[iv] = false;
 99 | 
100 |                 // check the east neighbor
101 |                 if (ix < xres - 1) { 
102 |                     if (data[iv] != data[IndicesToIndex(ix + 1, iy, iz)]) boundaries[iv] = true;
103 |                 }
104 |                 // check the south neighbor
105 |                 if (iy < yres - 1) {
106 |                     if (data[iv] != data[IndicesToIndex(ix, iy + 1, iz)]) boundaries[iv] = true;
107 |                 }
108 |             }
109 |         }
110 |     }
111 | 
112 |     // return the boundary array
113 |     return boundaries;
114 | }
115 | 
116 | static unsigned long *
117 | ConnectedComponents(bool *boundaries, int zres, int yres, int xres)
118 | {
119 |     // create the connected components
120 |     unsigned long *components = new unsigned long[grid_size];
121 |     if (!components) { fprintf(stderr, "Failed to allocate memory for connected components...\n"); exit(-1); }
122 |     for (int iv = 0; iv < grid_size; ++iv) 
123 |         components[iv] = 0;
124 | 
125 |     // run connected components for each slice
126 |     for (int iz = 0; iz < zres; ++iz) {
127 | 
128 |         // create vector for union find elements
129 |         std::vector<UnionFindElement *> union_find = std::vector<UnionFindElement *>();
130 | 
131 |         // current label in connected component
132 |         int curlab = 1;
133 |         for (int iy = 0; iy < yres; ++iy) {
134 |             for (int ix = 0; ix < xres; ++ix) {
135 |                 int iv = IndicesToIndex(ix, iy, iz);
136 | 
137 |                 // continue if boundary
138 |                 if (boundaries[iv]) continue;
139 | 
140 |                 // only consider the pixel directly to the north and west
141 |                 int north = IndicesToIndex(ix - 1, iy, iz);
142 |                 int west = IndicesToIndex(ix, iy - 1, iz);
143 | 
144 |                 int neighbor_labels[2] = { 0, 0 };
145 | 
146 |                 // get the labels for the relevant neighbor
147 |                 if (ix > 0) neighbor_labels[0] = components[north];
148 |                 if (iy > 0) neighbor_labels[1] = components[west];
149 | 
150 |                 // if the neighbors are boundary, create new label
151 |                 if (!neighbor_labels[0] && !neighbor_labels[1]) {
152 |                     components[iv] = curlab;
153 | 
154 |                     // add to union find structure
155 |                     union_find.push_back(new UnionFindElement(0));
156 | 
157 |                     // update the next label
158 |                     curlab++;
159 |                 }
160 |                 // the two pixels have equal non-trivial values
161 |                 else if (neighbor_labels[0] == neighbor_labels[1]) 
162 |                     components[iv] = neighbor_labels[0];
163 |                 // neighbors have differing values
164 |                 else {
165 |                     if (!neighbor_labels[0]) components[iv] = neighbor_labels[1];
166 |                     else if (!neighbor_labels[1]) components[iv] = neighbor_labels[0];
167 |                     // neighbors have differing non-trivial values
168 |                     else {
169 |                         // take minimum value
170 |                         components[iv] = std::min(neighbor_labels[0], neighbor_labels[1]);
171 | 
172 |                         // set the equivalence relationship
173 |                         Union(union_find[neighbor_labels[0] - 1], union_find[neighbor_labels[1] - 1]);
174 |                     }
175 |                 }
176 |             }
177 |         }
178 | 
179 |         // reset the current label to 1
180 |         curlab = 1;
181 | 
182 |         // create connected components (ordered)
183 |         for (int iy = 0; iy < yres; ++iy) {
184 |             for (int ix = 0; ix < xres; ++ix) {
185 |                 int iv = IndicesToIndex(ix, iy, iz);
186 | 
187 |                 if (boundaries[iv]) continue;
188 | 
189 |                 // get the parent for this component
190 |                 UnionFindElement *comp = Find(union_find[components[iv] - 1]);
191 |                 if (!comp->label) {
192 |                     comp->label = curlab;
193 |                     curlab++;
194 |                 }
195 | 
196 |                 components[iv] = comp->label;
197 |             }
198 |         }
199 | 
200 |         for (unsigned int iv = 0; iv < union_find.size(); ++iv)
201 |             delete union_find[iv];
202 |     }
203 | 
204 | 
205 |     // return the connected components array
206 |     return components;
207 | }
208 | 
209 | static std::vector<unsigned long> *
210 | IDMapping(unsigned long *components, unsigned long *data, int zres, int yres, int xres)
211 | {
212 |     // create a vector of the ids
213 |     std::vector<unsigned long> *ids = new std::vector<unsigned long>();
214 | 
215 |     for (int iz = 0; iz < zres; ++iz) {
216 |         // create a set for this individual slice
217 |         std::set<unsigned long> hash_map = std::set<unsigned long>();
218 | 
219 |         // iterate over the entire slice
220 |         for (int iy = 0; iy < yres; ++iy) {
221 |             for (int ix = 0; ix < xres; ++ix) {
222 |                 int iv = IndicesToIndex(ix, iy, iz);
223 | 
224 |                 // get the segment id
225 |                 unsigned long component_id = components[iv];
226 | 
227 |                 // if this component does not belong yet, add it
228 |                 if (!hash_map.count(component_id)) {
229 |                     hash_map.insert(component_id);
230 | 
231 |                     // add the segment id
232 |                     unsigned long segment_id = data[iv] + 1;
233 |                     ids->push_back(segment_id);
234 |                 }
235 |             }
236 |         }
237 | 
238 |     }
239 | 
240 |     // return the mapping
241 |     return ids;
242 | }
243 | 
244 | static unsigned long *
245 | EncodeBoundaries(bool *boundaries, int zres, int yres, int xres, int zstep, int ystep, int xstep) 
246 | {
247 |     // determine the number of blocks in the z, y, and x dimensions
248 |     int nzblocks = (int) (ceil((double)zres / zstep) + 0.5);
249 |     int nyblocks = (int) (ceil((double)yres / ystep) + 0.5);
250 |     int nxblocks = (int) (ceil((double)xres / xstep) + 0.5);
251 | 
252 |     // create an empty array for the encodings
253 |     int nblocks = nzblocks * nyblocks * nxblocks;
254 |     unsigned long *boundary_data = new unsigned long[nblocks];
255 |     for (int iv = 0; iv < nblocks; ++iv)
256 |         boundary_data[iv] = 0;
257 |     
258 |     for (int iz = 0; iz < zres; ++iz) {
259 |         for (int iy = 0; iy < yres; ++iy) {
260 |             for (int ix = 0; ix < xres; ++ix) {
261 |                 int iv = IndicesToIndex(ix, iy, iz);
262 | 
263 |                 // no encoding for non-boundaries
264 |                 if (!boundaries[iv]) continue;
265 | 
266 |                 // find the block from the index
267 |                 int zblock = iz / zstep;
268 |                 int yblock = iy / ystep;
269 |                 int xblock = ix / xstep;
270 | 
271 |                 // find the offset within the block
272 |                 int zoffset = iz % zstep;
273 |                 int yoffset = iy % ystep;
274 |                 int xoffset = ix % xstep;
275 | 
276 |                 int block = zblock * (nyblocks * nxblocks) + yblock * nxblocks + xblock;
277 |                 int offset = zoffset * (ystep * xstep) + yoffset * xstep + xoffset;
278 | 
279 |                 boundary_data[block] += (1LU << offset);
280 |             }
281 |         }
282 |     }
283 | 
284 |     return boundary_data;    
285 | }
286 | 
287 | static std::vector<unsigned long> *
288 | ValueMapping(unsigned long *boundary_data, int nblocks)
289 | {
290 |     // get a list of values
291 |     std::vector<unsigned long> *values = new std::vector<unsigned long>();
292 |     std::set<unsigned long> hash_map = std::set<unsigned long>();
293 | 
294 |     // go through all boundary data to create array of values
295 |     for (int iv = 0; iv < nblocks; ++iv) {
296 |         if (!hash_map.count(boundary_data[iv])) {
297 |             hash_map.insert(boundary_data[iv]);
298 |             values->push_back(boundary_data[iv]);
299 |         }
300 |     }
301 | 
302 |     // sort the values
303 |     sort(values->begin(), values->end());
304 | 
305 |     // create mapping from values to indices
306 |     std::unordered_map<unsigned long, unsigned long> mapping = std::unordered_map<unsigned long, unsigned long>();
307 |     for (unsigned int iv = 0; iv < values->size(); ++iv) {
308 |         mapping[(*values)[iv]] = iv;
309 |     }
310 | 
311 |     // update boundary data
312 |     for (int iv = 0; iv < nblocks; ++iv) {
313 |         boundary_data[iv] = mapping[boundary_data[iv]];
314 |     }
315 | 
316 |     // return values
317 |     return values;
318 | }
319 | 
320 | std::vector<unsigned long> *
321 | EncodeIndeterminateLocations(bool *boundaries, unsigned long *data, int zres, int yres, int xres)
322 | {
323 |     // update global size variables
324 |     row_size = xres;
325 |     sheet_size = yres * xres;
326 |     grid_size = zres * yres * xres;
327 | 
328 |     std::vector<unsigned long> *locations = new std::vector<unsigned long>();
329 | 
330 |     int iv = 0;
331 |     for (int iz = 0; iz < zres; ++iz) {
332 |         for (int iy = 0; iy < yres; ++iy) {
333 |             for (int ix = 0; ix < xres; ++ix, ++iv) {
334 | 
335 |                 if (!boundaries[iv]) continue;
336 |                 else if (iy > 0 && !boundaries[IndicesToIndex(ix, iy - 1, iz)]) continue; //boundaries[iv] = 0;
337 |                 else if (ix > 0 && !boundaries[IndicesToIndex(ix - 1, iy, iz)]) continue; //boundaries[iv] = 0;
338 |                 else {
339 |                     int north = IndicesToIndex(ix - 1, iy, iz);
340 |                     int south = IndicesToIndex(ix + 1, iy, iz);
341 |                     int east = IndicesToIndex(ix, iy - 1, iz);
342 |                     int west = IndicesToIndex(ix, iy + 1, iz);
343 |                     int up = IndicesToIndex(ix, iy, iz + 1);
344 |                     int down = IndicesToIndex(ix, iy, iz - 1);
345 | 
346 |                     // see if any of the immediate neighbors are candidates
347 |                     if (ix > 0 && !boundaries[north] && data[north] == data[iv])
348 |                         locations->push_back(0);
349 |                     else if (ix < xres - 1 && !boundaries[south] && data[south] == data[iv])
350 |                         locations->push_back(1);
351 |                     else if (iy > 0 && !boundaries[east] && data[east] == data[iv])
352 |                         locations->push_back(2);
353 |                     else if (iy < yres - 1 && !boundaries[west] && data[west] == data[iv])
354 |                         locations->push_back(3);
355 |                     else if (iz > 0 && !boundaries[down] && data[down] == data[iv])
356 |                         locations->push_back(4);
357 |                     else if (iz < zres - 1 && !boundaries[up] && data[up] == data[iv])
358 |                         locations->push_back(5);
359 |                     else 
360 |                         locations->push_back(data[IndicesToIndex(ix, iy, iz)] + 6);
361 |                 }
362 |             }
363 |         }
364 |     }
365 | 
366 |     return locations;
367 | }
368 | 
369 | 
370 | unsigned long *
371 | compresso::Compress(unsigned long *data, int zres, int yres, int xres, int zstep, int ystep, int xstep)
372 | {
373 |     // set global variables
374 |     row_size = xres;
375 |     sheet_size = yres * xres;
376 |     grid_size = zres * yres * xres;
377 | 
378 |     // determine the number of blocks in the z, y, and x dimensions
379 |     int nzblocks = (int) (ceil((double)zres / zstep) + 0.5);
380 |     int nyblocks = (int) (ceil((double)yres / ystep) + 0.5);
381 |     int nxblocks = (int) (ceil((double)xres / xstep) + 0.5);
382 | 
383 |     // create an empty array for the encodings
384 |     int nblocks = nzblocks * nyblocks * nxblocks;
385 | 
386 |     // get boundary voxels
387 |     bool *boundaries = ExtractBoundaries(data, zres, yres, xres);   
388 | 
389 |     // get the connected components
390 |     unsigned long *components = ConnectedComponents(boundaries, zres, yres, xres);
391 | 
392 |     std::vector<unsigned long> *ids = IDMapping(components, data, zres, yres, xres);
393 | 
394 |     unsigned long *boundary_data = EncodeBoundaries(boundaries, zres, yres, xres, zstep, ystep, xstep);
395 | 
396 |     std::vector<unsigned long> *values = ValueMapping(boundary_data, nblocks);
397 | 
398 |     std::vector<unsigned long> *locations = EncodeIndeterminateLocations(boundaries, data, zres, yres, xres);
399 | 
400 |     unsigned short header_size = 9;
401 |     unsigned long *compressed_data = new unsigned long[header_size + ids->size() + values->size() + locations->size() + nblocks];
402 | 
403 |     // add the resolution
404 |     compressed_data[0] = zres;
405 |     compressed_data[1] = yres;
406 |     compressed_data[2] = xres;
407 | 
408 |     // add the sizes of the vectors
409 |     compressed_data[3] = ids->size();
410 |     compressed_data[4] = values->size();
411 |     compressed_data[5] = locations->size();
412 | 
413 |     compressed_data[6] = zstep;
414 |     compressed_data[7] = ystep;
415 |     compressed_data[8] = xstep;
416 | 
417 |     int iv = header_size;
418 |     for (unsigned int ix = 0 ; ix < ids->size(); ++ix, ++iv)
419 |         compressed_data[iv] = (*ids)[ix];
420 |     for (unsigned int ix = 0; ix < values->size(); ++ix, ++iv)
421 |         compressed_data[iv] = (*values)[ix];
422 |     for (unsigned int ix = 0; ix < locations->size(); ++ix, ++iv)
423 |         compressed_data[iv] = (*locations)[ix];
424 |     for (int ix = 0; ix < nblocks; ++ix, ++iv)
425 |         compressed_data[iv] = boundary_data[ix];
426 | 
427 |     // free memory
428 |     delete[] boundaries;
429 |     delete[] components;
430 |     delete ids;
431 |     delete[] boundary_data;
432 |     delete values;
433 |     delete locations;
434 | 
435 |     return compressed_data;
436 | }
437 | 
438 | 
439 | 
440 | ///////////////////////////////////////////
441 | //// COMPRESSO DECOMPRESSION ALGORITHM ////
442 | ///////////////////////////////////////////
443 | 
444 | static bool *
445 | DecodeBoundaries(unsigned long *boundary_data, std::vector<unsigned long> *values, int zres, int yres, int xres, int zstep, int ystep, int xstep)
446 | {
447 |     int nyblocks = (int)(ceil((double)yres / ystep) + 0.5);
448 |     int nxblocks = (int)(ceil((double)xres / xstep) + 0.5);
449 | 
450 |     bool *boundaries = new bool[grid_size];
451 |     for (int iv = 0; iv < grid_size; ++iv)
452 |         boundaries[iv] = false;
453 | 
454 |     for (int iz = 0; iz < zres; ++iz) {
455 |         for (int iy = 0; iy < yres; ++iy) {
456 |             for (int ix = 0; ix < xres; ++ix) {
457 |                 int iv = IndicesToIndex(ix, iy, iz);
458 | 
459 |                 int zblock = iz / zstep;
460 |                 int yblock = iy / ystep;
461 |                 int xblock = ix / xstep;
462 | 
463 |                 int zoffset = iz % zstep;
464 |                 int yoffset = iy % ystep;
465 |                 int xoffset = ix % xstep;
466 | 
467 |                 int block = zblock * (nyblocks * nxblocks) + yblock * nxblocks + xblock;
468 |                 int offset = zoffset * (ystep * xstep) + yoffset * xstep + xoffset;
469 | 
470 |                 unsigned long value = (*values)[boundary_data[block]];
471 |                 if ((value >> offset) % 2) boundaries[iv] = true;
472 |             }
473 |         }
474 |     }
475 | 
476 |     return boundaries;
477 | }
478 | 
479 | static unsigned long *
480 | IDReverseMapping(unsigned long *components, std::vector<unsigned long> *ids, int zres, int yres, int xres)
481 | {
482 |     unsigned long *decompressed_data = new unsigned long[grid_size];
483 |     for (int iv = 0; iv < grid_size; ++iv)
484 |         decompressed_data[iv] = 0;
485 | 
486 |     int ids_index = 0;
487 |     for (int iz = 0; iz < zres; ++iz) {
488 | 
489 |         // create mapping (not memory efficient but FAST!!)
490 |         // number of components is guaranteed to be less than ids->size()
491 |         unsigned long *mapping = new unsigned long[ids->size()];
492 |         for (unsigned int iv = 0; iv < ids->size(); ++iv) {
493 |             mapping[iv] = 0;
494 |         }
495 | 
496 |         for (int iy = 0; iy < yres; ++iy) {
497 |             for (int ix = 0; ix < xres; ++ix) {
498 |                 int iv = IndicesToIndex(ix, iy, iz);
499 | 
500 |                 if (!mapping[components[iv]]) {
501 |                     mapping[components[iv]] = (*ids)[ids_index];
502 |                     ids_index++;
503 |                 }
504 | 
505 |                 decompressed_data[iv] = mapping[components[iv]] - 1;
506 |             }
507 |         }
508 |     }
509 | 
510 |     return decompressed_data;
511 | }
512 | 
513 | static void 
514 | DecodeIndeterminateLocations(bool *boundaries, unsigned long *decompressed_data, std::vector<unsigned long> *locations, int zres, int yres, int xres)
515 | {
516 |     int iv = 0;
517 |     int index = 0;
518 | 
519 |     // go through all coordinates
520 |     for (int iz = 0; iz < zres; ++iz) {
521 |         for (int iy = 0; iy < yres; ++iy) {
522 |             for (int ix = 0; ix < xres; ++ix, ++iv) {
523 |                 int north = IndicesToIndex(ix - 1, iy, iz);
524 |                 int west = IndicesToIndex(ix, iy - 1, iz);
525 | 
526 |                 if (!boundaries[iv]) continue;
527 |                 else if (ix > 0 && !boundaries[north]) {
528 |                     decompressed_data[iv] = decompressed_data[north];
529 |                     //boundaries[iv] = 0;
530 |                 }
531 |                 else if (iy > 0 && !boundaries[west]) {
532 |                     decompressed_data[iv] = decompressed_data[west];
533 |                     //boundaries[iv] = 0;
534 |                 }
535 |                 else {
536 |                     int offset = (*locations)[index];
537 |                     if (offset == 0) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix - 1, iy, iz)];
538 |                     else if (offset == 1) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix + 1, iy, iz)];
539 |                     else if (offset == 2) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix, iy - 1, iz)];
540 |                     else if (offset == 3) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix, iy + 1, iz)];
541 |                     else if (offset == 4) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix, iy, iz - 1)];
542 |                     else if (offset == 5) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix, iy, iz + 1)];
543 |                     else {
544 |                         decompressed_data[iv] = offset - 6;                        
545 |                     }
546 |                     index += 1;
547 |                 }
548 |             }
549 |         }
550 |     }
551 | }
552 | 
553 | unsigned long* 
554 | compresso::Decompress(unsigned long *compressed_data)
555 | {
556 |     // constants
557 |     int header_size = 9;
558 | 
559 |     // get the resolution
560 |     int zres = compressed_data[0];
561 |     int yres = compressed_data[1];
562 |     int xres = compressed_data[2];
563 | 
564 |     // set global variables
565 |     row_size = xres;
566 |     sheet_size = yres * xres;
567 |     grid_size = zres * yres * xres;
568 | 
569 |     // get the size of the vectors
570 |     int ids_size = compressed_data[3];
571 |     int values_size = compressed_data[4];
572 |     int locations_size = compressed_data[5];
573 | 
574 |     // get the step size
575 |     int zstep = compressed_data[6];
576 |     int ystep = compressed_data[7];
577 |     int xstep = compressed_data[8];
578 | 
579 |     // determine the number of blocks in the z, y, and x dimensions
580 |     int nzblocks = (int) (ceil((double)zres / zstep) + 0.5);
581 |     int nyblocks = (int) (ceil((double)yres / ystep) + 0.5);
582 |     int nxblocks = (int) (ceil((double)xres / xstep) + 0.5);
583 | 
584 |     // create an empty array for the encodings
585 |     int nblocks = nzblocks * nyblocks * nxblocks;
586 | 
587 |     // allocate memory for all arrays
588 |     std::vector<unsigned long> *ids = new std::vector<unsigned long>();
589 |     std::vector<unsigned long> *values = new std::vector<unsigned long>();
590 |     std::vector<unsigned long> *locations = new std::vector<unsigned long>();
591 |     unsigned long *boundary_data = new unsigned long[nblocks];
592 | 
593 |     int iv = header_size;
594 |     for (int ix = 0; ix < ids_size; ++ix, ++iv)
595 |         ids->push_back(compressed_data[iv]);
596 |     for (int ix = 0; ix < values_size; ++ix, ++iv)
597 |         values->push_back(compressed_data[iv]);
598 |     for (int ix = 0; ix < locations_size; ++ix, ++iv)
599 |         locations->push_back(compressed_data[iv]);
600 |     for (int ix = 0; ix < nblocks; ++ix, ++iv)
601 |         boundary_data[ix] = compressed_data[iv];
602 | 
603 |     bool *boundaries = DecodeBoundaries(boundary_data, values, zres, yres, xres, zstep, ystep, xstep);
604 | 
605 |     unsigned long *components = ConnectedComponents(boundaries, zres, yres, xres);
606 | 
607 |     unsigned long *decompressed_data = IDReverseMapping(components, ids, zres, yres, xres);
608 | 
609 |     DecodeIndeterminateLocations(boundaries, decompressed_data, locations, zres, yres, xres);
610 | 
611 |     // free memory
612 |     delete[] boundaries;
613 |     delete[] components;
614 |     delete[] boundary_data;
615 |     delete ids;
616 |     delete values;
617 |     delete locations;
618 | 
619 |     return decompressed_data;
620 | }
621 | 


--------------------------------------------------------------------------------
/experiments/compression/util.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import h5py
  3 | import matplotlib
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | import os
  7 | import time
  8 | import sys
  9 | 
 10 | from matplotlib.font_manager import FontProperties
 11 | from matplotlib.patches import Rectangle
 12 | from matplotlib.colors import ListedColormap
 13 | 
 14 | 
 15 | class Util(object):
 16 | 
 17 |     @staticmethod
 18 |     def adj_fig_size(width=10, height=10):
 19 |         '''Adjust figsize of plot
 20 |         '''
 21 | 
 22 |         fig_size = plt.rcParams["figure.figsize"]
 23 |         fig_size[0] = width
 24 |         fig_size[1] = height
 25 |         plt.rcParams["figure.figsize"] = fig_size
 26 | 
 27 |     @staticmethod
 28 |     def colorize(slice):
 29 |         colorized = np.zeros(slice.shape + (3,), dtype=np.uint8)
 30 | 
 31 |         colorized[:, :, 0] = np.mod(107 * slice[:, :], 700).astype(np.uint8)
 32 |         colorized[:, :, 1] = np.mod(509 * slice[:, :], 900).astype(np.uint8)
 33 |         colorized[:, :, 2] = np.mod(200 * slice[:, :], 777).astype(np.uint8)
 34 | 
 35 |         return colors
 36 | 
 37 |     @staticmethod
 38 |     def convert_to_rgb(img):
 39 | 
 40 |         colorized = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint8)
 41 | 
 42 |         colorized[:, :, 0] = img % (2**8)
 43 |         img = img >> 8
 44 |         colorized[:, :, 1] = img % (2**8)
 45 |         img = img >> 8
 46 |         colorized[:, :, 2] = img % (2**8)
 47 | 
 48 |         return colorized
 49 | 
 50 |     @staticmethod
 51 |     def convert_to_rgba(img):
 52 |         colorized = np.zeros((img.shape[0], img.shape[1], 4), dtype=np.uint8)
 53 | 
 54 |         colorized[:, :, 0] = img % (2**8)
 55 |         img = img >> 8
 56 |         colorized[:, :, 1] = img % (2**8)
 57 |         img = img >> 8
 58 |         colorized[:, :, 2] = img % (2**8)
 59 |         img = img >> 8
 60 |         colorized[:, :, 3] = img % (2**8)
 61 | 
 62 |         return colorized
 63 | 
 64 |     @staticmethod
 65 |     def convert_from_rgb(frame):
 66 | 
 67 |         img = np.zeros((frame.shape[0], frame.shape[1]), dtype=np.uint64)
 68 |         img[:] = (np.uint64(frame[:, :, 0]) + np.uint64(frame[:, :, 1]) * 256 + np.uint64(frame[:, :, 2]) * 256 * 256)
 69 | 
 70 |         return img
 71 | 
 72 |     @staticmethod
 73 |     def convert_from_rgba(frame):
 74 |         img = np.zeros((frame.shape[0], frame.shape[1]), dtype=np.uint64)
 75 |         img[:] = (frame[:, :, 0] + frame[:, :, 1] * 256 + frame[:, :, 2] * 256 * 256 + frame[:, :, 3] * 256 * 256 * 256)
 76 | 
 77 |         return img
 78 | 
 79 |     @staticmethod
 80 |     def get_size(variable):
 81 |         '''Get bytes of variable
 82 |         '''
 83 |         if type(variable).__module__ == np.__name__:
 84 |             variable = variable.tobytes()
 85 |         elif type(variable) is str:
 86 |             assert (all(ord(c) < 256) for c in variable)
 87 |         else:
 88 |             raise ValueError('Data type not supported')
 89 | 
 90 |         # checking the length of a bytestring is more accurate
 91 |         return len(variable)
 92 | 
 93 |     @staticmethod
 94 |     def to_best_type(array):
 95 |         '''Convert array to lowest possible bitrate.
 96 |         '''
 97 |         ui8 = np.iinfo(np.uint8)
 98 |         ui8 = ui8.max
 99 |         ui16 = np.iinfo(np.uint16)
100 |         ui16 = ui16.max
101 |         ui32 = np.iinfo(np.uint32)
102 |         ui32 = ui32.max
103 |         ui64 = np.iinfo(np.uint64)
104 |         ui64 = ui64.max
105 | 
106 |         if array.max() <= ui64:
107 |             new_type = np.uint64
108 |         if array.max() <= ui32:
109 |             new_type = np.uint32
110 |         if array.max() <= ui16:
111 |             new_type = np.uint16
112 |         if array.max() <= ui8:
113 |             new_type = np.uint8
114 | 
115 |         return array.astype(new_type)
116 | 
117 |     @staticmethod
118 |     def load_data(name='ac3', N=-1, prefix=None, gold=False):
119 |         '''Load data
120 |         '''
121 | 
122 |         if not 'mri' in name:
123 |             if gold: filename = '~/compresso/data/' + name + '/gold/' + name + '_gold.h5'
124 |             else: filename = '~/compresso/data/' + name + '/rhoana/' + name + '_rhoana.h5'
125 | 
126 |             with h5py.File(os.path.expanduser(filename), 'r') as hf:
127 |                 output = np.array(hf['main'], dtype=np.uint64)
128 |         else:
129 |             filename = '~/compresso/data/MRI/' + name + '.h5'
130 | 
131 |             with h5py.File(os.path.expanduser(filename), 'r') as hf:
132 |                 output = np.array(hf['main'], dtype=np.uint64)
133 | 
134 |         if (not N == -1):
135 |             output = output[0:N,:,:]
136 | 
137 |         return output
138 | 
139 |     @staticmethod
140 |     def compress(method, data, *args, **kwargs):
141 |         '''Compress data
142 |         '''
143 |         t0 = time.time()
144 | 
145 |         compressed_data = method.compress(data, *args, **kwargs)
146 | 
147 |         return compressed_data, time.time() - t0
148 | 
149 |     @staticmethod
150 |     def decompress(method, compressed_data, *args, **kwargs):
151 |         '''Decompress data
152 |         '''
153 |         t0 = time.time()
154 | 
155 |         data = method.decompress(compressed_data, *args, **kwargs)
156 | 
157 |         return data, time.time() - t0
158 | 
159 |     @staticmethod
160 |     def encode(method, data, *args, **kwargs):
161 |         '''Encode data
162 |         '''
163 |         t0 = time.time()
164 | 
165 |         enc_data = method.compress(data, *args, **kwargs)
166 | 
167 |         return enc_data, time.time() - t0
168 | 
169 |     @staticmethod
170 |     def decode(method, enc_data, *args, **kwargs):
171 |         '''Decode data
172 |         '''
173 |         t0 = time.time()
174 | 
175 |         data = method.decompress(enc_data, *args, **kwargs)
176 | 
177 |         return data, time.time() - t0
178 | 
179 |     @staticmethod
180 |     def run_experiment(com, enc, data, N=100, verbose=True):
181 |         '''Benchmark one compression method
182 |         com = [BROTLI, BZ2, LZMA, LZO, ZLIB, ZSTD]
183 |         data = data set
184 |         N = number of runs
185 |         *args / **kwargs = settings for the compression method, e.g. `9`
186 |         '''
187 |         enc_speed = []
188 |         denc_speed = []
189 | 
190 |         com_speed = []
191 |         dcom_speed = []
192 | 
193 |         total_com_speed = []
194 |         total_dcom_speed = []
195 | 
196 |         # run N iterations
197 |         for n in range(N):
198 |             start_time = time.time()
199 | 
200 |             # run encoding and compression
201 |             encoded_data, t1 = Util.encode(enc, data)
202 |             compressed_data, t2 = Util.compress(com, encoded_data)
203 | 
204 |             # decompress data
205 |             if (com.name() == 'LZF'):
206 |                 # LZF requires the original output size
207 |                 decompressed_data, t3 = Util.decompress(com, compressed_data, 8 * long(data.size))
208 |             else:
209 |                 decompressed_data, t3 = Util.decompress(com, compressed_data)
210 | 
211 |             # make sure the data is returned as an array
212 |             if not isinstance(decompressed_data, (np.ndarray, np.generic)) and isinstance(encoded_data, (np.ndarray, np.generic)):
213 |                 # convert back to numpy array
214 |                 decompressed_data = np.fromstring(decompressed_data, dtype=encoded_data.dtype)
215 | 
216 |             # decode the data
217 |             decoded_data, t4 = Util.decode(enc, decompressed_data)
218 | 
219 |             # update the speed lists
220 |             enc_speed.append(t1)
221 |             denc_speed.append(t4)
222 | 
223 |             com_speed.append(t2)
224 |             dcom_speed.append(t3)
225 | 
226 |             total_com_speed.append(t1 + t2)
227 |             total_dcom_speed.append(t3 + t4)
228 | 
229 |             # guarantee lossless behavior
230 |             assert np.array_equal(np.ndarray.flatten(data), np.ndarray.flatten(decoded_data))
231 | 
232 |             print 'Ran iteration ' + str(n + 1) + ' of ' + str(N) + ' on ' + enc.name() + ' + ' + com.name() + ' in %0.2f seconds' % (time.time() - start_time)
233 |             sys.stdout.flush()
234 | 
235 |         com_MB = Util.get_size(compressed_data) / float(1000**2)
236 |         dec_MB = Util.get_size(data) / float(1000**2)
237 | 
238 |         # Higher is better
239 |         ratio = dec_MB / com_MB
240 | 
241 |         # turn the speeds in MB / s
242 |         for n in range(N):
243 |             if enc_speed[n] == 0:
244 |                 enc_speed[n] = 0.01
245 |             if denc_speed[n] == 0:
246 |                 denc_speed[n] = 0.01
247 |             if com_speed[n] == 0:
248 |                 com_speed[n] = 0.01
249 |             if dcom_speed[n] == 0:
250 |                 dcom_speed[n] = 0.01
251 |             if total_com_speed[n] == 0:
252 |                 total_com_speed[n] = 0.01
253 |             if total_dcom_speed[n] == 0:
254 |                 total_dcom_speed[n] = 0.01
255 | 
256 |             enc_speed[n] = dec_MB / enc_speed[n]
257 |             denc_speed[n] = dec_MB / denc_speed[n]
258 | 
259 |             com_speed[n] = dec_MB / com_speed[n]
260 |             dcom_speed[n] = dec_MB / dcom_speed[n]
261 | 
262 |             total_com_speed[n] = dec_MB / total_com_speed[n]
263 |             total_dcom_speed[n] = dec_MB / total_dcom_speed[n]
264 | 
265 |         # get stddev for speeds
266 |         com_speed_std = np.std(com_speed)
267 |         dcom_speed_std = np.std(dcom_speed)
268 | 
269 |         enc_speed_std = np.std(enc_speed)
270 |         denc_speed_std = np.std(denc_speed)
271 | 
272 |         total_com_speed_std = np.std(total_com_speed)
273 |         total_dcom_speed_std = np.std(total_dcom_speed)
274 | 
275 |         # get means for speeds
276 |         enc_speed = np.mean(enc_speed)
277 |         denc_speed = np.mean(denc_speed)
278 | 
279 |         com_speed = np.mean(com_speed)
280 |         dcom_speed = np.mean(dcom_speed)
281 | 
282 |         total_com_speed = np.mean(total_com_speed)
283 |         total_dcom_speed = np.mean(total_dcom_speed)
284 | 
285 |         if verbose:
286 |             print '>>>> %s + %s <<<<' % (enc.name(), com.name())
287 |             print 'Compression Method:', com.name()
288 |             print 'Encoding Method:', enc.name()
289 |             print 'Input Size:', dec_MB, 'MB'
290 |             print 'Output Size:', com_MB, 'MB'
291 |             print 'Ratio:', ratio
292 |             print 'Total Compression Speed [MB/s]:', total_com_speed
293 |             print 'Total Decompression Speed [MB/s]:', total_dcom_speed
294 |             print 'Compression (Only) Speed [MB/s]:', com_speed
295 |             print 'Decompression (Only) Speed [MB/s]:', dcom_speed
296 |             print 'Encoding Speed [MB/s]:', enc_speed
297 |             print 'Decoding Speed [MB/s]:', denc_speed
298 |             print ''
299 | 
300 |         return {
301 |             'encoding': enc.name(),
302 |             'compression': com.name(),
303 |             'orig_bytes': dec_MB,
304 |             'comp_bytes': com_MB,
305 |             'ratio': ratio,
306 |             'comp_speed': com_speed,
307 |             'comp_speed_stddev': com_speed_std,
308 |             'decomp_speed': dcom_speed,
309 |             'decomp_speed_stddev': dcom_speed_std,
310 |             'enc_speed': enc_speed,
311 |             'enc_speed_stddev': enc_speed_std,
312 |             'denc_speed': denc_speed,
313 |             'denc_speed_stddev': denc_speed_std,
314 |             'total_comp_speed': total_com_speed,
315 |             'total_comp_speed_stddev': total_com_speed_std,
316 |             'total_decomp_speed': total_dcom_speed,
317 |             'total_decomp_speed_stddev': total_dcom_speed_std
318 |         }
319 | 
320 |     @staticmethod
321 |     def run_variable_experiment(com, enc, data, steps, N=100, verbose=True):
322 |         '''Benchmark one compression method
323 |         com = [BROTLI, BZ2, LZMA, LZO, ZLIB, ZSTD]
324 |         data = data set
325 |         N = number of runs
326 |         *args / **kwargs = settings for the compression method, e.g. `9`
327 |         '''
328 |         enc_speed = []
329 |         denc_speed = []
330 | 
331 |         com_speed = []
332 |         dcom_speed = []
333 | 
334 |         total_com_speed = []
335 |         total_dcom_speed = []
336 | 
337 |         # run N iterations
338 |         for n in range(N):
339 |             start_time = time.time()
340 | 
341 |             # run encoding and compression
342 |             encoded_data, t1 = Util.encode(enc, data, steps)
343 |             compressed_data, t2 = Util.compress(com, encoded_data, *args)
344 | 
345 |             # decompress data
346 |             if (com.name() == 'LZF'):
347 |                 # LZF requires the original output size
348 |                 decompressed_data, t3 = Util.decompress(com, compressed_data, 8 * data.size)
349 |             else:
350 |                 decompressed_data, t3 = Util.decompress(com, compressed_data)
351 | 
352 |             # make sure the data is returned as an array
353 |             if not isinstance(decompressed_data, (np.ndarray, np.generic)) and isinstance(encoded_data, (np.ndarray, np.generic)):
354 |                 # convert back to numpy array
355 |                 decompressed_data = np.fromstring(decompressed_data, dtype=encoded_data.dtype)
356 | 
357 |             # decode the data
358 |             decoded_data, t4 = Util.decode(enc, decompressed_data, steps)
359 | 
360 |             # update the speed lists
361 |             enc_speed.append(t1)
362 |             denc_speed.append(t4)
363 | 
364 |             com_speed.append(t2)
365 |             dcom_speed.append(t3)
366 | 
367 |             total_com_speed.append(t1 + t2)
368 |             total_dcom_speed.append(t3 + t4)
369 | 
370 |             # guarantee lossless behavior
371 |             assert np.array_equal(np.ndarray.flatten(data), np.ndarray.flatten(decoded_data))
372 | 
373 |             print 'Ran iteration ' + str(n + 1) + ' of ' + str(N) + ' on ' + enc.name() + ' + ' + com.name() + ' in %0.2f seconds' % (time.time() - start_time)
374 |             sys.stdout.flush()
375 | 
376 |         com_MB = Util.get_size(compressed_data) / float(1000**2)
377 |         dec_MB = Util.get_size(data) / float(1000**2)
378 | 
379 |         # Higher is better
380 |         ratio = dec_MB / com_MB
381 | 
382 |         # turn the speeds in MB / s
383 |         for n in range(N):
384 |             if enc_speed[n] == 0:
385 |                 enc_speed[n] = 0.01
386 |             if denc_speed[n] == 0:
387 |                 denc_speed[n] = 0.01
388 |             if com_speed[n] == 0:
389 |                 com_speed[n] = 0.01
390 |             if dcom_speed[n] == 0:
391 |                 dcom_speed[n] = 0.01
392 |             if total_com_speed[n] == 0:
393 |                 total_com_speed[n] = 0.01
394 |             if total_dcom_speed[n] == 0:
395 |                 total_dcom_speed[n] = 0.01
396 | 
397 |             enc_speed[n] = dec_MB / enc_speed[n]
398 |             denc_speed[n] = dec_MB / denc_speed[n]
399 | 
400 |             com_speed[n] = dec_MB / com_speed[n]
401 |             dcom_speed[n] = dec_MB / dcom_speed[n]
402 | 
403 |             total_com_speed[n] = dec_MB / total_com_speed[n]
404 |             total_dcom_speed[n] = dec_MB / total_dcom_speed[n]
405 | 
406 |         # get stddev for speeds
407 |         com_speed_std = np.std(com_speed)
408 |         dcom_speed_std = np.std(dcom_speed)
409 | 
410 |         enc_speed_std = np.std(enc_speed)
411 |         denc_speed_std = np.std(denc_speed)
412 | 
413 |         total_com_speed_std = np.std(total_com_speed)
414 |         total_dcom_speed_std = np.std(total_dcom_speed)
415 | 
416 |         # get means for speeds
417 |         enc_speed = np.mean(enc_speed)
418 |         denc_speed = np.mean(denc_speed)
419 | 
420 |         com_speed = np.mean(com_speed)
421 |         dcom_speed = np.mean(dcom_speed)
422 | 
423 |         total_com_speed = np.mean(total_com_speed)
424 |         total_dcom_speed = np.mean(total_dcom_speed)
425 | 
426 |         if verbose:
427 |             print '>>>> %s + %s <<<<' % (enc.name(), com.name())
428 |             print 'Compression Method:', com.name()
429 |             print 'Encoding Method:', enc.name()
430 |             print 'Input Size:', dec_MB, 'MB'
431 |             print 'Output Size:', com_MB, 'MB'
432 |             print 'Ratio:', ratio
433 |             print 'Total Compression Speed [MB/s]:', total_com_speed
434 |             print 'Total Decompression Speed [MB/s]:', total_dcom_speed
435 |             print 'Compression (Only) Speed [MB/s]:', com_speed
436 |             print 'Decompression (Only) Speed [MB/s]:', dcom_speed
437 |             print 'Encoding Speed [MB/s]:', enc_speed
438 |             print 'Decoding Speed [MB/s]:', denc_speed
439 |             print ''
440 | 
441 |         return {
442 |             'encoding': enc.name(),
443 |             'compression': com.name(),
444 |             'orig_bytes': dec_MB,
445 |             'comp_bytes': com_MB,
446 |             'ratio': ratio,
447 |             'comp_speed': com_speed,
448 |             'comp_speed_stddev': com_speed_std,
449 |             'decomp_speed': dcom_speed,
450 |             'decomp_speed_stddev': dcom_speed_std,
451 |             'enc_speed': enc_speed,
452 |             'enc_speed_stddev': enc_speed_std,
453 |             'denc_speed': denc_speed,
454 |             'denc_speed_stddev': denc_speed_std,
455 |             'total_comp_speed': total_com_speed,
456 |             'total_comp_speed_stddev': total_com_speed_std,
457 |             'total_decomp_speed': total_dcom_speed,
458 |             'total_decomp_speed_stddev': total_dcom_speed_std
459 |         }
460 | 
461 |     @staticmethod
462 |     def plot_all(
463 |         results,
464 |         what,
465 |         x_range=None,
466 |         y_range=None,
467 |         name=None,
468 |         leg=True,
469 |         leg_loc='right',
470 |         no_bw=True,
471 |         input_bytes=-1,
472 |         output='',
473 |         emphasis=-1,
474 |         bar_label=-1,
475 |         no_leg_bars=False,
476 |         log=False,
477 |         digital=True,
478 |         no_errorbars=False,
479 |         title=None
480 |     ):
481 | 
482 |         if name is None:
483 |             raise ValueError(
484 |                 'Holy Moly you haven\'t specified a `name`! Shame on you.'
485 |             )
486 | 
487 |         if what not in results:
488 |             raise ValueError('Wrong `what` parameter. Not found in `results`.')
489 | 
490 |         labels = []
491 | 
492 |         for i, method in enumerate(results['methods']):
493 |             labels.append(method.split()[0])
494 | 
495 |         labels = sorted(list(set(labels)), key=lambda s: s.lower())
496 | 
497 |         font_base = FontProperties()
498 | 
499 |         font_bold = font_base.copy()
500 |         font_bold.set_weight('bold')
501 | 
502 |         none = [0] * len(labels)
503 |         none_std = [0] * len(labels)
504 |         neuroglancer = [0] * len(labels)
505 |         neuroglancer_std = [0] * len(labels)
506 |         compresso = [0] * len(labels)
507 |         compresso_std = [0] * len(labels)
508 | 
509 |         for i, label in enumerate(labels):
510 |             none_index = label + ' None'
511 | 
512 |             if none_index in results['methods']:
513 |                 none_index = results['methods'].index(none_index)
514 |             else:
515 |                 none_index = -1
516 | 
517 |             neuroglancer_index = label + ' Neuroglancer'
518 | 
519 |             if neuroglancer_index in results['methods']:
520 |                 neuroglancer_index = results['methods'].index(
521 |                     neuroglancer_index
522 |                 )
523 |             else:
524 |                 neuroglancer_index = -1
525 | 
526 |             compresso_index = label + ' Compresso'
527 | 
528 |             if compresso_index in results['methods']:
529 |                 compresso_index = results['methods'].index(compresso_index)
530 |             else:
531 |                 compresso_index = -1
532 | 
533 |             if none_index != -1:
534 |                 none[i] = results[what][none_index]
535 |                 if what + '_std' in results:
536 |                     none_std[i] = results[what + '_std'][none_index]
537 |                 else:
538 |                     none_std[i] = 0
539 | 
540 |             if neuroglancer_index != -1:
541 |                 neuroglancer[i] = results[what][neuroglancer_index]
542 |                 if what + '_std' in results:
543 |                     neuroglancer_std[i] = results[what + '_std'][neuroglancer_index]
544 |                 else:
545 |                     neuroglancer_std[i] = 0
546 | 
547 |             if compresso_index != -1:
548 |                 compresso[i] = results[what][compresso_index]
549 |                 if what + '_std' in results:
550 |                     compresso_std[i] = results[what + '_std'][compresso_index]
551 |                 else:
552 |                     compresso_std[i] = 0
553 | 
554 |         plt.figure(figsize=(10, 10))
555 |         N = len(labels)
556 |         ind = np.arange(N)  # the x locations for the groups
557 |         width = 0.25       # the width of the bars
558 | 
559 |         font = {
560 |             'family': 'sans-serif',
561 |             'size': 13.5
562 |         }
563 |         plt.rc('font', **font)
564 | 
565 |         if no_bw:
566 |             ind = ind[1:]
567 |             none = none[1:]
568 |             none_std = none_std[1:]
569 |             neuroglancer = neuroglancer[1:]
570 |             neuroglancer_std = neuroglancer_std[1:]
571 |             compresso = compresso[1:]
572 |             compresso_std = compresso_std[1:]
573 |             labels = labels[1:]
574 | 
575 |         lab_none = 'No first stage encoding'
576 |         lab_neuroglancer = 'Neuroglancer'
577 |         lab_compresso = 'Compresso'
578 | 
579 |         if what == 'bytes':
580 |             lab_none = None
581 |             lab_neuroglancer = None
582 |             lab_compresso = None
583 | 
584 |         if what.endswith('speed') and log:
585 |             none = np.log10([max(x, 1) for x in none])
586 |             neuroglancer = np.log10([max(x, 1) for x in neuroglancer])
587 |             compresso = np.log10([max(x, 1) for x in compresso])
588 | 
589 |         capthick = 0 if no_errorbars else 2
590 | 
591 |         fig, ax = plt.subplots()
592 |         ne = ax.bar(
593 |             ind,
594 |             none,
595 |             width,
596 |             color='#bbbbbb',
597 |             label=lab_none,
598 |             edgecolor='#ffffff',
599 |             linewidth=0,
600 |             yerr=none_std,
601 |             ecolor=(0, 0, 0, 0.2) if digital else (0, 0, 0, 1),
602 |             error_kw=dict(lw=2, capsize=2, capthick=capthick)
603 |         )
604 | 
605 |         ng = ax.bar(
606 |             ind + width,
607 |             neuroglancer,
608 |             width,
609 |             color='#999999' if digital else '#808080',
610 |             label=lab_neuroglancer,
611 |             edgecolor='#ffffff',
612 |             linewidth=0,
613 |             yerr=neuroglancer_std,
614 |             ecolor=(0, 0, 0, 0.2) if digital else (0, 0, 0, 1),
615 |             error_kw=dict(lw=2, capsize=2, capthick=capthick)
616 |         )
617 |         cp = ax.bar(
618 |             ind + width * 2,
619 |             compresso,
620 |             width,
621 |             color='#dc133b',
622 |             label=lab_compresso,
623 |             edgecolor='#ffffff',
624 |             linewidth=0,
625 |             yerr=compresso_std,
626 |             ecolor=(0, 0, 0, 0.2) if digital else (0, 0, 0, 1),
627 |             error_kw=dict(lw=2, capsize=2, capthick=capthick)
628 |         )
629 | 
630 |         if what == 'bytes_size':
631 |             ax.axhline(
632 |                 y=input_bytes,
633 |                 color='gray',
634 |                 label='Input',
635 |                 linewidth=2,
636 |                 linestyle='--'
637 |             )
638 | 
639 |         ax.tick_params(
640 |             axis='y',
641 |             color='#cccccc' if digital else '#888888',
642 |             labelcolor='#999999' if digital else '#333333'
643 |         )
644 | 
645 |         ax.tick_params(
646 |             axis='x',
647 |             color='#cccccc'
648 |         )
649 | 
650 |         xticks_colors = ['#666666' if digital else '#333333'] * len(compresso)
651 |         compresso_font = font_base
652 |         compresso_font.set_size(15)
653 |         xticks_fonts = [compresso_font] * len(compresso)
654 | 
655 |         if emphasis >= 0:
656 |             xticks_colors[emphasis] = '#333333' if digital else '#000000'
657 |             xticks_fonts[emphasis] = font_bold
658 |             xticks_fonts[emphasis].set_size(16)
659 | 
660 |         for xtick, color, fp in zip(
661 |             ax.get_xticklabels(), xticks_colors, xticks_fonts
662 |         ):
663 |             xtick.set_color(color)
664 |             xtick.set_font_properties(fp)
665 | 
666 |         for spine in ax.spines.values():
667 |             spine.set_edgecolor('#cccccc' if digital else '#888888')
668 | 
669 |         ylabel = 'Compression Ratio\n(Original / Compressed)'
670 | 
671 |         if what.endswith('comp_speed'):
672 |             ylabel = 'Compression Speed\n(MB/s)'
673 | 
674 |         if what.endswith('dcom_speed'):
675 |             ylabel = 'Decompression Speed\n(MB/s)'
676 | 
677 |         if what.endswith('bytes_size'):
678 |             ylabel = 'Size\n(MB)'
679 |             ax.set_yscale('log', nonposy='clip')
680 | 
681 |         plt.ylabel(
682 |             ylabel,
683 |             color='#333333',
684 |             labelpad=10,
685 |             fontsize=16
686 |         )
687 | 
688 |         if leg:
689 |             leg = plt.legend(
690 |                 loc='upper %s' % leg_loc,
691 |                 prop={
692 |                     'size': 15
693 |                 }
694 |             )
695 |             if what != 'bytes':
696 |                 if no_leg_bars:
697 |                     if type(leg.legendHandles[0]) == Rectangle:
698 |                         leg.legendHandles[0].set_width(1)
699 | 
700 |                     if type(leg.legendHandles[1]) == Rectangle:
701 |                         leg.legendHandles[1].set_width(1)
702 | 
703 |                     if type(leg.legendHandles[2]) == Rectangle:
704 |                         leg.legendHandles[2].set_width(1)
705 | 
706 |                 # leg.legendHandles[0].set_width(2)
707 |                 leg.legendHandles[0].set_color('#bbbbbb')
708 |                 # leg.legendHandles[1].set_color('#8c8c8c')
709 |                 leg.legendHandles[1].set_color(
710 |                     '#999999' if digital else '#777777'
711 |                 )
712 |                 leg.legendHandles[2].set_color('#dc133b')
713 |             frm = leg.get_frame()
714 |             frm.set_edgecolor('#ffffff')
715 |             frm.set_facecolor('#ffffff')
716 |             leg_texts = leg.get_texts()
717 |             leg_texts[0].set_color('#999999' if digital else '#808080')
718 |             leg_texts[1].set_color('#666666')
719 |             leg_texts[2].set_color('#dc133b')
720 | 
721 |         ax.set_xticks(ind + width * 1.5)
722 |         ax.set_xticklabels(labels, rotation='vertical')
723 | 
724 |         if y_range is not None:
725 |             plt.ylim(0, y_range)
726 | 
727 |         if no_bw:
728 |             plt.xlim(0.5, len(none) + 1)
729 |         else:
730 |             plt.xlim(-0.5, len(none) + 0.5)
731 | 
732 |         plt.tick_params(
733 |             axis='x',
734 |             which='both',
735 |             bottom='off',
736 |             top='off'
737 |         )
738 | 
739 |         if bar_label >= 0:
740 |             # Plot label above bar to clarify relatiobship
741 |             height = ne[bar_label].get_height()
742 |             plt.text(
743 |                 ne[bar_label].get_x() + ne[bar_label].get_width() / 2.0,
744 |                 height,
745 |                 '1',
746 |                 ha='center',
747 |                 va='bottom',
748 |                 color='#bbbbbb',
749 |                 fontproperties=font_bold
750 |             )
751 | 
752 |             height = ng[bar_label].get_height()
753 |             plt.text(
754 |                 ng[bar_label].get_x() + ng[bar_label].get_width() / 2.0,
755 |                 height,
756 |                 '2',
757 |                 ha='center',
758 |                 va='bottom',
759 |                 color='#999999',
760 |                 fontproperties=font_bold
761 |             )
762 | 
763 |             height = cp[bar_label].get_height()
764 |             plt.text(
765 |                 cp[bar_label].get_x() + cp[bar_label].get_width() / 2.0,
766 |                 height,
767 |                 '3',
768 |                 ha='center',
769 |                 va='bottom',
770 |                 color='#dc133b',
771 |                 fontproperties=font_bold
772 |             )
773 | 
774 |         if title is not None:
775 |             plt.title(title, fontsize=18)
776 | 
777 |         ttl = ax.title
778 |         ttl.set_position([.5, 1.05])
779 |         ttl.set_font_properties
780 | 
781 |         plt.savefig(
782 |             os.path.join(output, '%s_compression_%s.pdf' % (name, what)),
783 |             bbox_inches='tight'
784 |         )
785 | 


--------------------------------------------------------------------------------
/src/c++/compresso.hxx:
--------------------------------------------------------------------------------
  1 | #ifndef __COMPRESSO_H__
  2 | #define __COMPRESSO_H__
  3 | 
  4 | #include <unordered_map>
  5 | #include <math.h>
  6 | #include <stdlib.h>
  7 | #include <vector>
  8 | #include <unordered_set>
  9 | #include <algorithm>
 10 | #include <ctime>
 11 | 
 12 | 
 13 | 
 14 | 
 15 | 
 16 | namespace Compresso {
 17 |     // function definitions
 18 |     template<typename Type> unsigned char *Compress(Type *data, long res[3], long steps[3], long *nentries = NULL);
 19 |     template<typename Type> Type *Decompress(unsigned char *compressed_data, long *res = NULL);
 20 | 
 21 |     // dimension constants
 22 |     static const short RN_X = 2;
 23 |     static const short RN_Y = 1;
 24 |     static const short RN_Z = 0;
 25 | 
 26 |     // global variables
 27 |     static long row_size = -1;
 28 |     static long sheet_size = -1;
 29 |     static long grid_size = -1;
 30 | 
 31 |     // internal helper function
 32 |     static inline long IndicesToIndex(long ix, long iy, long iz) {
 33 |         return iz * sheet_size + iy * row_size + ix;
 34 |     };
 35 | 
 36 | 
 37 | 
 38 |     ///////////////////////////////////////////////////
 39 |     //// UNION-FIND CLASS FOR CONNECTED COMPONENTS ////
 40 |     ///////////////////////////////////////////////////
 41 | 
 42 |     class UnionFindElement {
 43 |     public:
 44 |         // constructor
 45 |         UnionFindElement(unsigned long label) :
 46 |         label(label),
 47 |         parent(this),
 48 |         rank(0)
 49 |         {}
 50 | 
 51 |     public:
 52 |         // instance variables
 53 |         unsigned long label;
 54 |         UnionFindElement *parent;
 55 |         int rank;
 56 |     };
 57 | 
 58 |     UnionFindElement *
 59 |     Find(UnionFindElement *x)
 60 |     {
 61 |         if (x->parent != x) x->parent = Find(x->parent);
 62 |         return x->parent;
 63 |     };
 64 | 
 65 |     void
 66 |     Union(UnionFindElement *x, UnionFindElement *y)
 67 |     {
 68 |         UnionFindElement *xroot = Find(x);
 69 |         UnionFindElement *yroot = Find(y);
 70 | 
 71 |         // root already the same
 72 |         if (xroot == yroot) return;
 73 | 
 74 |         // merge the two roots
 75 |         if (xroot->rank < yroot->rank) xroot->parent = yroot;
 76 |         else if (xroot->rank > yroot->rank) yroot->parent = xroot;
 77 |         else {
 78 |             yroot->parent = xroot;
 79 |             xroot->rank = xroot->rank + 1;
 80 |         }
 81 |     };
 82 | 
 83 | 
 84 | 
 85 |     ///////////////////////////////
 86 |     //// COMPRESSION ALGORITHM ////
 87 |     ///////////////////////////////
 88 | 
 89 |     template<typename Type> bool *
 90 |     ExtractBoundaries(Type *data, long res[3])
 91 |     {
 92 |         // create the boundaries array
 93 |         bool *boundaries = new bool[grid_size];
 94 |         if (!boundaries) { fprintf(stderr, "Failed to allocate memory for boundaries...\n"); return NULL; }
 95 | 
 96 |         // determine which pixels differ from east or south neighbors
 97 |         for (long iz = 0; iz < res[RN_Z]; ++iz) {
 98 |             for (long iy = 0; iy < res[RN_Y]; ++iy) {
 99 |                 for (long ix = 0; ix < res[RN_X]; ++ix) {
100 |                     long iv = IndicesToIndex(ix, iy, iz);
101 | 
102 |                     boundaries[iv] = false;
103 | 
104 |                     if (ix < res[RN_X] - 1) {
105 |                         if (data[iv] != data[IndicesToIndex(ix + 1, iy, iz)]) boundaries[iv] = true;
106 |                     }
107 |                     if (iy < res[RN_Y] - 1) {
108 |                         if (data[iv] != data[IndicesToIndex(ix, iy + 1, iz)]) boundaries[iv] = true;
109 |                     }
110 |                 }
111 |             }
112 |         }
113 | 
114 |         return boundaries;
115 |     };
116 | 
117 |     static unsigned long *
118 |     ConnectedComponents(bool *boundaries, long res[3])
119 |     {
120 |         // create the connected components grid
121 |         unsigned long *components = new unsigned long[grid_size];
122 |         if (!components) { fprintf(stderr, "Failed to allocate memory for connected components...\n"); return NULL; }
123 | 
124 |         // initialize to zero
125 |         for (long iv = 0; iv < grid_size; ++iv)
126 |             components[iv] = 0;
127 | 
128 |         // run connected components for every slice
129 |         for (long iz = 0; iz < res[RN_Z]; ++iz) {
130 |             // create a vector of union find elements
131 |             std::vector<UnionFindElement *> union_find = std::vector<UnionFindElement *>();
132 | 
133 |             // current label in connected component
134 |             int curlab = 1;
135 |             for (long iy = 0; iy < res[RN_Y]; ++iy) {
136 |                 for (long ix = 0; ix < res[RN_X]; ++ix) {
137 |                     long iv = IndicesToIndex(ix, iy, iz);
138 | 
139 |                     // continue if boundary
140 |                     if (boundaries[iv]) continue;
141 | 
142 |                     // only consider the pixel to the north and west
143 |                     long north = IndicesToIndex(ix - 1, iy, iz);
144 |                     long west = IndicesToIndex(ix, iy - 1, iz);
145 | 
146 |                     unsigned long neighbor_labels[2] = { 0, 0 };
147 | 
148 |                     // get the labels for the relevant neighbor
149 |                     if (ix > 0) neighbor_labels[0] = components[north];
150 |                     if (iy > 0) neighbor_labels[1] = components[west];
151 | 
152 |                     // if the neighbors are boundary, create new label
153 |                     if (!neighbor_labels[0] && !neighbor_labels[1]) {
154 |                         components[iv] = curlab;
155 | 
156 |                         // add to union find structure
157 |                         union_find.push_back(new UnionFindElement(0));
158 | 
159 |                         // update the next label
160 |                         curlab++;
161 |                     }
162 |                     // the two pixels have equal non-trivial values
163 |                     else if (neighbor_labels[0] == neighbor_labels[1])
164 |                         components[iv] = neighbor_labels[0];
165 |                     else {
166 |                         if (!neighbor_labels[0]) components[iv] = neighbor_labels[1];
167 |                         else if (!neighbor_labels[1]) components[iv] = neighbor_labels[0];
168 |                         else {
169 |                             // take the minimum value
170 |                             components[iv] = std::min(neighbor_labels[0], neighbor_labels[1]);
171 | 
172 |                             // set the equivalence relationship
173 |                             Union(union_find[neighbor_labels[0] - 1], union_find[neighbor_labels[1] - 1]);
174 |                         }
175 |                     }
176 |                 }
177 |             }
178 | 
179 |             // reset the current label to 1
180 |             curlab = 1;
181 | 
182 |             // create the connected components in order
183 |             for (long iy = 0; iy < res[RN_Y]; ++iy) {
184 |                 for (long ix = 0; ix < res[RN_X]; ++ix) {
185 |                     long iv = IndicesToIndex(ix, iy, iz);
186 | 
187 |                     if (boundaries[iv]) continue;
188 | 
189 |                     // get the parent for this component
190 |                     UnionFindElement *comp = Find(union_find[components[iv] - 1]);
191 |                     if (!comp->label) {
192 |                         comp->label = curlab;
193 |                         curlab++;
194 |                     }
195 | 
196 |                     components[iv] = comp->label;
197 |                 }
198 |             }
199 | 
200 |             // free memory
201 |             for (unsigned long iv = 0; iv < union_find.size(); ++iv)
202 |                 delete union_find[iv];
203 |         }
204 | 
205 |         // return the connected components array
206 |         return components;
207 |     }
208 | 
209 |     template<typename Type> void
210 |     IDMapping(unsigned long *components, Type *data, std::vector<unsigned long> &ids, long res[3])
211 |     {
212 |         // iterate over every slice
213 |         for (int iz = 0; iz < res[RN_Z]; ++iz) {
214 |             // create a set of components for this slice
215 |             std::unordered_set<unsigned long> hash_map = std::unordered_set<unsigned long>();
216 | 
217 |             // iterate over the entire slice
218 |             for (long iy = 0; iy < res[RN_Y]; ++iy) {
219 |                 for (long ix = 0; ix < res[RN_X]; ++ix) {
220 |                     long iv = IndicesToIndex(ix, iy, iz);
221 | 
222 |                     // get the component label
223 |                     unsigned long component_id = components[iv];
224 | 
225 |                     // if this component does not belong yet, add it
226 |                     if (!hash_map.count(component_id)) {
227 |                         hash_map.insert(component_id);
228 | 
229 |                         // add the segment id
230 |                         unsigned long segment_id = (unsigned long)data[iv] + 1;
231 |                         ids.push_back(segment_id);
232 |                     }
233 |                 }
234 |             }
235 |         }
236 |     }
237 | 
238 |     unsigned long *
239 |     EncodeBoundaries(bool *boundaries, long res[3], long steps[3])
240 |     {
241 |         // determine the number of bloxks in each direction
242 |         long nblocks[3];
243 |         for (int dim = 0; dim <= 2; ++dim) {
244 |             nblocks[dim] = (long) (ceil((double)res[dim] / steps[dim]) + 0.5);
245 |         }
246 |         long nwindows = nblocks[RN_Z] * nblocks[RN_Y] * nblocks[RN_X];
247 | 
248 |         // create an empty array for the encodings
249 |         unsigned long *boundary_data = new unsigned long[nwindows];
250 |         if (!boundary_data) { fprintf(stderr, "Failed to allocate memory for boundary windows...\n"); return NULL; }
251 |         for (long iv = 0; iv < nwindows; ++iv)
252 |             boundary_data[iv] = 0;
253 | 
254 |         for (long iz = 0; iz < res[RN_Z]; ++iz) {
255 |             for (long iy = 0; iy < res[RN_Y]; ++iy) {
256 |                 for (long ix = 0; ix < res[RN_X]; ++ix) {
257 |                     long iv = IndicesToIndex(ix, iy, iz);
258 | 
259 |                     // no encoding for non-boundaries
260 |                     if (!boundaries[iv]) continue;
261 | 
262 |                     // find the block from the index
263 |                     long zblock = iz / steps[RN_Z];
264 |                     long yblock = iy / steps[RN_Y];
265 |                     long xblock = ix / steps[RN_X];
266 | 
267 |                     // find the offset within the block
268 |                     long zoffset = iz % steps[RN_Z];
269 |                     long yoffset = iy % steps[RN_Y];
270 |                     long xoffset = ix % steps[RN_X];
271 | 
272 |                     long block = zblock * (nblocks[RN_Y] * nblocks[RN_X]) + yblock * nblocks[RN_X] + xblock;
273 |                     long offset = zoffset * (steps[RN_Y] * steps[RN_X]) + yoffset * steps[RN_X] + xoffset;
274 | 
275 |                     boundary_data[block] += (1LU << offset);
276 |                 }
277 |             }
278 |         }
279 | 
280 |         // return the encodings
281 |         return boundary_data; 
282 |     }
283 | 
284 |     void
285 |     ValueMapping(unsigned long *boundary_data, std::vector<unsigned long> &values, long nwindows)
286 |     {
287 |         // keep a set of seen window values
288 |         std::unordered_set<unsigned long> hash_map = std::unordered_set<unsigned long>();
289 | 
290 |         // go through all of the boundary data to create array of values
291 |         for (long iv = 0; iv < nwindows; ++iv) {
292 |             if (!hash_map.count(boundary_data[iv])) {
293 |                 hash_map.insert(boundary_data[iv]);
294 |                 values.push_back(boundary_data[iv]);
295 |             }
296 |         }
297 | 
298 |         // sort the values
299 |         sort(values.begin(), values.end());
300 | 
301 |         // create mapping from values to indices
302 |         std::unordered_map<unsigned long, unsigned long> mapping = std::unordered_map<unsigned long, unsigned long>();
303 |         for (unsigned long iv = 0; iv < values.size(); ++iv) {
304 |             mapping[values[iv]] = iv;
305 |         }
306 | 
307 |         // update boundary data
308 |         for (long iv = 0; iv < nwindows; ++iv) {
309 |             boundary_data[iv] = mapping[boundary_data[iv]];
310 |         }
311 |     }
312 | 
313 |     template <typename Type> void
314 |     EncodeIndeterminateLocations(bool *boundaries, Type *data, std::vector<unsigned long> &locations, long res[3])
315 |     {
316 |         for (long iz = 0; iz < res[RN_Z]; ++iz) {
317 |             for (long iy = 0; iy < res[RN_Y]; ++iy) {
318 |                 for (long ix = 0; ix < res[RN_X]; ++ix) {
319 |                     long iv = IndicesToIndex(ix, iy, iz);
320 | 
321 |                     if (!boundaries[iv]) continue;
322 |                     else if (iy > 0 && !boundaries[IndicesToIndex(ix, iy - 1, iz)]) continue;
323 |                     else if (ix > 0 && !boundaries[IndicesToIndex(ix - 1, iy, iz)]) continue;
324 |                     else {
325 |                         long north = IndicesToIndex(ix - 1, iy, iz);
326 |                         long south = IndicesToIndex(ix + 1, iy, iz);
327 |                         long east = IndicesToIndex(ix, iy - 1, iz);
328 |                         long west = IndicesToIndex(ix, iy + 1, iz);
329 |                         long up = IndicesToIndex(ix, iy, iz + 1);
330 |                         long down = IndicesToIndex(ix, iy, iz - 1);
331 | 
332 |                         // see if any of the immediate neighbors are candidates
333 |                         if (ix > 0 && !boundaries[north] && data[north] == data[iv]) locations.push_back(0);
334 |                         else if (ix < res[RN_X] - 1 && !boundaries[south] && data[south] == data[iv]) locations.push_back(1);
335 |                         else if (iy > 0 && !boundaries[east] && data[east] == data[iv]) locations.push_back(2);
336 |                         else if (iy < res[RN_Y] - 1 && !boundaries[west] && data[west] == data[iv]) locations.push_back(3);
337 |                         else if (iz > 0 && !boundaries[down] && data[down] == data[iv]) locations.push_back(4);
338 |                         else if (iz < res[RN_Z] - 1 && !boundaries[up] && data[up] == data[iv]) locations.push_back(5);
339 |                         else locations.push_back(((unsigned long)data[IndicesToIndex(ix, iy, iz)]) + 6);
340 |                     }
341 |                 }
342 |             }
343 |         }
344 |     }
345 | 
346 |     static unsigned char 
347 |     BytesNeeded(unsigned long maximum_value)
348 |     {
349 |         if (maximum_value < 1L << 8) return 1;
350 |         else if (maximum_value < 1L << 16) return 2;
351 |         else if (maximum_value < 1L << 24) return 3;
352 |         else if (maximum_value < 1L << 32) return 4;
353 |         else if (maximum_value < 1L << 40) return 5;
354 |         else if (maximum_value < 1L << 48) return 6;
355 |         else if (maximum_value < 1L << 56) return 7;
356 |         else return 8;
357 |     }
358 | 
359 |     static void 
360 |     AppendValue(std::vector<unsigned char> &data, unsigned long value, unsigned char nbytes)
361 |     {
362 |         for (unsigned char iv = 0; iv < nbytes; ++iv) {
363 |             // get the 8 low order bits
364 |             unsigned char low_order = value % 256;
365 |             // add the one byte to the data
366 |             data.push_back(low_order);
367 |             // update the value by shifting one byte to the left
368 |             value = value >> 8;
369 |         }
370 |     }
371 | 
372 |     template<typename Type> unsigned char * 
373 |     Compress(Type *data, long res[3], long steps[3], long *nentries)
374 |     {
375 |         // set the global variables
376 |         row_size = res[RN_X];
377 |         sheet_size = res[RN_X] * res[RN_Y];
378 |         grid_size = res[RN_X] * res[RN_Y] * res[RN_Z];
379 | 
380 |         // determine the number of blocks in each direction
381 |         long nblocks[3];
382 |         for (int dim = 0; dim <= 2; ++dim) {
383 |             nblocks[dim] = (long) (ceil((double)res[dim] / steps[dim]) + 0.5);
384 |         }
385 |         long nwindows = nblocks[RN_Z] * nblocks[RN_Y] * nblocks[RN_X];
386 | 
387 |         // get the boundary voxels
388 |         std::clock_t start_time = std::clock();
389 |         bool *boundaries = ExtractBoundaries(data, res);
390 |         if (!boundaries) return NULL;
391 |         printf("Extract boundaries: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC);
392 | 
393 |         // get the connected components
394 |         // use unsigned long since there could be more components than Type.MAX
395 |         start_time = std::clock();
396 |         unsigned long *components = ConnectedComponents(boundaries, res);
397 |         if (!components) return NULL;
398 |         printf("Connected components: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC);
399 | 
400 |         // get the ids
401 |         start_time = std::clock();
402 |         std::vector<unsigned long> ids = std::vector<unsigned long>();
403 |         IDMapping(components, data, ids, res);
404 |         printf("ID mapping: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC);
405 | 
406 |         // free memory
407 |         delete[] components;
408 | 
409 |         // encode the boundary data
410 |         start_time = std::clock();
411 |         unsigned long *boundary_data = EncodeBoundaries(boundaries, res, steps);
412 |         if (!boundary_data) return NULL;
413 |         printf("Encode boundaries: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC);
414 | 
415 |         // map the window values
416 |         start_time = std::clock();
417 |         std::vector<unsigned long> values = std::vector<unsigned long>();
418 |         ValueMapping(boundary_data, values, nwindows);
419 |         printf("Map values: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC);
420 | 
421 |         // get the locations
422 |         start_time = std::clock();
423 |         std::vector<unsigned long> locations = std::vector<unsigned long>();
424 |         EncodeIndeterminateLocations(boundaries, data, locations, res);
425 |         printf("Encode locations: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC);
426 | 
427 |         // get the maximum id value
428 |         unsigned long maximum_id = 0;
429 |         for (unsigned long iv = 0; iv < ids.size(); ++iv)
430 |             if (ids[iv] > maximum_id) maximum_id = ids[iv];
431 | 
432 |         // get the maximum value for the locations array
433 |         unsigned long maximum_location = 0;
434 |         for (unsigned long iv = 0; iv < locations.size(); ++iv)
435 |             if (locations[iv] > maximum_location) maximum_location = locations[iv];
436 | 
437 |         // get the maximum boundary data window value
438 |         unsigned long maximum_boundary_data = 2 * values.size() + 1;
439 |         // find the maximum run of zeros
440 |         unsigned long maximum_zero_run = 0;
441 |         unsigned long current_run = 0;
442 |         for (long iv = 0; iv < nwindows; ++iv) {
443 |             if (!boundary_data[iv]) current_run++;
444 |             else {
445 |                 if (current_run > maximum_zero_run) maximum_zero_run = current_run;
446 |                 current_run = 0;
447 |             }
448 |         }
449 |         if (current_run > maximum_zero_run) maximum_zero_run = current_run;
450 |         // multiply by two to pad for run length encoding
451 |         maximum_zero_run *= 2;
452 |         if (maximum_zero_run > maximum_boundary_data) maximum_boundary_data = maximum_zero_run;
453 | 
454 |         // get the number of bits per window as small as possible
455 |         unsigned char bytes_per_window = steps[RN_X] * steps[RN_Y] * steps[RN_Z] / 8;
456 |         unsigned char bytes_per_id = BytesNeeded(maximum_id);
457 |         unsigned char bytes_per_location = BytesNeeded(maximum_location);
458 |         unsigned char bytes_per_data = BytesNeeded(maximum_boundary_data);
459 | 
460 |         std::vector<unsigned char> compressed_data = std::vector<unsigned char>();
461 | 
462 |         // add the header to the decompressed data
463 |         AppendValue(compressed_data, res[RN_Z], 8);
464 |         AppendValue(compressed_data, res[RN_Y], 8);
465 |         AppendValue(compressed_data, res[RN_X], 8);
466 |         AppendValue(compressed_data, steps[RN_Z], 8);
467 |         AppendValue(compressed_data, steps[RN_Y], 8);
468 |         AppendValue(compressed_data, steps[RN_X], 8);
469 |         AppendValue(compressed_data, values.size(), 8);
470 |         AppendValue(compressed_data, ids.size(), 8);
471 |         AppendValue(compressed_data, locations.size(), 8);
472 |         // need one byte to say how large each chunk of data is
473 |         AppendValue(compressed_data, bytes_per_window, 1);
474 |         AppendValue(compressed_data, bytes_per_id, 1);
475 |         AppendValue(compressed_data, bytes_per_location, 1);
476 |         AppendValue(compressed_data, bytes_per_data, 1);
477 |         // only final byte shows the original data Type
478 |         AppendValue(compressed_data, sizeof(Type), 1);
479 | 
480 |         // add in all window values
481 |         for (unsigned long iv = 0; iv < values.size(); ++iv)
482 |             AppendValue(compressed_data, values[iv], bytes_per_window);
483 |         // add in all ids
484 |         for (unsigned long iv = 0; iv < ids.size(); ++iv)
485 |             AppendValue(compressed_data, ids[iv], bytes_per_id);
486 |         // add in all locations
487 |         for (unsigned long iv = 0; iv < locations.size(); ++iv)
488 |             AppendValue(compressed_data, locations[iv], bytes_per_location);
489 |         
490 |         // add in all boundary data - apply run length encoding
491 |         unsigned long current_zero_run = 0;
492 |         for (long iv = 0; iv < nwindows; ++iv) {
493 |             if (!boundary_data[iv]) current_zero_run++;
494 |             else {
495 |                 if (current_zero_run) AppendValue(compressed_data, 2 * current_zero_run, bytes_per_data);
496 |                 AppendValue(compressed_data, 2 * boundary_data[iv] + 1, bytes_per_data);
497 |                 current_zero_run = 0;
498 |             }
499 |         }
500 | 
501 |         // have to add in the last zero run
502 |         if (current_zero_run) AppendValue(compressed_data, 2 * current_zero_run, bytes_per_data);
503 | 
504 |         unsigned char *compressed_pointer = new unsigned char[compressed_data.size()];
505 |         for (unsigned long iv = 0; iv < compressed_data.size(); ++iv)
506 |             compressed_pointer[iv] = compressed_data[iv];
507 | 
508 |         if (nentries) *nentries = compressed_data.size();
509 | 
510 |         // free memory
511 |         delete[] boundaries;
512 |         delete[] boundary_data;
513 | 
514 |         return compressed_pointer;
515 |     };
516 | 
517 |     static unsigned long
518 |     ExtractValue(unsigned char *data, unsigned long &offset, unsigned char nbytes)
519 |     {
520 |         // set the value to 0
521 |         unsigned long value = 0;
522 |         for (unsigned char iv = 0; iv < nbytes; ++iv) {
523 |             // get the current bit values
524 |             unsigned long byte = (unsigned long)data[offset];
525 |             // shift over the proper amount
526 |             byte = byte << (8 * iv);
527 |             // update the value
528 |             value += byte;
529 |             // update the offset
530 |             offset++;
531 |         }
532 | 
533 |         return value;
534 |     }
535 | 
536 |     static bool *
537 |     DecodeBoundaries(unsigned long *boundary_data, std::vector<unsigned long> &values, long res[3], long steps[3])
538 |     {
539 |         // determine the number of bloxks in each direction
540 |         long nblocks[3];
541 |         for (int dim = 0; dim <= 2; ++dim) {
542 |             nblocks[dim] = (long) (ceil((double)res[dim] / steps[dim]) + 0.5);
543 |         }
544 | 
545 |         bool *boundaries = new bool[grid_size];
546 |         for (long iv = 0; iv < grid_size; ++iv)
547 |             boundaries[iv] = false;
548 | 
549 |         for (long iz = 0; iz < res[RN_Z]; ++iz) {
550 |             for (long iy = 0; iy < res[RN_Y]; ++iy) {
551 |                 for (long ix = 0; ix < res[RN_X]; ++ix) {
552 |                     long iv = IndicesToIndex(ix, iy, iz);
553 | 
554 |                     // find the block from the index
555 |                     long zblock = iz / steps[RN_Z];
556 |                     long yblock = iy / steps[RN_Y];
557 |                     long xblock = ix / steps[RN_X];
558 | 
559 |                     // find the offset within the block
560 |                     long zoffset = iz % steps[RN_Z];
561 |                     long yoffset = iy % steps[RN_Y];
562 |                     long xoffset = ix % steps[RN_X];
563 | 
564 |                     long block = zblock * (nblocks[RN_Y] * nblocks[RN_X]) + yblock * nblocks[RN_X] + xblock;
565 |                     long offset = zoffset * (steps[RN_Y] * steps[RN_X]) + yoffset * steps[RN_X] + xoffset;
566 | 
567 |                     unsigned long value = values[boundary_data[block]];
568 |                     if ((value >> offset) % 2) boundaries[iv] = true;
569 |                 }
570 |             }
571 |         }
572 | 
573 |         return boundaries;
574 |     }
575 | 
576 |     template <typename Type> Type *
577 |     IDReverseMapping(unsigned long *components, std::vector<unsigned long> ids, long res[3])
578 |     {
579 |         Type *decompressed_data = new Type[grid_size];
580 |         for (long iv = 0; iv < grid_size; ++iv)
581 |             decompressed_data[iv] = 0;
582 | 
583 |         int ids_index = 0;
584 |         for (long iz = 0; iz < res[RN_Z]; ++iz) {
585 |             // create mapping (not memory efficient but FAST!!)
586 |             // number of components is guaranteed to be less than ids->size()
587 |             unsigned long *mapping = new unsigned long[ids.size() + 1];
588 |             for (unsigned long iv = 0; iv < ids.size() + 1; ++iv)
589 |                 mapping[iv] = 0;
590 | 
591 |             for (long iy = 0; iy < res[RN_Y]; ++iy) {
592 |                 for (long ix = 0; ix < res[RN_X]; ++ix) {
593 |                     long iv = IndicesToIndex(ix, iy, iz);
594 |                     if (!mapping[components[iv]]) {
595 |                         mapping[components[iv]] = ids[ids_index];
596 |                         ids_index++;
597 |                     }
598 | 
599 |                     decompressed_data[iv] = (Type)(mapping[components[iv]] - 1);
600 |                 }
601 |             }
602 |         }
603 | 
604 |         return decompressed_data;
605 |     }
606 | 
607 |     template <typename Type> void
608 |     DecodeIndeterminateLocations(bool *boundaries, Type *decompressed_data, std::vector<unsigned long> locations, long res[3])
609 |     {
610 |         long index = 0;
611 | 
612 |         // go through all voxels
613 |         for (long iz = 0; iz < res[RN_Z]; ++iz) {
614 |             for (long iy = 0; iy < res[RN_Y]; ++iy) {
615 |                 for (long ix = 0; ix < res[RN_X]; ++ix) {
616 |                     long iv = IndicesToIndex(ix, iy, iz);
617 |                     
618 |                     // get the north and west neighbors
619 |                     long north = IndicesToIndex(ix - 1, iy, iz);
620 |                     long west = IndicesToIndex(ix, iy - 1, iz);
621 | 
622 |                     if (!boundaries[iv]) continue;
623 |                     else if (ix > 0 && !boundaries[north]) {
624 |                         decompressed_data[iv] = decompressed_data[north];
625 |                     }
626 |                     else if (iy > 0 && !boundaries[west]) {
627 |                         decompressed_data[iv] = decompressed_data[west];
628 |                     }
629 |                     else {
630 |                         unsigned long offset = locations[index];
631 |                         if (offset == 0) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix - 1, iy, iz)];
632 |                         else if (offset == 1) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix + 1, iy, iz)];
633 |                         else if (offset == 2) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix, iy - 1, iz)];
634 |                         else if (offset == 3) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix, iy + 1, iz)];
635 |                         else if (offset == 4) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix, iy, iz - 1)];
636 |                         else if (offset == 5) decompressed_data[iv] = decompressed_data[IndicesToIndex(ix, iy, iz + 1)];
637 |                         else decompressed_data[iv] = (Type)(offset - 6);
638 |                         index++;
639 |                     }
640 |                 }
641 |             }
642 |         }
643 |     }
644 | 
645 |     template <typename Type> Type*
646 |     Decompress(unsigned char *compressed_data, long *res)
647 |     {
648 |         // extract all of the header information
649 |         if (!res) res = new long[3];
650 |         long steps[3];
651 |         
652 |         // the offset for the compressed data
653 |         unsigned long offset = 0;
654 |         // extract the header
655 |         res[RN_Z] = ExtractValue(compressed_data, offset, 8);
656 |         res[RN_Y] = ExtractValue(compressed_data, offset, 8);
657 |         res[RN_X] = ExtractValue(compressed_data, offset, 8);
658 |         steps[RN_Z] = ExtractValue(compressed_data, offset, 8);
659 |         steps[RN_Y] = ExtractValue(compressed_data, offset, 8);
660 |         steps[RN_X] = ExtractValue(compressed_data, offset, 8);
661 |         unsigned long nvalues = ExtractValue(compressed_data, offset, 8);
662 |         unsigned long nids = ExtractValue(compressed_data, offset, 8);
663 |         unsigned long nlocations = ExtractValue(compressed_data, offset, 8);
664 |         unsigned char bytes_per_window = ExtractValue(compressed_data, offset, 1);
665 |         unsigned char bytes_per_id = ExtractValue(compressed_data, offset, 1);
666 |         unsigned char bytes_per_location = ExtractValue(compressed_data, offset, 1);
667 |         unsigned char bytes_per_data = ExtractValue(compressed_data, offset, 1);
668 |         unsigned char bytes_for_output = ExtractValue(compressed_data, offset, 1);
669 | 
670 |         // set the global variables
671 |         row_size = res[RN_X];
672 |         sheet_size = res[RN_X] * res[RN_Y];
673 |         grid_size = res[RN_X] * res[RN_Y] * res[RN_Z];
674 | 
675 |         // determine the number of blocks in each direction
676 |         long nblocks[3];
677 |         for (int dim = 0; dim <= 2; ++dim) {
678 |             nblocks[dim] = (long) (ceil((double)res[dim] / steps[dim]) + 0.5);
679 |         }
680 |         long nwindows = nblocks[RN_Z] * nblocks[RN_Y] * nblocks[RN_X];
681 | 
682 |         // allocate memory for all arrays
683 |         std::vector<unsigned long> ids = std::vector<unsigned long>();
684 |         std::vector<unsigned long> values = std::vector<unsigned long>();
685 |         std::vector<unsigned long> locations = std::vector<unsigned long>();
686 |         unsigned long *boundary_data = new unsigned long[nwindows];
687 |         for (unsigned long iv = 0; iv < nvalues; ++iv)
688 |             values.push_back(ExtractValue(compressed_data, offset, bytes_per_window));
689 |         for (unsigned long iv = 0; iv < nids; ++iv)
690 |             ids.push_back(ExtractValue(compressed_data, offset, bytes_per_id));
691 |         for (unsigned long iv = 0; iv < nlocations; ++iv)
692 |             locations.push_back(ExtractValue(compressed_data, offset, bytes_per_location));
693 | 
694 |         // get the boundary data (undo run length encoding)
695 |         long iv = 0;
696 |         while (iv < nwindows) {
697 |             unsigned long window_value = ExtractValue(compressed_data, offset, bytes_per_data);
698 |             if (window_value % 2) {
699 |                 window_value = window_value / 2;
700 |                 assert (iv < nwindows);
701 |                 boundary_data[iv] = window_value;
702 |                 iv++;
703 |             }
704 |             else {
705 |                 unsigned long nzeros = window_value / 2;
706 |                 for (unsigned long iz = 0; iz < nzeros; ++iz, ++iv) {
707 |                     boundary_data[iv] = 0;
708 |                 }
709 |             }
710 |         }
711 |         
712 |         // get the boundaries from the data
713 |         std::clock_t start_time = std::clock();
714 |         bool *boundaries = DecodeBoundaries(boundary_data, values, res, steps);
715 |         if (!boundaries) return NULL;
716 |         printf("Decode boundaries: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC);
717 | 
718 |         // free memory 
719 |         delete[] boundary_data;
720 | 
721 |         // get the connected components
722 |         start_time = std::clock();
723 |         unsigned long *components = ConnectedComponents(boundaries, res);
724 |         if (!components) return NULL;
725 |         printf("Connected components: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC);
726 | 
727 |         // decompress the data
728 |         start_time = std::clock();
729 |         Type *decompressed_data = IDReverseMapping<Type>(components, ids, res);
730 |         if (!decompressed_data) return NULL;
731 |         printf("Reverse mapping: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC);
732 | 
733 |         // free memory
734 |         delete[] components;
735 | 
736 |         // decode the final indeterminate locations
737 |         start_time = std::clock();
738 |         DecodeIndeterminateLocations(boundaries, decompressed_data, locations, res);
739 |         printf("Decode locations: %lf\n", (double)(std::clock() - start_time) / CLOCKS_PER_SEC);
740 | 
741 |         // return the decompressed data
742 |         return decompressed_data;
743 |     }
744 | };
745 | 
746 | #endif


--------------------------------------------------------------------------------