├── VERSION
├── requirements.txt
├── requirements-tests.txt
├── doc
    ├── source
    │   ├── development
    │   │   ├── roadmap.rst
    │   │   ├── contributing.rst
    │   │   └── index.rst
    │   ├── release_notes
    │   │   └── index.rst
    │   ├── caterva-logo.png
    │   ├── _static
    │   │   ├── blosc-logo_128.png
    │   │   └── css
    │   │   │   └── custom.css
    │   ├── getting_started
    │   │   ├── index.rst
    │   │   ├── overview.rst
    │   │   ├── installation.rst
    │   │   └── tutorial.md
    │   ├── reference
    │   │   ├── index.rst
    │   │   ├── first_level.rst
    │   │   ├── constructors.rst
    │   │   ├── meta.rst
    │   │   └── ndarray.rst
    │   ├── index.rst
    │   └── conf.py
    ├── requirements.txt
    ├── Makefile
    └── make.bat
├── requirements-build.txt
├── .flake8
├── pyproject.toml
├── .gitmodules
├── CMakeLists.txt
├── code_of_conduct.md
├── .pre-commit-config.yaml
├── MANIFEST.in
├── ROADMAP.rst
├── caterva
    ├── __init__.py
    ├── CMakeLists.txt
    ├── utils.py
    ├── info.py
    ├── meta.py
    ├── ndarray.py
    ├── constructors.py
    └── caterva_ext.pyx
├── examples
    ├── ex_resize.py
    ├── ex_numpy.py
    ├── ex_buffer.py
    ├── ex_empty.py
    ├── ex_getitem.py
    ├── ex_copy.py
    ├── ex_meta.py
    ├── ex_persistency.py
    └── ex_formats.py
├── tests
    ├── test_buffer.py
    ├── test_numpy.py
    ├── test_resize.py
    ├── test_full.py
    ├── test_zeros.py
    ├── test_persistency.py
    ├── test_getitem.py
    ├── test_empty.py
    ├── test_metalayers.py
    └── test_copy.py
├── ANNOUNCE.rst
├── README.md
├── .github
    └── workflows
    │   ├── python-package.yml
    │   └── python-publish.yml
├── LICENSES
    ├── BLOSC.txt
    └── CATERVA.txt
├── LICENSE
├── RELEASING.rst
├── bench
    ├── compare_getitem.py
    ├── compare_loadframe.py
    ├── compare_reduceframe.py
    ├── compare_getslice.py
    └── compare_serialization.py
├── CONTRIBUTING.rst
├── setup.py
├── .gitignore
└── RELEASE_NOTES.rst


/VERSION:
--------------------------------------------------------------------------------
1 | 0.7.4.dev0
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ndindex>=1.4
2 | numpy>=1.20.3
3 | 


--------------------------------------------------------------------------------
/requirements-tests.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | pytest
3 | msgpack
4 | 


--------------------------------------------------------------------------------
/doc/source/development/roadmap.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../../../ROADMAP.rst
2 | 


--------------------------------------------------------------------------------
/doc/source/release_notes/index.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../../../RELEASE_NOTES.rst
2 | 


--------------------------------------------------------------------------------
/doc/source/development/contributing.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../../../CONTRIBUTING.rst
2 | 


--------------------------------------------------------------------------------
/requirements-build.txt:
--------------------------------------------------------------------------------
1 | setuptools
2 | wheel
3 | scikit-build
4 | cmake
5 | ninja
6 | cython
7 | 


--------------------------------------------------------------------------------
/doc/source/caterva-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Blosc/python-caterva/HEAD/doc/source/caterva-logo.png


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude =
3 |     __init__.py,
4 |     c-blosc2,
5 |     Caterva
6 | max-line-length = 99
7 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel", "scikit-build", "cmake", "ninja", "cython"]
3 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "caterva/caterva"]
2 | 	path = caterva/caterva
3 | 	url = https://github.com/Blosc/caterva.git
4 | 


--------------------------------------------------------------------------------
/doc/source/_static/blosc-logo_128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Blosc/python-caterva/HEAD/doc/source/_static/blosc-logo_128.png


--------------------------------------------------------------------------------
/doc/source/development/index.rst:
--------------------------------------------------------------------------------
1 | Development
2 | ===========
3 | 
4 | .. toctree::
5 |     :maxdepth: 2
6 | 
7 |     contributing
8 |     roadmap
9 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.11.0)
2 | project(caterva)
3 | find_package(PythonExtensions REQUIRED)
4 | find_package(Cython REQUIRED)
5 | 
6 | add_subdirectory(caterva)
7 | 


--------------------------------------------------------------------------------
/doc/source/getting_started/index.rst:
--------------------------------------------------------------------------------
 1 | Getting Started
 2 | ===============
 3 | 
 4 | .. toctree::
 5 |     :maxdepth: 2
 6 | 
 7 |     overview
 8 |     installation
 9 |     tutorial
10 | 


--------------------------------------------------------------------------------
/doc/source/reference/index.rst:
--------------------------------------------------------------------------------
 1 | API Reference
 2 | =============
 3 | 
 4 | .. toctree::
 5 |     :maxdepth: 2
 6 | 
 7 |     first_level
 8 |     constructors
 9 |     ndarray
10 |     meta
11 | 


--------------------------------------------------------------------------------
/doc/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpydoc
 2 | sphinx
 3 | cython
 4 | numpy
 5 | scikit-build
 6 | pytest
 7 | msgpack
 8 | cmake
 9 | pydata-sphinx-theme
10 | sphinx-inline-tabs
11 | sphinx-panels
12 | myst-nb
13 | 


--------------------------------------------------------------------------------
/code_of_conduct.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | The Blosc community has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://github.com/Blosc/community/blob/master/code_of_conduct.md)
5 | so that you can understand what actions will and will not be tolerated.
6 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v3.2.0
 4 |     hooks:
 5 |     -   id: trailing-whitespace
 6 |     -   id: end-of-file-fixer
 7 |     -   id: check-yaml
 8 |     -   id: check-added-large-files
 9 | 
10 | -   repo: https://gitlab.com/pycqa/flake8
11 |     rev: ''  # pick a git hash / tag to point to
12 |     hooks:
13 |     -   id: flake8
14 |         files: caterva/*
15 | 


--------------------------------------------------------------------------------
/doc/source/getting_started/overview.rst:
--------------------------------------------------------------------------------
1 | What is python-caterva?
2 | =======================
3 | 
4 | Caterva is a container for multidimensional data that is specially designed to read, in a very efficient way, datasets slices.
5 | It uses the metalayer capabilities present in superchunks/frames in order to store the multidimensionality information.
6 | Python-caterva is the Python wrapper for `Caterva <https://caterva.readthedocs.io/en/latest/index.html>`__.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include MANIFEST.in
 2 | include LICENSE
 3 | include VERSION
 4 | include *.txt *.rst *.md
 5 | exclude RELEASING.rst
 6 | include setup.py
 7 | include *.toml
 8 | 
 9 | recursive-include caterva *.py *.pyx *.pxd *.c *.h *.txt *in *.cmake *.rc
10 | recursive-include tests *.py
11 | recursive-include bench *.py *.txt
12 | recursive-include doc *.rst *.md *.txt *.py *.pdf *.html *.css *.png
13 | recursive-exclude doc/_build *
14 | recursive-include LICENSES *
15 | 


--------------------------------------------------------------------------------
/doc/source/getting_started/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | You can install Caterva wheels via PyPI using Pip or clone the GitHub repository.
 4 | 
 5 | Pip
 6 | +++
 7 | 
 8 | .. code-block::
 9 | 
10 |     python -m pip install caterva
11 | 
12 | 
13 | Source code
14 | +++++++++++
15 | 
16 | .. code-block::
17 | 
18 |     git clone --recurse-submodules https://github.com/Blosc/python-caterva
19 |     cd python-caterva
20 |     python -m pip install .
21 | 


--------------------------------------------------------------------------------
/doc/source/reference/first_level.rst:
--------------------------------------------------------------------------------
 1 | Global variables
 2 | ================
 3 | There are some global variables in Caterva that can be used anytime and make code more clear during compression and decompression processes.
 4 | 
 5 | .. py:attribute:: caterva.__version__
 6 | 
 7 |     The version of the caterva package.
 8 | 
 9 | .. autoclass:: caterva.Codec
10 |    :members:
11 |    :undoc-members:
12 | 
13 | .. autoclass:: caterva.Filter
14 |    :members:
15 |    :undoc-members:
16 | 


--------------------------------------------------------------------------------
/doc/source/reference/constructors.rst:
--------------------------------------------------------------------------------
 1 | Constructors
 2 | ============
 3 | These functions let users to create Caterva arrays either from scratch or from a dataset in another format.
 4 | 
 5 | .. currentmodule:: caterva
 6 | 
 7 | Basics
 8 | ------
 9 | 
10 | .. autosummary::
11 |    :toctree: api/
12 | 
13 |     empty
14 |     copy
15 |     from_buffer
16 |     open
17 |     asarray
18 | 
19 | 
20 | Utils
21 | -----
22 | 
23 | .. autosummary::
24 |    :toctree: api/utils
25 | 
26 |     remove
27 | 


--------------------------------------------------------------------------------
/doc/source/reference/meta.rst:
--------------------------------------------------------------------------------
 1 | Metalayers
 2 | ==========
 3 | Metalayers are small metadata for informing about the properties of data that is stored on a container. Caterva implements its own metalayer on top of C-Blosc2 for storing multidimensional information.
 4 | 
 5 | .. currentmodule:: caterva.meta
 6 | 
 7 | .. autoclass:: Meta
 8 |    :exclude-members: get, keys, items, values
 9 | 
10 | .. currentmodule:: caterva.meta.Meta
11 | 
12 | Methods
13 | -------
14 | 
15 | .. autosummary::
16 |     :toctree: api/meta
17 |     :nosignatures:
18 | 
19 |     __getitem__
20 |     __setitem__
21 |     get
22 |     keys
23 |     __iter__
24 |     __contains__
25 | 


--------------------------------------------------------------------------------
/ROADMAP.rst:
--------------------------------------------------------------------------------
 1 | Roadmap
 2 | =======
 3 | 
 4 | This document lists the main goals for the upcoming python-caterva releases.
 5 | 
 6 | 
 7 | Features
 8 | --------
 9 | 
10 | * *Support for variable-length metalayers*.
11 |   This would provide users a lot of flexibility to define their own metadata
12 | 
13 | * *Resize array dimensions*.
14 |   This feature would allow Caterva to increase or decrease in size any dimension of the arrays.
15 | 
16 | 
17 | Interoperability
18 | ----------------
19 | 
20 | * *Third-party integration*. Caterva need better integration with libraries like:
21 | 
22 |     * xarray (labeled arrays)
23 |     * dask (computation)
24 |     * napari (visualization)
25 | 


--------------------------------------------------------------------------------
/caterva/__init__.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | 
10 | from .version import __version__
11 | 
12 | from . import caterva_ext as ext
13 | 
14 | # Public API for container module
15 | from .constructors import (empty, zeros, full, from_buffer, open, asarray, copy)
16 | 
17 | from .ndarray import NDArray
18 | 
19 | from .utils import Codec, Filter, remove
20 | 


--------------------------------------------------------------------------------
/examples/ex_resize.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import caterva as cat
10 | import numpy as np
11 | 
12 | np.random.seed(123)
13 | 
14 | shape = (8, 8)
15 | chunks = (4, 4)
16 | blocks = (2, 2)
17 | 
18 | fill_value = b"1"
19 | a = cat.full(shape, fill_value=fill_value, chunks=chunks, blocks=blocks)
20 | 
21 | a.resize((10, 10))
22 | 
23 | print(a[:])
24 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/doc/source/reference/ndarray.rst:
--------------------------------------------------------------------------------
 1 | NDArray
 2 | =======
 3 | 
 4 | The multidimensional data array class. This class consists of a set of useful parameters and methods that allow not only to define an array correctly, but also to handle it in a simple way, being able to extract multidimensional slices from it.
 5 | 
 6 | .. currentmodule:: caterva.NDArray
 7 | 
 8 | Attributes
 9 | ----------
10 | 
11 | .. autosummary::
12 |     :toctree: api/ndarray
13 | 
14 |     itemsize
15 |     ndim
16 |     shape
17 |     chunks
18 |     blocks
19 |     meta
20 | 
21 | Methods
22 | -------
23 | 
24 | .. autosummary::
25 |     :toctree: api/ndarray
26 |     :nosignatures:
27 | 
28 |     __getitem__
29 |     __setitem__
30 |     slice
31 |     resize
32 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/examples/ex_numpy.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import caterva as cat
10 | import numpy as np
11 | 
12 | 
13 | shape = (1234, 23)
14 | chunks = (253, 23)
15 | blocks = (10, 23)
16 | 
17 | dtype = bool
18 | 
19 | # Create a buffer
20 | nparray = np.random.choice(a=[True, False], size=np.prod(shape)).reshape(shape)
21 | 
22 | # Create a caterva array from a numpy array
23 | a = cat.asarray(nparray, chunks=chunks, blocks=blocks)
24 | b = a.copy()
25 | 
26 | # Convert a caterva array to a numpy array
27 | nparray2 = np.asarray(b).view(dtype)
28 | 
29 | np.testing.assert_almost_equal(nparray, nparray2)
30 | 


--------------------------------------------------------------------------------
/examples/ex_buffer.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import caterva as cat
10 | import numpy as np
11 | 
12 | np.random.seed(123)
13 | 
14 | shape = (50, 50)
15 | chunks = (49, 49)
16 | blocks = (48, 48)
17 | 
18 | itemsize = 8
19 | 
20 | # Create a buffer
21 | buffer = bytes(np.random.normal(0, 1, np.prod(shape)) * itemsize)
22 | 
23 | # Create a caterva array from a buffer
24 | 
25 | a = cat.from_buffer(buffer, shape, chunks=chunks, blocks=blocks, itemsize=itemsize)
26 | print(a.filters)
27 | print(a.codec)
28 | print(a.cratio)
29 | 
30 | # Convert a caterva array to a buffer
31 | buffer2 = a.to_buffer()
32 | assert buffer == buffer2
33 | 


--------------------------------------------------------------------------------
/caterva/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(STATIC_LIB ON CACHE BOOL "Build a static version of the blosc library.")
 2 | set(SHARED_LIB ON CACHE BOOL "Build a shared library version of the blosc
 3 |     library.")
 4 | set(CATERVA_BUILD_TESTS OFF CACHE BOOL "Build Caterva tests")
 5 | set(CATERVA_BUILD_EXAMPLES OFF CACHE BOOL "Build Caterva examples")
 6 | set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 7 | add_subdirectory(caterva)
 8 | include_directories("${CMAKE_CURRENT_SOURCE_DIR}/caterva/caterva")
 9 | include_directories("${CMAKE_CURRENT_SOURCE_DIR}/caterva/contribs/c-blosc2/include")
10 | 
11 | 
12 | add_cython_target(caterva_ext caterva_ext.pyx)
13 | add_library(caterva_ext MODULE ${caterva_ext})
14 | 
15 | target_link_libraries(caterva_ext caterva_static)
16 | 
17 | python_extension_module(caterva_ext)
18 | 
19 | add_custom_command(
20 |     TARGET caterva_ext POST_BUILD
21 |     COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:caterva_ext> ${CMAKE_SOURCE_DIR}/caterva
22 | )
23 | 
24 | install(TARGETS caterva_ext LIBRARY DESTINATION caterva)
25 | 


--------------------------------------------------------------------------------
/tests/test_buffer.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import caterva as cat
10 | import pytest
11 | import numpy as np
12 | 
13 | 
14 | @pytest.mark.parametrize("shape, chunks, blocks, itemsize",
15 |                          [
16 |                              ([450], [128], [25], 8),
17 |                              ([20, 134, 13], [3, 13, 5], [3, 10, 5], 4),
18 |                          ])
19 | def test_buffer(shape, chunks, blocks, itemsize):
20 |     size = int(np.prod(shape))
21 |     buffer = bytes(size * itemsize)
22 |     a = cat.from_buffer(buffer, shape, itemsize, chunks=chunks, blocks=blocks)
23 |     buffer2 = a.to_buffer()
24 |     assert buffer == buffer2
25 | 


--------------------------------------------------------------------------------
/tests/test_numpy.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import caterva as cat
10 | import pytest
11 | import numpy as np
12 | 
13 | 
14 | @pytest.mark.parametrize("shape, chunks, blocks, dtype",
15 |                          [
16 |                              ([931], [223], [45], np.int32),
17 |                              ([134, 121, 78], [12, 13, 18], [4, 4, 9], np.float64),
18 |                          ])
19 | def test_numpy(shape, chunks, blocks, dtype):
20 |     size = int(np.prod(shape))
21 |     nparray = np.arange(size, dtype=dtype).reshape(shape)
22 |     a = cat.asarray(nparray, chunks=chunks, blocks=blocks)
23 |     nparray2 = np.asarray(a[:]).view(dtype)
24 |     np.testing.assert_almost_equal(nparray, nparray2)
25 | 


--------------------------------------------------------------------------------
/examples/ex_empty.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import caterva as cat
10 | import numpy as np
11 | 
12 | np.random.seed(123)
13 | 
14 | 
15 | shape, chunks, blocks, itemsize, codec, clevel, use_dict, nthreads, filters = (
16 |     (400, 399, 401),
17 |     (20, 10, 130),
18 |     (6, 6, 26),
19 |     3,
20 |     cat.Codec.BLOSCLZ,
21 |     5,
22 |     False,
23 |     2,
24 |     [cat.Filter.DELTA, cat.Filter.TRUNC_PREC]
25 | )
26 | 
27 | a = cat.empty(shape, chunks=chunks,
28 |               blocks=blocks,
29 |               itemsize=itemsize,
30 |               codec=codec,
31 |               clevel=clevel,
32 |               use_dict=use_dict,
33 |               nthreads=nthreads,
34 |               filters=filters)
35 | 
36 | print("HOLA")
37 | 


--------------------------------------------------------------------------------
/examples/ex_getitem.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import caterva as cat
10 | import numpy as np
11 | 
12 | 
13 | shape = (10, 10)
14 | chunks = (5, 7)
15 | blocks = (2, 2)
16 | 
17 | slices = (slice(2, 5), slice(4, 8))
18 | 
19 | dtype = np.int32
20 | itemsize = np.dtype(dtype).itemsize
21 | 
22 | # Create a numpy array
23 | nparray = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape)
24 | 
25 | # Create a caterva array from a numpy array
26 | a = cat.asarray(nparray, chunks=chunks, blocks=blocks)
27 | 
28 | # Get a slice
29 | buffer = np.asarray(a[slices]).view(dtype)
30 | buffer2 = nparray[slices]
31 | 
32 | np.testing.assert_almost_equal(buffer, buffer2)
33 | 
34 | a[slices] = np.ones((5, 5), dtype=dtype)
35 | 
36 | print(np.asarray(a[...]).view(dtype))
37 | 


--------------------------------------------------------------------------------
/caterva/utils.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | from enum import Enum
10 | import os
11 | import shutil
12 | 
13 | 
14 | class Codec(Enum):
15 |     """
16 |     Available codecs.
17 |     """
18 |     BLOSCLZ = 0
19 |     LZ4 = 1
20 |     LZ4HC = 2
21 |     ZLIB = 4
22 |     ZSTD = 5
23 | 
24 | 
25 | class Filter(Enum):
26 |     """
27 |     Available filters.
28 |     """
29 |     NOFILTER = 0
30 |     SHUFFLE = 1
31 |     BITSHUFFLE = 2
32 |     DELTA = 3
33 |     TRUNC_PREC = 4
34 | 
35 | 
36 | def remove(urlpath):
37 |     """
38 |     Remove a caterva file.
39 | 
40 |     Parameters
41 |     ----------
42 |     urlpath: String
43 |         The array urlpath.
44 |     """
45 |     if os.path.exists(urlpath):
46 |         if os.path.isdir(urlpath):
47 |             shutil.rmtree(urlpath)
48 |         else:
49 |             os.remove(urlpath)
50 | 


--------------------------------------------------------------------------------
/tests/test_resize.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import numpy as np
10 | import caterva as cat
11 | import pytest
12 | 
13 | 
14 | @pytest.mark.parametrize("shape, new_shape, chunks, blocks, fill_value",
15 |                          [
16 |                              ((100, 1230), (200, 1230), (200, 100), (55, 3), b"0123"),
17 |                              ((23, 34), (23, 120), (20, 20), (10, 10), b"sun"),
18 |                              ((80, 51, 60), (80, 100, 100), (20, 10, 33), (6, 6, 26), b"qwerty")
19 |                          ])
20 | def test_resize(shape, new_shape, chunks, blocks, fill_value):
21 |     a = cat.full(shape, fill_value=fill_value, chunks=chunks, blocks=blocks)
22 | 
23 |     a.resize(new_shape)
24 |     assert a.shape == new_shape
25 | 
26 |     slices = tuple(slice(s) for s in shape)
27 |     for i in np.nditer(np.array(a[slices])):
28 |         assert i == fill_value
29 | 


--------------------------------------------------------------------------------
/examples/ex_copy.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import caterva as cat
10 | import numpy as np
11 | 
12 | 
13 | shape = (10, 10)
14 | chunks = (10, 10)
15 | blocks = (10, 10)
16 | 
17 | dtype = np.dtype(np.float64)
18 | 
19 | # Create a buffer
20 | buffer = bytes(np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape))
21 | 
22 | # Create a caterva array from a buffer
23 | a = cat.from_buffer(buffer, shape, dtype.itemsize, dtype=str(dtype),
24 |                     chunks=chunks, blocks=blocks)
25 | 
26 | # Get a copy of a caterva array
27 | b = cat.copy(a)
28 | d = b.copy()
29 | 
30 | aux = np.asarray(b)
31 | aux[1, 2] = 0
32 | aux2 = cat.asarray(aux)
33 | 
34 | print(np.asarray(aux2))
35 | 
36 | c = np.asarray(b)
37 | 
38 | c[3:5, 2:7] = 0
39 | print(c)
40 | 
41 | del b
42 | 
43 | print(c)
44 | 
45 | # Convert the copy to a buffer
46 | buffer1 = a.to_buffer()
47 | buffer2 = d.to_buffer()
48 | 
49 | assert buffer1 == buffer2
50 | 


--------------------------------------------------------------------------------
/tests/test_full.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import numpy as np
10 | import caterva as cat
11 | import pytest
12 | 
13 | 
14 | @pytest.mark.parametrize("shape, chunks, blocks, fill_value, cname, clevel, use_dict, nthreads",
15 |                          [
16 |                              ((100, 1230), (200, 100), (55, 3), b"0123", cat.Codec.LZ4HC, 4, 0, 1),
17 |                              ((23, 34), (20, 20), (10, 10), b"sun", cat.Codec.LZ4HC, 8, 0, 2),
18 |                              ((80, 51, 60), (20, 10, 33), (6, 6, 26), b"qwerty", cat.Codec.ZLIB, 5, 1, 2)
19 |                          ])
20 | def test_full(shape, chunks, blocks, fill_value, cname, clevel, use_dict, nthreads):
21 |     a = cat.full(shape, fill_value=fill_value, chunks=chunks, blocks=blocks, cname=cname, clevel=clevel,
22 |                  use_dict=use_dict, nthreads=nthreads)
23 | 
24 |     for i in np.nditer(np.array(a[:])):
25 |         assert i == fill_value
26 | 


--------------------------------------------------------------------------------
/examples/ex_meta.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import caterva as cat
10 | import numpy as np
11 | import os
12 | 
13 | 
14 | shape = (128, 128)
15 | chunks = (32, 32)
16 | blocks = (8, 8)
17 | 
18 | urlpath = "ex_meta.cat"
19 | if os.path.exists(urlpath):
20 |     # Remove file on disk
21 |     os.remove(urlpath)
22 | 
23 | dtype = np.dtype(np.complex128)
24 | itemsize = dtype.itemsize
25 | 
26 | # Create a numpy array
27 | nparray = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape)
28 | 
29 | meta = {
30 |     "m1": b"1111",
31 |     "m2": b"2222",
32 | }
33 | # Create a caterva array from a numpy array (on disk)
34 | a = cat.from_buffer(bytes(nparray), nparray.shape, chunks=chunks, blocks=blocks,
35 |                     urlpath=urlpath, itemsize=itemsize, meta=meta)
36 | 
37 | # Read a caterva array from disk
38 | b = cat.open(urlpath)
39 | 
40 | # Deal with meta
41 | m1 = b.meta.get("m5", b"0000")
42 | m2 = b.meta["m2"]
43 | 
44 | # Remove file on disk
45 | os.remove(urlpath)
46 | 


--------------------------------------------------------------------------------
/tests/test_zeros.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import numpy as np
10 | import caterva as cat
11 | import pytest
12 | 
13 | 
14 | @pytest.mark.parametrize("shape, chunks, blocks, itemsize, cname, clevel, use_dict, nthreads",
15 |                          [
16 |                              ((100, 1230), (200, 100), (55, 3), 4, cat.Codec.ZSTD, 4, 0, 1),
17 |                              ((23, 34), (10, 10), (10, 10), 8, cat.Codec.BLOSCLZ, 8, 0, 2),
18 |                              ((80, 51, 60), (20, 10, 33), (6, 6, 26), 3, cat.Codec.LZ4, 5, 1, 2)
19 |                          ])
20 | def test_zeros(shape, chunks, blocks, itemsize, cname, clevel, use_dict, nthreads):
21 |     a = cat.zeros(shape, chunks=chunks,
22 |                   blocks=blocks,
23 |                   itemsize=itemsize,
24 |                   cname=cname,
25 |                   clevel=clevel,
26 |                   use_dict=use_dict,
27 |                   nthreads=nthreads)
28 | 
29 |     for i in np.nditer(np.array(a[:])):
30 |         assert i == bytes(itemsize)
31 | 


--------------------------------------------------------------------------------
/ANNOUNCE.rst:
--------------------------------------------------------------------------------
 1 | # Announcing python-caterva 0.7.3
 2 | 
 3 | 
 4 | ## What is new?
 5 | 
 6 | In this release, support for Python 3.7 has been droped and replaced to 3.10
 7 | and 3.11.
 8 | 
 9 | For more info, you can have a look at the release notes in:
10 | 
11 | https://github.com/Blosc/python-caterva/releases
12 | 
13 | More docs and examples are available in the documentation site:
14 | 
15 | https://python-caterva.readthedocs.io
16 | 
17 | 
18 | ## What is it?
19 | 
20 | Caterva is an open source C library and a format that allows to store large
21 | multidimensional, chunked, compressed datasets. Data can be stored either
22 | in-memory or on-disk, but the API to handle both versions is the same.
23 | Compression is handled transparently for the user by adopting the Blosc2 library.
24 | 
25 | python-caterva is a pythonic wrapper for the Caterva library.
26 | 
27 | 
28 | ## Sources repository
29 | 
30 | The sources and documentation are managed through github services at:
31 | 
32 | http://github.com/Blosc/python-caterva
33 | 
34 | Caterva is distributed using the BSD license, see
35 | [LICENSE](https://github.com/Blosc/python-caterva/blob/master/LICENSE) for details.
36 | 
37 | 
38 | ## Mailing list
39 | 
40 | There is an official Blosc mailing list where discussions about Caterva are welcome:
41 | 
42 | blosc@googlegroups.com
43 | 
44 | http://groups.google.es/group/blosc
45 | 
46 | 
47 | Enjoy Data!
48 | - The Blosc Development Team
49 | 


--------------------------------------------------------------------------------
/examples/ex_persistency.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import caterva as cat
10 | import numpy as np
11 | import os
12 | import shutil
13 | 
14 | 
15 | shape = (128, 128)
16 | chunks = (32, 32)
17 | blocks = (8, 8)
18 | 
19 | urlpath = "ex_persistency.cat"
20 | 
21 | if os.path.exists(urlpath):
22 |     cat.remove(urlpath)
23 | 
24 | dtype = np.dtype(np.complex128)
25 | itemsize = dtype.itemsize
26 | 
27 | # Create a numpy array
28 | nparray = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape)
29 | 
30 | # Create a caterva array from a numpy array (on disk)
31 | a = cat.from_buffer(bytes(nparray), nparray.shape, itemsize, chunks=chunks, blocks=blocks,
32 |                     urlpath=urlpath, contiguous=False)
33 | 
34 | # Read a caterva array from disk
35 | b = cat.open(urlpath)
36 | 
37 | # Convert a caterva array to a numpy array
38 | nparray2 = np.asarray(cat.from_buffer(b.to_buffer(), b.shape, b.itemsize)).view(dtype)
39 | 
40 | np.testing.assert_almost_equal(nparray, nparray2)
41 | 
42 | # Remove file on disk
43 | if os.path.exists(urlpath):
44 |     cat.remove(urlpath)
45 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | **Important:** All the features of Python-Caterva have been included in the [Python-Blosc2 NDArray object](https://www.blosc.org/python-blosc2/python-blosc2.html). As a result, this project is now obsolete.
 2 | 
 3 | [![Python package](https://github.com/Blosc/python-caterva/actions/workflows/python-package.yml/badge.svg?branch=master)](https://github.com/Blosc/python-caterva/actions/workflows/python-package.yml)
 4 | [![Documentation Status](https://readthedocs.org/projects/python-caterva/badge/?version=latest)](https://python-caterva.readthedocs.io/en/latest/?badge=latest)
 5 | [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg)](code_of_conduct.md)
 6 | 
 7 | # python-caterva
 8 | 
 9 | Python wrapper for [Caterva](https://caterva.readthedocs.io).
10 | 
11 | ## Install
12 | 
13 | ```sh
14 | pip install caterva
15 | ```
16 | 
17 | ## Development Workflow
18 | 
19 | ### Clone repo and submodules
20 | 
21 | ```sh
22 | git clone --recurse-submodules https://github.com/Blosc/python-caterva
23 | ```
24 | 
25 | ### Install requirements
26 | 
27 | ```sh
28 | python -m pip install -r requirements-build.txt
29 | python -m pip install -r requirements.txt
30 | python -m pip install -r requirements-tests.txt
31 | ```
32 | 
33 | ### Compile
34 | 
35 | ```sh
36 | python setup.py build_ext --build-type=RelWithDebInfo
37 | ```
38 | 
39 | ### Run tests
40 | 
41 | ```sh
42 | PYTHONPATH=. pytest
43 | ```
44 | 
45 | ### Installing
46 | 
47 | ```sh
48 | python -m pip install .
49 | ```
50 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python package
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       fail-fast: false
18 |       matrix:
19 |         python-version: ['3.8', '3.9', '3.10', '3.11']
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v3
23 |       with:
24 |         submodules: 'recursive'
25 |     - name: Set up Python ${{ matrix.python-version }}
26 |       uses: actions/setup-python@v4
27 |       with:
28 |         python-version: ${{ matrix.python-version }}
29 |     - name: Install the package
30 |       run: |
31 |         python -m pip install --upgrade pip
32 |         python -m pip install build
33 |         python -m build --wheel
34 |         python -m pip install dist/*
35 | #    - name: Lint with flake8
36 | #      run: |
37 | #        # stop the build if there are Python syntax errors or undefined names
38 | #        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
39 | #        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
40 | #        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
41 |     - name: Test with pytest
42 |       run: |
43 |         python -m pip install -r requirements-tests.txt
44 |         pytest
45 | 


--------------------------------------------------------------------------------
/tests/test_persistency.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import caterva as cat
10 | import pytest
11 | import numpy as np
12 | import os
13 | 
14 | 
15 | @pytest.mark.parametrize("contiguous",
16 |                          [
17 |                              True,
18 |                              False,
19 |                          ])
20 | @pytest.mark.parametrize("shape, chunks, blocks, urlpath, dtype",
21 |                          [
22 |                              ([634], [156], [33], "test00.cat", np.float64),
23 |                              ([20, 134, 13], [7, 22, 5], [3, 5, 3], "test01.cat", np.int32),
24 |                              ([12, 13, 14, 15, 16], [4, 6, 4, 7, 5], [2, 4, 2, 3, 3], "test02.cat", np.float32)
25 |                          ])
26 | def test_persistency(shape, chunks, blocks, urlpath, contiguous, dtype):
27 |     if os.path.exists(urlpath):
28 |         cat.remove(urlpath)
29 | 
30 |     size = int(np.prod(shape))
31 |     nparray = np.arange(size, dtype=dtype).reshape(shape)
32 |     _ = cat.asarray(nparray, chunks=chunks, blocks=blocks,
33 |                     urlpath=urlpath, contiguous=contiguous)
34 |     b = cat.open(urlpath)
35 | 
36 |     bc = b[:]
37 | 
38 |     nparray2 = np.asarray(bc).view(dtype)
39 |     np.testing.assert_almost_equal(nparray, nparray2)
40 | 
41 |     cat.remove(urlpath)
42 | 


--------------------------------------------------------------------------------
/caterva/info.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | from textwrap import TextWrapper
10 | 
11 | 
12 | def info_text_report(items: list) -> str:
13 |     keys = [k for k, v in items]
14 |     max_key_len = max(len(k) for k in keys)
15 |     report = ""
16 |     for k, v in items:
17 |         wrapper = TextWrapper(
18 |             width=80,
19 |             initial_indent=k.ljust(max_key_len) + " : ",
20 |             subsequent_indent=" " * max_key_len + " : ",
21 |         )
22 |         text = wrapper.fill(str(v))
23 |         report += text + "\n"
24 |     return report
25 | 
26 | 
27 | def info_html_report(items: list) -> str:
28 |     report = '<table class="iarray-info">'
29 |     report += "<tbody>"
30 |     for k, v in items:
31 |         report += (
32 |             "<tr>"
33 |             '<th style="text-align: left">%s</th>'
34 |             '<td style="text-align: left">%s</td>'
35 |             "</tr>" % (k, v)
36 |         )
37 |     report += "</tbody>"
38 |     report += "</table>"
39 |     return report
40 | 
41 | 
42 | class InfoReporter(object):
43 |     def __init__(self, obj):
44 |         self.obj = obj
45 | 
46 |     def __repr__(self):
47 |         items = self.obj.info_items
48 |         return info_text_report(items)
49 | 
50 |     def _repr_html_(self):
51 |         items = self.obj.info_items
52 |         return info_html_report(items)
53 | 


--------------------------------------------------------------------------------
/LICENSES/BLOSC.txt:
--------------------------------------------------------------------------------
 1 | BSD License
 2 | 
 3 | For Blosc - A blocking, shuffling and lossless compression library
 4 | 
 5 | Copyright (C) 2009-2018 Francesc Alted <francesc@blosc.org>
 6 | Copyright (C) 2019-present Blosc Development team <blosc@blosc.org>
 7 | 
 8 | Redistribution and use in source and binary forms, with or without modification,
 9 | are permitted provided that the following conditions are met:
10 | 
11 |  * Redistributions of source code must retain the above copyright notice, this
12 |    list of conditions and the following disclaimer.
13 | 
14 |  * Redistributions in binary form must reproduce the above copyright notice,
15 |    this list of conditions and the following disclaimer in the documentation
16 |    and/or other materials provided with the distribution.
17 | 
18 |  * Neither the name Francesc Alted nor the names of its contributors may be used
19 |    to endorse or promote products derived from this software without specific
20 |    prior written permission.
21 | 
22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
23 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
24 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
26 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
27 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
29 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD License
 2 | 
 3 | For Caterva - A multidimensional data container on top of Blosc2.
 4 | 
 5 | Copyright (C) 2018 Francesc Alted <francesc@blosc.org>
 6 | Copyright (C) 2018 Aleix Alcacer <aleix@blosc.org>
 7 | Copyright (C) 2019-present Blosc Development team <blosc@blosc.org>
 8 | 
 9 | Redistribution and use in source and binary forms, with or without modification,
10 | are permitted provided that the following conditions are met:
11 | 
12 |  * Redistributions of source code must retain the above copyright notice, this
13 |    list of conditions and the following disclaimer.
14 | 
15 |  * Redistributions in binary form must reproduce the above copyright notice,
16 |    this list of conditions and the following disclaimer in the documentation
17 |    and/or other materials provided with the distribution.
18 | 
19 |  * Neither the names of the Blosc Development team nor the names of its
20 |    contributors may be used to endorse or promote products derived from this
21 |    software without specific prior written permission.
22 | 
23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
24 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
27 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
30 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 | 


--------------------------------------------------------------------------------
/LICENSES/CATERVA.txt:
--------------------------------------------------------------------------------
 1 | BSD License
 2 | 
 3 | For Caterva - A multidimensional data container on top of Blosc2.
 4 | 
 5 | Copyright (C) 2018 Francesc Alted <francesc@blosc.org>
 6 | Copyright (C) 2018 Aleix Alcacer <aleix@blosc.org>
 7 | Copyright (C) 2019-present Blosc Development team <blosc@blosc.org>
 8 | 
 9 | Redistribution and use in source and binary forms, with or without modification,
10 | are permitted provided that the following conditions are met:
11 | 
12 |  * Redistributions of source code must retain the above copyright notice, this
13 |    list of conditions and the following disclaimer.
14 | 
15 |  * Redistributions in binary form must reproduce the above copyright notice,
16 |    this list of conditions and the following disclaimer in the documentation
17 |    and/or other materials provided with the distribution.
18 | 
19 |  * Neither the names of the Blosc Development team nor the names of its
20 |    contributors may be used to endorse or promote products derived from this
21 |    software without specific prior written permission.
22 | 
23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
24 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
27 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
30 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 | 


--------------------------------------------------------------------------------
/tests/test_getitem.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import caterva as cat
10 | import pytest
11 | import numpy as np
12 | 
13 | 
14 | argnames = "shape, chunks, blocks, slices, dtype"
15 | argvalues = [
16 |     ([456], [258], [73], slice(0, 1), np.int32),
17 |     ([77, 134, 13], [31, 13, 5], [7, 8, 3], (slice(3, 7), slice(50, 100), 7),
18 |      np.float64),
19 |     ([12, 13, 14, 15, 16], [5, 5, 5, 5, 5], [2, 2, 2, 2, 2], (slice(1, 3), ..., slice(3, 6)),
20 |      np.float32)
21 | ]
22 | 
23 | 
24 | @pytest.mark.parametrize(argnames, argvalues)
25 | def test_getitem(shape, chunks, blocks, slices, dtype):
26 |     size = int(np.prod(shape))
27 |     nparray = np.arange(size, dtype=dtype).reshape(shape)
28 |     a = cat.from_buffer(bytes(nparray), nparray.shape, nparray.itemsize,
29 |                         chunks=chunks, blocks=blocks)
30 |     nparray_slice = nparray[slices]
31 |     buffer_slice = np.asarray(a[slices])
32 |     a_slice = np.frombuffer(buffer_slice, dtype=dtype).reshape(nparray_slice.shape)
33 |     np.testing.assert_almost_equal(a_slice, nparray_slice)
34 | 
35 | 
36 | @pytest.mark.parametrize(argnames, argvalues)
37 | def test_getitem_numpy(shape, chunks, blocks, slices, dtype):
38 |     size = int(np.prod(shape))
39 |     nparray = np.arange(size, dtype=dtype).reshape(shape)
40 |     a = cat.asarray(nparray, chunks=chunks, blocks=blocks)
41 |     nparray_slice = nparray[slices]
42 |     a_slice = np.asarray(a[slices]).view(dtype)
43 | 
44 |     np.testing.assert_almost_equal(a_slice, nparray_slice)
45 | 


--------------------------------------------------------------------------------
/examples/ex_formats.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import numpy as np
10 | import caterva as cat
11 | from time import time
12 | import os
13 | 
14 | urlpath_sparse = "ex_formats_sparse.caterva"
15 | # urlpath_sparse = None
16 | urlpath_contiguous = "ex_formats_contiguous.caterva"
17 | # urlpath_contiguous = None
18 | 
19 | if urlpath_sparse and os.path.exists(urlpath_sparse):
20 |     cat.remove(urlpath_sparse)
21 | 
22 | if urlpath_contiguous and os.path.exists(urlpath_contiguous):
23 |     cat.remove(urlpath_contiguous)
24 | 
25 | shape = (1000 * 1000,)
26 | chunks = (100,)
27 | blocks = (100,)
28 | dtype = np.dtype(np.float64)
29 | itemsize = dtype.itemsize
30 | 
31 | t0 = time()
32 | a = cat.empty(shape, 8, chunks=chunks, blocks=blocks, urlpath=urlpath_sparse,
33 |              contiguous=False)
34 | for nchunk in range(a.nchunks):
35 |     a[nchunk * chunks[0]: (nchunk + 1) * chunks[0]] = np.arange(chunks[0], dtype=dtype)
36 | t1 = time()
37 | 
38 | print(f"Time: {(t1 - t0):.4f} s")
39 | print(a.nchunks)
40 | an = np.array(a[:]).view(dtype)
41 | 
42 | 
43 | t0 = time()
44 | b = cat.empty(shape, itemsize=itemsize, chunks=chunks, blocks=blocks, urlpath=urlpath_contiguous, contiguous=True)
45 | 
46 | print(b.nchunks)
47 | for nchunk in range(shape[0] // chunks[0]):
48 |     b[nchunk * chunks[0]: (nchunk + 1) * chunks[0]] = np.arange(chunks[0], dtype=dtype)
49 | t1 = time()
50 | 
51 | print(f"Time: {(t1 - t0):.4f} s")
52 | print(b.nchunks)
53 | bn = np.array(b[:]).view(dtype)
54 | 
55 | np.testing.assert_allclose(an, bn)
56 | 


--------------------------------------------------------------------------------
/RELEASING.rst:
--------------------------------------------------------------------------------
 1 | Python-caterva release procedure
 2 | ================================
 3 | 
 4 | Preliminaries
 5 | -------------
 6 | 
 7 | * Make sure that the current master branch is passing the tests on Github Actions.
 8 | 
 9 | * Make sure that `RELEASE_NOTES.rst` and `ANNOUNCE.rst` are up to date with the latest news
10 |   in the release.
11 | 
12 | * Check that `VERSION` file contains the correct number.
13 | 
14 | * Check any copyright listings and update them if necessary. You can use ``grep
15 |   -i copyright`` to figure out where they might be.
16 | 
17 | * Commit the changes::
18 | 
19 |     git commit -a -m "Getting ready for release X.Y.Z"
20 |     git push
21 | 
22 | * Check that the documentation is correctly created in https://python-caterva.readthedocs.io.
23 | 
24 | 
25 | Tagging
26 | -------
27 | 
28 | * Create a signed tag ``X.Y.Z`` from ``master``.  Use the next message::
29 | 
30 |     git tag -a vX.Y.Z -m "Tagging version X.Y.Z"
31 | 
32 | * Push the tag to the github repo::
33 | 
34 |     git push
35 |     git push --tags
36 | 
37 | After the tag would be up, update the release notes in: https://github.com/Blosc/python-caterva/releases
38 | 
39 | * Check that the wheels are upload correctly to Pypi.
40 | 
41 | Announcing
42 | ----------
43 | 
44 | * Send an announcement to the Blosc list.  Use the ``ANNOUNCE.rst`` file as skeleton
45 |   (or possibly as the definitive version).
46 | 
47 | * Announce in Twitter via @Blosc2 account and rejoice.
48 | 
49 | 
50 | Post-release actions
51 | --------------------
52 | 
53 | * Create new headers for adding new features in ``RELEASE_NOTES.rst``
54 |   add this place-holder:
55 | 
56 |   XXX version-specific blurb XXX
57 | 
58 | * Edit ``VERSION`` in master to increment the version to the next
59 |   minor one (i.e. X.Y.Z --> X.Y.(Z+1).dev0).
60 | 
61 | * Commit your changes with::
62 | 
63 |     git commit -a -m "Post X.Y.Z release actions done"
64 |     git push
65 | 
66 | 
67 | That's all folks!
68 | 


--------------------------------------------------------------------------------
/tests/test_empty.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import caterva as cat
10 | import pytest
11 | 
12 | 
13 | @pytest.mark.parametrize("shape, chunks, blocks, itemsize, codec, clevel, use_dict, nthreads, filters",
14 |                          [
15 |                              ((100, 1230), (200, 100), (55, 3), 4, cat.Codec.LZ4, 4, 0, 1, [cat.Filter.SHUFFLE]),
16 |                              ((234, 125), (90, 90), (20, 10), 8, cat.Codec.LZ4HC, 8, 0, 2,
17 |                               [cat.Filter.DELTA, cat.Filter.BITSHUFFLE]),
18 |                              ((400, 399, 401), (20, 10, 130), (6, 6, 26), 3, cat.Codec.BLOSCLZ, 5, 1, 2,
19 |                               [cat.Filter.DELTA, cat.Filter.TRUNC_PREC])
20 |                          ])
21 | def test_empty(shape, chunks, blocks, itemsize, codec, clevel, use_dict, nthreads,
22 |                filters):
23 |     a = cat.empty(shape, chunks=chunks,
24 |                   blocks=blocks,
25 |                   itemsize=itemsize,
26 |                   codec=codec,
27 |                   clevel=clevel,
28 |                   use_dict=use_dict,
29 |                   nthreads=nthreads,
30 |                   filters=filters)
31 |     if chunks is not None:
32 |         assert a.chunks == chunks
33 |         assert a.blocks == blocks
34 |     assert a.shape == shape
35 |     assert a.itemsize == itemsize
36 |     assert a.codec == (codec if chunks is not None else None)
37 |     assert a.clevel == (clevel if chunks is not None else 1)
38 |     if chunks is not None:
39 |         assert a.filters[-len(filters):] == filters
40 |     else:
41 |         assert a.filters is None
42 | 


--------------------------------------------------------------------------------
/tests/test_metalayers.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import caterva as cat
10 | import pytest
11 | import numpy as np
12 | import os
13 | from msgpack import packb
14 | 
15 | 
16 | @pytest.mark.parametrize("contiguous",
17 |                          [
18 |                              True,
19 |                              False,
20 |                          ])
21 | @pytest.mark.parametrize("shape, chunks, blocks, urlpath, dtype",
22 |                          [
23 |                              ([556], [221], [33], "testmeta00.cat", np.float64),
24 |                              ([20, 134, 13], [12, 66, 8], [3, 13, 5], "testmeta01.cat", np.int32),
25 |                              ([12, 13, 14, 15, 16], [8, 9, 4, 12, 9], [2, 6, 4, 5, 4], "testmeta02.cat", np.float32)
26 |                          ])
27 | def test_metalayers(shape, chunks, blocks, urlpath, contiguous, dtype):
28 |     if os.path.exists(urlpath):
29 |         cat.remove(urlpath)
30 | 
31 |     numpy_meta = packb({b"dtype": str(np.dtype(dtype))})
32 |     test_meta = packb({b"lorem": 1234})
33 | 
34 |     # Create an empty caterva array (on disk)
35 |     itemsize = np.dtype(dtype).itemsize
36 |     a = cat.empty(shape, itemsize, chunks=chunks, blocks=blocks,
37 |                   urlpath=urlpath, contiguous=contiguous,
38 |                   meta={"numpy": numpy_meta,
39 |                         "test": test_meta})
40 | 
41 |     assert ("numpy" in a.meta)
42 |     assert ("error" not in a.meta)
43 |     assert (a.meta["numpy"] == numpy_meta)
44 |     assert ("test" in a.meta)
45 |     assert (a.meta["test"] == test_meta)
46 | 
47 |     test_meta = packb({b"lorem": 4231})
48 |     a.meta["test"] = test_meta
49 |     assert (a.meta["test"] == test_meta)
50 | 
51 |     # Remove file on disk
52 |     cat.remove(urlpath)
53 | 


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | Python-caterva documentation
 2 | ============================
 3 | 
 4 | Python-caterva is a Python wrapper of `Caterva <https://caterva.readthedocs.io/en/latest/>`_, an open source C library specially
 5 | designed
 6 | to deal with large multidimensional, chunked, compressed datasets.
 7 | 
 8 | .. panels::
 9 |     :card: + intro-card text-center
10 |     :column: col-lg-6 col-md-12 col-sm-12 col-xs-12 d-flex
11 | 
12 |     ---
13 | 
14 |     Getting Started
15 |     ^^^^^^^^^^^^^^^
16 | 
17 |     New to *python-caterva*? Check out the getting started guides. They contain an
18 |     introduction to *python-caterva* main concepts and an installation tutorial.
19 | 
20 |     +++
21 | 
22 |     .. link-button:: getting_started/index
23 |             :type: ref
24 |             :text: To the getting started guides
25 |             :classes: btn-light
26 | 
27 |     ---
28 | 
29 |     API Reference
30 |     ^^^^^^^^^^^^^
31 | 
32 |     The reference guide contains a detailed description of the *python-caterva* API.
33 |     The reference describes how the functions work and which parameters can
34 |     be used.
35 | 
36 |     +++
37 | 
38 |     .. link-button:: reference/index
39 |             :type: ref
40 |             :text: To the reference guide
41 |             :classes: btn-light
42 | 
43 | 
44 |     ---
45 | 
46 |     Development
47 |     ^^^^^^^^^^^
48 | 
49 |     Saw a typo in the documentation? Want to improve
50 |     existing functionalities? The contributing guidelines will guide
51 |     you through the process of improving *python-caterva*.
52 | 
53 |     +++
54 | 
55 |     .. link-button:: development/index
56 |             :type: ref
57 |             :text: To the development guide
58 |             :classes: btn-light
59 | 
60 |     ---
61 | 
62 |     Release Notes
63 |     ^^^^^^^^^^^^^
64 | 
65 |     Want to see what's new in the latest release? Check out the release notes to find out!
66 | 
67 |     +++
68 | 
69 |     .. link-button:: release_notes/index
70 |             :type: ref
71 |             :text: To the release notes
72 |             :classes: btn-light
73 | 
74 | 
75 | ..  toctree::
76 |     :maxdepth: 1
77 |     :hidden:
78 | 
79 |     Getting Started <getting_started/index>
80 |     API Reference <reference/index>
81 |     Development <development/index>
82 |     Release Notes <release_notes/index>
83 | 


--------------------------------------------------------------------------------
/tests/test_copy.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import caterva as cat
10 | import pytest
11 | import numpy as np
12 | 
13 | 
14 | @pytest.mark.parametrize("shape, chunks1, blocks1, chunks2, blocks2, itemsize",
15 |                          [
16 |                              ([521], [212], [33], [121], [18], 8),
17 |                              ([20, 134, 13], [10, 43, 10], [3, 13, 5], [10, 43, 10], [3, 6, 5], 4),
18 |                              ([12, 13, 14, 15, 16], [6, 6, 6, 6, 6], [2, 2, 2, 2, 2],
19 |                               [7, 7, 7, 7, 7], [3, 3, 5, 3, 3], 8)
20 |                          ])
21 | def test_copy(shape, chunks1, blocks1, chunks2, blocks2, itemsize):
22 |     size = int(np.prod(shape))
23 |     buffer = bytes(size * itemsize)
24 |     a = cat.from_buffer(buffer, shape, itemsize, chunks=chunks1, blocks=blocks1,
25 |                         complevel=2)
26 |     b = a.copy(chunks=chunks2, blocks=blocks2,
27 |                itemsize=itemsize, complevel=5, filters=[cat.Filter.BITSHUFFLE])
28 |     buffer2 = b.to_buffer()
29 |     assert buffer == buffer2
30 | 
31 | 
32 | @pytest.mark.parametrize("shape, chunks1, blocks1, chunks2, blocks2, dtype",
33 |                          [
34 |                              ([521], [212], [33], [121], [18], "i8"),
35 |                              ([20, 134, 13], [10, 43, 10], [3, 13, 5], [10, 43, 10], [3, 6, 5], "f4"),
36 |                              ([12, 13, 14, 15, 16], [6, 6, 6, 6, 6], [2, 2, 2, 2, 2],
37 |                               [7, 7, 7, 7, 7], [3, 3, 5, 3, 3], "f8")
38 |                          ])
39 | def test_copy_numpy(shape, chunks1, blocks1, chunks2, blocks2, dtype):
40 |     size = int(np.prod(shape))
41 |     nparray = np.arange(size, dtype=dtype).reshape(shape)
42 |     a = cat.asarray(nparray, chunks=chunks1, blocks=blocks1)
43 |     b = a.copy(chunks=chunks2, blocks=blocks2, complevel=5, filters=[cat.Filter.BITSHUFFLE])
44 |     if chunks2:
45 |         b = b[...]
46 |     nparray2 = np.asarray(b).view(dtype)
47 |     np.testing.assert_almost_equal(nparray, nparray2)
48 | 


--------------------------------------------------------------------------------
/bench/compare_getitem.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | import caterva as cat
10 | import numpy as np
11 | import os
12 | import sys
13 | from time import time
14 | 
15 | 
16 | # Dimensions, type and persistency properties for the arrays
17 | shape = (1000 * 1000,)
18 | chunkshape = (100,)
19 | blockshape = (25,)
20 | 
21 | dtype = np.float64
22 | persistent = bool(sys.argv[1]) if len(sys.argv) > 1 else False
23 | 
24 | if persistent:
25 |     filename = "bench_getitem.cat"
26 |     if os.path.exists(filename):
27 |         # Remove file on disk
28 |         os.remove(filename)
29 | else:
30 |     filename = None
31 | 
32 | itemsize = np.dtype(dtype).itemsize
33 | 
34 | # Create an empty caterva array
35 | a = cat.empty(shape, itemsize, dtype=str(np.dtype(dtype)), chunkshape=chunkshape, blockshape=blockshape,
36 |               filename=filename, compcode=0)
37 | 
38 | # Fill an empty caterva array using a block iterator
39 | t0 = time()
40 | count = 0
41 | for block, info in a.iter_write():
42 |     nparray = np.arange(count, count + info.nitems, dtype=dtype).reshape(info.shape)
43 |     block[:] = bytes(nparray)
44 |     count += info.nitems
45 | t1 = time()
46 | print("Time for filling: %.3fs" % (t1 - t0))
47 | 
48 | # Check that the retrieved items are correct
49 | t0 = time()
50 | for block, info in a.iter_read(chunkshape):
51 |     pass
52 | t1 = time()
53 | print("Time for reading with iterator: %.3fs" % (t1 - t0))
54 | 
55 | # Asserting results
56 | count = 0
57 | for block, info in a.iter_read(chunkshape):
58 |     nparray = np.arange(count, count + info.nitems, dtype=dtype).reshape(info.shape)
59 |     np.testing.assert_allclose(block, nparray)
60 |     count += info.nitems
61 | 
62 | # Use getitem
63 | t0 = time()
64 | for i in range(shape[0] // chunkshape[0]):
65 |     _ = a[i * 100: (i+1) * 100]
66 | t1 = time()
67 | print("Time for reading with getitem: %.3fs" % (t1 - t0))
68 | 
69 | count = 0
70 | for i in range(shape[0] // chunkshape[0]):
71 |     nparray = np.arange(count, count + chunkshape[0], dtype=dtype).reshape(chunkshape)
72 |     np.testing.assert_allclose(a[i * chunkshape[0]: (i+1) * chunkshape[0]], nparray)
73 |     count += chunkshape[0]
74 | 
75 | 
76 | if persistent:
77 |     os.remove(filename)
78 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
 1 | Contributing to python-caterva
 2 | ==============================
 3 | 
 4 | python-caterva is a community maintained project. We want to make contributing to
 5 | this project as easy and transparent as possible.
 6 | 
 7 | 
 8 | Asking for help
 9 | ---------------
10 | 
11 | If you have a question about how to use python-caterva, please post your question on
12 | StackOverflow using the `“caterva” tag <https://stackoverflow.com/questions/tagged/caterva>`_.
13 | 
14 | 
15 | 
16 | Bug reports
17 | -----------
18 | 
19 | We use `GitHub issues <https://github.com/Blosc/python-caterva/issues>`_ to track
20 | public bugs. Please ensure your description is clear and has sufficient
21 | instructions to be able to reproduce the issue. The ideal report should
22 | contain the following:
23 | 
24 | 1. Summarize the problem: Include details about your goal, describe expected
25 | and actual results and include any error messages.
26 | 
27 | 2. Describe what you’ve tried: Show what you’ve tried, tell us what you
28 | found and why it didn’t meet your needs.
29 | 
30 | 3. Minimum reproducible example: Share the minimum amount of code needed to
31 | reproduce your issue. You can format the code nicely using markdown::
32 | 
33 |     ```python
34 |     import caterva as cat
35 | 
36 |     ...
37 |     ```
38 | 
39 | 
40 | 4. Determine the environment: Indicates the python-caterva version and the operating
41 | system the code is running on.
42 | 
43 | Contributing to code
44 | --------------------
45 | 
46 | We actively welcome your code contributions. By contributing to python-caterva, you
47 | agree that your contributions will be licensed under the `<LICENSE>`_ file of
48 | the project.
49 | 
50 | Fork the repo
51 | +++++++++++++
52 | 
53 | Make a fork of the python-caterva repository and clone it::
54 | 
55 |     git clone https://github.com/<your-github-username>/python-caterva
56 | 
57 | 
58 | Create your branch
59 | ++++++++++++++++++++
60 | 
61 | Before you do any new work or submit a pull request, please open an `issue on
62 | GitHub <https://github.com/Blosc/python-caterva/issues>`_ to report the bug or
63 | propose the feature you’d like to add.
64 | 
65 | Then create a new, separate branch for each piece of work you want to do.
66 | 
67 | 
68 | Update docstrings
69 | +++++++++++++++++
70 | 
71 | If you've changed APIs, update the involved docstrings using the `doxygen
72 | format <https://www.doxygen.nl/manual/docblocks.html#cppblock>`_.
73 | 
74 | 
75 | Run the test suite
76 | ++++++++++++++++++
77 | 
78 | If you have added code that needs to be tested, add the necessary tests and
79 | verify that all tests pass successfully.
80 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | from __future__ import print_function
10 | 
11 | import os
12 | import sys
13 | import io
14 | 
15 | from skbuild import setup
16 | from textwrap import dedent
17 | 
18 | 
19 | with io.open('README.md', encoding='utf-8') as f:
20 |     long_description = f.read()
21 | 
22 | 
23 | def exit_with_error(message):
24 |     print('ERROR: %s' % message)
25 |     sys.exit(1)
26 | 
27 | 
28 | # Check for Python
29 | if sys.version_info[0] == 3:
30 |     if sys.version_info[1] < 6:
31 |         exit_with_error("You need Python 3.6 or greater to install Caterva!")
32 | else:
33 |     exit_with_error("You need Python 3.6 or greater to install Caterva!")
34 | 
35 | 
36 | # Read the long_description from README.md
37 | with open('README.md') as f:
38 |     long_description = f.read()
39 | 
40 | # Blosc version
41 | VERSION = open('VERSION').read().strip()
42 | # Create the version.py file
43 | open('caterva/version.py', 'w').write('__version__ = "%s"\n' % VERSION)
44 | 
45 | 
46 | classifiers = dedent("""\
47 | Development Status :: 3 - Alpha
48 | Intended Audience :: Developers
49 | Intended Audience :: Information Technology
50 | Intended Audience :: Science/Research
51 | License :: OSI Approved :: BSD License
52 | Programming Language :: Python
53 | Topic :: Software Development :: Libraries :: Python Modules
54 | Operating System :: Microsoft :: Windows
55 | Operating System :: Unix
56 | Programming Language :: Python :: 3
57 | Programming Language :: Python :: 3.6
58 | Programming Language :: Python :: 3.7
59 | Programming Language :: Python :: 3.8
60 | """)
61 | 
62 | 
63 | setup(
64 |     name="caterva",
65 |     version=VERSION,
66 |     description='Caterva for Python (multidimensional compressed data containers).',
67 |     long_description=long_description,
68 |     long_description_content_type='text/markdown',
69 |     classifiers=[c for c in classifiers.split("\n") if c],
70 |     author='Blosc Development Team',
71 |     author_email='blosc@blosc.org',
72 |     maintainer='Blosc Development Team',
73 |     maintainer_email='blosc@blosc.org',
74 |     url='https://github.com/Blosc/python-caterva',
75 |     license='https://opensource.org/licenses/BSD-3-Clause',
76 |     platforms=['any'],
77 |     packages=['caterva'],
78 |     package_dir={'caterva': 'caterva'},
79 |     install_requires=['ndindex', 'msgpack'],
80 | )
81 | 


--------------------------------------------------------------------------------
/caterva/meta.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | from . import caterva_ext as ext
10 | from collections.abc import Mapping
11 | 
12 | 
13 | class Meta(Mapping):
14 |     """
15 |     Class providing access to user meta on a :py:class:`NDArray`.
16 |     It will be available via the `.meta` property of an array.
17 |     """
18 |     def get(self, key, default=None):
19 |         """Return the value for `key` if `key` is in the dictionary, else `default`.
20 |         If `default` is not given, it defaults to ``None``."""
21 |         return self[key] if key in self else default
22 | 
23 |     def __del__(self):
24 |         pass
25 | 
26 |     def __init__(self, ndarray):
27 |         self.arr = ndarray
28 | 
29 |     def __contains__(self, key):
30 |         """Check if the `key` metalayer exists or not."""
31 |         return ext.meta__contains__(self.arr, key)
32 | 
33 |     def __delitem__(self, key):
34 |         return None
35 | 
36 |     def __setitem__(self, key, value):
37 |         """Update the `key` metalayer with `value`.
38 | 
39 |         Parameters
40 |         ----------
41 |         key: str
42 |             The name of the metalayer to update.
43 |         value: bytes
44 |             The buffer containing the new content for the metalayer.
45 | 
46 |             ..warning: Note that the *length* of the metalayer cannot not change,
47 |             else an exception will be raised.
48 |         """
49 |         return ext.meta__setitem__(self.arr, key, value)
50 | 
51 |     def __getitem__(self, item):
52 |         """Return the `item` metalayer.
53 | 
54 |         Parameters
55 |         ----------
56 |         item: str
57 |             The name of the metalayer to return.
58 | 
59 |         Returns
60 |         -------
61 |         bytes
62 |             The buffer containing the metalayer info (typically in msgpack
63 |             format).
64 |         """
65 |         return ext.meta__getitem__(self.arr, item)
66 | 
67 |     def keys(self):
68 |         """Return the metalayers keys."""
69 |         return ext.meta_keys(self.arr)
70 | 
71 |     def values(self):
72 |         raise NotImplementedError("Values can not be accessed")
73 | 
74 |     def items(self):
75 |         raise NotImplementedError("Items can not be accessed")
76 | 
77 |     def __iter__(self):
78 |         """Iter over the keys of the metalayers."""
79 |         return iter(self.keys())
80 | 
81 |     def __len__(self):
82 |         """Return the number of metalayers."""
83 |         return ext.meta__len__(self.arr)
84 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 
134 | # pytype static type analyzer
135 | .pytype/
136 | 
137 | # Cython debug symbols
138 | cython_debug/
139 | 
140 | # Skit build
141 | _skbuild/
142 | 
143 | # PyCharm
144 | .idea/
145 | 
146 | # MacOS
147 | .DS_Store
148 | 
149 | # Docs
150 | doc/source/reference/api
151 | 


--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
 1 | #######################################################################
 2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under a BSD-style license (found in the
 6 | # LICENSE file in the root directory of this source tree)
 7 | #######################################################################
 8 | 
 9 | # Configuration file for the Sphinx documentation builder.
10 | #
11 | # This file only contains a selection of the most common options. For a full
12 | # list see the documentation:
13 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
14 | 
15 | # -- Path setup --------------------------------------------------------------
16 | 
17 | # If extensions (or modules to document with autodoc) are in another directory,
18 | # add these directories to sys.path here. If the directory is relative to the
19 | # documentation root, use os.path.abspath to make it absolute, like shown here.
20 | #
21 | import os
22 | import sys
23 | sys.path.insert(0, os.path.abspath('../..'))
24 | 
25 | 
26 | # -- Project information -----------------------------------------------------
27 | 
28 | project = 'caterva'
29 | copyright = '2021, The Blosc Developers'
30 | author = 'The Blosc Developers'
31 | 
32 | import caterva
33 | release = caterva.__version__
34 | 
35 | 
36 | # -- General configuration ---------------------------------------------------
37 | 
38 | # Add any Sphinx extension module names here, as strings. They can be
39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
40 | # ones.
41 | extensions = [
42 |     'sphinx.ext.autodoc',
43 |     'sphinx.ext.autosummary',
44 |     'numpydoc',
45 |     'myst_nb',
46 |     'sphinx_panels'
47 | ]
48 | 
49 | 
50 | # Add any paths that contain templates here, relative to this directory.
51 | templates_path = ['_templates']
52 | 
53 | # List of patterns, relative to source directory, that match files and
54 | # directories to ignore when looking for source files.
55 | # This pattern also affects html_static_path and html_extra_path.
56 | exclude_patterns = []
57 | 
58 | 
59 | # -- Options for HTML output -------------------------------------------------
60 | 
61 | # The theme to use for HTML and HTML Help pages.  See the documentation for
62 | # a list of builtin themes.
63 | #
64 | html_theme = 'pydata_sphinx_theme'
65 | html_logo = "caterva-logo.png"
66 | 
67 | pygments_style = 'sphinx'
68 | 
69 | # Add any paths that contain custom static files (such as style sheets) here,
70 | # relative to this directory. They are copied after the builtin static files,
71 | # so a file named "default.css" will overwrite the builtin "default.css".
72 | html_static_path = ['_static']
73 | 
74 | html_theme_options = {
75 |   "navigation_depth": 1,
76 | }
77 | 
78 | html_css_files = [
79 |     'css/custom.css',
80 |     "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css",
81 | ]
82 | 
83 | 
84 | myst_enable_extensions = [
85 |     "amsmath",
86 |     "colon_fence",
87 |     "deflist",
88 |     "dollarmath",
89 |     "html_image",
90 | ]
91 | myst_url_schemes = ("http", "https", "mailto")
92 | myst_update_mathjax = False
93 | 
94 | panels_add_bootstrap_css = False
95 | 
96 | autosummary_generate = True
97 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
  1 | name: Python publish
  2 | on:
  3 |   push:
  4 |     branches:
  5 |       - master
  6 |     tags:
  7 |       - '*'
  8 | 
  9 | jobs:
 10 |   build_wheels:
 11 |     name: Build wheels on ${{ matrix.name }}
 12 |     runs-on: ${{ matrix.os }}
 13 |     strategy:
 14 |       matrix:
 15 |         include:
 16 |           - name: Ubuntu
 17 |             os: ubuntu-latest
 18 |             cibw_build: 'cp38-* cp39-* cp310-* cp311-*'
 19 |             cibw_skip: '*-manylinux*_i686'
 20 | 
 21 |           - name: MacOS
 22 |             os: macos-latest
 23 |             cibw_build: 'cp38-* cp39-* cp310-* cp311-*'
 24 | 
 25 |           - name: Windows x86
 26 |             os: windows-latest
 27 |             arch: x86
 28 |             cibw_build: 'cp38-win32 cp39-win32 cp310-win32 cp311-win32'
 29 | 
 30 |           - name: Windows amd64
 31 |             os: windows-latest
 32 |             arch: amd64
 33 |             cibw_build: 'cp38-win_amd64 cp39-win_amd64 cp310-win_amd64 
 34 |                                      cp311-win_amd64'
 35 | 
 36 |     steps:
 37 |       - name: Checkout repo
 38 |         uses: actions/checkout@v3
 39 |         with:
 40 |           submodules: 'recursive'
 41 | 
 42 |       - name: Set up Python
 43 |         uses: actions/setup-python@v4
 44 |         with:
 45 |           python-version: '3.8'
 46 | 
 47 |       - name: Install Ninja
 48 |         uses: seanmiddleditch/gha-setup-ninja@master
 49 | 
 50 |       - name: Install MSVC
 51 |         if: ${{runner.os == 'Windows'}}
 52 |         uses: ilammy/msvc-dev-cmd@v1
 53 |         with:
 54 |           arch: ${{ matrix.arch }}
 55 | 
 56 |       - name: Build wheels
 57 |         uses: pypa/cibuildwheel@v2.11.0
 58 |         with:
 59 |           output-dir: wheelhouse
 60 |         env:
 61 |           CIBW_BUILD: ${{ matrix.cibw_build }}
 62 |           CIBW_SKIP: ${{ matrix.cibw_skip }}
 63 |           CIBW_BEFORE_BUILD: python -m pip install -r requirements.txt
 64 |           CIBW_BEFORE_TEST: python -m pip install -r requirements-tests.txt
 65 |           CIBW_TEST_COMMAND: python -m pytest {project}/tests
 66 |           CIBW_BUILD_VERBOSITY: 1
 67 | 
 68 |       - name: Upload wheels
 69 |         uses: actions/upload-artifact@v3
 70 |         with:
 71 |           path: ./wheelhouse/*.whl
 72 | 
 73 | 
 74 |   build_sdist:
 75 |     name: Build sdist
 76 |     runs-on: ubuntu-latest
 77 | 
 78 |     steps:
 79 |       - uses: actions/checkout@v3
 80 |         with:
 81 |           submodules: 'recursive'
 82 | 
 83 |       - uses: actions/setup-python@v4
 84 |         name: Setup Python
 85 |         with:
 86 |           python-version: '3.8'
 87 | 
 88 |       - name: Install dependencies
 89 |         run: |
 90 |           python -m pip install --upgrade pip
 91 |           python -m pip install build
 92 | 
 93 |       - name: Build sdist
 94 |         run: |
 95 |           python -m build --sdist
 96 | 
 97 |       - name: Upload sdist package
 98 |         uses: actions/upload-artifact@v3
 99 |         with:
100 |           path: dist/*.tar.gz
101 | 
102 |   upload_pypi:
103 |     needs: [ build_wheels, build_sdist ]  # last but not least
104 |     runs-on: ubuntu-latest
105 |     if: startsWith(github.event.ref, 'refs/tags')
106 |     steps:
107 |       - uses: actions/download-artifact@v3
108 |         with:
109 |           name: artifact
110 |           path: dist
111 | 
112 |       - uses: pypa/gh-action-pypi-publish@master
113 |         with:
114 |           user: __token__
115 |           password: ${{ secrets.blosc_pypi_secret }}
116 | 


--------------------------------------------------------------------------------
/RELEASE_NOTES.rst:
--------------------------------------------------------------------------------
  1 | Release notes
  2 | =============
  3 | 
  4 | Changes from 0.7.3 to 0.7.4
  5 | ---------------------------
  6 | 
  7 | XXX version-specific blurb XXX
  8 | 
  9 | 
 10 | Changes from 0.7.2 to 0.7.3
 11 | ---------------------------
 12 | 
 13 | * Addapt to the latest version of Caterva 0.5.0
 14 | * Drop support for Python 3.7 and add for Python 3.10 and 3.11
 15 | 
 16 | Changes from 0.7.1 to 0.7.2
 17 | ---------------------------
 18 | 
 19 | * Implement a resize method
 20 | 
 21 | Changes from 0.7.0 to 0.7.1
 22 | ---------------------------
 23 | 
 24 | * Fix to apply filtersmeta from kwargs.
 25 | * Fix metalayer creation in the ext file.
 26 | * Update the docstrings.
 27 | 
 28 | Changes from 0.6.0 to 0.7.0
 29 | ---------------------------
 30 | 
 31 | * Remove plainbuffer support.
 32 | * Improve documentation.
 33 | 
 34 | Changes from 0.5.3 to 0.6.0
 35 | ---------------------------
 36 | 
 37 | * Provide wheels in PyPi.
 38 | * Update caterva submodule to 0.5.0.
 39 | 
 40 | Changes from 0.5.1 to 0.5.3
 41 | ---------------------------
 42 | 
 43 | * Fix dependencies installation issue.
 44 | 
 45 | Changes from 0.5.0 to 0.5.1
 46 | ---------------------------
 47 | 
 48 | * Update `setup.py` and add `pyproject.toml`.
 49 | 
 50 | Changes from 0.4.2 to 0.5.0
 51 | ---------------------------
 52 | 
 53 | * Big c-core refactoring improving the slicing performance.
 54 | * Implement `__setitem__` method for arrays to allow to update the values of the arrays.
 55 | * Use Blosc special-constructors to initialize the arrays.
 56 | * Improve the buffer and array protocols.
 57 | * Remove the data type support in order to simplify the library.
 58 | 
 59 | Changes from 0.4.1 to 0.4.2
 60 | ---------------------------
 61 | 
 62 | * Add files in `MANIFEST.in`.
 63 | 
 64 | Changes from 0.4.0 to 0.4.1
 65 | ---------------------------
 66 | 
 67 | * Fix invalid values for classifiers defined in `setup.py`.
 68 | 
 69 | Changes from 0.3.0 to 0.4.0
 70 | ---------------------------
 71 | 
 72 | * Compile the package using scikit-build.
 73 | 
 74 | * Introduce a second level of multidimensional chunking.
 75 | 
 76 | * Complete API renaming.
 77 | 
 78 | * Support the buffer protocol and the numpy array protocol.
 79 | 
 80 | * Generalize the slicing.
 81 | 
 82 | * Make python-caterva independent of numpy.
 83 | 
 84 | 
 85 | Changes from 0.2.3 to 0.3.0
 86 | ---------------------------
 87 | 
 88 | * Set the development status to alpha.
 89 | 
 90 | * Add instructions about installing python-caterva from pip.
 91 | 
 92 | * `getitem` and `setitem` are now special methods in `ext.Container`.
 93 | 
 94 | * Add new class from numpy arrays `NPArray`.
 95 | 
 96 | * Support for serializing/deserializing Containers to/from serialized frames (bytes).
 97 | 
 98 | * The `pshape` is calculated automatically if is `None`.
 99 | 
100 | * Add a `.sframe` attribute for the serialized frame.
101 | 
102 | * Big refactor for more consistent inheritance among classes.
103 | 
104 | * The `from_numpy()` function always return a `NPArray` now.
105 | 
106 | 
107 | Changes from 0.2.2 to 0.2.3
108 | ---------------------------
109 | 
110 | * Rename `MANINFEST.in` for `MANIFEST.in`.
111 | 
112 | * Fix the list of available cnames.
113 | 
114 | 
115 | Changes from 0.2.1 to 0.2.2
116 | ---------------------------
117 | 
118 | * Added a `MANIFEST.in` for including all C-Blosc2 and Caterva sources in package.
119 | 
120 | 
121 | Changes from 0.1.1 to 0.2.1
122 | ---------------------------
123 | 
124 | * Docstrings has been added. In addition, the documentation can be found at:
125 |   https://python-caterva.readthedocs.io/
126 | 
127 | * Add a `copy` parameter to `from_file()`.
128 | 
129 | * `complib` has been renamed to `cname` for compatibility with blosc-powered packages.
130 | 
131 | * The use of an itemsize different than a 2 power is allowed now.
132 | 


--------------------------------------------------------------------------------
/bench/compare_loadframe.py:
--------------------------------------------------------------------------------
  1 | #######################################################################
  2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
  3 | # All rights reserved.
  4 | #
  5 | # This source code is licensed under a BSD-style license (found in the
  6 | # LICENSE file in the root directory of this source tree)
  7 | #######################################################################
  8 | 
  9 | # Benchmark for comparing loading on-disk frames for
 10 | # multidimensional arrays using different methods:
 11 | # * Opening an on-disk frame without copying
 12 | # * Loading the frame in-memory
 13 | 
 14 | import caterva as cat
 15 | import numpy as np
 16 | import os
 17 | from time import time
 18 | 
 19 | # Dimensions, type and persistency properties for the arrays
 20 | shape = (100, 5000, 250)
 21 | chunkshape = (20, 100, 50)
 22 | blockshape = (10, 50, 25)
 23 | 
 24 | dtype = np.float64
 25 | 
 26 | # Compression properties
 27 | cname = "zstd"
 28 | clevel = 6
 29 | filter = cat.SHUFFLE
 30 | nthreads = 2
 31 | 
 32 | fname_npy = "compare_loadframe.npy"
 33 | if os.path.exists(fname_npy):
 34 |     os.remove(fname_npy)
 35 | fname_cat = "compare_loadframe.cat"
 36 | if os.path.exists(fname_cat):
 37 |     os.remove(fname_cat)
 38 | 
 39 | # Create content for populating arrays
 40 | t0 = time()
 41 | content = np.linspace(0, 10, int(np.prod(shape)), dtype=dtype).reshape(shape)
 42 | # content = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape)
 43 | t1 = time()
 44 | print("Time for filling array (numpy): %.3fs" % (t1 - t0))
 45 | 
 46 | t0 = time()
 47 | np.save(fname_npy, content)
 48 | t1 = time()
 49 | print("Time for storing array on-disk (numpy): %.3fs" % (t1 - t0))
 50 | 
 51 | # Create and fill a caterva array using a block iterator
 52 | t0 = time()
 53 | a = cat.empty(shape, chunkshape=chunkshape, blockshape=blockshape, itemsize=content.itemsize,
 54 |               filename=fname_cat,
 55 |               cname=cname, clevel=clevel, filters=[filter],
 56 |               nthreads=nthreads)
 57 | for block, info in a.iter_write():
 58 |     nparray = content[info.slice]
 59 |     block[:] = bytes(nparray)
 60 | acratio = a.cratio
 61 | del a
 62 | t1 = time()
 63 | print("Time for storing array on-disk (caterva, iter): %.3fs ; CRatio: %.1fx" % ((t1 - t0), acratio))
 64 | 
 65 | print()
 66 | 
 67 | # Setup the coordinates for random planes
 68 | planes_idx = np.random.randint(0, shape[1], 3)
 69 | 
 70 | def bench_read_numpy(fname, planes_idx, copy):
 71 |     t0 = time()
 72 |     mmap_mode = None if copy else 'r'
 73 |     a = np.load(fname, mmap_mode=mmap_mode)
 74 |     t1 = time()
 75 |     print("Time for opening the on-disk frame (numpy, copy=%s): %.3fs" % (copy, (t1 - t0)))
 76 | 
 77 |     t0 = time()
 78 |     for i in planes_idx:
 79 |         block = a[:, i, :]
 80 |         if not copy:
 81 |             # Do an actual read for memory mapped files
 82 |             # Do an actual read for memory mapped files
 83 |             block = block.copy()
 84 |     del a
 85 |     t1 = time()
 86 |     print("Time for reading with getitem (numpy, copy=%s): %.3fs" % (copy, (t1 - t0)))
 87 | 
 88 | def bench_read_caterva(fname, planes_idx, copy):
 89 |     t0 = time()
 90 |     a = cat.open(fname, copy=copy)
 91 |     t1 = time()
 92 |     print("Time for opening the on-disk frame (caterva, copy=%s): %.3fs" % (copy, (t1 - t0)))
 93 | 
 94 |     t0 = time()
 95 |     for i in planes_idx:
 96 |         rbytes = a[:, i, :]
 97 |         block = np.frombuffer(rbytes, dtype=dtype).reshape((shape[0], shape[2]))
 98 |     del a
 99 |     t1 = time()
100 |     print("Time for reading with getitem (caterva, copy=%s): %.3fs" % (copy, (t1 - t0)))
101 | 
102 | bench_read_numpy(fname_npy, planes_idx, copy=False)
103 | bench_read_numpy(fname_npy, planes_idx, copy=True)
104 | print()
105 | bench_read_caterva(fname_cat, planes_idx, copy=False)
106 | bench_read_caterva(fname_cat, planes_idx, copy=True)
107 | 
108 | os.remove(fname_npy)
109 | os.remove(fname_cat)
110 | 


--------------------------------------------------------------------------------
/bench/compare_reduceframe.py:
--------------------------------------------------------------------------------
  1 | #######################################################################
  2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
  3 | # All rights reserved.
  4 | #
  5 | # This source code is licensed under a BSD-style license (found in the
  6 | # LICENSE file in the root directory of this source tree)
  7 | #######################################################################
  8 | 
  9 | # Benchmark for comparing reducing on-disk frames for
 10 | # multidimensional arrays using different methods:
 11 | # * Opening an on-disk frame without copying
 12 | # * Loading the frame in-memory
 13 | 
 14 | import caterva as cat
 15 | import numpy as np
 16 | import os
 17 | from time import time
 18 | import platform
 19 | 
 20 | macosx = 'Darwin' in platform.platform()
 21 | linux = 'Linux' in platform.platform()
 22 | 
 23 | # Dimensions, type and persistency properties for the arrays
 24 | shape = (100, 5000, 250)
 25 | chunkshape = (20, 100, 50)
 26 | blockshape = (10, 50, 25)
 27 | 
 28 | dtype = np.float64
 29 | 
 30 | # Compression properties
 31 | cname = "lz4"
 32 | clevel = 5
 33 | filter = cat.SHUFFLE
 34 | nthreads = 4
 35 | 
 36 | fname_npy = "compare_reduceframe.npy"
 37 | if os.path.exists(fname_npy):
 38 |     os.remove(fname_npy)
 39 | fname_cat = "compare_reduceframe.cat"
 40 | if os.path.exists(fname_cat):
 41 |     os.remove(fname_cat)
 42 | 
 43 | # Create content for populating arrays
 44 | t0 = time()
 45 | content = np.linspace(0, 10, int(np.prod(shape)), dtype=dtype).reshape(shape)
 46 | # content = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape)
 47 | t1 = time()
 48 | print("Time for filling array (numpy): %.3fs" % (t1 - t0))
 49 | 
 50 | t0 = time()
 51 | np.save(fname_npy, content)
 52 | t1 = time()
 53 | print("Time for storing array on-disk (numpy): %.3fs" % (t1 - t0))
 54 | 
 55 | # Create and fill a caterva array using a block iterator
 56 | t0 = time()
 57 | a = cat.empty(shape, chunkshape=chunkshape, blockshape=blockshape, itemsize=content.itemsize,
 58 |               filename=fname_cat,
 59 |               cname=cname, clevel=clevel, filters=[filter],
 60 |               nthreads=nthreads)
 61 | 
 62 | for block, info in a.iter_write():
 63 |     nparray = content[info.slice]
 64 |     block[:] = bytes(nparray)
 65 | acratio = a.cratio
 66 | del a
 67 | t1 = time()
 68 | print("Time for storing array on-disk (caterva, iter): %.3fs ; CRatio: %.1fx" % ((t1 - t0), acratio))
 69 | 
 70 | print()
 71 | 
 72 | def bench_read_numpy(fname, copy):
 73 |     if macosx: os.system("/usr/sbin/purge")
 74 |     t0 = time()
 75 |     mmap_mode = None if copy else 'r'
 76 |     a = np.load(fname, mmap_mode=mmap_mode)
 77 |     t1 = time()
 78 |     print("Time for opening the on-disk frame (numpy, copy=%s): %.3fs" % (copy, (t1 - t0)))
 79 | 
 80 |     if macosx: os.system("/usr/sbin/purge")
 81 |     t0 = time()
 82 |     acc = a.sum()
 83 |     del a
 84 |     t1 = time()
 85 |     print("Time for reducing with (numpy, copy=%s): %.3fs" % (copy, (t1 - t0)))
 86 |     return acc
 87 | 
 88 | def bench_read_caterva(fname, copy):
 89 |     if macosx: os.system("/usr/sbin/purge")
 90 |     t0 = time()
 91 |     a = cat.open(fname, copy=copy)
 92 |     t1 = time()
 93 |     print("Time for opening the on-disk frame (caterva, copy=%s): %.3fs" % (copy, (t1 - t0)))
 94 | 
 95 |     if macosx: os.system("/usr/sbin/purge")
 96 |     t0 = time()
 97 |     acc = 0
 98 |     for (block, info) in a.iter_read():
 99 |         block = np.frombuffer(block, dtype=dtype).reshape(info.shape)
100 |         acc += np.sum(block)
101 |     del a
102 |     t1 = time()
103 |     print("Time for reducing with (caterva, copy=%s): %.3fs" % (copy, (t1 - t0)))
104 |     return acc
105 | 
106 | acc_npy1 = bench_read_numpy(fname_npy, copy=False)
107 | acc_npy2 = bench_read_numpy(fname_npy, copy=True)
108 | np.testing.assert_allclose(acc_npy1, acc_npy2)
109 | print()
110 | acc_cat1 = bench_read_caterva(fname_cat, copy=False)
111 | np.testing.assert_allclose(acc_cat1, acc_npy1)
112 | acc_cat2 = bench_read_caterva(fname_cat, copy=True)
113 | np.testing.assert_allclose(acc_cat1, acc_npy2)
114 | 
115 | os.remove(fname_npy)
116 | os.remove(fname_cat)
117 | 


--------------------------------------------------------------------------------
/doc/source/getting_started/tutorial.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | jupytext:
  3 |   formats: md:myst
  4 |   text_representation:
  5 |     extension: .md
  6 |     format_name: myst
  7 |     format_version: 0.13
  8 |     jupytext_version: 1.11.2
  9 | kernelspec:
 10 |   display_name: Python 3
 11 |   language: python
 12 |   name: python3
 13 | ---
 14 | 
 15 | # Tutorial
 16 | Caterva functions let users to perform different operations with Caterva arrays like setting, copying or slicing them.
 17 | In this section, we are going to see how to create and manipulate a Caterva array in a simple way.
 18 | 
 19 | ```{code-cell} ipython3
 20 | import caterva as cat
 21 | 
 22 | cat.__version__
 23 | ```
 24 | 
 25 | ## Creating an array
 26 | First, we create an array, with zero being used as the default value for uninitialized portions of the array.
 27 | 
 28 | ```{code-cell} ipython3
 29 | c = cat.zeros((10000, 10000), itemsize=4, chunks=(1000, 1000), blocks=(100, 100))
 30 | 
 31 | c
 32 | ```
 33 | 
 34 | ## Reading and writing data
 35 | We can access and edit Caterva arrays using NumPy.
 36 | 
 37 | ```{code-cell} ipython3
 38 | import struct
 39 | import numpy as np
 40 | 
 41 | dtype = np.int32
 42 | 
 43 | c[0, :] = np.arange(10000, dtype=dtype)
 44 | c[:, 0] = np.arange(10000, dtype=dtype)
 45 | ```
 46 | 
 47 | ```{code-cell} ipython3
 48 | c[0, 0]
 49 | ```
 50 | 
 51 | ```{code-cell} ipython3
 52 | np.array(c[0, 0]).view(dtype)
 53 | ```
 54 | 
 55 | ```{code-cell} ipython3
 56 | np.array(c[0, -1]).view(dtype)
 57 | ```
 58 | 
 59 | ```{code-cell} ipython3
 60 | np.array(c[0, :]).view(dtype)
 61 | ```
 62 | 
 63 | ```{code-cell} ipython3
 64 | np.array(c[:, 0]).view(dtype)
 65 | ```
 66 | 
 67 | ```{code-cell} ipython3
 68 | np.array(c[:]).view(dtype)
 69 | ```
 70 | 
 71 | ## Persistent data
 72 | When we create a Caterva array, we can we can specify where it will be stored.
 73 | Then, we can access to this array whenever we want and it will still contain all the data as it is stored persistently.
 74 | 
 75 | ```{code-cell} ipython3
 76 | c1 = cat.full((1000, 1000), fill_value=b"pepe", chunks=(100, 100), blocks=(50, 50),
 77 |              urlpath="cat_tutorial.caterva")
 78 | ```
 79 | 
 80 | ```{code-cell} ipython3
 81 | c2 = cat.open("cat_tutorial.caterva")
 82 | 
 83 | c2.info
 84 | ```
 85 | 
 86 | ```{code-cell} ipython3
 87 | np.array(c2[0, 20:30]).view("S4")
 88 | ```
 89 | 
 90 | ```{code-cell} ipython3
 91 | import os
 92 | if os.path.exists("cat_tutorial.caterva"):
 93 |   cat.remove("cat_tutorial.caterva")
 94 | ```
 95 | 
 96 | ## Compression params
 97 | Here we can see how when we make a copy of a Caterva array we can change its compression parameters in an easy way. 
 98 | 
 99 | ```{code-cell} ipython3
100 | b = np.arange(1000000).tobytes()
101 | 
102 | c1 = cat.from_buffer(b, shape=(1000, 1000), itemsize=8, chunks=(500, 10), blocks=(50, 10))
103 | 
104 | c1.info
105 | ```
106 | 
107 | ```{code-cell} ipython3
108 | c2 = c1.copy(chunks=(500, 10), blocks=(50, 10),
109 |              codec=cat.Codec.ZSTD, clevel=9, filters=[cat.Filter.BITSHUFFLE])
110 | 
111 | c2.info
112 | ```
113 | 
114 | ## Metalayers
115 | Metalayers are small metadata for informing about the properties of data that is stored on a container. 
116 | The metalayers of a Caterva array are also easy to access and edit by users.
117 | 
118 | ```{code-cell} ipython3
119 | from msgpack import packb, unpackb
120 | ```
121 | 
122 | ```{code-cell} ipython3
123 | meta = {
124 |     "dtype": packb("i8"),
125 |     "coords": packb([5.14, 23.])
126 | }
127 | ```
128 | 
129 | ```{code-cell} ipython3
130 | c = cat.zeros((1000, 1000), 5, chunks=(100, 100), blocks=(50, 50), meta=meta)
131 | ```
132 | 
133 | ```{code-cell} ipython3
134 | len(c.meta)
135 | ```
136 | 
137 | ```{code-cell} ipython3
138 | c.meta.keys()
139 | ```
140 | 
141 | ```{code-cell} ipython3
142 | for key in c.meta:
143 |     print(f"{key} -> {unpackb(c.meta[key])}")
144 | ```
145 | 
146 | ```{code-cell} ipython3
147 | c.meta["coords"] = packb([0., 23.])
148 | ```
149 | 
150 | ```{code-cell} ipython3
151 | for key in c.meta:
152 |     print(f"{key} -> {unpackb(c.meta[key])}")
153 | ```
154 | 
155 | ## Small tutorial
156 | In this example it is shown how easy is to create a Caterva array from an image and how users can manipulate it using Caterva and Image functions.  
157 | 
158 | ```{code-cell} ipython3
159 | from PIL import Image
160 | ```
161 | 
162 | ```{code-cell} ipython3
163 | im = Image.open("../_static/blosc-logo_128.png")
164 | 
165 | im
166 | ```
167 | 
168 | ```{code-cell} ipython3
169 | meta = {"dtype": b"|u1"}
170 | 
171 | c = cat.asarray(np.array(im), chunks=(50, 50, 4), blocks=(10, 10, 4), meta=meta)
172 | 
173 | c.info
174 | ```
175 | 
176 | ```{code-cell} ipython3
177 | im2 = c[15:55, 10:35]  # Letter B
178 | 
179 | Image.fromarray(np.array(im2).view(c.meta["dtype"]))
180 | ```
181 | 
182 | ```{code-cell} ipython3
183 | 
184 | ```
185 | 


--------------------------------------------------------------------------------
/bench/compare_getslice.py:
--------------------------------------------------------------------------------
  1 | #######################################################################
  2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
  3 | # All rights reserved.
  4 | #
  5 | # This source code is licensed under a BSD-style license (found in the
  6 | # LICENSE file in the root directory of this source tree)
  7 | #######################################################################
  8 | 
  9 | # Benchmark for comparing speeds of getitem of hyperplanes on a
 10 | # multidimensional array and using different backends:
 11 | # Caterva, Zarr and HDF5
 12 | # In brief, each approach has its own strengths and weaknesses.
 13 | #
 14 | # Usage: pass any argument for testing the persistent backends.
 15 | # Else, only in-memory containers will be tested.
 16 | 
 17 | import caterva as cat
 18 | import zarr
 19 | import numcodecs
 20 | import tables
 21 | import numpy as np
 22 | import os
 23 | import sys
 24 | import shutil
 25 | from time import time
 26 | 
 27 | persistent = bool(sys.argv[1]) if len(sys.argv) > 1 else False
 28 | if persistent:
 29 |     print("Testing the persistent backends...")
 30 | else:
 31 |     print("Testing the in-memory backends...")
 32 | 
 33 | # Dimensions and type properties for the arrays
 34 | shape = (100, 5000, 250)
 35 | chunkshape = (20, 500, 50)
 36 | blockshape = (10, 100, 25)
 37 | # This config generates containers of more than 2 GB in size
 38 | # shape = (250, 4000, 350)
 39 | # pshape = (200, 100, 100)
 40 | dtype = np.float64
 41 | 
 42 | # Compression properties
 43 | cname = "zstd"
 44 | clevel = 6
 45 | filter = cat.Filter.SHUFFLE
 46 | zfilter = numcodecs.Blosc.SHUFFLE
 47 | nthreads = 1
 48 | blocksize = int(np.prod(blockshape))
 49 | 
 50 | fname_cat = None
 51 | fname_zarr = None
 52 | fname_h5 = "whatever.h5"
 53 | if persistent:
 54 |     fname_cat = "compare_getslice.cat"
 55 |     if os.path.exists(fname_cat):
 56 |         os.remove(fname_cat)
 57 |     fname_zarr = "compare_getslice.zarr"
 58 |     if os.path.exists(fname_zarr):
 59 |         shutil.rmtree(fname_zarr)
 60 |     fname_h5 = "compare_getslice.h5"
 61 |     if os.path.exists(fname_h5):
 62 |         os.remove(fname_h5)
 63 | 
 64 | # Create content for populating arrays
 65 | content = np.random.normal(0, 1, int(np.prod(shape))).reshape(shape)
 66 | 
 67 | # Create and fill a caterva array using a block iterator
 68 | t0 = time()
 69 | a = cat.empty(shape, content.itemsize, chunkshape=chunkshape, blockshape=blockshape,
 70 |               dtype=str(content.dtype), urlpath=fname_cat,
 71 |               cname=cname, clevel=clevel, filters=[filter], nthreads=nthreads)
 72 | a[:] = content
 73 | acratio = a.cratio
 74 | if persistent:
 75 |     del a
 76 | t1 = time()
 77 | print("Time for filling array (caterva, iter): %.3fs ; CRatio: %.1fx" % ((t1 - t0), acratio))
 78 | 
 79 | # Create and fill a zarr array
 80 | t0 = time()
 81 | compressor = numcodecs.Blosc(cname=cname, clevel=clevel, shuffle=zfilter, blocksize=blocksize)
 82 | numcodecs.blosc.set_nthreads(nthreads)
 83 | if persistent:
 84 |     z = zarr.open(fname_zarr, mode='w', shape=shape, chunks=chunkshape, dtype=dtype, compressor=compressor)
 85 | else:
 86 |     z = zarr.empty(shape=shape, chunks=chunkshape, dtype=dtype, compressor=compressor)
 87 | z[:] = content
 88 | zratio = z.nbytes / z.nbytes_stored
 89 | if persistent:
 90 |     del z
 91 | t1 = time()
 92 | print("Time for filling array (zarr): %.3fs ; CRatio: %.1fx" % ((t1 - t0), zratio))
 93 | 
 94 | # Create and fill a hdf5 array
 95 | t0 = time()
 96 | filters = tables.Filters(complevel=clevel, complib="blosc:%s" % cname, shuffle=True)
 97 | tables.set_blosc_max_threads(nthreads)
 98 | if persistent:
 99 |     h5f = tables.open_file(fname_h5, 'w')
100 | else:
101 |     h5f = tables.open_file(fname_h5, 'w', driver='H5FD_CORE', driver_core_backing_store=0)
102 | h5ca = h5f.create_carray(h5f.root, 'carray', filters=filters, chunkshape=chunkshape, obj=content)
103 | h5f.flush()
104 | h5ratio = h5ca.size_in_memory / h5ca.size_on_disk
105 | if persistent:
106 |     h5f.close()
107 | t1 = time()
108 | print("Time for filling array (hdf5): %.3fs ; CRatio: %.1fx" % ((t1 - t0), h5ratio))
109 | 
110 | # Setup the coordinates for random planes
111 | planes_idx = np.random.randint(0, shape[1], 100)
112 | 
113 | # Time getitem with caterva
114 | t0 = time()
115 | if persistent:
116 |     a = cat.open(fname_cat)  # reopen
117 | for i in planes_idx:
118 |     rbytes = a[:, i, :]
119 | del a
120 | t1 = time()
121 | print("Time for reading with getitem (caterva): %.3fs" % (t1 - t0))
122 | 
123 | # Time getitem with zarr
124 | t0 = time()
125 | if persistent:
126 |     z = zarr.open(fname_zarr, mode='r')
127 | for i in planes_idx:
128 |     block = z[:, i, :]
129 | del z
130 | t1 = time()
131 | print("Time for reading with getitem (zarr): %.3fs" % (t1 - t0))
132 | 
133 | # Time getitem with hdf5
134 | t0 = time()
135 | if persistent:
136 |     h5f = tables.open_file(fname_h5, 'r', filters=filters)
137 | h5ca = h5f.root.carray
138 | for i in planes_idx:
139 |     block = h5ca[:, i, :]
140 | h5f.close()
141 | t1 = time()
142 | print("Time for reading with getitem (hdf5): %.3fs" % (t1 - t0))
143 | 
144 | 
145 | if persistent:
146 |     os.remove(fname_cat)
147 |     shutil.rmtree(fname_zarr)
148 |     os.remove(fname_h5)
149 | 


--------------------------------------------------------------------------------
/bench/compare_serialization.py:
--------------------------------------------------------------------------------
  1 | #######################################################################
  2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
  3 | # All rights reserved.
  4 | #
  5 | # This source code is licensed under a BSD-style license (found in the
  6 | # LICENSE file in the root directory of this source tree)
  7 | #######################################################################
  8 | 
  9 | # Benchmark for comparing serializing/deserializing frames for
 10 | # multidimensional arrays using different methods:
 11 | # * to_sframe() / from_sframe()
 12 | # * Numpy copy
 13 | # * PyArrow
 14 | # * Pickle v4
 15 | # * Pickle v5 (in the future)
 16 | 
 17 | import caterva as cat
 18 | import numpy as np
 19 | from time import time
 20 | import pyarrow as pa
 21 | 
 22 | import pickle
 23 | 
 24 | check_roundtrip = False  # set this to True to check for roundtrip validity
 25 | 
 26 | # Dimensions, type and persistency properties for the arrays
 27 | shape = (100, 5000, 250)
 28 | chunkshape = (20, 500, 100)
 29 | blockshape = (10, 50, 50)
 30 | dtype = "f8"
 31 | 
 32 | # Compression properties
 33 | cname = "lz4"
 34 | clevel = 3
 35 | # cname = "zstd"
 36 | # clevel = 1
 37 | filter = cat.SHUFFLE
 38 | nthreads = 4
 39 | 
 40 | t0 = time()
 41 | arr = np.linspace(0, 10, int(np.prod(shape)), dtype=dtype).reshape(shape)
 42 | # arr = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape)
 43 | t1 = time()
 44 | print("Time for filling array (numpy): %.3fs" % (t1 - t0))
 45 | 
 46 | t0 = time()
 47 | arr2 = arr.copy()
 48 | t1 = time()
 49 | print("Time for copying array in-memory (numpy): %.3fs" % (t1 - t0))
 50 | 
 51 | # Create and fill a caterva array using a block iterator and an in-memory frame
 52 | t0 = time()
 53 | carr = cat.empty(shape, np.dtype(dtype).itemsize, dtype=dtype, chunkshape=chunkshape, blockshape=blockshape,
 54 |                  enforceframe=True,
 55 |                  cname=cname, clevel=clevel, filters=[filter],
 56 |                  cnthreads=nthreads, dnthreads=nthreads)
 57 | for block, info in carr.iter_write():
 58 |     nparray = arr[info.slice]
 59 |     block[:] = bytes(nparray)
 60 | acratio = carr.cratio
 61 | t1 = time()
 62 | print("Time for creating an array in-memory (numpy -> caterva, copy): %.3fs ; CRatio: %.1fx" % ((t1 - t0), acratio))
 63 | 
 64 | print()
 65 | 
 66 | t0 = time()
 67 | sframe_nocopy = carr.sframe
 68 | t1 = time()
 69 | print("Time for serializing array in-memory (caterva, no-copy): %.3fs" % (t1 - t0))
 70 | 
 71 | t0 = time()
 72 | sframe_copy = carr.to_sframe()
 73 | t1 = time()
 74 | print("Time for serializing array in-memory (caterva, copy): %.3fs" % (t1 - t0))
 75 | 
 76 | t0 = time()
 77 | serialized = pa.serialize(arr)
 78 | pyarrow_nocopy = serialized.to_components()
 79 | t1 = time()
 80 | print("Time for serializing array in-memory (arrow, no-copy): %.3fs" % (t1 - t0))
 81 | 
 82 | t0 = time()
 83 | pyarrow_copy = pa.serialize(arr).to_buffer().to_pybytes()
 84 | t1 = time()
 85 | print("Time for serializing array in-memory (arrow, copy): %.3fs" % (t1 - t0))
 86 | 
 87 | t0 = time()
 88 | frame_pickle = pickle.dumps(arr, protocol=4)
 89 | t1 = time()
 90 | print("Time for serializing array in-memory (pickle4, copy): %.3fs" % (t1 - t0))
 91 | 
 92 | t0 = time()
 93 | carr2 = cat.from_sframe(sframe_nocopy, copy=False)
 94 | t1 = time()
 95 | print("Time for de-serializing array in-memory (caterva, no-copy): %.3fs" % (t1 - t0))
 96 | 
 97 | if check_roundtrip:
 98 |     print("The roundtrip is... ", end="", flush=True)
 99 |     np.testing.assert_allclose(carr2, arr)
100 |     print("ok!")
101 | 
102 | t0 = time()
103 | arr2 = pa.deserialize_components(pyarrow_nocopy)
104 | t1 = time()
105 | print("Time for de-serializing array in-memory (arrow, no-copy): %.3fs" % (t1 - t0))
106 | 
107 | if check_roundtrip:
108 |     print("The roundtrip is... ", end="", flush=True)
109 |     np.testing.assert_allclose(arr2, arr)
110 |     print("ok!")
111 | 
112 | t0 = time()
113 | arr2 = pa.deserialize(pyarrow_copy)
114 | t1 = time()
115 | print("Time for de-serializing array in-memory (arrow, copy): %.3fs" % (t1 - t0))
116 | 
117 | if check_roundtrip:
118 |     print("The roundtrip is... ", end="", flush=True)
119 |     np.testing.assert_allclose(arr2, arr)
120 |     print("ok!")
121 | 
122 | t0 = time()
123 | arr2 = pickle.loads(frame_pickle)
124 | t1 = time()
125 | print("Time for de-serializing array in-memory (pickle4, copy): %.3fs" % (t1 - t0))
126 | 
127 | if check_roundtrip:
128 |     print("The roundtrip is... ", end="", flush=True)
129 |     np.testing.assert_allclose(arr2, arr)
130 |     print("ok!")
131 | 
132 | print()
133 | t0 = time()
134 | for i in range(1):
135 |     carr3 = cat.from_sframe(sframe_copy)
136 |     arr2 = np.asarray(carr3.copy())
137 | t1 = time()
138 | print("Time for re-creating array in-memory (caterva -> numpy, copy): %.3fs" % (t1 - t0))
139 | 
140 | if check_roundtrip:
141 |     print("The roundtrip is... ", end="", flush=True)
142 |     np.testing.assert_allclose(arr2, arr)
143 |     print("ok!")
144 | 
145 | print()
146 | arrsize = arr.size * arr.itemsize
147 | time_100Mbps = arrsize / (10 * 2 ** 20)
148 | print("Time to transmit array at 100 Mbps (no compression):\t%6.3fs" % time_100Mbps)
149 | ctime_100Mbps = (arrsize / acratio) / (10 * 2**20)
150 | print("Time to transmit array at 100 Mbps (compression):\t%6.3fs" % ctime_100Mbps)
151 | time_1Gbps = arrsize / (100 * 2 ** 20)
152 | print("Time to transmit array at 1 Gbps (no compression):\t%6.3fs" % time_1Gbps)
153 | ctime_1Gbps = (arrsize / acratio) / (100 * 2**20)
154 | print("Time to transmit array at 1 Gbps (compression):\t\t%6.3fs" % ctime_1Gbps)
155 | time_10Gbps = arrsize / (1000 * 2 ** 20)
156 | print("Time to transmit array at 10 Gbps (no compression):\t%6.3fs" % time_10Gbps)
157 | ctime_10Gbps = (arrsize / acratio) / (1000 * 2**20)
158 | print("Time to transmit array at 10 Gbps (compression):\t%6.3fs" % ctime_10Gbps)
159 | 


--------------------------------------------------------------------------------
/caterva/ndarray.py:
--------------------------------------------------------------------------------
  1 | #######################################################################
  2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
  3 | # All rights reserved.
  4 | #
  5 | # This source code is licensed under a BSD-style license (found in the
  6 | # LICENSE file in the root directory of this source tree)
  7 | #######################################################################
  8 | 
  9 | from . import caterva_ext as ext
 10 | import ndindex
 11 | import numpy as np
 12 | from .info import InfoReporter
 13 | import os
 14 | from .meta import Meta
 15 | 
 16 | 
 17 | def process_key(key, shape):
 18 |     key = ndindex.ndindex(key).expand(shape).raw
 19 |     mask = tuple(True if isinstance(k, int) else False for k in key)
 20 |     key = tuple(k if isinstance(k, slice) else slice(k, k+1, None) for k in key)
 21 |     return key, mask
 22 | 
 23 | 
 24 | def prod(list):
 25 |     prod = 1
 26 |     for li in list:
 27 |         prod *= li
 28 |     return prod
 29 | 
 30 | 
 31 | def get_caterva_start_stop(ndim, key, shape):
 32 |     start = tuple(s.start if s.start is not None else 0 for s in key)
 33 |     stop = tuple(s.stop if s.stop is not None else sh for s, sh in zip(key, shape))
 34 | 
 35 |     size = prod([stop[i] - start[i] for i in range(ndim)])
 36 | 
 37 |     return start, stop, size
 38 | 
 39 | 
 40 | def parse_kwargs(**kwargs):
 41 |     if kwargs.get("urlpath"):
 42 |         if os.path.exists(kwargs["urlpath"]):
 43 |             raise FileExistsError(f"Can not create the file {kwargs['urlpath']}."
 44 |                                   f"It already exists!")
 45 | 
 46 | 
 47 | class NDArray(ext.NDArray):
 48 |     def __init__(self, **kwargs):
 49 |         parse_kwargs(**kwargs)
 50 |         self.kwargs = kwargs
 51 |         super(NDArray, self).__init__(**self.kwargs)
 52 | 
 53 |     @classmethod
 54 |     def cast(cls, cont):
 55 |         cont.__class__ = cls
 56 |         assert isinstance(cont, NDArray)
 57 |         return cont
 58 | 
 59 |     @property
 60 |     def meta(self):
 61 |         return Meta(self)
 62 | 
 63 |     @property
 64 |     def info(self):
 65 |         """
 66 |         Print information about this array.
 67 |         """
 68 |         return InfoReporter(self)
 69 | 
 70 |     @property
 71 |     def info_items(self):
 72 |         items = []
 73 |         items += [("Type", f"{self.__class__.__name__}")]
 74 |         items += [("Itemsize", self.itemsize)]
 75 |         items += [("Shape", self.shape)]
 76 |         items += [("Chunks", self.chunks)]
 77 |         items += [("Blocks", self.blocks)]
 78 |         items += [("Comp. codec", self.codec.name)]
 79 |         items += [("Comp. level", self.clevel)]
 80 |         filters = [f.name for f in self.filters if f.name != "NOFILTER"]
 81 |         items += [("Comp. filters", f"[{', '.join(map(str, filters))}]")]
 82 |         items += [("Comp. ratio", f"{self.cratio:.2f}")]
 83 |         return items
 84 | 
 85 |     def __setitem__(self, key, value):
 86 |         key, mask = process_key(key, self.shape)
 87 |         start, stop, _ = get_caterva_start_stop(self.ndim, key, self.shape)
 88 |         key = (start, stop)
 89 |         return ext.set_slice(self, key, value)
 90 | 
 91 |     def __getitem__(self, key):
 92 |         """ Get a (multidimensional) slice as specified in key.
 93 | 
 94 |         Parameters
 95 |         ----------
 96 |         key: int, slice or sequence of slices
 97 |             The index for the slices to be updated. Note that step parameter is not honored yet
 98 |             in slices.
 99 | 
100 |         Returns
101 |         -------
102 |         out: NDArray
103 |             An array, stored in a non-compressed buffer, with the requested data.
104 |         """
105 |         key, mask = process_key(key, self.shape)
106 |         start, stop, _ = get_caterva_start_stop(self.ndim, key, self.shape)
107 |         key = (start, stop)
108 |         shape = [sp - st for st, sp in zip(start, stop)]
109 |         arr = np.zeros(shape, dtype=f"S{self.itemsize}")
110 |         return ext.get_slice_numpy(arr, self, key, mask)
111 | 
112 |     def resize(self, newshape):
113 |         """Change the shape of the array by growing one or more dimensions.
114 | 
115 |         Parameters
116 |         ----------
117 |         newshape : tuple or list
118 |             The new shape of the array. It should have the same dimensions
119 |             as `self`.
120 | 
121 |         Notes
122 |         -----
123 |         The array values corresponding to the added positions are not initialized.
124 |         Thus, the user is in charge of initializing them.
125 |         """
126 |         return ext.resize(self, newshape)
127 | 
128 |     def slice(self, key, **kwargs):
129 |         """ Get a (multidimensional) slice as specified in key. Generalizes :py:meth:`__getitem__`.
130 | 
131 |         Parameters
132 |         ----------
133 |         key: int, slice or sequence of slices
134 |             The index for the slices to be updated. Note that step parameter is not honored yet in
135 |             slices.
136 | 
137 |         Other Parameters
138 |         ----------------
139 |         kwargs: dict, optional
140 |             Keyword arguments that are supported by the :py:meth:`caterva.empty` constructor.
141 | 
142 |         Returns
143 |         -------
144 |         out: NDArray
145 |             An array with the requested data.
146 |         """
147 |         arr = NDArray(**kwargs)
148 |         kwargs = arr.kwargs
149 |         key, mask = process_key(key, self.shape)
150 |         start, stop, _ = get_caterva_start_stop(self.ndim, key, self.shape)
151 |         key = (start, stop)
152 |         return ext.get_slice(arr, self, key, mask, **kwargs)
153 | 
154 |     def squeeze(self):
155 |         """Remove the 1's in array's shape."""
156 |         super(NDArray, self).squeeze(**self.kwargs)
157 | 
158 |     def to_buffer(self):
159 |         """Returns a buffer with the data contents.
160 | 
161 |         Returns
162 |         -------
163 |         bytes
164 |             The buffer containing the data of the whole array.
165 |         """
166 |         return super(NDArray, self).to_buffer(**self.kwargs)
167 | 
168 |     def copy(self, **kwargs):
169 |         """Copy into a new array.
170 | 
171 |         Other Parameters
172 |         ----------------
173 |         kwargs: dict, optional
174 |             Keyword arguments that are supported by the :py:meth:`caterva.empty` constructor.
175 | 
176 |         Returns
177 |         -------
178 |         NDArray
179 |             An array containing the copy.
180 |         """
181 |         arr = NDArray(**kwargs)
182 |         return ext.copy(arr, self, **kwargs)
183 | 


--------------------------------------------------------------------------------
/doc/source/_static/css/custom.css:
--------------------------------------------------------------------------------
  1 | :root {
  2 |     /*****************************************************************************
  3 |     * Theme config
  4 |     **/
  5 |     --pst-header-height: 60px;
  6 | 
  7 |     /*****************************************************************************
  8 |     * Font size
  9 |     **/
 10 |     --pst-font-size-base: 15px; /* base font size - applied at body / html level */
 11 | 
 12 |     /* heading font sizes */
 13 |     --pst-font-size-h1: 36px;
 14 |     --pst-font-size-h2: 32px;
 15 |     --pst-font-size-h3: 26px;
 16 |     --pst-font-size-h4: 21px;
 17 |     --pst-font-size-h5: 18px;
 18 |     --pst-font-size-h6: 16px;
 19 | 
 20 |     /* smaller then heading font sizes*/
 21 |     --pst-font-size-milli: 12px;
 22 | 
 23 |     --pst-sidebar-font-size: .9em;
 24 |     --pst-sidebar-caption-font-size: .9em;
 25 | 
 26 |     /*****************************************************************************
 27 |     * Font family
 28 |     **/
 29 |     /* These are adapted from https://systemfontstack.com/ */
 30 |     --pst-font-family-base-system: -apple-system, BlinkMacSystemFont, Segoe UI, "Helvetica Neue",
 31 |     Arial, sans-serif, Apple Color Emoji, Segoe UI Emoji, Segoe UI Symbol;
 32 |     --pst-font-family-monospace-system: "SFMono-Regular", Menlo, Consolas, Monaco,
 33 |     Liberation Mono, Lucida Console, monospace;
 34 | 
 35 |     --pst-font-family-base: var(--pst-font-family-base-system);
 36 |     --pst-font-family-heading: var(--pst-font-family-base);
 37 |     --pst-font-family-monospace: var(--pst-font-family-monospace-system);
 38 | 
 39 |     /*****************************************************************************
 40 |     * Color
 41 |     *
 42 |     * Colors are defined in rgb string way, "red, green, blue"
 43 |     **/
 44 |     --pst-color-primary: 15, 109, 126;
 45 |     --pst-color-primary-dark: 2, 89, 100;
 46 |     --pst-color-secondary: 212, 168, 12;
 47 |     --pst-color-success: 40, 167, 69;
 48 |     --pst-color-info: 0, 123, 255;  /*23, 162, 184;*/
 49 |     --pst-color-warning: 255, 193, 7;
 50 |     --pst-color-danger: 220, 53, 69;
 51 |     --pst-color-text-base: 51, 51, 51;
 52 | 
 53 |     --pst-color-h1: var(--pst-color-primary-dark);
 54 |     --pst-color-h2: var(--pst-color-primary-dark);
 55 |     --pst-color-h3: var(--pst-color-text-base);
 56 |     --pst-color-h4: var(--pst-color-text-base);
 57 |     --pst-color-h5: var(--pst-color-text-base);
 58 |     --pst-color-h6: var(--pst-color-text-base);
 59 |     --pst-color-paragraph: var(--pst-color-text-base);
 60 |     --pst-color-link: var(--pst-color-primary);
 61 |     --pst-color-link-hover: var(--pst-color-secondary);
 62 |     --pst-color-headerlink: var(--pst-color-primary);
 63 |     --pst-color-headerlink-hover: 255, 255, 255;
 64 |     --pst-color-preformatted-text: 34, 34, 34;
 65 |     --pst-color-preformatted-background: 250, 250, 250;
 66 |     --pst-color-inline-code: var(--pst-color-primary);
 67 | 
 68 |     --pst-color-active-navigation: var(--pst-color-secondary);
 69 |     --pst-color-navbar-link: 77, 77, 77;
 70 |     --pst-color-navbar-link-hover: var(--pst-color-active-navigation);
 71 |     --pst-color-navbar-link-active: var(--pst-color-active-navigation);
 72 |     --pst-color-sidebar-link: 77, 77, 77;
 73 |     --pst-color-sidebar-link-hover: var(--pst-color-active-navigation);
 74 |     --pst-color-sidebar-link-active: var(--pst-color-active-navigation);
 75 |     --pst-color-sidebar-expander-background-hover: 244, 244, 244;
 76 |     --pst-color-sidebar-caption: 77, 77, 77;
 77 |     --pst-color-toc-link: 119, 117, 122;
 78 |     --pst-color-toc-link-hover: var(--pst-color-active-navigation);
 79 |     --pst-color-toc-link-active: var(--pst-color-active-navigation);
 80 | 
 81 |     /*****************************************************************************
 82 |     * Icon
 83 |     **/
 84 | 
 85 |     /* font awesome icons*/
 86 |     --pst-icon-check-circle: '\f058';
 87 |     --pst-icon-info-circle: '\f05a';
 88 |     --pst-icon-exclamation-triangle: '\f071';
 89 |     --pst-icon-exclamation-circle: '\f06a';
 90 |     --pst-icon-times-circle: '\f057';
 91 |     --pst-icon-lightbulb: '\f0eb';
 92 | 
 93 |     /*****************************************************************************
 94 |     * Admonitions
 95 |     **/
 96 | 
 97 |     --pst-color-admonition-default: var(--pst-color-info);
 98 |     --pst-color-admonition-note: var(--pst-color-info);
 99 |     --pst-color-admonition-attention: var(--pst-color-warning);
100 |     --pst-color-admonition-caution: var(--pst-color-warning);
101 |     --pst-color-admonition-warning: var(--pst-color-warning);
102 |     --pst-color-admonition-danger: var(--pst-color-danger);
103 |     --pst-color-admonition-error: var(--pst-color-danger);
104 |     --pst-color-admonition-hint: var(--pst-color-success);
105 |     --pst-color-admonition-tip: var(--pst-color-success);
106 |     --pst-color-admonition-important: var(--pst-color-success);
107 | 
108 |     --pst-icon-admonition-default: var(--pst-icon-info-circle);
109 |     --pst-icon-admonition-note: var(--pst-icon-info-circle);
110 |     --pst-icon-admonition-attention: var(--pst-icon-exclamation-circle);
111 |     --pst-icon-admonition-caution: var(--pst-icon-exclamation-triangle);
112 |     --pst-icon-admonition-warning: var(--pst-icon-exclamation-triangle);
113 |     --pst-icon-admonition-danger: var(--pst-icon-exclamation-triangle);
114 |     --pst-icon-admonition-error: var(--pst-icon-times-circle);
115 |     --pst-icon-admonition-hint: var(--pst-icon-lightbulb);
116 |     --pst-icon-admonition-tip: var(--pst-icon-lightbulb);
117 |     --pst-icon-admonition-important: var(--pst-icon-exclamation-circle);
118 | 
119 |     --tabs--label-text: rgb(var(--pst-color-primary));
120 |     --tabs--label-text--active: rgb(var(--pst-color-secondary));
121 |     --tabs--label-text--hover: rgb(var(--pst-color-secondary));
122 |     --tabs--label-background--active: transparent;
123 |     --tabs--label-background--hover: transparent;
124 |     --tabs--border: transparent;
125 | }
126 | 
127 | /* Main index page overview cards */
128 | 
129 | .intro-card {
130 |     background: #fff;
131 |     border-radius: 0;
132 |     padding: 30px 10px 20px 10px;
133 |     margin: 10px 0px;
134 | }
135 | 
136 | .intro-card p.card-text {
137 |     margin: 0px;
138 | }
139 | 
140 | .intro-card .card-img-top {
141 |     margin: 10px;
142 |     height: 52px;
143 | }
144 | 
145 | .intro-card .card-header {
146 |     border: none;
147 |     background-color: white;
148 |     font-size: var(--pst-font-size-h5);
149 |     font-weight: bold;
150 |     padding: 2.5rem 0rem 0.5rem 0rem;
151 | }
152 | 
153 | .intro-card .card-header .card-text {
154 |     color: rgb(var(--pst-color-primary));
155 | }
156 | 
157 | .intro-card .card-footer {
158 |     border: none;
159 |     background-color:white;
160 | }
161 | 
162 | .intro-card .card-footer p.card-text{
163 |     max-width: 220px;
164 |     margin-left: auto;
165 |     margin-right: auto;
166 | }
167 | 


--------------------------------------------------------------------------------
/caterva/constructors.py:
--------------------------------------------------------------------------------
  1 | #######################################################################
  2 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
  3 | # All rights reserved.
  4 | #
  5 | # This source code is licensed under a BSD-style license (found in the
  6 | # LICENSE file in the root directory of this source tree)
  7 | #######################################################################
  8 | 
  9 | from . import caterva_ext as ext
 10 | from .ndarray import NDArray
 11 | 
 12 | 
 13 | def empty(shape, itemsize, **kwargs):
 14 |     """Create an empty array.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     shape: tuple or list
 19 |         The shape for the final array.
 20 |     itemsize: int
 21 |         The size, in bytes, of each element.
 22 | 
 23 |     Other Parameters
 24 |     ----------------
 25 |     kwargs: dict, optional
 26 |         Keyword arguments supported:
 27 | 
 28 |             chunks: iterable object or None
 29 |                 The chunk shape.  If `None`, the array is stored using a non-compressed buffer.
 30 |                 (Default `None`)
 31 |             blocks: iterable object or None
 32 |                 The block shape.  If `None`, the array is stored using a non-compressed buffer.
 33 |                 (Default `None`)
 34 |             urlpath: str or None
 35 |                 The name of the file to store data.  If `None`, data is stored in-memory.
 36 |                 (Default `None`)
 37 |             contiguous: bool or None
 38 |                 Whether the data is stored contiguously or sparsely (one chunk per file).
 39 |                 If `None`, data is stored sparsely.
 40 |             memframe: bool
 41 |                 If True, the array is backed by a frame in-memory.  Else, by a super-chunk.
 42 |                 (Default: `False`)
 43 |             meta: dict or None
 44 |                 A dictionary with different metalayers.  One entry per metalayer:
 45 | 
 46 |                     key: bytes or str
 47 |                         The name of the metalayer.
 48 |                     value: object
 49 |                         The metalayer object that will be (de-)serialized using msgpack.
 50 | 
 51 |             codec: :py:class:`Codec`
 52 |                 The name for the compressor codec.  (Default: :py:attr:`Codec.LZ4`)
 53 |             clevel: int (0 to 9)
 54 |                 The compression level.  0 means no compression, and 9 maximum compression.
 55 |                 (Default: 5)
 56 |             filters: list
 57 |                 The filter pipeline.  (Default: [:py:attr:`Filter.SHUFFLE`])
 58 |             filtersmeta: list
 59 |                 The meta info for each filter in pipeline. (Default: [0])
 60 |             nthreads: int
 61 |                 The number of threads.  (Default: 1)
 62 |             usedict: bool
 63 |                 If a dictionary should be used during compression.  (Default: False)
 64 | 
 65 |     Returns
 66 |     -------
 67 |     out: NDArray
 68 |         A `NDArray` is returned.
 69 |     """
 70 |     arr = NDArray(**kwargs)
 71 |     kwargs = arr.kwargs
 72 |     ext.empty(arr, shape, itemsize, **kwargs)
 73 |     return arr
 74 | 
 75 | 
 76 | def zeros(shape, itemsize, **kwargs):
 77 |     """Create an array, with zero being used as the default value
 78 |     for uninitialized portions of the array.
 79 | 
 80 |     Parameters
 81 |     ----------
 82 |     shape: tuple or list
 83 |         The shape for the final array.
 84 |     itemsize: int
 85 |         The size, in bytes, of each element.
 86 | 
 87 |     Other Parameters
 88 |     ----------------
 89 |     kwargs: dict, optional
 90 |         Keyword arguments that are supported by the :py:meth:`caterva.empty` constructor.
 91 | 
 92 |     Returns
 93 |     -------
 94 |     out: NDArray
 95 |         A `NDArray` is returned.
 96 |     """
 97 |     arr = NDArray(**kwargs)
 98 |     kwargs = arr.kwargs
 99 |     ext.zeros(arr, shape, itemsize, **kwargs)
100 |     return arr
101 | 
102 | 
103 | def full(shape, fill_value, **kwargs):
104 |     """Create an array, with @p fill_value being used as the default value
105 |     for uninitialized portions of the array.
106 | 
107 |     Parameters
108 |     ----------
109 |     shape: tuple or list
110 |         The shape for the final array..
111 |     fill_value: bytes
112 |         Default value to use for uninitialized portions of the array.
113 |     Other Parameters
114 |     ----------------
115 |     kwargs: dict, optional
116 |         Keyword arguments that are supported by the :py:meth:`caterva.empty` constructor.
117 | 
118 |     Returns
119 |     -------
120 |     out: NDArray
121 |         A `NDArray` is returned.
122 |     """
123 |     arr = NDArray(**kwargs)
124 |     kwargs = arr.kwargs
125 |     ext.full(arr, shape, fill_value, **kwargs)
126 |     return arr
127 | 
128 | 
129 | def from_buffer(buffer, shape, itemsize, **kwargs):
130 |     """Create an array out of a buffer.
131 | 
132 |     Parameters
133 |     ----------
134 |     buffer: bytes
135 |         The buffer of the data to populate the container.
136 |     shape: tuple or list
137 |         The shape for the final container.
138 |     itemsize: int
139 |         The size, in bytes, of each element.
140 | 
141 |     Other Parameters
142 |     ----------------
143 |     kwargs: dict, optional
144 |         Keyword arguments that are supported by the :py:meth:`caterva.empty` constructor.
145 | 
146 |     Returns
147 |     -------
148 |     out: NDArray
149 |         A `NDArray` is returned.
150 |     """
151 |     arr = NDArray(**kwargs)
152 |     kwargs = arr.kwargs
153 | 
154 |     ext.from_buffer(arr, buffer, shape, itemsize, **kwargs)
155 |     return arr
156 | 
157 | 
158 | def copy(array, **kwargs):
159 |     """Create a copy of an array.
160 | 
161 |     Parameters
162 |     ----------
163 |     array: NDArray
164 |         The array to be copied.
165 | 
166 |     Other Parameters
167 |     ----------------
168 |     kwargs: dict, optional
169 |         Keyword arguments that are supported by the :py:meth:`caterva.empty` constructor.
170 | 
171 |     Returns
172 |     -------
173 |     out: NDArray
174 |         A `NDArray` with a copy of the data.
175 |     """
176 |     arr = NDArray(**kwargs)
177 |     kwargs = arr.kwargs
178 | 
179 |     ext.copy(arr, array, **kwargs)
180 | 
181 |     return arr
182 | 
183 | 
184 | def open(urlpath):
185 |     """Open a new container from `urlpath`.
186 | 
187 |     .. warning:: Only one handler is supported per file.
188 | 
189 |     Parameters
190 |     ----------
191 |     urlpath: str
192 |         The file having a Blosc2 frame format with a Caterva metalayer on it.
193 | 
194 |     Returns
195 |     -------
196 |     out: NDArray
197 |         A `NDArray` is returned.
198 |     """
199 | 
200 |     arr = NDArray()
201 |     ext.from_file(arr, urlpath)
202 | 
203 |     return arr
204 | 
205 | 
206 | def asarray(ndarray, **kwargs):
207 |     """Convert the input to an array.
208 | 
209 |     Parameters
210 |     ----------
211 |     array: array_like
212 |         An array supporting the python buffer protocol and the numpy array interface.
213 | 
214 |     Other Parameters
215 |     ----------------
216 |     kwargs: dict, optional
217 |         Keyword arguments that are supported by the :py:meth:`caterva.empty` constructor.
218 | 
219 |     Returns
220 |     -------
221 |     out: NDArray
222 |         A Caterva array interpretation of `ndarray`.
223 |     """
224 |     arr = NDArray(**kwargs)
225 |     kwargs = arr.kwargs
226 | 
227 |     ext.asarray(arr, ndarray, **kwargs)
228 | 
229 |     return arr
230 | 


--------------------------------------------------------------------------------
/caterva/caterva_ext.pyx:
--------------------------------------------------------------------------------
  1 | # Hey Cython, this is Python 3!
  2 | # cython: language_level=3
  3 | 
  4 | #######################################################################
  5 | # Copyright (C) 2019-present, Blosc Development team <blosc@blosc.org>
  6 | # All rights reserved.
  7 | #
  8 | # This source code is licensed under a BSD-style license (found in the
  9 | # LICENSE file in the root directory of this source tree)
 10 | #######################################################################
 11 | from libc.stdlib cimport malloc, free
 12 | from libcpp cimport bool
 13 | from cpython.pycapsule cimport PyCapsule_New
 14 | from libc.stdint cimport uintptr_t
 15 | from libc.string cimport strdup, memcpy
 16 | from cpython cimport (
 17 |     PyObject_GetBuffer, PyBuffer_Release,
 18 |     PyBUF_SIMPLE, Py_buffer,
 19 |     PyBytes_FromStringAndSize
 20 | )
 21 | from .utils import Codec, Filter
 22 | import os.path
 23 | 
 24 | cdef extern from "<stdint.h>":
 25 |     ctypedef   signed char  int8_t
 26 |     ctypedef   signed short int16_t
 27 |     ctypedef   signed int   int32_t
 28 |     ctypedef   signed long  int64_t
 29 |     ctypedef unsigned char  uint8_t
 30 |     ctypedef unsigned short uint16_t
 31 |     ctypedef unsigned int   uint32_t
 32 |     ctypedef unsigned long long uint64_t
 33 | 
 34 | 
 35 | cdef extern from "blosc2.h":
 36 |     ctypedef enum:
 37 |         BLOSC_NOFILTER
 38 |         BLOSC_SHUFFLE
 39 |         BLOSC_BITSHUFFLE
 40 |         BLOSC_DELTA
 41 |         BLOSC_TRUNC_PREC
 42 |         BLOSC_BLOSCLZ
 43 |         BLOSC_LZ4
 44 |         BLOSC_LZ4HC
 45 |         BLOSC_ZLIB
 46 |         BLOSC_ZSTD
 47 |         BLOSC2_MAX_FILTERS
 48 |         BLOSC2_MAX_METALAYERS
 49 |         BLOSC2_MAX_VLMETALAYERS
 50 |         BLOSC2_MAX_OVERHEAD
 51 |         BLOSC_ALWAYS_SPLIT = 1,
 52 |         BLOSC_NEVER_SPLIT = 2,
 53 |         BLOSC_AUTO_SPLIT = 3,
 54 |         BLOSC_FORWARD_COMPAT_SPLIT = 4,
 55 | 
 56 |     ctypedef int *blosc2_prefilter_fn
 57 |     ctypedef struct blosc2_prefilter_params
 58 |     ctypedef struct blosc2_storage
 59 |     ctypedef struct blosc2_btune
 60 |     ctypedef struct blosc2_context
 61 |     ctypedef struct blosc2_frame
 62 |     ctypedef struct blosc2_metalayer:
 63 |         char *name;
 64 |         uint8_t *content;
 65 |         int32_t content_len;
 66 | 
 67 |     ctypedef struct blosc2_schunk:
 68 |         uint8_t version;
 69 |         uint8_t compcode;
 70 |         uint8_t compcode_meta;
 71 |         uint8_t clevel;
 72 |         int32_t typesize;
 73 |         int32_t blocksize;
 74 |         int32_t chunksize;
 75 |         uint8_t filters[BLOSC2_MAX_FILTERS];
 76 |         uint8_t filters_meta[BLOSC2_MAX_FILTERS];
 77 |         int64_t nchunks;
 78 |         int64_t nbytes;
 79 |         int64_t cbytes;
 80 |         uint8_t** data;
 81 |         size_t data_len;
 82 |         blosc2_storage *storage;
 83 |         blosc2_frame *frame;
 84 |         blosc2_context *cctx;
 85 |         blosc2_context *dctx;
 86 |         blosc2_metalayer *metalayers[BLOSC2_MAX_METALAYERS];
 87 |         int16_t nmetalayers;
 88 |         blosc2_metalayer *vlmetalayers[BLOSC2_MAX_VLMETALAYERS];
 89 |         blosc2_btune *udbtune;
 90 | 
 91 |     int blosc2_meta_exists(blosc2_schunk *schunk, const char *name)
 92 |     int blosc2_meta_add(blosc2_schunk *schunk, const char *name, uint8_t *content,
 93 |                                  int32_t content_len)
 94 |     int blosc2_meta_update(blosc2_schunk *schunk, const char *name, uint8_t *content,
 95 |                                     int32_t content_len)
 96 |     int blosc2_meta_get(blosc2_schunk *schunk, const char *name, uint8_t ** content,
 97 |                     int32_t *content_len)
 98 | 
 99 | 
100 | 
101 | cdef extern from "caterva.h":
102 |     ctypedef enum:
103 |         CATERVA_MAX_DIM
104 |         CATERVA_MAX_METALAYERS
105 | 
106 |     ctypedef struct caterva_config_t:
107 |         void *(*alloc)(size_t)
108 |         void (*free)(void *)
109 |         uint8_t compcodec
110 |         uint8_t compmeta
111 |         uint8_t complevel
112 |         int32_t splitmode
113 |         int usedict
114 |         int16_t nthreads
115 |         uint8_t filters[BLOSC2_MAX_FILTERS]
116 |         uint8_t filtersmeta[BLOSC2_MAX_FILTERS]
117 |         blosc2_prefilter_fn prefilter
118 |         blosc2_prefilter_params *pparams
119 |         blosc2_btune *udbtune;
120 | 
121 |     ctypedef struct caterva_ctx_t:
122 |         caterva_config_t *cfg
123 | 
124 |     ctypedef struct caterva_metalayer_t:
125 |         char *name
126 |         uint8_t *sdata
127 |         int32_t size
128 | 
129 |     ctypedef struct caterva_storage_t:
130 |         int32_t chunkshape[CATERVA_MAX_DIM]
131 |         int32_t blockshape[CATERVA_MAX_DIM]
132 |         bool contiguous
133 |         char* urlpath
134 |         caterva_metalayer_t metalayers[CATERVA_MAX_METALAYERS]
135 |         int32_t nmetalayers
136 | 
137 |     ctypedef struct caterva_params_t:
138 |         int64_t shape[CATERVA_MAX_DIM]
139 |         int8_t ndim
140 |         uint8_t itemsize
141 | 
142 | 
143 |     cdef struct chunk_cache_s:
144 |         uint8_t *data
145 |         int64_t nchunk
146 | 
147 |     ctypedef struct caterva_array_t:
148 |         blosc2_schunk *sc;
149 |         uint8_t *buf;
150 |         int64_t shape[CATERVA_MAX_DIM];
151 |         int32_t chunkshape[CATERVA_MAX_DIM];
152 |         int64_t extshape[CATERVA_MAX_DIM];
153 |         int32_t blockshape[CATERVA_MAX_DIM];
154 |         int64_t extchunkshape[CATERVA_MAX_DIM];
155 |         int64_t nitems;
156 |         int32_t chunknitems;
157 |         int64_t extnitems;
158 |         int32_t blocknitems;
159 |         int64_t extchunknitems;
160 |         int8_t ndim;
161 |         uint8_t itemsize;
162 |         int64_t nchunks;
163 |         chunk_cache_s chunk_cache;
164 | 
165 |     int caterva_ctx_new(caterva_config_t *cfg, caterva_ctx_t **ctx);
166 |     int caterva_ctx_free(caterva_ctx_t **ctx);
167 |     int caterva_empty(caterva_ctx_t *ctx, caterva_params_t *params,
168 |                       caterva_storage_t *storage, caterva_array_t ** array);
169 |     int caterva_zeros(caterva_ctx_t *ctx, caterva_params_t *params,
170 |                       caterva_storage_t *storage, caterva_array_t ** array);
171 |     int caterva_full(caterva_ctx_t *ctx, caterva_params_t *params,
172 |                      caterva_storage_t *storage, void *fill_value, caterva_array_t ** array);
173 |     int caterva_free(caterva_ctx_t *ctx, caterva_array_t ** array);
174 |     int caterva_from_schunk(caterva_ctx_t *ctx, blosc2_schunk *schunk,
175 |                             caterva_array_t **array);
176 |     int caterva_from_serial_schunk(caterva_ctx_t *ctx, uint8_t *serial_schunk, int64_t len,
177 |                                    caterva_array_t ** array);
178 |     int caterva_open(caterva_ctx_t *ctx, const char *urlpath, caterva_array_t ** array);
179 |     int caterva_from_buffer(caterva_ctx_t *ctx, void *buffer, int64_t buffersize,
180 |                             caterva_params_t *params, caterva_storage_t *storage,
181 |                             caterva_array_t ** array);
182 |     int caterva_to_buffer(caterva_ctx_t *ctx, caterva_array_t *array, void *buffer,
183 |                           int64_t buffersize);
184 |     int caterva_get_slice(caterva_ctx_t *ctx, caterva_array_t *src, int64_t *start,
185 |                           int64_t *stop, caterva_storage_t *storage, caterva_array_t ** array);
186 |     int caterva_squeeze_index(caterva_ctx_t *ctx, caterva_array_t *array,
187 |                               bool *index);
188 |     int caterva_squeeze(caterva_ctx_t *ctx, caterva_array_t *array);
189 |     int caterva_get_slice_buffer(caterva_ctx_t *ctx, caterva_array_t *array,
190 |                                  int64_t *start, int64_t *stop,
191 |                                  void *buffer, int64_t *buffershape, int64_t buffersize);
192 |     int caterva_set_slice_buffer(caterva_ctx_t *ctx,
193 |                                  void *buffer, int64_t *buffershape, int64_t buffersize,
194 |                                  int64_t *start, int64_t *stop, caterva_array_t *array);
195 |     int caterva_copy(caterva_ctx_t *ctx, caterva_array_t *src, caterva_storage_t *storage,
196 |                      caterva_array_t ** array);
197 |     int caterva_resize(caterva_ctx_t *ctx, caterva_array_t *array, int64_t *new_shape,
198 |                        int64_t *start);
199 | 
200 | # Defaults for compression params
201 | config_dflts = {
202 |     'codec': Codec.LZ4,
203 |     'clevel': 5,
204 |     'usedict': False,
205 |     'nthreads': 1,
206 |     'filters': [Filter.SHUFFLE],
207 |     'filtersmeta': [0],  # no actual meta info for SHUFFLE, but anyway...
208 |     }
209 | 
210 | 
211 | cdef class Context:
212 |     cdef caterva_ctx_t *context_
213 |     cdef uint8_t compcode
214 |     cdef uint8_t compmeta
215 |     cdef uint8_t complevel
216 |     cdef int32_t splitmode
217 |     cdef int usedict
218 |     cdef int16_t nthreads
219 |     cdef int32_t blocksize
220 |     cdef uint8_t filters[BLOSC2_MAX_FILTERS]
221 |     cdef uint8_t filtersmeta[BLOSC2_MAX_FILTERS]
222 |     cdef blosc2_prefilter_fn prefilter
223 |     cdef blosc2_prefilter_params* pparams
224 | 
225 |     def __init__(self, **kwargs):
226 |         cdef caterva_config_t config
227 |         config.free = free
228 |         config.alloc = malloc
229 |         config.compcodec = kwargs.get('codec', config_dflts['codec']).value
230 |         config.compmeta = 0
231 |         config.complevel = kwargs.get('clevel', config_dflts['clevel'])
232 |         config.splitmode = BLOSC_AUTO_SPLIT
233 |         config.usedict =  kwargs.get('usedict', config_dflts['usedict'])
234 |         config.nthreads = kwargs.get('nthreads', config_dflts['nthreads'])
235 |         config.prefilter = NULL
236 |         config.pparams = NULL
237 |         config.udbtune = NULL
238 | 
239 |         for i in range(BLOSC2_MAX_FILTERS):
240 |             config.filters[i] = 0
241 |             config.filtersmeta[i] = 0
242 | 
243 |         filters = kwargs.get('filters', config_dflts['filters'])
244 |         for i in range(BLOSC2_MAX_FILTERS - len(filters), BLOSC2_MAX_FILTERS):
245 |             config.filters[i] = filters[i - BLOSC2_MAX_FILTERS + len(filters)].value
246 | 
247 |         filtersmeta = kwargs.get('filtersmeta', config_dflts['filtersmeta'])
248 |         for i in range(BLOSC2_MAX_FILTERS - len(filtersmeta), BLOSC2_MAX_FILTERS):
249 |             config.filtersmeta[i] = filtersmeta[i - BLOSC2_MAX_FILTERS + len(filtersmeta)]
250 | 
251 |         caterva_ctx_new(&config, &self.context_)
252 | 
253 |     def __dealloc__(self):
254 |         caterva_ctx_free(&self.context_)
255 | 
256 |     def tocapsule(self):
257 |         return PyCapsule_New(self.context_, <char *>"caterva_ctx_t*", NULL)
258 | 
259 | 
260 | cdef create_caterva_params(caterva_params_t *params, shape, itemsize):
261 |     params.ndim = len(shape)
262 |     params.itemsize = itemsize
263 |     for i in range(params.ndim):
264 |         params.shape[i] = shape[i]
265 | 
266 | 
267 | cdef create_caterva_storage(caterva_storage_t *storage, kwargs):
268 |     chunks = kwargs.get('chunks', None)
269 |     blocks = kwargs.get('blocks', None)
270 |     urlpath = kwargs.get('urlpath', None)
271 |     contiguous = kwargs.get('contiguous', False)
272 |     meta = kwargs.get('meta', None)
273 | 
274 |     if not chunks:
275 |         raise AttributeError("chunks must be specified")
276 |     if not blocks:
277 |         raise AttributeError("blocks must be specified")
278 | 
279 |     if urlpath is not None:
280 |         urlpath = urlpath.encode("utf-8") if isinstance(urlpath, str) else urlpath
281 |         storage.urlpath = urlpath
282 |     else:
283 |         storage.urlpath = NULL
284 |     storage.contiguous = contiguous
285 |     for i in range(len(chunks)):
286 |         storage.chunkshape[i] = chunks[i]
287 |         storage.blockshape[i] = blocks[i]
288 | 
289 |     if meta is None:
290 |         storage.nmetalayers = 0
291 |     else:
292 |         storage.nmetalayers = len(meta)
293 |         for i, (name, content) in enumerate(meta.items()):
294 |             name2 = name.encode("utf-8") if isinstance(name, str) else name # do a copy
295 |             storage.metalayers[i].name = strdup(name2)
296 |             storage.metalayers[i].sdata = <uint8_t *> malloc(len(content))
297 |             memcpy(storage.metalayers[i].sdata, <uint8_t *> content, len(content))
298 |             storage.metalayers[i].size = len(content)
299 | 
300 | 
301 | cdef class NDArray:
302 |     cdef caterva_array_t *array
303 |     cdef kwargs
304 | 
305 |     @property
306 |     def shape(self):
307 |         """The shape of this container."""
308 |         return tuple([self.array.shape[i] for i in range(self.array.ndim)])
309 | 
310 |     @property
311 |     def chunks(self):
312 |         """The chunk shape of this container."""
313 |         return tuple([self.array.chunkshape[i] for i in range(self.array.ndim)])
314 | 
315 |     @property
316 |     def blocks(self):
317 |         """The block shape of this container."""
318 |         return tuple([self.array.blockshape[i] for i in range(self.array.ndim)])
319 | 
320 |     @property
321 |     def cratio(self):
322 |         """The compression ratio for this container."""
323 |         return self.size / (self.array.sc.cbytes + BLOSC2_MAX_OVERHEAD * self.nchunks)
324 | 
325 |     @property
326 |     def clevel(self):
327 |         """The compression level for this container."""
328 |         return self.array.sc.clevel
329 | 
330 |     @property
331 |     def codec(self):
332 |         """The compression codec name for this container."""
333 |         return Codec(self.array.sc.compcode)
334 | 
335 |     @property
336 |     def filters(self):
337 |         """The filters list for this container."""
338 |         return [Filter(self.array.sc.filters[i]) for i in range(BLOSC2_MAX_FILTERS)]
339 | 
340 |     @property
341 |     def itemsize(self):
342 |         """The itemsize of this container."""
343 |         return self.array.itemsize
344 | 
345 |     @property
346 |     def chunksize(self):
347 |         """The chunk size (in bytes) for this container."""
348 |         return self.array.chunknitems * self.itemsize
349 | 
350 |     @property
351 |     def blocksize(self):
352 |         """The block size (in bytes) for this container."""
353 |         return self.array.blocknitems * self.itemsize
354 | 
355 |     @property
356 |     def size(self):
357 |         """The size (in bytes) for this container."""
358 |         return self.array.nitems * self.itemsize
359 | 
360 |     @property
361 |     def nchunks(self):
362 |         """The number of chunks in this container."""
363 |         return int(self.array.extnitems / self.array.chunknitems)
364 | 
365 |     @property
366 |     def ndim(self):
367 |         """The number of dimensions of this container."""
368 |         return self.array.ndim
369 | 
370 |     @property
371 |     def c_array(self):
372 |         return <uintptr_t> self.array
373 | 
374 |     def __init__(self, **kwargs):
375 |         self.kwargs = kwargs
376 |         self.array = NULL
377 | 
378 |     def squeeze(self, **kwargs):
379 |         ctx = Context(**kwargs)
380 |         caterva_squeeze(ctx.context_, self.array)
381 | 
382 |     def to_buffer(self, **kwargs):
383 |         ctx = Context(**kwargs)
384 |         buffersize = self.size
385 |         buffer = bytes(buffersize)
386 |         caterva_to_buffer(ctx.context_, self.array, <void *> <char *> buffer, buffersize)
387 |         return buffer
388 | 
389 |     def __dealloc__(self):
390 |         if self.array != NULL:
391 |             ctx = Context(**self.kwargs)
392 |             caterva_free(ctx.context_, &self.array)
393 | 
394 | 
395 | def get_slice_numpy(arr, NDArray src, key, mask, **kwargs):
396 |     ctx = Context(**kwargs)
397 |     ndim = src.ndim
398 |     start, stop = key
399 | 
400 |     cdef int64_t[CATERVA_MAX_DIM] start_, stop_
401 |     cdef int64_t buffersize_ = src.itemsize
402 |     cdef int64_t[CATERVA_MAX_DIM] buffershape_
403 |     for i in range(src.ndim):
404 |         start_[i] = start[i]
405 |         stop_[i] = stop[i]
406 |         buffershape_[i] = stop_[i] - start_[i]
407 |         buffersize_ *= buffershape_[i]
408 | 
409 |     buffershape = [sp - st for st, sp in zip(start, stop)]
410 |     cdef int64_t buffersize = src.itemsize
411 | 
412 |     cdef Py_buffer view
413 |     PyObject_GetBuffer(arr, &view, PyBUF_SIMPLE)
414 | 
415 |     cdef caterva_array_t *array_
416 |     caterva_get_slice_buffer(ctx.context_, src.array, start_, stop_, <void *> view.buf, buffershape_, buffersize_)
417 |     PyBuffer_Release(&view)
418 | 
419 |     return arr.squeeze()
420 | 
421 | 
422 | def get_slice(NDArray arr, NDArray src, key, mask, **kwargs):
423 |     ctx = Context(**kwargs)
424 |     ndim = src.ndim
425 |     start, stop = key
426 | 
427 |     cdef int64_t[CATERVA_MAX_DIM] start_, stop_
428 | 
429 |     for i in range(src.ndim):
430 |         start_[i] = start[i]
431 |         stop_[i] = stop[i]
432 | 
433 |     cdef caterva_storage_t storage_
434 |     create_caterva_storage(&storage_, kwargs)
435 | 
436 |     cdef caterva_array_t *array_
437 |     caterva_get_slice(ctx.context_, src.array, start_, stop_, &storage_, &array_)
438 | 
439 |     cdef bool mask_[CATERVA_MAX_DIM]
440 |     for i in range(src.ndim):
441 |         mask_[i] = mask[i]
442 | 
443 |     caterva_squeeze_index(ctx.context_, array_, mask_)
444 |     arr.array = array_
445 |     return arr
446 | 
447 | def set_slice(NDArray dst, key, ndarray):
448 |     ctx = Context(**dst.kwargs)
449 |     ndim = dst.ndim
450 |     start, stop = key
451 |     interface = ndarray.__array_interface__
452 |     cdef Py_buffer *buf = <Py_buffer *> malloc(sizeof(Py_buffer))
453 |     PyObject_GetBuffer(ndarray, buf, PyBUF_SIMPLE)
454 | 
455 |     cdef int64_t[CATERVA_MAX_DIM] buffershape_, start_, stop_
456 |     for i in range(ndim):
457 |         start_[i] = start[i]
458 |         stop_[i] = stop[i]
459 |         buffershape_[i] = stop[i] - start[i]
460 | 
461 |     caterva_set_slice_buffer(ctx.context_, buf.buf, buffershape_, buf.len, start_, stop_, dst.array)
462 |     PyBuffer_Release(buf)
463 |     return dst
464 | 
465 | 
466 | def empty(NDArray arr, shape, itemsize, **kwargs):
467 |     ctx = Context(**kwargs)
468 | 
469 |     cdef caterva_params_t params_
470 |     create_caterva_params(&params_, shape, itemsize)
471 | 
472 |     cdef caterva_storage_t storage_
473 |     create_caterva_storage(&storage_, kwargs)
474 | 
475 |     cdef caterva_array_t *array_
476 |     caterva_empty(ctx.context_, &params_, &storage_, &array_)
477 |     arr.array = array_
478 | 
479 | 
480 | def zeros(NDArray arr, shape, itemsize, **kwargs):
481 |     ctx = Context(**kwargs)
482 | 
483 |     cdef caterva_params_t params_
484 |     create_caterva_params(&params_, shape, itemsize)
485 | 
486 |     cdef caterva_storage_t storage_
487 |     create_caterva_storage(&storage_, kwargs)
488 | 
489 |     cdef caterva_array_t *array_
490 |     caterva_zeros(ctx.context_, &params_, &storage_, &array_)
491 |     arr.array = array_
492 | 
493 | 
494 | def full(NDArray arr, shape, value, **kwargs):
495 |     ctx = Context(**kwargs)
496 | 
497 |     cdef caterva_params_t params_
498 |     create_caterva_params(&params_, shape, len(value))
499 | 
500 |     cdef caterva_storage_t storage_
501 |     create_caterva_storage(&storage_, kwargs)
502 |     cdef uint8_t *fill_value_ = <uint8_t *> value
503 |     cdef caterva_array_t *array_
504 |     caterva_full(ctx.context_, &params_, &storage_, fill_value_, &array_)
505 |     arr.array = array_
506 | 
507 | 
508 | def copy(NDArray arr, NDArray src, **kwargs):
509 |     ctx = Context(**kwargs)
510 |     cdef caterva_storage_t storage_
511 |     create_caterva_storage(&storage_, kwargs)
512 | 
513 |     cdef caterva_array_t *array_
514 |     caterva_copy(ctx.context_, src.array, &storage_, &array_)
515 |     arr.array = array_
516 |     return arr
517 | 
518 | def resize(NDArray arr, new_shape):
519 |     ctx = Context(**arr.kwargs)
520 |     cdef int64_t new_shape_[CATERVA_MAX_DIM]
521 |     for i, s in enumerate(new_shape):
522 |         new_shape_[i] = s
523 |     caterva_resize(ctx.context_, arr.array, new_shape_, NULL)
524 |     return arr
525 | 
526 | def from_file(NDArray arr, urlpath, **kwargs):
527 |     ctx = Context(**kwargs)
528 | 
529 |     urlpath = urlpath.encode("utf-8") if isinstance(urlpath, str) else urlpath
530 |     if not os.path.exists(urlpath):
531 |         raise FileNotFoundError
532 | 
533 |     cdef caterva_array_t *array_
534 |     caterva_open(ctx.context_, urlpath, &array_)
535 |     arr.array = array_
536 | 
537 | 
538 | def from_buffer(NDArray arr, buf, shape, itemsize, **kwargs):
539 |     ctx = Context(**kwargs)
540 | 
541 |     cdef caterva_params_t params_
542 |     create_caterva_params(&params_, shape, itemsize)
543 | 
544 |     cdef caterva_storage_t storage_
545 |     create_caterva_storage(&storage_, kwargs)
546 | 
547 |     cdef caterva_array_t *array_
548 |     caterva_from_buffer(ctx.context_, <void*> <char *> buf, len(buf), &params_, &storage_, &array_)
549 |     arr.array = array_
550 | 
551 | 
552 | def asarray(NDArray arr, ndarray, **kwargs):
553 |     ctx = Context(**kwargs)
554 | 
555 |     interface = ndarray.__array_interface__
556 |     cdef Py_buffer *buf = <Py_buffer *> malloc(sizeof(Py_buffer))
557 |     PyObject_GetBuffer(ndarray, buf, PyBUF_SIMPLE)
558 | 
559 |     shape = interface["shape"]
560 |     itemsize = buf.itemsize
561 | 
562 |     cdef caterva_params_t params_
563 |     create_caterva_params(&params_, shape, itemsize)
564 | 
565 |     cdef caterva_storage_t storage_
566 |     create_caterva_storage(&storage_, kwargs)
567 | 
568 |     cdef caterva_array_t *array_
569 |     caterva_from_buffer(ctx.context_, <void*> <char *> buf.buf, buf.len, &params_, &storage_, &array_)
570 |     arr.array = array_
571 |     PyBuffer_Release(buf)
572 | 
573 | 
574 | def meta__contains__(self, name):
575 |     cdef caterva_array_t *array = <caterva_array_t *><uintptr_t> self.c_array
576 |     name = name.encode("utf-8") if isinstance(name, str) else name
577 |     n = blosc2_meta_exists(array.sc, name)
578 |     return False if n < 0 else True
579 | 
580 | def meta__getitem__(self, name):
581 |     cdef caterva_array_t *array = <caterva_array_t *><uintptr_t> self.c_array
582 |     name = name.encode("utf-8") if isinstance(name, str) else name
583 |     cdef uint8_t *content
584 |     cdef int32_t content_len
585 |     n = blosc2_meta_get(array.sc, name, &content, &content_len)
586 |     return PyBytes_FromStringAndSize(<char *> content, content_len)
587 | 
588 | def meta__setitem__(self, name, content):
589 |     cdef caterva_array_t *array = <caterva_array_t *><uintptr_t> self.c_array
590 |     name = name.encode("utf-8") if isinstance(name, str) else name
591 |     old_content = meta__getitem__(self, name)
592 |     if len(old_content) != len(content):
593 |         raise ValueError("The length of the content in a metalayer cannot change.")
594 |     n = blosc2_meta_update(array.sc, name, content, len(content))
595 |     return n
596 | 
597 | def meta__len__(self):
598 |     cdef caterva_array_t *arr = <caterva_array_t *><uintptr_t> self.c_array
599 |     return arr.sc.nmetalayers
600 | 
601 | def meta_keys(self):
602 |     cdef caterva_array_t *arr = <caterva_array_t *><uintptr_t> self.c_array
603 |     keys = []
604 |     for i in range(meta__len__(self)):
605 |         name = arr.sc.metalayers[i].name.decode("utf-8")
606 |         keys.append(name)
607 |     return keys
608 | 


--------------------------------------------------------------------------------