├── VERSION ├── requirements.txt ├── requirements-tests.txt ├── doc ├── source │ ├── development │ │ ├── roadmap.rst │ │ ├── contributing.rst │ │ └── index.rst │ ├── release_notes │ │ └── index.rst │ ├── caterva-logo.png │ ├── _static │ │ ├── blosc-logo_128.png │ │ └── css │ │ │ └── custom.css │ ├── getting_started │ │ ├── index.rst │ │ ├── overview.rst │ │ ├── installation.rst │ │ └── tutorial.md │ ├── reference │ │ ├── index.rst │ │ ├── first_level.rst │ │ ├── constructors.rst │ │ ├── meta.rst │ │ └── ndarray.rst │ ├── index.rst │ └── conf.py ├── requirements.txt ├── Makefile └── make.bat ├── requirements-build.txt ├── .flake8 ├── pyproject.toml ├── .gitmodules ├── CMakeLists.txt ├── code_of_conduct.md ├── .pre-commit-config.yaml ├── MANIFEST.in ├── ROADMAP.rst ├── caterva ├── __init__.py ├── CMakeLists.txt ├── utils.py ├── info.py ├── meta.py ├── ndarray.py ├── constructors.py └── caterva_ext.pyx ├── examples ├── ex_resize.py ├── ex_numpy.py ├── ex_buffer.py ├── ex_empty.py ├── ex_getitem.py ├── ex_copy.py ├── ex_meta.py ├── ex_persistency.py └── ex_formats.py ├── tests ├── test_buffer.py ├── test_numpy.py ├── test_resize.py ├── test_full.py ├── test_zeros.py ├── test_persistency.py ├── test_getitem.py ├── test_empty.py ├── test_metalayers.py └── test_copy.py ├── ANNOUNCE.rst ├── README.md ├── .github └── workflows │ ├── python-package.yml │ └── python-publish.yml ├── LICENSES ├── BLOSC.txt └── CATERVA.txt ├── LICENSE ├── RELEASING.rst ├── bench ├── compare_getitem.py ├── compare_loadframe.py ├── compare_reduceframe.py ├── compare_getslice.py └── compare_serialization.py ├── CONTRIBUTING.rst ├── setup.py ├── .gitignore └── RELEASE_NOTES.rst /VERSION: -------------------------------------------------------------------------------- 1 | 0.7.4.dev0 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ndindex>=1.4 2 | numpy>=1.20.3 3 | -------------------------------------------------------------------------------- /requirements-tests.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pytest 3 | msgpack 4 | -------------------------------------------------------------------------------- /doc/source/development/roadmap.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../../ROADMAP.rst 2 | -------------------------------------------------------------------------------- /doc/source/release_notes/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../../RELEASE_NOTES.rst 2 | -------------------------------------------------------------------------------- /doc/source/development/contributing.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../../CONTRIBUTING.rst 2 | -------------------------------------------------------------------------------- /requirements-build.txt: -------------------------------------------------------------------------------- 1 | setuptools 2 | wheel 3 | scikit-build 4 | cmake 5 | ninja 6 | cython 7 | -------------------------------------------------------------------------------- /doc/source/caterva-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-caterva/HEAD/doc/source/caterva-logo.png -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = 3 | __init__.py, 4 | c-blosc2, 5 | Caterva 6 | max-line-length = 99 7 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel", "scikit-build", "cmake", "ninja", "cython"] 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "caterva/caterva"] 2 | path = caterva/caterva 3 | url = https://github.com/Blosc/caterva.git 4 | -------------------------------------------------------------------------------- /doc/source/_static/blosc-logo_128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-caterva/HEAD/doc/source/_static/blosc-logo_128.png -------------------------------------------------------------------------------- /doc/source/development/index.rst: -------------------------------------------------------------------------------- 1 | Development 2 | =========== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | contributing 8 | roadmap 9 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.11.0) 2 | project(caterva) 3 | find_package(PythonExtensions REQUIRED) 4 | find_package(Cython REQUIRED) 5 | 6 | add_subdirectory(caterva) 7 | -------------------------------------------------------------------------------- /doc/source/getting_started/index.rst: -------------------------------------------------------------------------------- 1 | Getting Started 2 | =============== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | overview 8 | installation 9 | tutorial 10 | -------------------------------------------------------------------------------- /doc/source/reference/index.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | first_level 8 | constructors 9 | ndarray 10 | meta 11 | -------------------------------------------------------------------------------- /doc/requirements.txt: -------------------------------------------------------------------------------- 1 | numpydoc 2 | sphinx 3 | cython 4 | numpy 5 | scikit-build 6 | pytest 7 | msgpack 8 | cmake 9 | pydata-sphinx-theme 10 | sphinx-inline-tabs 11 | sphinx-panels 12 | myst-nb 13 | -------------------------------------------------------------------------------- /code_of_conduct.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | The Blosc community has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://github.com/Blosc/community/blob/master/code_of_conduct.md) 5 | so that you can understand what actions will and will not be tolerated. 6 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v3.2.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: end-of-file-fixer 7 | - id: check-yaml 8 | - id: check-added-large-files 9 | 10 | - repo: https://gitlab.com/pycqa/flake8 11 | rev: '' # pick a git hash / tag to point to 12 | hooks: 13 | - id: flake8 14 | files: caterva/* 15 | -------------------------------------------------------------------------------- /doc/source/getting_started/overview.rst: -------------------------------------------------------------------------------- 1 | What is python-caterva? 2 | ======================= 3 | 4 | Caterva is a container for multidimensional data that is specially designed to read, in a very efficient way, datasets slices. 5 | It uses the metalayer capabilities present in superchunks/frames in order to store the multidimensionality information. 6 | Python-caterva is the Python wrapper for `Caterva `__. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include MANIFEST.in 2 | include LICENSE 3 | include VERSION 4 | include *.txt *.rst *.md 5 | exclude RELEASING.rst 6 | include setup.py 7 | include *.toml 8 | 9 | recursive-include caterva *.py *.pyx *.pxd *.c *.h *.txt *in *.cmake *.rc 10 | recursive-include tests *.py 11 | recursive-include bench *.py *.txt 12 | recursive-include doc *.rst *.md *.txt *.py *.pdf *.html *.css *.png 13 | recursive-exclude doc/_build * 14 | recursive-include LICENSES * 15 | -------------------------------------------------------------------------------- /doc/source/getting_started/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | You can install Caterva wheels via PyPI using Pip or clone the GitHub repository. 4 | 5 | Pip 6 | +++ 7 | 8 | .. code-block:: 9 | 10 | python -m pip install caterva 11 | 12 | 13 | Source code 14 | +++++++++++ 15 | 16 | .. code-block:: 17 | 18 | git clone --recurse-submodules https://github.com/Blosc/python-caterva 19 | cd python-caterva 20 | python -m pip install . 21 | -------------------------------------------------------------------------------- /doc/source/reference/first_level.rst: -------------------------------------------------------------------------------- 1 | Global variables 2 | ================ 3 | There are some global variables in Caterva that can be used anytime and make code more clear during compression and decompression processes. 4 | 5 | .. py:attribute:: caterva.__version__ 6 | 7 | The version of the caterva package. 8 | 9 | .. autoclass:: caterva.Codec 10 | :members: 11 | :undoc-members: 12 | 13 | .. autoclass:: caterva.Filter 14 | :members: 15 | :undoc-members: 16 | -------------------------------------------------------------------------------- /doc/source/reference/constructors.rst: -------------------------------------------------------------------------------- 1 | Constructors 2 | ============ 3 | These functions let users to create Caterva arrays either from scratch or from a dataset in another format. 4 | 5 | .. currentmodule:: caterva 6 | 7 | Basics 8 | ------ 9 | 10 | .. autosummary:: 11 | :toctree: api/ 12 | 13 | empty 14 | copy 15 | from_buffer 16 | open 17 | asarray 18 | 19 | 20 | Utils 21 | ----- 22 | 23 | .. autosummary:: 24 | :toctree: api/utils 25 | 26 | remove 27 | -------------------------------------------------------------------------------- /doc/source/reference/meta.rst: -------------------------------------------------------------------------------- 1 | Metalayers 2 | ========== 3 | Metalayers are small metadata for informing about the properties of data that is stored on a container. Caterva implements its own metalayer on top of C-Blosc2 for storing multidimensional information. 4 | 5 | .. currentmodule:: caterva.meta 6 | 7 | .. autoclass:: Meta 8 | :exclude-members: get, keys, items, values 9 | 10 | .. currentmodule:: caterva.meta.Meta 11 | 12 | Methods 13 | ------- 14 | 15 | .. autosummary:: 16 | :toctree: api/meta 17 | :nosignatures: 18 | 19 | __getitem__ 20 | __setitem__ 21 | get 22 | keys 23 | __iter__ 24 | __contains__ 25 | -------------------------------------------------------------------------------- /ROADMAP.rst: -------------------------------------------------------------------------------- 1 | Roadmap 2 | ======= 3 | 4 | This document lists the main goals for the upcoming python-caterva releases. 5 | 6 | 7 | Features 8 | -------- 9 | 10 | * *Support for variable-length metalayers*. 11 | This would provide users a lot of flexibility to define their own metadata 12 | 13 | * *Resize array dimensions*. 14 | This feature would allow Caterva to increase or decrease in size any dimension of the arrays. 15 | 16 | 17 | Interoperability 18 | ---------------- 19 | 20 | * *Third-party integration*. Caterva need better integration with libraries like: 21 | 22 | * xarray (labeled arrays) 23 | * dask (computation) 24 | * napari (visualization) 25 | -------------------------------------------------------------------------------- /caterva/__init__.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | 10 | from .version import __version__ 11 | 12 | from . import caterva_ext as ext 13 | 14 | # Public API for container module 15 | from .constructors import (empty, zeros, full, from_buffer, open, asarray, copy) 16 | 17 | from .ndarray import NDArray 18 | 19 | from .utils import Codec, Filter, remove 20 | -------------------------------------------------------------------------------- /examples/ex_resize.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import caterva as cat 10 | import numpy as np 11 | 12 | np.random.seed(123) 13 | 14 | shape = (8, 8) 15 | chunks = (4, 4) 16 | blocks = (2, 2) 17 | 18 | fill_value = b"1" 19 | a = cat.full(shape, fill_value=fill_value, chunks=chunks, blocks=blocks) 20 | 21 | a.resize((10, 10)) 22 | 23 | print(a[:]) 24 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /doc/source/reference/ndarray.rst: -------------------------------------------------------------------------------- 1 | NDArray 2 | ======= 3 | 4 | The multidimensional data array class. This class consists of a set of useful parameters and methods that allow not only to define an array correctly, but also to handle it in a simple way, being able to extract multidimensional slices from it. 5 | 6 | .. currentmodule:: caterva.NDArray 7 | 8 | Attributes 9 | ---------- 10 | 11 | .. autosummary:: 12 | :toctree: api/ndarray 13 | 14 | itemsize 15 | ndim 16 | shape 17 | chunks 18 | blocks 19 | meta 20 | 21 | Methods 22 | ------- 23 | 24 | .. autosummary:: 25 | :toctree: api/ndarray 26 | :nosignatures: 27 | 28 | __getitem__ 29 | __setitem__ 30 | slice 31 | resize 32 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /examples/ex_numpy.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import caterva as cat 10 | import numpy as np 11 | 12 | 13 | shape = (1234, 23) 14 | chunks = (253, 23) 15 | blocks = (10, 23) 16 | 17 | dtype = bool 18 | 19 | # Create a buffer 20 | nparray = np.random.choice(a=[True, False], size=np.prod(shape)).reshape(shape) 21 | 22 | # Create a caterva array from a numpy array 23 | a = cat.asarray(nparray, chunks=chunks, blocks=blocks) 24 | b = a.copy() 25 | 26 | # Convert a caterva array to a numpy array 27 | nparray2 = np.asarray(b).view(dtype) 28 | 29 | np.testing.assert_almost_equal(nparray, nparray2) 30 | -------------------------------------------------------------------------------- /examples/ex_buffer.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import caterva as cat 10 | import numpy as np 11 | 12 | np.random.seed(123) 13 | 14 | shape = (50, 50) 15 | chunks = (49, 49) 16 | blocks = (48, 48) 17 | 18 | itemsize = 8 19 | 20 | # Create a buffer 21 | buffer = bytes(np.random.normal(0, 1, np.prod(shape)) * itemsize) 22 | 23 | # Create a caterva array from a buffer 24 | 25 | a = cat.from_buffer(buffer, shape, chunks=chunks, blocks=blocks, itemsize=itemsize) 26 | print(a.filters) 27 | print(a.codec) 28 | print(a.cratio) 29 | 30 | # Convert a caterva array to a buffer 31 | buffer2 = a.to_buffer() 32 | assert buffer == buffer2 33 | -------------------------------------------------------------------------------- /caterva/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(STATIC_LIB ON CACHE BOOL "Build a static version of the blosc library.") 2 | set(SHARED_LIB ON CACHE BOOL "Build a shared library version of the blosc 3 | library.") 4 | set(CATERVA_BUILD_TESTS OFF CACHE BOOL "Build Caterva tests") 5 | set(CATERVA_BUILD_EXAMPLES OFF CACHE BOOL "Build Caterva examples") 6 | set(CMAKE_POSITION_INDEPENDENT_CODE ON) 7 | add_subdirectory(caterva) 8 | include_directories("${CMAKE_CURRENT_SOURCE_DIR}/caterva/caterva") 9 | include_directories("${CMAKE_CURRENT_SOURCE_DIR}/caterva/contribs/c-blosc2/include") 10 | 11 | 12 | add_cython_target(caterva_ext caterva_ext.pyx) 13 | add_library(caterva_ext MODULE ${caterva_ext}) 14 | 15 | target_link_libraries(caterva_ext caterva_static) 16 | 17 | python_extension_module(caterva_ext) 18 | 19 | add_custom_command( 20 | TARGET caterva_ext POST_BUILD 21 | COMMAND ${CMAKE_COMMAND} -E copy $ ${CMAKE_SOURCE_DIR}/caterva 22 | ) 23 | 24 | install(TARGETS caterva_ext LIBRARY DESTINATION caterva) 25 | -------------------------------------------------------------------------------- /tests/test_buffer.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import caterva as cat 10 | import pytest 11 | import numpy as np 12 | 13 | 14 | @pytest.mark.parametrize("shape, chunks, blocks, itemsize", 15 | [ 16 | ([450], [128], [25], 8), 17 | ([20, 134, 13], [3, 13, 5], [3, 10, 5], 4), 18 | ]) 19 | def test_buffer(shape, chunks, blocks, itemsize): 20 | size = int(np.prod(shape)) 21 | buffer = bytes(size * itemsize) 22 | a = cat.from_buffer(buffer, shape, itemsize, chunks=chunks, blocks=blocks) 23 | buffer2 = a.to_buffer() 24 | assert buffer == buffer2 25 | -------------------------------------------------------------------------------- /tests/test_numpy.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import caterva as cat 10 | import pytest 11 | import numpy as np 12 | 13 | 14 | @pytest.mark.parametrize("shape, chunks, blocks, dtype", 15 | [ 16 | ([931], [223], [45], np.int32), 17 | ([134, 121, 78], [12, 13, 18], [4, 4, 9], np.float64), 18 | ]) 19 | def test_numpy(shape, chunks, blocks, dtype): 20 | size = int(np.prod(shape)) 21 | nparray = np.arange(size, dtype=dtype).reshape(shape) 22 | a = cat.asarray(nparray, chunks=chunks, blocks=blocks) 23 | nparray2 = np.asarray(a[:]).view(dtype) 24 | np.testing.assert_almost_equal(nparray, nparray2) 25 | -------------------------------------------------------------------------------- /examples/ex_empty.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import caterva as cat 10 | import numpy as np 11 | 12 | np.random.seed(123) 13 | 14 | 15 | shape, chunks, blocks, itemsize, codec, clevel, use_dict, nthreads, filters = ( 16 | (400, 399, 401), 17 | (20, 10, 130), 18 | (6, 6, 26), 19 | 3, 20 | cat.Codec.BLOSCLZ, 21 | 5, 22 | False, 23 | 2, 24 | [cat.Filter.DELTA, cat.Filter.TRUNC_PREC] 25 | ) 26 | 27 | a = cat.empty(shape, chunks=chunks, 28 | blocks=blocks, 29 | itemsize=itemsize, 30 | codec=codec, 31 | clevel=clevel, 32 | use_dict=use_dict, 33 | nthreads=nthreads, 34 | filters=filters) 35 | 36 | print("HOLA") 37 | -------------------------------------------------------------------------------- /examples/ex_getitem.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import caterva as cat 10 | import numpy as np 11 | 12 | 13 | shape = (10, 10) 14 | chunks = (5, 7) 15 | blocks = (2, 2) 16 | 17 | slices = (slice(2, 5), slice(4, 8)) 18 | 19 | dtype = np.int32 20 | itemsize = np.dtype(dtype).itemsize 21 | 22 | # Create a numpy array 23 | nparray = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape) 24 | 25 | # Create a caterva array from a numpy array 26 | a = cat.asarray(nparray, chunks=chunks, blocks=blocks) 27 | 28 | # Get a slice 29 | buffer = np.asarray(a[slices]).view(dtype) 30 | buffer2 = nparray[slices] 31 | 32 | np.testing.assert_almost_equal(buffer, buffer2) 33 | 34 | a[slices] = np.ones((5, 5), dtype=dtype) 35 | 36 | print(np.asarray(a[...]).view(dtype)) 37 | -------------------------------------------------------------------------------- /caterva/utils.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | from enum import Enum 10 | import os 11 | import shutil 12 | 13 | 14 | class Codec(Enum): 15 | """ 16 | Available codecs. 17 | """ 18 | BLOSCLZ = 0 19 | LZ4 = 1 20 | LZ4HC = 2 21 | ZLIB = 4 22 | ZSTD = 5 23 | 24 | 25 | class Filter(Enum): 26 | """ 27 | Available filters. 28 | """ 29 | NOFILTER = 0 30 | SHUFFLE = 1 31 | BITSHUFFLE = 2 32 | DELTA = 3 33 | TRUNC_PREC = 4 34 | 35 | 36 | def remove(urlpath): 37 | """ 38 | Remove a caterva file. 39 | 40 | Parameters 41 | ---------- 42 | urlpath: String 43 | The array urlpath. 44 | """ 45 | if os.path.exists(urlpath): 46 | if os.path.isdir(urlpath): 47 | shutil.rmtree(urlpath) 48 | else: 49 | os.remove(urlpath) 50 | -------------------------------------------------------------------------------- /tests/test_resize.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | import caterva as cat 11 | import pytest 12 | 13 | 14 | @pytest.mark.parametrize("shape, new_shape, chunks, blocks, fill_value", 15 | [ 16 | ((100, 1230), (200, 1230), (200, 100), (55, 3), b"0123"), 17 | ((23, 34), (23, 120), (20, 20), (10, 10), b"sun"), 18 | ((80, 51, 60), (80, 100, 100), (20, 10, 33), (6, 6, 26), b"qwerty") 19 | ]) 20 | def test_resize(shape, new_shape, chunks, blocks, fill_value): 21 | a = cat.full(shape, fill_value=fill_value, chunks=chunks, blocks=blocks) 22 | 23 | a.resize(new_shape) 24 | assert a.shape == new_shape 25 | 26 | slices = tuple(slice(s) for s in shape) 27 | for i in np.nditer(np.array(a[slices])): 28 | assert i == fill_value 29 | -------------------------------------------------------------------------------- /examples/ex_copy.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import caterva as cat 10 | import numpy as np 11 | 12 | 13 | shape = (10, 10) 14 | chunks = (10, 10) 15 | blocks = (10, 10) 16 | 17 | dtype = np.dtype(np.float64) 18 | 19 | # Create a buffer 20 | buffer = bytes(np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape)) 21 | 22 | # Create a caterva array from a buffer 23 | a = cat.from_buffer(buffer, shape, dtype.itemsize, dtype=str(dtype), 24 | chunks=chunks, blocks=blocks) 25 | 26 | # Get a copy of a caterva array 27 | b = cat.copy(a) 28 | d = b.copy() 29 | 30 | aux = np.asarray(b) 31 | aux[1, 2] = 0 32 | aux2 = cat.asarray(aux) 33 | 34 | print(np.asarray(aux2)) 35 | 36 | c = np.asarray(b) 37 | 38 | c[3:5, 2:7] = 0 39 | print(c) 40 | 41 | del b 42 | 43 | print(c) 44 | 45 | # Convert the copy to a buffer 46 | buffer1 = a.to_buffer() 47 | buffer2 = d.to_buffer() 48 | 49 | assert buffer1 == buffer2 50 | -------------------------------------------------------------------------------- /tests/test_full.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | import caterva as cat 11 | import pytest 12 | 13 | 14 | @pytest.mark.parametrize("shape, chunks, blocks, fill_value, cname, clevel, use_dict, nthreads", 15 | [ 16 | ((100, 1230), (200, 100), (55, 3), b"0123", cat.Codec.LZ4HC, 4, 0, 1), 17 | ((23, 34), (20, 20), (10, 10), b"sun", cat.Codec.LZ4HC, 8, 0, 2), 18 | ((80, 51, 60), (20, 10, 33), (6, 6, 26), b"qwerty", cat.Codec.ZLIB, 5, 1, 2) 19 | ]) 20 | def test_full(shape, chunks, blocks, fill_value, cname, clevel, use_dict, nthreads): 21 | a = cat.full(shape, fill_value=fill_value, chunks=chunks, blocks=blocks, cname=cname, clevel=clevel, 22 | use_dict=use_dict, nthreads=nthreads) 23 | 24 | for i in np.nditer(np.array(a[:])): 25 | assert i == fill_value 26 | -------------------------------------------------------------------------------- /examples/ex_meta.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import caterva as cat 10 | import numpy as np 11 | import os 12 | 13 | 14 | shape = (128, 128) 15 | chunks = (32, 32) 16 | blocks = (8, 8) 17 | 18 | urlpath = "ex_meta.cat" 19 | if os.path.exists(urlpath): 20 | # Remove file on disk 21 | os.remove(urlpath) 22 | 23 | dtype = np.dtype(np.complex128) 24 | itemsize = dtype.itemsize 25 | 26 | # Create a numpy array 27 | nparray = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape) 28 | 29 | meta = { 30 | "m1": b"1111", 31 | "m2": b"2222", 32 | } 33 | # Create a caterva array from a numpy array (on disk) 34 | a = cat.from_buffer(bytes(nparray), nparray.shape, chunks=chunks, blocks=blocks, 35 | urlpath=urlpath, itemsize=itemsize, meta=meta) 36 | 37 | # Read a caterva array from disk 38 | b = cat.open(urlpath) 39 | 40 | # Deal with meta 41 | m1 = b.meta.get("m5", b"0000") 42 | m2 = b.meta["m2"] 43 | 44 | # Remove file on disk 45 | os.remove(urlpath) 46 | -------------------------------------------------------------------------------- /tests/test_zeros.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | import caterva as cat 11 | import pytest 12 | 13 | 14 | @pytest.mark.parametrize("shape, chunks, blocks, itemsize, cname, clevel, use_dict, nthreads", 15 | [ 16 | ((100, 1230), (200, 100), (55, 3), 4, cat.Codec.ZSTD, 4, 0, 1), 17 | ((23, 34), (10, 10), (10, 10), 8, cat.Codec.BLOSCLZ, 8, 0, 2), 18 | ((80, 51, 60), (20, 10, 33), (6, 6, 26), 3, cat.Codec.LZ4, 5, 1, 2) 19 | ]) 20 | def test_zeros(shape, chunks, blocks, itemsize, cname, clevel, use_dict, nthreads): 21 | a = cat.zeros(shape, chunks=chunks, 22 | blocks=blocks, 23 | itemsize=itemsize, 24 | cname=cname, 25 | clevel=clevel, 26 | use_dict=use_dict, 27 | nthreads=nthreads) 28 | 29 | for i in np.nditer(np.array(a[:])): 30 | assert i == bytes(itemsize) 31 | -------------------------------------------------------------------------------- /ANNOUNCE.rst: -------------------------------------------------------------------------------- 1 | # Announcing python-caterva 0.7.3 2 | 3 | 4 | ## What is new? 5 | 6 | In this release, support for Python 3.7 has been droped and replaced to 3.10 7 | and 3.11. 8 | 9 | For more info, you can have a look at the release notes in: 10 | 11 | https://github.com/Blosc/python-caterva/releases 12 | 13 | More docs and examples are available in the documentation site: 14 | 15 | https://python-caterva.readthedocs.io 16 | 17 | 18 | ## What is it? 19 | 20 | Caterva is an open source C library and a format that allows to store large 21 | multidimensional, chunked, compressed datasets. Data can be stored either 22 | in-memory or on-disk, but the API to handle both versions is the same. 23 | Compression is handled transparently for the user by adopting the Blosc2 library. 24 | 25 | python-caterva is a pythonic wrapper for the Caterva library. 26 | 27 | 28 | ## Sources repository 29 | 30 | The sources and documentation are managed through github services at: 31 | 32 | http://github.com/Blosc/python-caterva 33 | 34 | Caterva is distributed using the BSD license, see 35 | [LICENSE](https://github.com/Blosc/python-caterva/blob/master/LICENSE) for details. 36 | 37 | 38 | ## Mailing list 39 | 40 | There is an official Blosc mailing list where discussions about Caterva are welcome: 41 | 42 | blosc@googlegroups.com 43 | 44 | http://groups.google.es/group/blosc 45 | 46 | 47 | Enjoy Data! 48 | - The Blosc Development Team 49 | -------------------------------------------------------------------------------- /examples/ex_persistency.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import caterva as cat 10 | import numpy as np 11 | import os 12 | import shutil 13 | 14 | 15 | shape = (128, 128) 16 | chunks = (32, 32) 17 | blocks = (8, 8) 18 | 19 | urlpath = "ex_persistency.cat" 20 | 21 | if os.path.exists(urlpath): 22 | cat.remove(urlpath) 23 | 24 | dtype = np.dtype(np.complex128) 25 | itemsize = dtype.itemsize 26 | 27 | # Create a numpy array 28 | nparray = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape) 29 | 30 | # Create a caterva array from a numpy array (on disk) 31 | a = cat.from_buffer(bytes(nparray), nparray.shape, itemsize, chunks=chunks, blocks=blocks, 32 | urlpath=urlpath, contiguous=False) 33 | 34 | # Read a caterva array from disk 35 | b = cat.open(urlpath) 36 | 37 | # Convert a caterva array to a numpy array 38 | nparray2 = np.asarray(cat.from_buffer(b.to_buffer(), b.shape, b.itemsize)).view(dtype) 39 | 40 | np.testing.assert_almost_equal(nparray, nparray2) 41 | 42 | # Remove file on disk 43 | if os.path.exists(urlpath): 44 | cat.remove(urlpath) 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **Important:** All the features of Python-Caterva have been included in the [Python-Blosc2 NDArray object](https://www.blosc.org/python-blosc2/python-blosc2.html). As a result, this project is now obsolete. 2 | 3 | [![Python package](https://github.com/Blosc/python-caterva/actions/workflows/python-package.yml/badge.svg?branch=master)](https://github.com/Blosc/python-caterva/actions/workflows/python-package.yml) 4 | [![Documentation Status](https://readthedocs.org/projects/python-caterva/badge/?version=latest)](https://python-caterva.readthedocs.io/en/latest/?badge=latest) 5 | [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg)](code_of_conduct.md) 6 | 7 | # python-caterva 8 | 9 | Python wrapper for [Caterva](https://caterva.readthedocs.io). 10 | 11 | ## Install 12 | 13 | ```sh 14 | pip install caterva 15 | ``` 16 | 17 | ## Development Workflow 18 | 19 | ### Clone repo and submodules 20 | 21 | ```sh 22 | git clone --recurse-submodules https://github.com/Blosc/python-caterva 23 | ``` 24 | 25 | ### Install requirements 26 | 27 | ```sh 28 | python -m pip install -r requirements-build.txt 29 | python -m pip install -r requirements.txt 30 | python -m pip install -r requirements-tests.txt 31 | ``` 32 | 33 | ### Compile 34 | 35 | ```sh 36 | python setup.py build_ext --build-type=RelWithDebInfo 37 | ``` 38 | 39 | ### Run tests 40 | 41 | ```sh 42 | PYTHONPATH=. pytest 43 | ``` 44 | 45 | ### Installing 46 | 47 | ```sh 48 | python -m pip install . 49 | ``` 50 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: ['3.8', '3.9', '3.10', '3.11'] 20 | 21 | steps: 22 | - uses: actions/checkout@v3 23 | with: 24 | submodules: 'recursive' 25 | - name: Set up Python ${{ matrix.python-version }} 26 | uses: actions/setup-python@v4 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | - name: Install the package 30 | run: | 31 | python -m pip install --upgrade pip 32 | python -m pip install build 33 | python -m build --wheel 34 | python -m pip install dist/* 35 | # - name: Lint with flake8 36 | # run: | 37 | # # stop the build if there are Python syntax errors or undefined names 38 | # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 39 | # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 40 | # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 41 | - name: Test with pytest 42 | run: | 43 | python -m pip install -r requirements-tests.txt 44 | pytest 45 | -------------------------------------------------------------------------------- /tests/test_persistency.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import caterva as cat 10 | import pytest 11 | import numpy as np 12 | import os 13 | 14 | 15 | @pytest.mark.parametrize("contiguous", 16 | [ 17 | True, 18 | False, 19 | ]) 20 | @pytest.mark.parametrize("shape, chunks, blocks, urlpath, dtype", 21 | [ 22 | ([634], [156], [33], "test00.cat", np.float64), 23 | ([20, 134, 13], [7, 22, 5], [3, 5, 3], "test01.cat", np.int32), 24 | ([12, 13, 14, 15, 16], [4, 6, 4, 7, 5], [2, 4, 2, 3, 3], "test02.cat", np.float32) 25 | ]) 26 | def test_persistency(shape, chunks, blocks, urlpath, contiguous, dtype): 27 | if os.path.exists(urlpath): 28 | cat.remove(urlpath) 29 | 30 | size = int(np.prod(shape)) 31 | nparray = np.arange(size, dtype=dtype).reshape(shape) 32 | _ = cat.asarray(nparray, chunks=chunks, blocks=blocks, 33 | urlpath=urlpath, contiguous=contiguous) 34 | b = cat.open(urlpath) 35 | 36 | bc = b[:] 37 | 38 | nparray2 = np.asarray(bc).view(dtype) 39 | np.testing.assert_almost_equal(nparray, nparray2) 40 | 41 | cat.remove(urlpath) 42 | -------------------------------------------------------------------------------- /caterva/info.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | from textwrap import TextWrapper 10 | 11 | 12 | def info_text_report(items: list) -> str: 13 | keys = [k for k, v in items] 14 | max_key_len = max(len(k) for k in keys) 15 | report = "" 16 | for k, v in items: 17 | wrapper = TextWrapper( 18 | width=80, 19 | initial_indent=k.ljust(max_key_len) + " : ", 20 | subsequent_indent=" " * max_key_len + " : ", 21 | ) 22 | text = wrapper.fill(str(v)) 23 | report += text + "\n" 24 | return report 25 | 26 | 27 | def info_html_report(items: list) -> str: 28 | report = '' 29 | report += "" 30 | for k, v in items: 31 | report += ( 32 | "" 33 | '' 34 | '' 35 | "" % (k, v) 36 | ) 37 | report += "" 38 | report += "
%s%s
" 39 | return report 40 | 41 | 42 | class InfoReporter(object): 43 | def __init__(self, obj): 44 | self.obj = obj 45 | 46 | def __repr__(self): 47 | items = self.obj.info_items 48 | return info_text_report(items) 49 | 50 | def _repr_html_(self): 51 | items = self.obj.info_items 52 | return info_html_report(items) 53 | -------------------------------------------------------------------------------- /LICENSES/BLOSC.txt: -------------------------------------------------------------------------------- 1 | BSD License 2 | 3 | For Blosc - A blocking, shuffling and lossless compression library 4 | 5 | Copyright (C) 2009-2018 Francesc Alted 6 | Copyright (C) 2019-present Blosc Development team 7 | 8 | Redistribution and use in source and binary forms, with or without modification, 9 | are permitted provided that the following conditions are met: 10 | 11 | * Redistributions of source code must retain the above copyright notice, this 12 | list of conditions and the following disclaimer. 13 | 14 | * Redistributions in binary form must reproduce the above copyright notice, 15 | this list of conditions and the following disclaimer in the documentation 16 | and/or other materials provided with the distribution. 17 | 18 | * Neither the name Francesc Alted nor the names of its contributors may be used 19 | to endorse or promote products derived from this software without specific 20 | prior written permission. 21 | 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 23 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 24 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 25 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 26 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 29 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD License 2 | 3 | For Caterva - A multidimensional data container on top of Blosc2. 4 | 5 | Copyright (C) 2018 Francesc Alted 6 | Copyright (C) 2018 Aleix Alcacer 7 | Copyright (C) 2019-present Blosc Development team 8 | 9 | Redistribution and use in source and binary forms, with or without modification, 10 | are permitted provided that the following conditions are met: 11 | 12 | * Redistributions of source code must retain the above copyright notice, this 13 | list of conditions and the following disclaimer. 14 | 15 | * Redistributions in binary form must reproduce the above copyright notice, 16 | this list of conditions and the following disclaimer in the documentation 17 | and/or other materials provided with the distribution. 18 | 19 | * Neither the names of the Blosc Development team nor the names of its 20 | contributors may be used to endorse or promote products derived from this 21 | software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 24 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 25 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 26 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 27 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 28 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 30 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | -------------------------------------------------------------------------------- /LICENSES/CATERVA.txt: -------------------------------------------------------------------------------- 1 | BSD License 2 | 3 | For Caterva - A multidimensional data container on top of Blosc2. 4 | 5 | Copyright (C) 2018 Francesc Alted 6 | Copyright (C) 2018 Aleix Alcacer 7 | Copyright (C) 2019-present Blosc Development team 8 | 9 | Redistribution and use in source and binary forms, with or without modification, 10 | are permitted provided that the following conditions are met: 11 | 12 | * Redistributions of source code must retain the above copyright notice, this 13 | list of conditions and the following disclaimer. 14 | 15 | * Redistributions in binary form must reproduce the above copyright notice, 16 | this list of conditions and the following disclaimer in the documentation 17 | and/or other materials provided with the distribution. 18 | 19 | * Neither the names of the Blosc Development team nor the names of its 20 | contributors may be used to endorse or promote products derived from this 21 | software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 24 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 25 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 26 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 27 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 28 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 30 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | -------------------------------------------------------------------------------- /tests/test_getitem.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import caterva as cat 10 | import pytest 11 | import numpy as np 12 | 13 | 14 | argnames = "shape, chunks, blocks, slices, dtype" 15 | argvalues = [ 16 | ([456], [258], [73], slice(0, 1), np.int32), 17 | ([77, 134, 13], [31, 13, 5], [7, 8, 3], (slice(3, 7), slice(50, 100), 7), 18 | np.float64), 19 | ([12, 13, 14, 15, 16], [5, 5, 5, 5, 5], [2, 2, 2, 2, 2], (slice(1, 3), ..., slice(3, 6)), 20 | np.float32) 21 | ] 22 | 23 | 24 | @pytest.mark.parametrize(argnames, argvalues) 25 | def test_getitem(shape, chunks, blocks, slices, dtype): 26 | size = int(np.prod(shape)) 27 | nparray = np.arange(size, dtype=dtype).reshape(shape) 28 | a = cat.from_buffer(bytes(nparray), nparray.shape, nparray.itemsize, 29 | chunks=chunks, blocks=blocks) 30 | nparray_slice = nparray[slices] 31 | buffer_slice = np.asarray(a[slices]) 32 | a_slice = np.frombuffer(buffer_slice, dtype=dtype).reshape(nparray_slice.shape) 33 | np.testing.assert_almost_equal(a_slice, nparray_slice) 34 | 35 | 36 | @pytest.mark.parametrize(argnames, argvalues) 37 | def test_getitem_numpy(shape, chunks, blocks, slices, dtype): 38 | size = int(np.prod(shape)) 39 | nparray = np.arange(size, dtype=dtype).reshape(shape) 40 | a = cat.asarray(nparray, chunks=chunks, blocks=blocks) 41 | nparray_slice = nparray[slices] 42 | a_slice = np.asarray(a[slices]).view(dtype) 43 | 44 | np.testing.assert_almost_equal(a_slice, nparray_slice) 45 | -------------------------------------------------------------------------------- /examples/ex_formats.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | import caterva as cat 11 | from time import time 12 | import os 13 | 14 | urlpath_sparse = "ex_formats_sparse.caterva" 15 | # urlpath_sparse = None 16 | urlpath_contiguous = "ex_formats_contiguous.caterva" 17 | # urlpath_contiguous = None 18 | 19 | if urlpath_sparse and os.path.exists(urlpath_sparse): 20 | cat.remove(urlpath_sparse) 21 | 22 | if urlpath_contiguous and os.path.exists(urlpath_contiguous): 23 | cat.remove(urlpath_contiguous) 24 | 25 | shape = (1000 * 1000,) 26 | chunks = (100,) 27 | blocks = (100,) 28 | dtype = np.dtype(np.float64) 29 | itemsize = dtype.itemsize 30 | 31 | t0 = time() 32 | a = cat.empty(shape, 8, chunks=chunks, blocks=blocks, urlpath=urlpath_sparse, 33 | contiguous=False) 34 | for nchunk in range(a.nchunks): 35 | a[nchunk * chunks[0]: (nchunk + 1) * chunks[0]] = np.arange(chunks[0], dtype=dtype) 36 | t1 = time() 37 | 38 | print(f"Time: {(t1 - t0):.4f} s") 39 | print(a.nchunks) 40 | an = np.array(a[:]).view(dtype) 41 | 42 | 43 | t0 = time() 44 | b = cat.empty(shape, itemsize=itemsize, chunks=chunks, blocks=blocks, urlpath=urlpath_contiguous, contiguous=True) 45 | 46 | print(b.nchunks) 47 | for nchunk in range(shape[0] // chunks[0]): 48 | b[nchunk * chunks[0]: (nchunk + 1) * chunks[0]] = np.arange(chunks[0], dtype=dtype) 49 | t1 = time() 50 | 51 | print(f"Time: {(t1 - t0):.4f} s") 52 | print(b.nchunks) 53 | bn = np.array(b[:]).view(dtype) 54 | 55 | np.testing.assert_allclose(an, bn) 56 | -------------------------------------------------------------------------------- /RELEASING.rst: -------------------------------------------------------------------------------- 1 | Python-caterva release procedure 2 | ================================ 3 | 4 | Preliminaries 5 | ------------- 6 | 7 | * Make sure that the current master branch is passing the tests on Github Actions. 8 | 9 | * Make sure that `RELEASE_NOTES.rst` and `ANNOUNCE.rst` are up to date with the latest news 10 | in the release. 11 | 12 | * Check that `VERSION` file contains the correct number. 13 | 14 | * Check any copyright listings and update them if necessary. You can use ``grep 15 | -i copyright`` to figure out where they might be. 16 | 17 | * Commit the changes:: 18 | 19 | git commit -a -m "Getting ready for release X.Y.Z" 20 | git push 21 | 22 | * Check that the documentation is correctly created in https://python-caterva.readthedocs.io. 23 | 24 | 25 | Tagging 26 | ------- 27 | 28 | * Create a signed tag ``X.Y.Z`` from ``master``. Use the next message:: 29 | 30 | git tag -a vX.Y.Z -m "Tagging version X.Y.Z" 31 | 32 | * Push the tag to the github repo:: 33 | 34 | git push 35 | git push --tags 36 | 37 | After the tag would be up, update the release notes in: https://github.com/Blosc/python-caterva/releases 38 | 39 | * Check that the wheels are upload correctly to Pypi. 40 | 41 | Announcing 42 | ---------- 43 | 44 | * Send an announcement to the Blosc list. Use the ``ANNOUNCE.rst`` file as skeleton 45 | (or possibly as the definitive version). 46 | 47 | * Announce in Twitter via @Blosc2 account and rejoice. 48 | 49 | 50 | Post-release actions 51 | -------------------- 52 | 53 | * Create new headers for adding new features in ``RELEASE_NOTES.rst`` 54 | add this place-holder: 55 | 56 | XXX version-specific blurb XXX 57 | 58 | * Edit ``VERSION`` in master to increment the version to the next 59 | minor one (i.e. X.Y.Z --> X.Y.(Z+1).dev0). 60 | 61 | * Commit your changes with:: 62 | 63 | git commit -a -m "Post X.Y.Z release actions done" 64 | git push 65 | 66 | 67 | That's all folks! 68 | -------------------------------------------------------------------------------- /tests/test_empty.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import caterva as cat 10 | import pytest 11 | 12 | 13 | @pytest.mark.parametrize("shape, chunks, blocks, itemsize, codec, clevel, use_dict, nthreads, filters", 14 | [ 15 | ((100, 1230), (200, 100), (55, 3), 4, cat.Codec.LZ4, 4, 0, 1, [cat.Filter.SHUFFLE]), 16 | ((234, 125), (90, 90), (20, 10), 8, cat.Codec.LZ4HC, 8, 0, 2, 17 | [cat.Filter.DELTA, cat.Filter.BITSHUFFLE]), 18 | ((400, 399, 401), (20, 10, 130), (6, 6, 26), 3, cat.Codec.BLOSCLZ, 5, 1, 2, 19 | [cat.Filter.DELTA, cat.Filter.TRUNC_PREC]) 20 | ]) 21 | def test_empty(shape, chunks, blocks, itemsize, codec, clevel, use_dict, nthreads, 22 | filters): 23 | a = cat.empty(shape, chunks=chunks, 24 | blocks=blocks, 25 | itemsize=itemsize, 26 | codec=codec, 27 | clevel=clevel, 28 | use_dict=use_dict, 29 | nthreads=nthreads, 30 | filters=filters) 31 | if chunks is not None: 32 | assert a.chunks == chunks 33 | assert a.blocks == blocks 34 | assert a.shape == shape 35 | assert a.itemsize == itemsize 36 | assert a.codec == (codec if chunks is not None else None) 37 | assert a.clevel == (clevel if chunks is not None else 1) 38 | if chunks is not None: 39 | assert a.filters[-len(filters):] == filters 40 | else: 41 | assert a.filters is None 42 | -------------------------------------------------------------------------------- /tests/test_metalayers.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import caterva as cat 10 | import pytest 11 | import numpy as np 12 | import os 13 | from msgpack import packb 14 | 15 | 16 | @pytest.mark.parametrize("contiguous", 17 | [ 18 | True, 19 | False, 20 | ]) 21 | @pytest.mark.parametrize("shape, chunks, blocks, urlpath, dtype", 22 | [ 23 | ([556], [221], [33], "testmeta00.cat", np.float64), 24 | ([20, 134, 13], [12, 66, 8], [3, 13, 5], "testmeta01.cat", np.int32), 25 | ([12, 13, 14, 15, 16], [8, 9, 4, 12, 9], [2, 6, 4, 5, 4], "testmeta02.cat", np.float32) 26 | ]) 27 | def test_metalayers(shape, chunks, blocks, urlpath, contiguous, dtype): 28 | if os.path.exists(urlpath): 29 | cat.remove(urlpath) 30 | 31 | numpy_meta = packb({b"dtype": str(np.dtype(dtype))}) 32 | test_meta = packb({b"lorem": 1234}) 33 | 34 | # Create an empty caterva array (on disk) 35 | itemsize = np.dtype(dtype).itemsize 36 | a = cat.empty(shape, itemsize, chunks=chunks, blocks=blocks, 37 | urlpath=urlpath, contiguous=contiguous, 38 | meta={"numpy": numpy_meta, 39 | "test": test_meta}) 40 | 41 | assert ("numpy" in a.meta) 42 | assert ("error" not in a.meta) 43 | assert (a.meta["numpy"] == numpy_meta) 44 | assert ("test" in a.meta) 45 | assert (a.meta["test"] == test_meta) 46 | 47 | test_meta = packb({b"lorem": 4231}) 48 | a.meta["test"] = test_meta 49 | assert (a.meta["test"] == test_meta) 50 | 51 | # Remove file on disk 52 | cat.remove(urlpath) 53 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | Python-caterva documentation 2 | ============================ 3 | 4 | Python-caterva is a Python wrapper of `Caterva `_, an open source C library specially 5 | designed 6 | to deal with large multidimensional, chunked, compressed datasets. 7 | 8 | .. panels:: 9 | :card: + intro-card text-center 10 | :column: col-lg-6 col-md-12 col-sm-12 col-xs-12 d-flex 11 | 12 | --- 13 | 14 | Getting Started 15 | ^^^^^^^^^^^^^^^ 16 | 17 | New to *python-caterva*? Check out the getting started guides. They contain an 18 | introduction to *python-caterva* main concepts and an installation tutorial. 19 | 20 | +++ 21 | 22 | .. link-button:: getting_started/index 23 | :type: ref 24 | :text: To the getting started guides 25 | :classes: btn-light 26 | 27 | --- 28 | 29 | API Reference 30 | ^^^^^^^^^^^^^ 31 | 32 | The reference guide contains a detailed description of the *python-caterva* API. 33 | The reference describes how the functions work and which parameters can 34 | be used. 35 | 36 | +++ 37 | 38 | .. link-button:: reference/index 39 | :type: ref 40 | :text: To the reference guide 41 | :classes: btn-light 42 | 43 | 44 | --- 45 | 46 | Development 47 | ^^^^^^^^^^^ 48 | 49 | Saw a typo in the documentation? Want to improve 50 | existing functionalities? The contributing guidelines will guide 51 | you through the process of improving *python-caterva*. 52 | 53 | +++ 54 | 55 | .. link-button:: development/index 56 | :type: ref 57 | :text: To the development guide 58 | :classes: btn-light 59 | 60 | --- 61 | 62 | Release Notes 63 | ^^^^^^^^^^^^^ 64 | 65 | Want to see what's new in the latest release? Check out the release notes to find out! 66 | 67 | +++ 68 | 69 | .. link-button:: release_notes/index 70 | :type: ref 71 | :text: To the release notes 72 | :classes: btn-light 73 | 74 | 75 | .. toctree:: 76 | :maxdepth: 1 77 | :hidden: 78 | 79 | Getting Started 80 | API Reference 81 | Development 82 | Release Notes 83 | -------------------------------------------------------------------------------- /tests/test_copy.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import caterva as cat 10 | import pytest 11 | import numpy as np 12 | 13 | 14 | @pytest.mark.parametrize("shape, chunks1, blocks1, chunks2, blocks2, itemsize", 15 | [ 16 | ([521], [212], [33], [121], [18], 8), 17 | ([20, 134, 13], [10, 43, 10], [3, 13, 5], [10, 43, 10], [3, 6, 5], 4), 18 | ([12, 13, 14, 15, 16], [6, 6, 6, 6, 6], [2, 2, 2, 2, 2], 19 | [7, 7, 7, 7, 7], [3, 3, 5, 3, 3], 8) 20 | ]) 21 | def test_copy(shape, chunks1, blocks1, chunks2, blocks2, itemsize): 22 | size = int(np.prod(shape)) 23 | buffer = bytes(size * itemsize) 24 | a = cat.from_buffer(buffer, shape, itemsize, chunks=chunks1, blocks=blocks1, 25 | complevel=2) 26 | b = a.copy(chunks=chunks2, blocks=blocks2, 27 | itemsize=itemsize, complevel=5, filters=[cat.Filter.BITSHUFFLE]) 28 | buffer2 = b.to_buffer() 29 | assert buffer == buffer2 30 | 31 | 32 | @pytest.mark.parametrize("shape, chunks1, blocks1, chunks2, blocks2, dtype", 33 | [ 34 | ([521], [212], [33], [121], [18], "i8"), 35 | ([20, 134, 13], [10, 43, 10], [3, 13, 5], [10, 43, 10], [3, 6, 5], "f4"), 36 | ([12, 13, 14, 15, 16], [6, 6, 6, 6, 6], [2, 2, 2, 2, 2], 37 | [7, 7, 7, 7, 7], [3, 3, 5, 3, 3], "f8") 38 | ]) 39 | def test_copy_numpy(shape, chunks1, blocks1, chunks2, blocks2, dtype): 40 | size = int(np.prod(shape)) 41 | nparray = np.arange(size, dtype=dtype).reshape(shape) 42 | a = cat.asarray(nparray, chunks=chunks1, blocks=blocks1) 43 | b = a.copy(chunks=chunks2, blocks=blocks2, complevel=5, filters=[cat.Filter.BITSHUFFLE]) 44 | if chunks2: 45 | b = b[...] 46 | nparray2 = np.asarray(b).view(dtype) 47 | np.testing.assert_almost_equal(nparray, nparray2) 48 | -------------------------------------------------------------------------------- /bench/compare_getitem.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import caterva as cat 10 | import numpy as np 11 | import os 12 | import sys 13 | from time import time 14 | 15 | 16 | # Dimensions, type and persistency properties for the arrays 17 | shape = (1000 * 1000,) 18 | chunkshape = (100,) 19 | blockshape = (25,) 20 | 21 | dtype = np.float64 22 | persistent = bool(sys.argv[1]) if len(sys.argv) > 1 else False 23 | 24 | if persistent: 25 | filename = "bench_getitem.cat" 26 | if os.path.exists(filename): 27 | # Remove file on disk 28 | os.remove(filename) 29 | else: 30 | filename = None 31 | 32 | itemsize = np.dtype(dtype).itemsize 33 | 34 | # Create an empty caterva array 35 | a = cat.empty(shape, itemsize, dtype=str(np.dtype(dtype)), chunkshape=chunkshape, blockshape=blockshape, 36 | filename=filename, compcode=0) 37 | 38 | # Fill an empty caterva array using a block iterator 39 | t0 = time() 40 | count = 0 41 | for block, info in a.iter_write(): 42 | nparray = np.arange(count, count + info.nitems, dtype=dtype).reshape(info.shape) 43 | block[:] = bytes(nparray) 44 | count += info.nitems 45 | t1 = time() 46 | print("Time for filling: %.3fs" % (t1 - t0)) 47 | 48 | # Check that the retrieved items are correct 49 | t0 = time() 50 | for block, info in a.iter_read(chunkshape): 51 | pass 52 | t1 = time() 53 | print("Time for reading with iterator: %.3fs" % (t1 - t0)) 54 | 55 | # Asserting results 56 | count = 0 57 | for block, info in a.iter_read(chunkshape): 58 | nparray = np.arange(count, count + info.nitems, dtype=dtype).reshape(info.shape) 59 | np.testing.assert_allclose(block, nparray) 60 | count += info.nitems 61 | 62 | # Use getitem 63 | t0 = time() 64 | for i in range(shape[0] // chunkshape[0]): 65 | _ = a[i * 100: (i+1) * 100] 66 | t1 = time() 67 | print("Time for reading with getitem: %.3fs" % (t1 - t0)) 68 | 69 | count = 0 70 | for i in range(shape[0] // chunkshape[0]): 71 | nparray = np.arange(count, count + chunkshape[0], dtype=dtype).reshape(chunkshape) 72 | np.testing.assert_allclose(a[i * chunkshape[0]: (i+1) * chunkshape[0]], nparray) 73 | count += chunkshape[0] 74 | 75 | 76 | if persistent: 77 | os.remove(filename) 78 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | Contributing to python-caterva 2 | ============================== 3 | 4 | python-caterva is a community maintained project. We want to make contributing to 5 | this project as easy and transparent as possible. 6 | 7 | 8 | Asking for help 9 | --------------- 10 | 11 | If you have a question about how to use python-caterva, please post your question on 12 | StackOverflow using the `“caterva” tag `_. 13 | 14 | 15 | 16 | Bug reports 17 | ----------- 18 | 19 | We use `GitHub issues `_ to track 20 | public bugs. Please ensure your description is clear and has sufficient 21 | instructions to be able to reproduce the issue. The ideal report should 22 | contain the following: 23 | 24 | 1. Summarize the problem: Include details about your goal, describe expected 25 | and actual results and include any error messages. 26 | 27 | 2. Describe what you’ve tried: Show what you’ve tried, tell us what you 28 | found and why it didn’t meet your needs. 29 | 30 | 3. Minimum reproducible example: Share the minimum amount of code needed to 31 | reproduce your issue. You can format the code nicely using markdown:: 32 | 33 | ```python 34 | import caterva as cat 35 | 36 | ... 37 | ``` 38 | 39 | 40 | 4. Determine the environment: Indicates the python-caterva version and the operating 41 | system the code is running on. 42 | 43 | Contributing to code 44 | -------------------- 45 | 46 | We actively welcome your code contributions. By contributing to python-caterva, you 47 | agree that your contributions will be licensed under the ``_ file of 48 | the project. 49 | 50 | Fork the repo 51 | +++++++++++++ 52 | 53 | Make a fork of the python-caterva repository and clone it:: 54 | 55 | git clone https://github.com//python-caterva 56 | 57 | 58 | Create your branch 59 | ++++++++++++++++++++ 60 | 61 | Before you do any new work or submit a pull request, please open an `issue on 62 | GitHub `_ to report the bug or 63 | propose the feature you’d like to add. 64 | 65 | Then create a new, separate branch for each piece of work you want to do. 66 | 67 | 68 | Update docstrings 69 | +++++++++++++++++ 70 | 71 | If you've changed APIs, update the involved docstrings using the `doxygen 72 | format `_. 73 | 74 | 75 | Run the test suite 76 | ++++++++++++++++++ 77 | 78 | If you have added code that needs to be tested, add the necessary tests and 79 | verify that all tests pass successfully. 80 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | from __future__ import print_function 10 | 11 | import os 12 | import sys 13 | import io 14 | 15 | from skbuild import setup 16 | from textwrap import dedent 17 | 18 | 19 | with io.open('README.md', encoding='utf-8') as f: 20 | long_description = f.read() 21 | 22 | 23 | def exit_with_error(message): 24 | print('ERROR: %s' % message) 25 | sys.exit(1) 26 | 27 | 28 | # Check for Python 29 | if sys.version_info[0] == 3: 30 | if sys.version_info[1] < 6: 31 | exit_with_error("You need Python 3.6 or greater to install Caterva!") 32 | else: 33 | exit_with_error("You need Python 3.6 or greater to install Caterva!") 34 | 35 | 36 | # Read the long_description from README.md 37 | with open('README.md') as f: 38 | long_description = f.read() 39 | 40 | # Blosc version 41 | VERSION = open('VERSION').read().strip() 42 | # Create the version.py file 43 | open('caterva/version.py', 'w').write('__version__ = "%s"\n' % VERSION) 44 | 45 | 46 | classifiers = dedent("""\ 47 | Development Status :: 3 - Alpha 48 | Intended Audience :: Developers 49 | Intended Audience :: Information Technology 50 | Intended Audience :: Science/Research 51 | License :: OSI Approved :: BSD License 52 | Programming Language :: Python 53 | Topic :: Software Development :: Libraries :: Python Modules 54 | Operating System :: Microsoft :: Windows 55 | Operating System :: Unix 56 | Programming Language :: Python :: 3 57 | Programming Language :: Python :: 3.6 58 | Programming Language :: Python :: 3.7 59 | Programming Language :: Python :: 3.8 60 | """) 61 | 62 | 63 | setup( 64 | name="caterva", 65 | version=VERSION, 66 | description='Caterva for Python (multidimensional compressed data containers).', 67 | long_description=long_description, 68 | long_description_content_type='text/markdown', 69 | classifiers=[c for c in classifiers.split("\n") if c], 70 | author='Blosc Development Team', 71 | author_email='blosc@blosc.org', 72 | maintainer='Blosc Development Team', 73 | maintainer_email='blosc@blosc.org', 74 | url='https://github.com/Blosc/python-caterva', 75 | license='https://opensource.org/licenses/BSD-3-Clause', 76 | platforms=['any'], 77 | packages=['caterva'], 78 | package_dir={'caterva': 'caterva'}, 79 | install_requires=['ndindex', 'msgpack'], 80 | ) 81 | -------------------------------------------------------------------------------- /caterva/meta.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | from . import caterva_ext as ext 10 | from collections.abc import Mapping 11 | 12 | 13 | class Meta(Mapping): 14 | """ 15 | Class providing access to user meta on a :py:class:`NDArray`. 16 | It will be available via the `.meta` property of an array. 17 | """ 18 | def get(self, key, default=None): 19 | """Return the value for `key` if `key` is in the dictionary, else `default`. 20 | If `default` is not given, it defaults to ``None``.""" 21 | return self[key] if key in self else default 22 | 23 | def __del__(self): 24 | pass 25 | 26 | def __init__(self, ndarray): 27 | self.arr = ndarray 28 | 29 | def __contains__(self, key): 30 | """Check if the `key` metalayer exists or not.""" 31 | return ext.meta__contains__(self.arr, key) 32 | 33 | def __delitem__(self, key): 34 | return None 35 | 36 | def __setitem__(self, key, value): 37 | """Update the `key` metalayer with `value`. 38 | 39 | Parameters 40 | ---------- 41 | key: str 42 | The name of the metalayer to update. 43 | value: bytes 44 | The buffer containing the new content for the metalayer. 45 | 46 | ..warning: Note that the *length* of the metalayer cannot not change, 47 | else an exception will be raised. 48 | """ 49 | return ext.meta__setitem__(self.arr, key, value) 50 | 51 | def __getitem__(self, item): 52 | """Return the `item` metalayer. 53 | 54 | Parameters 55 | ---------- 56 | item: str 57 | The name of the metalayer to return. 58 | 59 | Returns 60 | ------- 61 | bytes 62 | The buffer containing the metalayer info (typically in msgpack 63 | format). 64 | """ 65 | return ext.meta__getitem__(self.arr, item) 66 | 67 | def keys(self): 68 | """Return the metalayers keys.""" 69 | return ext.meta_keys(self.arr) 70 | 71 | def values(self): 72 | raise NotImplementedError("Values can not be accessed") 73 | 74 | def items(self): 75 | raise NotImplementedError("Items can not be accessed") 76 | 77 | def __iter__(self): 78 | """Iter over the keys of the metalayers.""" 79 | return iter(self.keys()) 80 | 81 | def __len__(self): 82 | """Return the number of metalayers.""" 83 | return ext.meta__len__(self.arr) 84 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | 140 | # Skit build 141 | _skbuild/ 142 | 143 | # PyCharm 144 | .idea/ 145 | 146 | # MacOS 147 | .DS_Store 148 | 149 | # Docs 150 | doc/source/reference/api 151 | -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Configuration file for the Sphinx documentation builder. 10 | # 11 | # This file only contains a selection of the most common options. For a full 12 | # list see the documentation: 13 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 14 | 15 | # -- Path setup -------------------------------------------------------------- 16 | 17 | # If extensions (or modules to document with autodoc) are in another directory, 18 | # add these directories to sys.path here. If the directory is relative to the 19 | # documentation root, use os.path.abspath to make it absolute, like shown here. 20 | # 21 | import os 22 | import sys 23 | sys.path.insert(0, os.path.abspath('../..')) 24 | 25 | 26 | # -- Project information ----------------------------------------------------- 27 | 28 | project = 'caterva' 29 | copyright = '2021, The Blosc Developers' 30 | author = 'The Blosc Developers' 31 | 32 | import caterva 33 | release = caterva.__version__ 34 | 35 | 36 | # -- General configuration --------------------------------------------------- 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | 'sphinx.ext.autodoc', 43 | 'sphinx.ext.autosummary', 44 | 'numpydoc', 45 | 'myst_nb', 46 | 'sphinx_panels' 47 | ] 48 | 49 | 50 | # Add any paths that contain templates here, relative to this directory. 51 | templates_path = ['_templates'] 52 | 53 | # List of patterns, relative to source directory, that match files and 54 | # directories to ignore when looking for source files. 55 | # This pattern also affects html_static_path and html_extra_path. 56 | exclude_patterns = [] 57 | 58 | 59 | # -- Options for HTML output ------------------------------------------------- 60 | 61 | # The theme to use for HTML and HTML Help pages. See the documentation for 62 | # a list of builtin themes. 63 | # 64 | html_theme = 'pydata_sphinx_theme' 65 | html_logo = "caterva-logo.png" 66 | 67 | pygments_style = 'sphinx' 68 | 69 | # Add any paths that contain custom static files (such as style sheets) here, 70 | # relative to this directory. They are copied after the builtin static files, 71 | # so a file named "default.css" will overwrite the builtin "default.css". 72 | html_static_path = ['_static'] 73 | 74 | html_theme_options = { 75 | "navigation_depth": 1, 76 | } 77 | 78 | html_css_files = [ 79 | 'css/custom.css', 80 | "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css", 81 | ] 82 | 83 | 84 | myst_enable_extensions = [ 85 | "amsmath", 86 | "colon_fence", 87 | "deflist", 88 | "dollarmath", 89 | "html_image", 90 | ] 91 | myst_url_schemes = ("http", "https", "mailto") 92 | myst_update_mathjax = False 93 | 94 | panels_add_bootstrap_css = False 95 | 96 | autosummary_generate = True 97 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | name: Python publish 2 | on: 3 | push: 4 | branches: 5 | - master 6 | tags: 7 | - '*' 8 | 9 | jobs: 10 | build_wheels: 11 | name: Build wheels on ${{ matrix.name }} 12 | runs-on: ${{ matrix.os }} 13 | strategy: 14 | matrix: 15 | include: 16 | - name: Ubuntu 17 | os: ubuntu-latest 18 | cibw_build: 'cp38-* cp39-* cp310-* cp311-*' 19 | cibw_skip: '*-manylinux*_i686' 20 | 21 | - name: MacOS 22 | os: macos-latest 23 | cibw_build: 'cp38-* cp39-* cp310-* cp311-*' 24 | 25 | - name: Windows x86 26 | os: windows-latest 27 | arch: x86 28 | cibw_build: 'cp38-win32 cp39-win32 cp310-win32 cp311-win32' 29 | 30 | - name: Windows amd64 31 | os: windows-latest 32 | arch: amd64 33 | cibw_build: 'cp38-win_amd64 cp39-win_amd64 cp310-win_amd64 34 | cp311-win_amd64' 35 | 36 | steps: 37 | - name: Checkout repo 38 | uses: actions/checkout@v3 39 | with: 40 | submodules: 'recursive' 41 | 42 | - name: Set up Python 43 | uses: actions/setup-python@v4 44 | with: 45 | python-version: '3.8' 46 | 47 | - name: Install Ninja 48 | uses: seanmiddleditch/gha-setup-ninja@master 49 | 50 | - name: Install MSVC 51 | if: ${{runner.os == 'Windows'}} 52 | uses: ilammy/msvc-dev-cmd@v1 53 | with: 54 | arch: ${{ matrix.arch }} 55 | 56 | - name: Build wheels 57 | uses: pypa/cibuildwheel@v2.11.0 58 | with: 59 | output-dir: wheelhouse 60 | env: 61 | CIBW_BUILD: ${{ matrix.cibw_build }} 62 | CIBW_SKIP: ${{ matrix.cibw_skip }} 63 | CIBW_BEFORE_BUILD: python -m pip install -r requirements.txt 64 | CIBW_BEFORE_TEST: python -m pip install -r requirements-tests.txt 65 | CIBW_TEST_COMMAND: python -m pytest {project}/tests 66 | CIBW_BUILD_VERBOSITY: 1 67 | 68 | - name: Upload wheels 69 | uses: actions/upload-artifact@v3 70 | with: 71 | path: ./wheelhouse/*.whl 72 | 73 | 74 | build_sdist: 75 | name: Build sdist 76 | runs-on: ubuntu-latest 77 | 78 | steps: 79 | - uses: actions/checkout@v3 80 | with: 81 | submodules: 'recursive' 82 | 83 | - uses: actions/setup-python@v4 84 | name: Setup Python 85 | with: 86 | python-version: '3.8' 87 | 88 | - name: Install dependencies 89 | run: | 90 | python -m pip install --upgrade pip 91 | python -m pip install build 92 | 93 | - name: Build sdist 94 | run: | 95 | python -m build --sdist 96 | 97 | - name: Upload sdist package 98 | uses: actions/upload-artifact@v3 99 | with: 100 | path: dist/*.tar.gz 101 | 102 | upload_pypi: 103 | needs: [ build_wheels, build_sdist ] # last but not least 104 | runs-on: ubuntu-latest 105 | if: startsWith(github.event.ref, 'refs/tags') 106 | steps: 107 | - uses: actions/download-artifact@v3 108 | with: 109 | name: artifact 110 | path: dist 111 | 112 | - uses: pypa/gh-action-pypi-publish@master 113 | with: 114 | user: __token__ 115 | password: ${{ secrets.blosc_pypi_secret }} 116 | -------------------------------------------------------------------------------- /RELEASE_NOTES.rst: -------------------------------------------------------------------------------- 1 | Release notes 2 | ============= 3 | 4 | Changes from 0.7.3 to 0.7.4 5 | --------------------------- 6 | 7 | XXX version-specific blurb XXX 8 | 9 | 10 | Changes from 0.7.2 to 0.7.3 11 | --------------------------- 12 | 13 | * Addapt to the latest version of Caterva 0.5.0 14 | * Drop support for Python 3.7 and add for Python 3.10 and 3.11 15 | 16 | Changes from 0.7.1 to 0.7.2 17 | --------------------------- 18 | 19 | * Implement a resize method 20 | 21 | Changes from 0.7.0 to 0.7.1 22 | --------------------------- 23 | 24 | * Fix to apply filtersmeta from kwargs. 25 | * Fix metalayer creation in the ext file. 26 | * Update the docstrings. 27 | 28 | Changes from 0.6.0 to 0.7.0 29 | --------------------------- 30 | 31 | * Remove plainbuffer support. 32 | * Improve documentation. 33 | 34 | Changes from 0.5.3 to 0.6.0 35 | --------------------------- 36 | 37 | * Provide wheels in PyPi. 38 | * Update caterva submodule to 0.5.0. 39 | 40 | Changes from 0.5.1 to 0.5.3 41 | --------------------------- 42 | 43 | * Fix dependencies installation issue. 44 | 45 | Changes from 0.5.0 to 0.5.1 46 | --------------------------- 47 | 48 | * Update `setup.py` and add `pyproject.toml`. 49 | 50 | Changes from 0.4.2 to 0.5.0 51 | --------------------------- 52 | 53 | * Big c-core refactoring improving the slicing performance. 54 | * Implement `__setitem__` method for arrays to allow to update the values of the arrays. 55 | * Use Blosc special-constructors to initialize the arrays. 56 | * Improve the buffer and array protocols. 57 | * Remove the data type support in order to simplify the library. 58 | 59 | Changes from 0.4.1 to 0.4.2 60 | --------------------------- 61 | 62 | * Add files in `MANIFEST.in`. 63 | 64 | Changes from 0.4.0 to 0.4.1 65 | --------------------------- 66 | 67 | * Fix invalid values for classifiers defined in `setup.py`. 68 | 69 | Changes from 0.3.0 to 0.4.0 70 | --------------------------- 71 | 72 | * Compile the package using scikit-build. 73 | 74 | * Introduce a second level of multidimensional chunking. 75 | 76 | * Complete API renaming. 77 | 78 | * Support the buffer protocol and the numpy array protocol. 79 | 80 | * Generalize the slicing. 81 | 82 | * Make python-caterva independent of numpy. 83 | 84 | 85 | Changes from 0.2.3 to 0.3.0 86 | --------------------------- 87 | 88 | * Set the development status to alpha. 89 | 90 | * Add instructions about installing python-caterva from pip. 91 | 92 | * `getitem` and `setitem` are now special methods in `ext.Container`. 93 | 94 | * Add new class from numpy arrays `NPArray`. 95 | 96 | * Support for serializing/deserializing Containers to/from serialized frames (bytes). 97 | 98 | * The `pshape` is calculated automatically if is `None`. 99 | 100 | * Add a `.sframe` attribute for the serialized frame. 101 | 102 | * Big refactor for more consistent inheritance among classes. 103 | 104 | * The `from_numpy()` function always return a `NPArray` now. 105 | 106 | 107 | Changes from 0.2.2 to 0.2.3 108 | --------------------------- 109 | 110 | * Rename `MANINFEST.in` for `MANIFEST.in`. 111 | 112 | * Fix the list of available cnames. 113 | 114 | 115 | Changes from 0.2.1 to 0.2.2 116 | --------------------------- 117 | 118 | * Added a `MANIFEST.in` for including all C-Blosc2 and Caterva sources in package. 119 | 120 | 121 | Changes from 0.1.1 to 0.2.1 122 | --------------------------- 123 | 124 | * Docstrings has been added. In addition, the documentation can be found at: 125 | https://python-caterva.readthedocs.io/ 126 | 127 | * Add a `copy` parameter to `from_file()`. 128 | 129 | * `complib` has been renamed to `cname` for compatibility with blosc-powered packages. 130 | 131 | * The use of an itemsize different than a 2 power is allowed now. 132 | -------------------------------------------------------------------------------- /bench/compare_loadframe.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Benchmark for comparing loading on-disk frames for 10 | # multidimensional arrays using different methods: 11 | # * Opening an on-disk frame without copying 12 | # * Loading the frame in-memory 13 | 14 | import caterva as cat 15 | import numpy as np 16 | import os 17 | from time import time 18 | 19 | # Dimensions, type and persistency properties for the arrays 20 | shape = (100, 5000, 250) 21 | chunkshape = (20, 100, 50) 22 | blockshape = (10, 50, 25) 23 | 24 | dtype = np.float64 25 | 26 | # Compression properties 27 | cname = "zstd" 28 | clevel = 6 29 | filter = cat.SHUFFLE 30 | nthreads = 2 31 | 32 | fname_npy = "compare_loadframe.npy" 33 | if os.path.exists(fname_npy): 34 | os.remove(fname_npy) 35 | fname_cat = "compare_loadframe.cat" 36 | if os.path.exists(fname_cat): 37 | os.remove(fname_cat) 38 | 39 | # Create content for populating arrays 40 | t0 = time() 41 | content = np.linspace(0, 10, int(np.prod(shape)), dtype=dtype).reshape(shape) 42 | # content = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape) 43 | t1 = time() 44 | print("Time for filling array (numpy): %.3fs" % (t1 - t0)) 45 | 46 | t0 = time() 47 | np.save(fname_npy, content) 48 | t1 = time() 49 | print("Time for storing array on-disk (numpy): %.3fs" % (t1 - t0)) 50 | 51 | # Create and fill a caterva array using a block iterator 52 | t0 = time() 53 | a = cat.empty(shape, chunkshape=chunkshape, blockshape=blockshape, itemsize=content.itemsize, 54 | filename=fname_cat, 55 | cname=cname, clevel=clevel, filters=[filter], 56 | nthreads=nthreads) 57 | for block, info in a.iter_write(): 58 | nparray = content[info.slice] 59 | block[:] = bytes(nparray) 60 | acratio = a.cratio 61 | del a 62 | t1 = time() 63 | print("Time for storing array on-disk (caterva, iter): %.3fs ; CRatio: %.1fx" % ((t1 - t0), acratio)) 64 | 65 | print() 66 | 67 | # Setup the coordinates for random planes 68 | planes_idx = np.random.randint(0, shape[1], 3) 69 | 70 | def bench_read_numpy(fname, planes_idx, copy): 71 | t0 = time() 72 | mmap_mode = None if copy else 'r' 73 | a = np.load(fname, mmap_mode=mmap_mode) 74 | t1 = time() 75 | print("Time for opening the on-disk frame (numpy, copy=%s): %.3fs" % (copy, (t1 - t0))) 76 | 77 | t0 = time() 78 | for i in planes_idx: 79 | block = a[:, i, :] 80 | if not copy: 81 | # Do an actual read for memory mapped files 82 | # Do an actual read for memory mapped files 83 | block = block.copy() 84 | del a 85 | t1 = time() 86 | print("Time for reading with getitem (numpy, copy=%s): %.3fs" % (copy, (t1 - t0))) 87 | 88 | def bench_read_caterva(fname, planes_idx, copy): 89 | t0 = time() 90 | a = cat.open(fname, copy=copy) 91 | t1 = time() 92 | print("Time for opening the on-disk frame (caterva, copy=%s): %.3fs" % (copy, (t1 - t0))) 93 | 94 | t0 = time() 95 | for i in planes_idx: 96 | rbytes = a[:, i, :] 97 | block = np.frombuffer(rbytes, dtype=dtype).reshape((shape[0], shape[2])) 98 | del a 99 | t1 = time() 100 | print("Time for reading with getitem (caterva, copy=%s): %.3fs" % (copy, (t1 - t0))) 101 | 102 | bench_read_numpy(fname_npy, planes_idx, copy=False) 103 | bench_read_numpy(fname_npy, planes_idx, copy=True) 104 | print() 105 | bench_read_caterva(fname_cat, planes_idx, copy=False) 106 | bench_read_caterva(fname_cat, planes_idx, copy=True) 107 | 108 | os.remove(fname_npy) 109 | os.remove(fname_cat) 110 | -------------------------------------------------------------------------------- /bench/compare_reduceframe.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Benchmark for comparing reducing on-disk frames for 10 | # multidimensional arrays using different methods: 11 | # * Opening an on-disk frame without copying 12 | # * Loading the frame in-memory 13 | 14 | import caterva as cat 15 | import numpy as np 16 | import os 17 | from time import time 18 | import platform 19 | 20 | macosx = 'Darwin' in platform.platform() 21 | linux = 'Linux' in platform.platform() 22 | 23 | # Dimensions, type and persistency properties for the arrays 24 | shape = (100, 5000, 250) 25 | chunkshape = (20, 100, 50) 26 | blockshape = (10, 50, 25) 27 | 28 | dtype = np.float64 29 | 30 | # Compression properties 31 | cname = "lz4" 32 | clevel = 5 33 | filter = cat.SHUFFLE 34 | nthreads = 4 35 | 36 | fname_npy = "compare_reduceframe.npy" 37 | if os.path.exists(fname_npy): 38 | os.remove(fname_npy) 39 | fname_cat = "compare_reduceframe.cat" 40 | if os.path.exists(fname_cat): 41 | os.remove(fname_cat) 42 | 43 | # Create content for populating arrays 44 | t0 = time() 45 | content = np.linspace(0, 10, int(np.prod(shape)), dtype=dtype).reshape(shape) 46 | # content = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape) 47 | t1 = time() 48 | print("Time for filling array (numpy): %.3fs" % (t1 - t0)) 49 | 50 | t0 = time() 51 | np.save(fname_npy, content) 52 | t1 = time() 53 | print("Time for storing array on-disk (numpy): %.3fs" % (t1 - t0)) 54 | 55 | # Create and fill a caterva array using a block iterator 56 | t0 = time() 57 | a = cat.empty(shape, chunkshape=chunkshape, blockshape=blockshape, itemsize=content.itemsize, 58 | filename=fname_cat, 59 | cname=cname, clevel=clevel, filters=[filter], 60 | nthreads=nthreads) 61 | 62 | for block, info in a.iter_write(): 63 | nparray = content[info.slice] 64 | block[:] = bytes(nparray) 65 | acratio = a.cratio 66 | del a 67 | t1 = time() 68 | print("Time for storing array on-disk (caterva, iter): %.3fs ; CRatio: %.1fx" % ((t1 - t0), acratio)) 69 | 70 | print() 71 | 72 | def bench_read_numpy(fname, copy): 73 | if macosx: os.system("/usr/sbin/purge") 74 | t0 = time() 75 | mmap_mode = None if copy else 'r' 76 | a = np.load(fname, mmap_mode=mmap_mode) 77 | t1 = time() 78 | print("Time for opening the on-disk frame (numpy, copy=%s): %.3fs" % (copy, (t1 - t0))) 79 | 80 | if macosx: os.system("/usr/sbin/purge") 81 | t0 = time() 82 | acc = a.sum() 83 | del a 84 | t1 = time() 85 | print("Time for reducing with (numpy, copy=%s): %.3fs" % (copy, (t1 - t0))) 86 | return acc 87 | 88 | def bench_read_caterva(fname, copy): 89 | if macosx: os.system("/usr/sbin/purge") 90 | t0 = time() 91 | a = cat.open(fname, copy=copy) 92 | t1 = time() 93 | print("Time for opening the on-disk frame (caterva, copy=%s): %.3fs" % (copy, (t1 - t0))) 94 | 95 | if macosx: os.system("/usr/sbin/purge") 96 | t0 = time() 97 | acc = 0 98 | for (block, info) in a.iter_read(): 99 | block = np.frombuffer(block, dtype=dtype).reshape(info.shape) 100 | acc += np.sum(block) 101 | del a 102 | t1 = time() 103 | print("Time for reducing with (caterva, copy=%s): %.3fs" % (copy, (t1 - t0))) 104 | return acc 105 | 106 | acc_npy1 = bench_read_numpy(fname_npy, copy=False) 107 | acc_npy2 = bench_read_numpy(fname_npy, copy=True) 108 | np.testing.assert_allclose(acc_npy1, acc_npy2) 109 | print() 110 | acc_cat1 = bench_read_caterva(fname_cat, copy=False) 111 | np.testing.assert_allclose(acc_cat1, acc_npy1) 112 | acc_cat2 = bench_read_caterva(fname_cat, copy=True) 113 | np.testing.assert_allclose(acc_cat1, acc_npy2) 114 | 115 | os.remove(fname_npy) 116 | os.remove(fname_cat) 117 | -------------------------------------------------------------------------------- /doc/source/getting_started/tutorial.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | formats: md:myst 4 | text_representation: 5 | extension: .md 6 | format_name: myst 7 | format_version: 0.13 8 | jupytext_version: 1.11.2 9 | kernelspec: 10 | display_name: Python 3 11 | language: python 12 | name: python3 13 | --- 14 | 15 | # Tutorial 16 | Caterva functions let users to perform different operations with Caterva arrays like setting, copying or slicing them. 17 | In this section, we are going to see how to create and manipulate a Caterva array in a simple way. 18 | 19 | ```{code-cell} ipython3 20 | import caterva as cat 21 | 22 | cat.__version__ 23 | ``` 24 | 25 | ## Creating an array 26 | First, we create an array, with zero being used as the default value for uninitialized portions of the array. 27 | 28 | ```{code-cell} ipython3 29 | c = cat.zeros((10000, 10000), itemsize=4, chunks=(1000, 1000), blocks=(100, 100)) 30 | 31 | c 32 | ``` 33 | 34 | ## Reading and writing data 35 | We can access and edit Caterva arrays using NumPy. 36 | 37 | ```{code-cell} ipython3 38 | import struct 39 | import numpy as np 40 | 41 | dtype = np.int32 42 | 43 | c[0, :] = np.arange(10000, dtype=dtype) 44 | c[:, 0] = np.arange(10000, dtype=dtype) 45 | ``` 46 | 47 | ```{code-cell} ipython3 48 | c[0, 0] 49 | ``` 50 | 51 | ```{code-cell} ipython3 52 | np.array(c[0, 0]).view(dtype) 53 | ``` 54 | 55 | ```{code-cell} ipython3 56 | np.array(c[0, -1]).view(dtype) 57 | ``` 58 | 59 | ```{code-cell} ipython3 60 | np.array(c[0, :]).view(dtype) 61 | ``` 62 | 63 | ```{code-cell} ipython3 64 | np.array(c[:, 0]).view(dtype) 65 | ``` 66 | 67 | ```{code-cell} ipython3 68 | np.array(c[:]).view(dtype) 69 | ``` 70 | 71 | ## Persistent data 72 | When we create a Caterva array, we can we can specify where it will be stored. 73 | Then, we can access to this array whenever we want and it will still contain all the data as it is stored persistently. 74 | 75 | ```{code-cell} ipython3 76 | c1 = cat.full((1000, 1000), fill_value=b"pepe", chunks=(100, 100), blocks=(50, 50), 77 | urlpath="cat_tutorial.caterva") 78 | ``` 79 | 80 | ```{code-cell} ipython3 81 | c2 = cat.open("cat_tutorial.caterva") 82 | 83 | c2.info 84 | ``` 85 | 86 | ```{code-cell} ipython3 87 | np.array(c2[0, 20:30]).view("S4") 88 | ``` 89 | 90 | ```{code-cell} ipython3 91 | import os 92 | if os.path.exists("cat_tutorial.caterva"): 93 | cat.remove("cat_tutorial.caterva") 94 | ``` 95 | 96 | ## Compression params 97 | Here we can see how when we make a copy of a Caterva array we can change its compression parameters in an easy way. 98 | 99 | ```{code-cell} ipython3 100 | b = np.arange(1000000).tobytes() 101 | 102 | c1 = cat.from_buffer(b, shape=(1000, 1000), itemsize=8, chunks=(500, 10), blocks=(50, 10)) 103 | 104 | c1.info 105 | ``` 106 | 107 | ```{code-cell} ipython3 108 | c2 = c1.copy(chunks=(500, 10), blocks=(50, 10), 109 | codec=cat.Codec.ZSTD, clevel=9, filters=[cat.Filter.BITSHUFFLE]) 110 | 111 | c2.info 112 | ``` 113 | 114 | ## Metalayers 115 | Metalayers are small metadata for informing about the properties of data that is stored on a container. 116 | The metalayers of a Caterva array are also easy to access and edit by users. 117 | 118 | ```{code-cell} ipython3 119 | from msgpack import packb, unpackb 120 | ``` 121 | 122 | ```{code-cell} ipython3 123 | meta = { 124 | "dtype": packb("i8"), 125 | "coords": packb([5.14, 23.]) 126 | } 127 | ``` 128 | 129 | ```{code-cell} ipython3 130 | c = cat.zeros((1000, 1000), 5, chunks=(100, 100), blocks=(50, 50), meta=meta) 131 | ``` 132 | 133 | ```{code-cell} ipython3 134 | len(c.meta) 135 | ``` 136 | 137 | ```{code-cell} ipython3 138 | c.meta.keys() 139 | ``` 140 | 141 | ```{code-cell} ipython3 142 | for key in c.meta: 143 | print(f"{key} -> {unpackb(c.meta[key])}") 144 | ``` 145 | 146 | ```{code-cell} ipython3 147 | c.meta["coords"] = packb([0., 23.]) 148 | ``` 149 | 150 | ```{code-cell} ipython3 151 | for key in c.meta: 152 | print(f"{key} -> {unpackb(c.meta[key])}") 153 | ``` 154 | 155 | ## Small tutorial 156 | In this example it is shown how easy is to create a Caterva array from an image and how users can manipulate it using Caterva and Image functions. 157 | 158 | ```{code-cell} ipython3 159 | from PIL import Image 160 | ``` 161 | 162 | ```{code-cell} ipython3 163 | im = Image.open("../_static/blosc-logo_128.png") 164 | 165 | im 166 | ``` 167 | 168 | ```{code-cell} ipython3 169 | meta = {"dtype": b"|u1"} 170 | 171 | c = cat.asarray(np.array(im), chunks=(50, 50, 4), blocks=(10, 10, 4), meta=meta) 172 | 173 | c.info 174 | ``` 175 | 176 | ```{code-cell} ipython3 177 | im2 = c[15:55, 10:35] # Letter B 178 | 179 | Image.fromarray(np.array(im2).view(c.meta["dtype"])) 180 | ``` 181 | 182 | ```{code-cell} ipython3 183 | 184 | ``` 185 | -------------------------------------------------------------------------------- /bench/compare_getslice.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Benchmark for comparing speeds of getitem of hyperplanes on a 10 | # multidimensional array and using different backends: 11 | # Caterva, Zarr and HDF5 12 | # In brief, each approach has its own strengths and weaknesses. 13 | # 14 | # Usage: pass any argument for testing the persistent backends. 15 | # Else, only in-memory containers will be tested. 16 | 17 | import caterva as cat 18 | import zarr 19 | import numcodecs 20 | import tables 21 | import numpy as np 22 | import os 23 | import sys 24 | import shutil 25 | from time import time 26 | 27 | persistent = bool(sys.argv[1]) if len(sys.argv) > 1 else False 28 | if persistent: 29 | print("Testing the persistent backends...") 30 | else: 31 | print("Testing the in-memory backends...") 32 | 33 | # Dimensions and type properties for the arrays 34 | shape = (100, 5000, 250) 35 | chunkshape = (20, 500, 50) 36 | blockshape = (10, 100, 25) 37 | # This config generates containers of more than 2 GB in size 38 | # shape = (250, 4000, 350) 39 | # pshape = (200, 100, 100) 40 | dtype = np.float64 41 | 42 | # Compression properties 43 | cname = "zstd" 44 | clevel = 6 45 | filter = cat.Filter.SHUFFLE 46 | zfilter = numcodecs.Blosc.SHUFFLE 47 | nthreads = 1 48 | blocksize = int(np.prod(blockshape)) 49 | 50 | fname_cat = None 51 | fname_zarr = None 52 | fname_h5 = "whatever.h5" 53 | if persistent: 54 | fname_cat = "compare_getslice.cat" 55 | if os.path.exists(fname_cat): 56 | os.remove(fname_cat) 57 | fname_zarr = "compare_getslice.zarr" 58 | if os.path.exists(fname_zarr): 59 | shutil.rmtree(fname_zarr) 60 | fname_h5 = "compare_getslice.h5" 61 | if os.path.exists(fname_h5): 62 | os.remove(fname_h5) 63 | 64 | # Create content for populating arrays 65 | content = np.random.normal(0, 1, int(np.prod(shape))).reshape(shape) 66 | 67 | # Create and fill a caterva array using a block iterator 68 | t0 = time() 69 | a = cat.empty(shape, content.itemsize, chunkshape=chunkshape, blockshape=blockshape, 70 | dtype=str(content.dtype), urlpath=fname_cat, 71 | cname=cname, clevel=clevel, filters=[filter], nthreads=nthreads) 72 | a[:] = content 73 | acratio = a.cratio 74 | if persistent: 75 | del a 76 | t1 = time() 77 | print("Time for filling array (caterva, iter): %.3fs ; CRatio: %.1fx" % ((t1 - t0), acratio)) 78 | 79 | # Create and fill a zarr array 80 | t0 = time() 81 | compressor = numcodecs.Blosc(cname=cname, clevel=clevel, shuffle=zfilter, blocksize=blocksize) 82 | numcodecs.blosc.set_nthreads(nthreads) 83 | if persistent: 84 | z = zarr.open(fname_zarr, mode='w', shape=shape, chunks=chunkshape, dtype=dtype, compressor=compressor) 85 | else: 86 | z = zarr.empty(shape=shape, chunks=chunkshape, dtype=dtype, compressor=compressor) 87 | z[:] = content 88 | zratio = z.nbytes / z.nbytes_stored 89 | if persistent: 90 | del z 91 | t1 = time() 92 | print("Time for filling array (zarr): %.3fs ; CRatio: %.1fx" % ((t1 - t0), zratio)) 93 | 94 | # Create and fill a hdf5 array 95 | t0 = time() 96 | filters = tables.Filters(complevel=clevel, complib="blosc:%s" % cname, shuffle=True) 97 | tables.set_blosc_max_threads(nthreads) 98 | if persistent: 99 | h5f = tables.open_file(fname_h5, 'w') 100 | else: 101 | h5f = tables.open_file(fname_h5, 'w', driver='H5FD_CORE', driver_core_backing_store=0) 102 | h5ca = h5f.create_carray(h5f.root, 'carray', filters=filters, chunkshape=chunkshape, obj=content) 103 | h5f.flush() 104 | h5ratio = h5ca.size_in_memory / h5ca.size_on_disk 105 | if persistent: 106 | h5f.close() 107 | t1 = time() 108 | print("Time for filling array (hdf5): %.3fs ; CRatio: %.1fx" % ((t1 - t0), h5ratio)) 109 | 110 | # Setup the coordinates for random planes 111 | planes_idx = np.random.randint(0, shape[1], 100) 112 | 113 | # Time getitem with caterva 114 | t0 = time() 115 | if persistent: 116 | a = cat.open(fname_cat) # reopen 117 | for i in planes_idx: 118 | rbytes = a[:, i, :] 119 | del a 120 | t1 = time() 121 | print("Time for reading with getitem (caterva): %.3fs" % (t1 - t0)) 122 | 123 | # Time getitem with zarr 124 | t0 = time() 125 | if persistent: 126 | z = zarr.open(fname_zarr, mode='r') 127 | for i in planes_idx: 128 | block = z[:, i, :] 129 | del z 130 | t1 = time() 131 | print("Time for reading with getitem (zarr): %.3fs" % (t1 - t0)) 132 | 133 | # Time getitem with hdf5 134 | t0 = time() 135 | if persistent: 136 | h5f = tables.open_file(fname_h5, 'r', filters=filters) 137 | h5ca = h5f.root.carray 138 | for i in planes_idx: 139 | block = h5ca[:, i, :] 140 | h5f.close() 141 | t1 = time() 142 | print("Time for reading with getitem (hdf5): %.3fs" % (t1 - t0)) 143 | 144 | 145 | if persistent: 146 | os.remove(fname_cat) 147 | shutil.rmtree(fname_zarr) 148 | os.remove(fname_h5) 149 | -------------------------------------------------------------------------------- /bench/compare_serialization.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Benchmark for comparing serializing/deserializing frames for 10 | # multidimensional arrays using different methods: 11 | # * to_sframe() / from_sframe() 12 | # * Numpy copy 13 | # * PyArrow 14 | # * Pickle v4 15 | # * Pickle v5 (in the future) 16 | 17 | import caterva as cat 18 | import numpy as np 19 | from time import time 20 | import pyarrow as pa 21 | 22 | import pickle 23 | 24 | check_roundtrip = False # set this to True to check for roundtrip validity 25 | 26 | # Dimensions, type and persistency properties for the arrays 27 | shape = (100, 5000, 250) 28 | chunkshape = (20, 500, 100) 29 | blockshape = (10, 50, 50) 30 | dtype = "f8" 31 | 32 | # Compression properties 33 | cname = "lz4" 34 | clevel = 3 35 | # cname = "zstd" 36 | # clevel = 1 37 | filter = cat.SHUFFLE 38 | nthreads = 4 39 | 40 | t0 = time() 41 | arr = np.linspace(0, 10, int(np.prod(shape)), dtype=dtype).reshape(shape) 42 | # arr = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape) 43 | t1 = time() 44 | print("Time for filling array (numpy): %.3fs" % (t1 - t0)) 45 | 46 | t0 = time() 47 | arr2 = arr.copy() 48 | t1 = time() 49 | print("Time for copying array in-memory (numpy): %.3fs" % (t1 - t0)) 50 | 51 | # Create and fill a caterva array using a block iterator and an in-memory frame 52 | t0 = time() 53 | carr = cat.empty(shape, np.dtype(dtype).itemsize, dtype=dtype, chunkshape=chunkshape, blockshape=blockshape, 54 | enforceframe=True, 55 | cname=cname, clevel=clevel, filters=[filter], 56 | cnthreads=nthreads, dnthreads=nthreads) 57 | for block, info in carr.iter_write(): 58 | nparray = arr[info.slice] 59 | block[:] = bytes(nparray) 60 | acratio = carr.cratio 61 | t1 = time() 62 | print("Time for creating an array in-memory (numpy -> caterva, copy): %.3fs ; CRatio: %.1fx" % ((t1 - t0), acratio)) 63 | 64 | print() 65 | 66 | t0 = time() 67 | sframe_nocopy = carr.sframe 68 | t1 = time() 69 | print("Time for serializing array in-memory (caterva, no-copy): %.3fs" % (t1 - t0)) 70 | 71 | t0 = time() 72 | sframe_copy = carr.to_sframe() 73 | t1 = time() 74 | print("Time for serializing array in-memory (caterva, copy): %.3fs" % (t1 - t0)) 75 | 76 | t0 = time() 77 | serialized = pa.serialize(arr) 78 | pyarrow_nocopy = serialized.to_components() 79 | t1 = time() 80 | print("Time for serializing array in-memory (arrow, no-copy): %.3fs" % (t1 - t0)) 81 | 82 | t0 = time() 83 | pyarrow_copy = pa.serialize(arr).to_buffer().to_pybytes() 84 | t1 = time() 85 | print("Time for serializing array in-memory (arrow, copy): %.3fs" % (t1 - t0)) 86 | 87 | t0 = time() 88 | frame_pickle = pickle.dumps(arr, protocol=4) 89 | t1 = time() 90 | print("Time for serializing array in-memory (pickle4, copy): %.3fs" % (t1 - t0)) 91 | 92 | t0 = time() 93 | carr2 = cat.from_sframe(sframe_nocopy, copy=False) 94 | t1 = time() 95 | print("Time for de-serializing array in-memory (caterva, no-copy): %.3fs" % (t1 - t0)) 96 | 97 | if check_roundtrip: 98 | print("The roundtrip is... ", end="", flush=True) 99 | np.testing.assert_allclose(carr2, arr) 100 | print("ok!") 101 | 102 | t0 = time() 103 | arr2 = pa.deserialize_components(pyarrow_nocopy) 104 | t1 = time() 105 | print("Time for de-serializing array in-memory (arrow, no-copy): %.3fs" % (t1 - t0)) 106 | 107 | if check_roundtrip: 108 | print("The roundtrip is... ", end="", flush=True) 109 | np.testing.assert_allclose(arr2, arr) 110 | print("ok!") 111 | 112 | t0 = time() 113 | arr2 = pa.deserialize(pyarrow_copy) 114 | t1 = time() 115 | print("Time for de-serializing array in-memory (arrow, copy): %.3fs" % (t1 - t0)) 116 | 117 | if check_roundtrip: 118 | print("The roundtrip is... ", end="", flush=True) 119 | np.testing.assert_allclose(arr2, arr) 120 | print("ok!") 121 | 122 | t0 = time() 123 | arr2 = pickle.loads(frame_pickle) 124 | t1 = time() 125 | print("Time for de-serializing array in-memory (pickle4, copy): %.3fs" % (t1 - t0)) 126 | 127 | if check_roundtrip: 128 | print("The roundtrip is... ", end="", flush=True) 129 | np.testing.assert_allclose(arr2, arr) 130 | print("ok!") 131 | 132 | print() 133 | t0 = time() 134 | for i in range(1): 135 | carr3 = cat.from_sframe(sframe_copy) 136 | arr2 = np.asarray(carr3.copy()) 137 | t1 = time() 138 | print("Time for re-creating array in-memory (caterva -> numpy, copy): %.3fs" % (t1 - t0)) 139 | 140 | if check_roundtrip: 141 | print("The roundtrip is... ", end="", flush=True) 142 | np.testing.assert_allclose(arr2, arr) 143 | print("ok!") 144 | 145 | print() 146 | arrsize = arr.size * arr.itemsize 147 | time_100Mbps = arrsize / (10 * 2 ** 20) 148 | print("Time to transmit array at 100 Mbps (no compression):\t%6.3fs" % time_100Mbps) 149 | ctime_100Mbps = (arrsize / acratio) / (10 * 2**20) 150 | print("Time to transmit array at 100 Mbps (compression):\t%6.3fs" % ctime_100Mbps) 151 | time_1Gbps = arrsize / (100 * 2 ** 20) 152 | print("Time to transmit array at 1 Gbps (no compression):\t%6.3fs" % time_1Gbps) 153 | ctime_1Gbps = (arrsize / acratio) / (100 * 2**20) 154 | print("Time to transmit array at 1 Gbps (compression):\t\t%6.3fs" % ctime_1Gbps) 155 | time_10Gbps = arrsize / (1000 * 2 ** 20) 156 | print("Time to transmit array at 10 Gbps (no compression):\t%6.3fs" % time_10Gbps) 157 | ctime_10Gbps = (arrsize / acratio) / (1000 * 2**20) 158 | print("Time to transmit array at 10 Gbps (compression):\t%6.3fs" % ctime_10Gbps) 159 | -------------------------------------------------------------------------------- /caterva/ndarray.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | from . import caterva_ext as ext 10 | import ndindex 11 | import numpy as np 12 | from .info import InfoReporter 13 | import os 14 | from .meta import Meta 15 | 16 | 17 | def process_key(key, shape): 18 | key = ndindex.ndindex(key).expand(shape).raw 19 | mask = tuple(True if isinstance(k, int) else False for k in key) 20 | key = tuple(k if isinstance(k, slice) else slice(k, k+1, None) for k in key) 21 | return key, mask 22 | 23 | 24 | def prod(list): 25 | prod = 1 26 | for li in list: 27 | prod *= li 28 | return prod 29 | 30 | 31 | def get_caterva_start_stop(ndim, key, shape): 32 | start = tuple(s.start if s.start is not None else 0 for s in key) 33 | stop = tuple(s.stop if s.stop is not None else sh for s, sh in zip(key, shape)) 34 | 35 | size = prod([stop[i] - start[i] for i in range(ndim)]) 36 | 37 | return start, stop, size 38 | 39 | 40 | def parse_kwargs(**kwargs): 41 | if kwargs.get("urlpath"): 42 | if os.path.exists(kwargs["urlpath"]): 43 | raise FileExistsError(f"Can not create the file {kwargs['urlpath']}." 44 | f"It already exists!") 45 | 46 | 47 | class NDArray(ext.NDArray): 48 | def __init__(self, **kwargs): 49 | parse_kwargs(**kwargs) 50 | self.kwargs = kwargs 51 | super(NDArray, self).__init__(**self.kwargs) 52 | 53 | @classmethod 54 | def cast(cls, cont): 55 | cont.__class__ = cls 56 | assert isinstance(cont, NDArray) 57 | return cont 58 | 59 | @property 60 | def meta(self): 61 | return Meta(self) 62 | 63 | @property 64 | def info(self): 65 | """ 66 | Print information about this array. 67 | """ 68 | return InfoReporter(self) 69 | 70 | @property 71 | def info_items(self): 72 | items = [] 73 | items += [("Type", f"{self.__class__.__name__}")] 74 | items += [("Itemsize", self.itemsize)] 75 | items += [("Shape", self.shape)] 76 | items += [("Chunks", self.chunks)] 77 | items += [("Blocks", self.blocks)] 78 | items += [("Comp. codec", self.codec.name)] 79 | items += [("Comp. level", self.clevel)] 80 | filters = [f.name for f in self.filters if f.name != "NOFILTER"] 81 | items += [("Comp. filters", f"[{', '.join(map(str, filters))}]")] 82 | items += [("Comp. ratio", f"{self.cratio:.2f}")] 83 | return items 84 | 85 | def __setitem__(self, key, value): 86 | key, mask = process_key(key, self.shape) 87 | start, stop, _ = get_caterva_start_stop(self.ndim, key, self.shape) 88 | key = (start, stop) 89 | return ext.set_slice(self, key, value) 90 | 91 | def __getitem__(self, key): 92 | """ Get a (multidimensional) slice as specified in key. 93 | 94 | Parameters 95 | ---------- 96 | key: int, slice or sequence of slices 97 | The index for the slices to be updated. Note that step parameter is not honored yet 98 | in slices. 99 | 100 | Returns 101 | ------- 102 | out: NDArray 103 | An array, stored in a non-compressed buffer, with the requested data. 104 | """ 105 | key, mask = process_key(key, self.shape) 106 | start, stop, _ = get_caterva_start_stop(self.ndim, key, self.shape) 107 | key = (start, stop) 108 | shape = [sp - st for st, sp in zip(start, stop)] 109 | arr = np.zeros(shape, dtype=f"S{self.itemsize}") 110 | return ext.get_slice_numpy(arr, self, key, mask) 111 | 112 | def resize(self, newshape): 113 | """Change the shape of the array by growing one or more dimensions. 114 | 115 | Parameters 116 | ---------- 117 | newshape : tuple or list 118 | The new shape of the array. It should have the same dimensions 119 | as `self`. 120 | 121 | Notes 122 | ----- 123 | The array values corresponding to the added positions are not initialized. 124 | Thus, the user is in charge of initializing them. 125 | """ 126 | return ext.resize(self, newshape) 127 | 128 | def slice(self, key, **kwargs): 129 | """ Get a (multidimensional) slice as specified in key. Generalizes :py:meth:`__getitem__`. 130 | 131 | Parameters 132 | ---------- 133 | key: int, slice or sequence of slices 134 | The index for the slices to be updated. Note that step parameter is not honored yet in 135 | slices. 136 | 137 | Other Parameters 138 | ---------------- 139 | kwargs: dict, optional 140 | Keyword arguments that are supported by the :py:meth:`caterva.empty` constructor. 141 | 142 | Returns 143 | ------- 144 | out: NDArray 145 | An array with the requested data. 146 | """ 147 | arr = NDArray(**kwargs) 148 | kwargs = arr.kwargs 149 | key, mask = process_key(key, self.shape) 150 | start, stop, _ = get_caterva_start_stop(self.ndim, key, self.shape) 151 | key = (start, stop) 152 | return ext.get_slice(arr, self, key, mask, **kwargs) 153 | 154 | def squeeze(self): 155 | """Remove the 1's in array's shape.""" 156 | super(NDArray, self).squeeze(**self.kwargs) 157 | 158 | def to_buffer(self): 159 | """Returns a buffer with the data contents. 160 | 161 | Returns 162 | ------- 163 | bytes 164 | The buffer containing the data of the whole array. 165 | """ 166 | return super(NDArray, self).to_buffer(**self.kwargs) 167 | 168 | def copy(self, **kwargs): 169 | """Copy into a new array. 170 | 171 | Other Parameters 172 | ---------------- 173 | kwargs: dict, optional 174 | Keyword arguments that are supported by the :py:meth:`caterva.empty` constructor. 175 | 176 | Returns 177 | ------- 178 | NDArray 179 | An array containing the copy. 180 | """ 181 | arr = NDArray(**kwargs) 182 | return ext.copy(arr, self, **kwargs) 183 | -------------------------------------------------------------------------------- /doc/source/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | :root { 2 | /***************************************************************************** 3 | * Theme config 4 | **/ 5 | --pst-header-height: 60px; 6 | 7 | /***************************************************************************** 8 | * Font size 9 | **/ 10 | --pst-font-size-base: 15px; /* base font size - applied at body / html level */ 11 | 12 | /* heading font sizes */ 13 | --pst-font-size-h1: 36px; 14 | --pst-font-size-h2: 32px; 15 | --pst-font-size-h3: 26px; 16 | --pst-font-size-h4: 21px; 17 | --pst-font-size-h5: 18px; 18 | --pst-font-size-h6: 16px; 19 | 20 | /* smaller then heading font sizes*/ 21 | --pst-font-size-milli: 12px; 22 | 23 | --pst-sidebar-font-size: .9em; 24 | --pst-sidebar-caption-font-size: .9em; 25 | 26 | /***************************************************************************** 27 | * Font family 28 | **/ 29 | /* These are adapted from https://systemfontstack.com/ */ 30 | --pst-font-family-base-system: -apple-system, BlinkMacSystemFont, Segoe UI, "Helvetica Neue", 31 | Arial, sans-serif, Apple Color Emoji, Segoe UI Emoji, Segoe UI Symbol; 32 | --pst-font-family-monospace-system: "SFMono-Regular", Menlo, Consolas, Monaco, 33 | Liberation Mono, Lucida Console, monospace; 34 | 35 | --pst-font-family-base: var(--pst-font-family-base-system); 36 | --pst-font-family-heading: var(--pst-font-family-base); 37 | --pst-font-family-monospace: var(--pst-font-family-monospace-system); 38 | 39 | /***************************************************************************** 40 | * Color 41 | * 42 | * Colors are defined in rgb string way, "red, green, blue" 43 | **/ 44 | --pst-color-primary: 15, 109, 126; 45 | --pst-color-primary-dark: 2, 89, 100; 46 | --pst-color-secondary: 212, 168, 12; 47 | --pst-color-success: 40, 167, 69; 48 | --pst-color-info: 0, 123, 255; /*23, 162, 184;*/ 49 | --pst-color-warning: 255, 193, 7; 50 | --pst-color-danger: 220, 53, 69; 51 | --pst-color-text-base: 51, 51, 51; 52 | 53 | --pst-color-h1: var(--pst-color-primary-dark); 54 | --pst-color-h2: var(--pst-color-primary-dark); 55 | --pst-color-h3: var(--pst-color-text-base); 56 | --pst-color-h4: var(--pst-color-text-base); 57 | --pst-color-h5: var(--pst-color-text-base); 58 | --pst-color-h6: var(--pst-color-text-base); 59 | --pst-color-paragraph: var(--pst-color-text-base); 60 | --pst-color-link: var(--pst-color-primary); 61 | --pst-color-link-hover: var(--pst-color-secondary); 62 | --pst-color-headerlink: var(--pst-color-primary); 63 | --pst-color-headerlink-hover: 255, 255, 255; 64 | --pst-color-preformatted-text: 34, 34, 34; 65 | --pst-color-preformatted-background: 250, 250, 250; 66 | --pst-color-inline-code: var(--pst-color-primary); 67 | 68 | --pst-color-active-navigation: var(--pst-color-secondary); 69 | --pst-color-navbar-link: 77, 77, 77; 70 | --pst-color-navbar-link-hover: var(--pst-color-active-navigation); 71 | --pst-color-navbar-link-active: var(--pst-color-active-navigation); 72 | --pst-color-sidebar-link: 77, 77, 77; 73 | --pst-color-sidebar-link-hover: var(--pst-color-active-navigation); 74 | --pst-color-sidebar-link-active: var(--pst-color-active-navigation); 75 | --pst-color-sidebar-expander-background-hover: 244, 244, 244; 76 | --pst-color-sidebar-caption: 77, 77, 77; 77 | --pst-color-toc-link: 119, 117, 122; 78 | --pst-color-toc-link-hover: var(--pst-color-active-navigation); 79 | --pst-color-toc-link-active: var(--pst-color-active-navigation); 80 | 81 | /***************************************************************************** 82 | * Icon 83 | **/ 84 | 85 | /* font awesome icons*/ 86 | --pst-icon-check-circle: '\f058'; 87 | --pst-icon-info-circle: '\f05a'; 88 | --pst-icon-exclamation-triangle: '\f071'; 89 | --pst-icon-exclamation-circle: '\f06a'; 90 | --pst-icon-times-circle: '\f057'; 91 | --pst-icon-lightbulb: '\f0eb'; 92 | 93 | /***************************************************************************** 94 | * Admonitions 95 | **/ 96 | 97 | --pst-color-admonition-default: var(--pst-color-info); 98 | --pst-color-admonition-note: var(--pst-color-info); 99 | --pst-color-admonition-attention: var(--pst-color-warning); 100 | --pst-color-admonition-caution: var(--pst-color-warning); 101 | --pst-color-admonition-warning: var(--pst-color-warning); 102 | --pst-color-admonition-danger: var(--pst-color-danger); 103 | --pst-color-admonition-error: var(--pst-color-danger); 104 | --pst-color-admonition-hint: var(--pst-color-success); 105 | --pst-color-admonition-tip: var(--pst-color-success); 106 | --pst-color-admonition-important: var(--pst-color-success); 107 | 108 | --pst-icon-admonition-default: var(--pst-icon-info-circle); 109 | --pst-icon-admonition-note: var(--pst-icon-info-circle); 110 | --pst-icon-admonition-attention: var(--pst-icon-exclamation-circle); 111 | --pst-icon-admonition-caution: var(--pst-icon-exclamation-triangle); 112 | --pst-icon-admonition-warning: var(--pst-icon-exclamation-triangle); 113 | --pst-icon-admonition-danger: var(--pst-icon-exclamation-triangle); 114 | --pst-icon-admonition-error: var(--pst-icon-times-circle); 115 | --pst-icon-admonition-hint: var(--pst-icon-lightbulb); 116 | --pst-icon-admonition-tip: var(--pst-icon-lightbulb); 117 | --pst-icon-admonition-important: var(--pst-icon-exclamation-circle); 118 | 119 | --tabs--label-text: rgb(var(--pst-color-primary)); 120 | --tabs--label-text--active: rgb(var(--pst-color-secondary)); 121 | --tabs--label-text--hover: rgb(var(--pst-color-secondary)); 122 | --tabs--label-background--active: transparent; 123 | --tabs--label-background--hover: transparent; 124 | --tabs--border: transparent; 125 | } 126 | 127 | /* Main index page overview cards */ 128 | 129 | .intro-card { 130 | background: #fff; 131 | border-radius: 0; 132 | padding: 30px 10px 20px 10px; 133 | margin: 10px 0px; 134 | } 135 | 136 | .intro-card p.card-text { 137 | margin: 0px; 138 | } 139 | 140 | .intro-card .card-img-top { 141 | margin: 10px; 142 | height: 52px; 143 | } 144 | 145 | .intro-card .card-header { 146 | border: none; 147 | background-color: white; 148 | font-size: var(--pst-font-size-h5); 149 | font-weight: bold; 150 | padding: 2.5rem 0rem 0.5rem 0rem; 151 | } 152 | 153 | .intro-card .card-header .card-text { 154 | color: rgb(var(--pst-color-primary)); 155 | } 156 | 157 | .intro-card .card-footer { 158 | border: none; 159 | background-color:white; 160 | } 161 | 162 | .intro-card .card-footer p.card-text{ 163 | max-width: 220px; 164 | margin-left: auto; 165 | margin-right: auto; 166 | } 167 | -------------------------------------------------------------------------------- /caterva/constructors.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (C) 2019-present, Blosc Development team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | from . import caterva_ext as ext 10 | from .ndarray import NDArray 11 | 12 | 13 | def empty(shape, itemsize, **kwargs): 14 | """Create an empty array. 15 | 16 | Parameters 17 | ---------- 18 | shape: tuple or list 19 | The shape for the final array. 20 | itemsize: int 21 | The size, in bytes, of each element. 22 | 23 | Other Parameters 24 | ---------------- 25 | kwargs: dict, optional 26 | Keyword arguments supported: 27 | 28 | chunks: iterable object or None 29 | The chunk shape. If `None`, the array is stored using a non-compressed buffer. 30 | (Default `None`) 31 | blocks: iterable object or None 32 | The block shape. If `None`, the array is stored using a non-compressed buffer. 33 | (Default `None`) 34 | urlpath: str or None 35 | The name of the file to store data. If `None`, data is stored in-memory. 36 | (Default `None`) 37 | contiguous: bool or None 38 | Whether the data is stored contiguously or sparsely (one chunk per file). 39 | If `None`, data is stored sparsely. 40 | memframe: bool 41 | If True, the array is backed by a frame in-memory. Else, by a super-chunk. 42 | (Default: `False`) 43 | meta: dict or None 44 | A dictionary with different metalayers. One entry per metalayer: 45 | 46 | key: bytes or str 47 | The name of the metalayer. 48 | value: object 49 | The metalayer object that will be (de-)serialized using msgpack. 50 | 51 | codec: :py:class:`Codec` 52 | The name for the compressor codec. (Default: :py:attr:`Codec.LZ4`) 53 | clevel: int (0 to 9) 54 | The compression level. 0 means no compression, and 9 maximum compression. 55 | (Default: 5) 56 | filters: list 57 | The filter pipeline. (Default: [:py:attr:`Filter.SHUFFLE`]) 58 | filtersmeta: list 59 | The meta info for each filter in pipeline. (Default: [0]) 60 | nthreads: int 61 | The number of threads. (Default: 1) 62 | usedict: bool 63 | If a dictionary should be used during compression. (Default: False) 64 | 65 | Returns 66 | ------- 67 | out: NDArray 68 | A `NDArray` is returned. 69 | """ 70 | arr = NDArray(**kwargs) 71 | kwargs = arr.kwargs 72 | ext.empty(arr, shape, itemsize, **kwargs) 73 | return arr 74 | 75 | 76 | def zeros(shape, itemsize, **kwargs): 77 | """Create an array, with zero being used as the default value 78 | for uninitialized portions of the array. 79 | 80 | Parameters 81 | ---------- 82 | shape: tuple or list 83 | The shape for the final array. 84 | itemsize: int 85 | The size, in bytes, of each element. 86 | 87 | Other Parameters 88 | ---------------- 89 | kwargs: dict, optional 90 | Keyword arguments that are supported by the :py:meth:`caterva.empty` constructor. 91 | 92 | Returns 93 | ------- 94 | out: NDArray 95 | A `NDArray` is returned. 96 | """ 97 | arr = NDArray(**kwargs) 98 | kwargs = arr.kwargs 99 | ext.zeros(arr, shape, itemsize, **kwargs) 100 | return arr 101 | 102 | 103 | def full(shape, fill_value, **kwargs): 104 | """Create an array, with @p fill_value being used as the default value 105 | for uninitialized portions of the array. 106 | 107 | Parameters 108 | ---------- 109 | shape: tuple or list 110 | The shape for the final array.. 111 | fill_value: bytes 112 | Default value to use for uninitialized portions of the array. 113 | Other Parameters 114 | ---------------- 115 | kwargs: dict, optional 116 | Keyword arguments that are supported by the :py:meth:`caterva.empty` constructor. 117 | 118 | Returns 119 | ------- 120 | out: NDArray 121 | A `NDArray` is returned. 122 | """ 123 | arr = NDArray(**kwargs) 124 | kwargs = arr.kwargs 125 | ext.full(arr, shape, fill_value, **kwargs) 126 | return arr 127 | 128 | 129 | def from_buffer(buffer, shape, itemsize, **kwargs): 130 | """Create an array out of a buffer. 131 | 132 | Parameters 133 | ---------- 134 | buffer: bytes 135 | The buffer of the data to populate the container. 136 | shape: tuple or list 137 | The shape for the final container. 138 | itemsize: int 139 | The size, in bytes, of each element. 140 | 141 | Other Parameters 142 | ---------------- 143 | kwargs: dict, optional 144 | Keyword arguments that are supported by the :py:meth:`caterva.empty` constructor. 145 | 146 | Returns 147 | ------- 148 | out: NDArray 149 | A `NDArray` is returned. 150 | """ 151 | arr = NDArray(**kwargs) 152 | kwargs = arr.kwargs 153 | 154 | ext.from_buffer(arr, buffer, shape, itemsize, **kwargs) 155 | return arr 156 | 157 | 158 | def copy(array, **kwargs): 159 | """Create a copy of an array. 160 | 161 | Parameters 162 | ---------- 163 | array: NDArray 164 | The array to be copied. 165 | 166 | Other Parameters 167 | ---------------- 168 | kwargs: dict, optional 169 | Keyword arguments that are supported by the :py:meth:`caterva.empty` constructor. 170 | 171 | Returns 172 | ------- 173 | out: NDArray 174 | A `NDArray` with a copy of the data. 175 | """ 176 | arr = NDArray(**kwargs) 177 | kwargs = arr.kwargs 178 | 179 | ext.copy(arr, array, **kwargs) 180 | 181 | return arr 182 | 183 | 184 | def open(urlpath): 185 | """Open a new container from `urlpath`. 186 | 187 | .. warning:: Only one handler is supported per file. 188 | 189 | Parameters 190 | ---------- 191 | urlpath: str 192 | The file having a Blosc2 frame format with a Caterva metalayer on it. 193 | 194 | Returns 195 | ------- 196 | out: NDArray 197 | A `NDArray` is returned. 198 | """ 199 | 200 | arr = NDArray() 201 | ext.from_file(arr, urlpath) 202 | 203 | return arr 204 | 205 | 206 | def asarray(ndarray, **kwargs): 207 | """Convert the input to an array. 208 | 209 | Parameters 210 | ---------- 211 | array: array_like 212 | An array supporting the python buffer protocol and the numpy array interface. 213 | 214 | Other Parameters 215 | ---------------- 216 | kwargs: dict, optional 217 | Keyword arguments that are supported by the :py:meth:`caterva.empty` constructor. 218 | 219 | Returns 220 | ------- 221 | out: NDArray 222 | A Caterva array interpretation of `ndarray`. 223 | """ 224 | arr = NDArray(**kwargs) 225 | kwargs = arr.kwargs 226 | 227 | ext.asarray(arr, ndarray, **kwargs) 228 | 229 | return arr 230 | -------------------------------------------------------------------------------- /caterva/caterva_ext.pyx: -------------------------------------------------------------------------------- 1 | # Hey Cython, this is Python 3! 2 | # cython: language_level=3 3 | 4 | ####################################################################### 5 | # Copyright (C) 2019-present, Blosc Development team 6 | # All rights reserved. 7 | # 8 | # This source code is licensed under a BSD-style license (found in the 9 | # LICENSE file in the root directory of this source tree) 10 | ####################################################################### 11 | from libc.stdlib cimport malloc, free 12 | from libcpp cimport bool 13 | from cpython.pycapsule cimport PyCapsule_New 14 | from libc.stdint cimport uintptr_t 15 | from libc.string cimport strdup, memcpy 16 | from cpython cimport ( 17 | PyObject_GetBuffer, PyBuffer_Release, 18 | PyBUF_SIMPLE, Py_buffer, 19 | PyBytes_FromStringAndSize 20 | ) 21 | from .utils import Codec, Filter 22 | import os.path 23 | 24 | cdef extern from "": 25 | ctypedef signed char int8_t 26 | ctypedef signed short int16_t 27 | ctypedef signed int int32_t 28 | ctypedef signed long int64_t 29 | ctypedef unsigned char uint8_t 30 | ctypedef unsigned short uint16_t 31 | ctypedef unsigned int uint32_t 32 | ctypedef unsigned long long uint64_t 33 | 34 | 35 | cdef extern from "blosc2.h": 36 | ctypedef enum: 37 | BLOSC_NOFILTER 38 | BLOSC_SHUFFLE 39 | BLOSC_BITSHUFFLE 40 | BLOSC_DELTA 41 | BLOSC_TRUNC_PREC 42 | BLOSC_BLOSCLZ 43 | BLOSC_LZ4 44 | BLOSC_LZ4HC 45 | BLOSC_ZLIB 46 | BLOSC_ZSTD 47 | BLOSC2_MAX_FILTERS 48 | BLOSC2_MAX_METALAYERS 49 | BLOSC2_MAX_VLMETALAYERS 50 | BLOSC2_MAX_OVERHEAD 51 | BLOSC_ALWAYS_SPLIT = 1, 52 | BLOSC_NEVER_SPLIT = 2, 53 | BLOSC_AUTO_SPLIT = 3, 54 | BLOSC_FORWARD_COMPAT_SPLIT = 4, 55 | 56 | ctypedef int *blosc2_prefilter_fn 57 | ctypedef struct blosc2_prefilter_params 58 | ctypedef struct blosc2_storage 59 | ctypedef struct blosc2_btune 60 | ctypedef struct blosc2_context 61 | ctypedef struct blosc2_frame 62 | ctypedef struct blosc2_metalayer: 63 | char *name; 64 | uint8_t *content; 65 | int32_t content_len; 66 | 67 | ctypedef struct blosc2_schunk: 68 | uint8_t version; 69 | uint8_t compcode; 70 | uint8_t compcode_meta; 71 | uint8_t clevel; 72 | int32_t typesize; 73 | int32_t blocksize; 74 | int32_t chunksize; 75 | uint8_t filters[BLOSC2_MAX_FILTERS]; 76 | uint8_t filters_meta[BLOSC2_MAX_FILTERS]; 77 | int64_t nchunks; 78 | int64_t nbytes; 79 | int64_t cbytes; 80 | uint8_t** data; 81 | size_t data_len; 82 | blosc2_storage *storage; 83 | blosc2_frame *frame; 84 | blosc2_context *cctx; 85 | blosc2_context *dctx; 86 | blosc2_metalayer *metalayers[BLOSC2_MAX_METALAYERS]; 87 | int16_t nmetalayers; 88 | blosc2_metalayer *vlmetalayers[BLOSC2_MAX_VLMETALAYERS]; 89 | blosc2_btune *udbtune; 90 | 91 | int blosc2_meta_exists(blosc2_schunk *schunk, const char *name) 92 | int blosc2_meta_add(blosc2_schunk *schunk, const char *name, uint8_t *content, 93 | int32_t content_len) 94 | int blosc2_meta_update(blosc2_schunk *schunk, const char *name, uint8_t *content, 95 | int32_t content_len) 96 | int blosc2_meta_get(blosc2_schunk *schunk, const char *name, uint8_t ** content, 97 | int32_t *content_len) 98 | 99 | 100 | 101 | cdef extern from "caterva.h": 102 | ctypedef enum: 103 | CATERVA_MAX_DIM 104 | CATERVA_MAX_METALAYERS 105 | 106 | ctypedef struct caterva_config_t: 107 | void *(*alloc)(size_t) 108 | void (*free)(void *) 109 | uint8_t compcodec 110 | uint8_t compmeta 111 | uint8_t complevel 112 | int32_t splitmode 113 | int usedict 114 | int16_t nthreads 115 | uint8_t filters[BLOSC2_MAX_FILTERS] 116 | uint8_t filtersmeta[BLOSC2_MAX_FILTERS] 117 | blosc2_prefilter_fn prefilter 118 | blosc2_prefilter_params *pparams 119 | blosc2_btune *udbtune; 120 | 121 | ctypedef struct caterva_ctx_t: 122 | caterva_config_t *cfg 123 | 124 | ctypedef struct caterva_metalayer_t: 125 | char *name 126 | uint8_t *sdata 127 | int32_t size 128 | 129 | ctypedef struct caterva_storage_t: 130 | int32_t chunkshape[CATERVA_MAX_DIM] 131 | int32_t blockshape[CATERVA_MAX_DIM] 132 | bool contiguous 133 | char* urlpath 134 | caterva_metalayer_t metalayers[CATERVA_MAX_METALAYERS] 135 | int32_t nmetalayers 136 | 137 | ctypedef struct caterva_params_t: 138 | int64_t shape[CATERVA_MAX_DIM] 139 | int8_t ndim 140 | uint8_t itemsize 141 | 142 | 143 | cdef struct chunk_cache_s: 144 | uint8_t *data 145 | int64_t nchunk 146 | 147 | ctypedef struct caterva_array_t: 148 | blosc2_schunk *sc; 149 | uint8_t *buf; 150 | int64_t shape[CATERVA_MAX_DIM]; 151 | int32_t chunkshape[CATERVA_MAX_DIM]; 152 | int64_t extshape[CATERVA_MAX_DIM]; 153 | int32_t blockshape[CATERVA_MAX_DIM]; 154 | int64_t extchunkshape[CATERVA_MAX_DIM]; 155 | int64_t nitems; 156 | int32_t chunknitems; 157 | int64_t extnitems; 158 | int32_t blocknitems; 159 | int64_t extchunknitems; 160 | int8_t ndim; 161 | uint8_t itemsize; 162 | int64_t nchunks; 163 | chunk_cache_s chunk_cache; 164 | 165 | int caterva_ctx_new(caterva_config_t *cfg, caterva_ctx_t **ctx); 166 | int caterva_ctx_free(caterva_ctx_t **ctx); 167 | int caterva_empty(caterva_ctx_t *ctx, caterva_params_t *params, 168 | caterva_storage_t *storage, caterva_array_t ** array); 169 | int caterva_zeros(caterva_ctx_t *ctx, caterva_params_t *params, 170 | caterva_storage_t *storage, caterva_array_t ** array); 171 | int caterva_full(caterva_ctx_t *ctx, caterva_params_t *params, 172 | caterva_storage_t *storage, void *fill_value, caterva_array_t ** array); 173 | int caterva_free(caterva_ctx_t *ctx, caterva_array_t ** array); 174 | int caterva_from_schunk(caterva_ctx_t *ctx, blosc2_schunk *schunk, 175 | caterva_array_t **array); 176 | int caterva_from_serial_schunk(caterva_ctx_t *ctx, uint8_t *serial_schunk, int64_t len, 177 | caterva_array_t ** array); 178 | int caterva_open(caterva_ctx_t *ctx, const char *urlpath, caterva_array_t ** array); 179 | int caterva_from_buffer(caterva_ctx_t *ctx, void *buffer, int64_t buffersize, 180 | caterva_params_t *params, caterva_storage_t *storage, 181 | caterva_array_t ** array); 182 | int caterva_to_buffer(caterva_ctx_t *ctx, caterva_array_t *array, void *buffer, 183 | int64_t buffersize); 184 | int caterva_get_slice(caterva_ctx_t *ctx, caterva_array_t *src, int64_t *start, 185 | int64_t *stop, caterva_storage_t *storage, caterva_array_t ** array); 186 | int caterva_squeeze_index(caterva_ctx_t *ctx, caterva_array_t *array, 187 | bool *index); 188 | int caterva_squeeze(caterva_ctx_t *ctx, caterva_array_t *array); 189 | int caterva_get_slice_buffer(caterva_ctx_t *ctx, caterva_array_t *array, 190 | int64_t *start, int64_t *stop, 191 | void *buffer, int64_t *buffershape, int64_t buffersize); 192 | int caterva_set_slice_buffer(caterva_ctx_t *ctx, 193 | void *buffer, int64_t *buffershape, int64_t buffersize, 194 | int64_t *start, int64_t *stop, caterva_array_t *array); 195 | int caterva_copy(caterva_ctx_t *ctx, caterva_array_t *src, caterva_storage_t *storage, 196 | caterva_array_t ** array); 197 | int caterva_resize(caterva_ctx_t *ctx, caterva_array_t *array, int64_t *new_shape, 198 | int64_t *start); 199 | 200 | # Defaults for compression params 201 | config_dflts = { 202 | 'codec': Codec.LZ4, 203 | 'clevel': 5, 204 | 'usedict': False, 205 | 'nthreads': 1, 206 | 'filters': [Filter.SHUFFLE], 207 | 'filtersmeta': [0], # no actual meta info for SHUFFLE, but anyway... 208 | } 209 | 210 | 211 | cdef class Context: 212 | cdef caterva_ctx_t *context_ 213 | cdef uint8_t compcode 214 | cdef uint8_t compmeta 215 | cdef uint8_t complevel 216 | cdef int32_t splitmode 217 | cdef int usedict 218 | cdef int16_t nthreads 219 | cdef int32_t blocksize 220 | cdef uint8_t filters[BLOSC2_MAX_FILTERS] 221 | cdef uint8_t filtersmeta[BLOSC2_MAX_FILTERS] 222 | cdef blosc2_prefilter_fn prefilter 223 | cdef blosc2_prefilter_params* pparams 224 | 225 | def __init__(self, **kwargs): 226 | cdef caterva_config_t config 227 | config.free = free 228 | config.alloc = malloc 229 | config.compcodec = kwargs.get('codec', config_dflts['codec']).value 230 | config.compmeta = 0 231 | config.complevel = kwargs.get('clevel', config_dflts['clevel']) 232 | config.splitmode = BLOSC_AUTO_SPLIT 233 | config.usedict = kwargs.get('usedict', config_dflts['usedict']) 234 | config.nthreads = kwargs.get('nthreads', config_dflts['nthreads']) 235 | config.prefilter = NULL 236 | config.pparams = NULL 237 | config.udbtune = NULL 238 | 239 | for i in range(BLOSC2_MAX_FILTERS): 240 | config.filters[i] = 0 241 | config.filtersmeta[i] = 0 242 | 243 | filters = kwargs.get('filters', config_dflts['filters']) 244 | for i in range(BLOSC2_MAX_FILTERS - len(filters), BLOSC2_MAX_FILTERS): 245 | config.filters[i] = filters[i - BLOSC2_MAX_FILTERS + len(filters)].value 246 | 247 | filtersmeta = kwargs.get('filtersmeta', config_dflts['filtersmeta']) 248 | for i in range(BLOSC2_MAX_FILTERS - len(filtersmeta), BLOSC2_MAX_FILTERS): 249 | config.filtersmeta[i] = filtersmeta[i - BLOSC2_MAX_FILTERS + len(filtersmeta)] 250 | 251 | caterva_ctx_new(&config, &self.context_) 252 | 253 | def __dealloc__(self): 254 | caterva_ctx_free(&self.context_) 255 | 256 | def tocapsule(self): 257 | return PyCapsule_New(self.context_, "caterva_ctx_t*", NULL) 258 | 259 | 260 | cdef create_caterva_params(caterva_params_t *params, shape, itemsize): 261 | params.ndim = len(shape) 262 | params.itemsize = itemsize 263 | for i in range(params.ndim): 264 | params.shape[i] = shape[i] 265 | 266 | 267 | cdef create_caterva_storage(caterva_storage_t *storage, kwargs): 268 | chunks = kwargs.get('chunks', None) 269 | blocks = kwargs.get('blocks', None) 270 | urlpath = kwargs.get('urlpath', None) 271 | contiguous = kwargs.get('contiguous', False) 272 | meta = kwargs.get('meta', None) 273 | 274 | if not chunks: 275 | raise AttributeError("chunks must be specified") 276 | if not blocks: 277 | raise AttributeError("blocks must be specified") 278 | 279 | if urlpath is not None: 280 | urlpath = urlpath.encode("utf-8") if isinstance(urlpath, str) else urlpath 281 | storage.urlpath = urlpath 282 | else: 283 | storage.urlpath = NULL 284 | storage.contiguous = contiguous 285 | for i in range(len(chunks)): 286 | storage.chunkshape[i] = chunks[i] 287 | storage.blockshape[i] = blocks[i] 288 | 289 | if meta is None: 290 | storage.nmetalayers = 0 291 | else: 292 | storage.nmetalayers = len(meta) 293 | for i, (name, content) in enumerate(meta.items()): 294 | name2 = name.encode("utf-8") if isinstance(name, str) else name # do a copy 295 | storage.metalayers[i].name = strdup(name2) 296 | storage.metalayers[i].sdata = malloc(len(content)) 297 | memcpy(storage.metalayers[i].sdata, content, len(content)) 298 | storage.metalayers[i].size = len(content) 299 | 300 | 301 | cdef class NDArray: 302 | cdef caterva_array_t *array 303 | cdef kwargs 304 | 305 | @property 306 | def shape(self): 307 | """The shape of this container.""" 308 | return tuple([self.array.shape[i] for i in range(self.array.ndim)]) 309 | 310 | @property 311 | def chunks(self): 312 | """The chunk shape of this container.""" 313 | return tuple([self.array.chunkshape[i] for i in range(self.array.ndim)]) 314 | 315 | @property 316 | def blocks(self): 317 | """The block shape of this container.""" 318 | return tuple([self.array.blockshape[i] for i in range(self.array.ndim)]) 319 | 320 | @property 321 | def cratio(self): 322 | """The compression ratio for this container.""" 323 | return self.size / (self.array.sc.cbytes + BLOSC2_MAX_OVERHEAD * self.nchunks) 324 | 325 | @property 326 | def clevel(self): 327 | """The compression level for this container.""" 328 | return self.array.sc.clevel 329 | 330 | @property 331 | def codec(self): 332 | """The compression codec name for this container.""" 333 | return Codec(self.array.sc.compcode) 334 | 335 | @property 336 | def filters(self): 337 | """The filters list for this container.""" 338 | return [Filter(self.array.sc.filters[i]) for i in range(BLOSC2_MAX_FILTERS)] 339 | 340 | @property 341 | def itemsize(self): 342 | """The itemsize of this container.""" 343 | return self.array.itemsize 344 | 345 | @property 346 | def chunksize(self): 347 | """The chunk size (in bytes) for this container.""" 348 | return self.array.chunknitems * self.itemsize 349 | 350 | @property 351 | def blocksize(self): 352 | """The block size (in bytes) for this container.""" 353 | return self.array.blocknitems * self.itemsize 354 | 355 | @property 356 | def size(self): 357 | """The size (in bytes) for this container.""" 358 | return self.array.nitems * self.itemsize 359 | 360 | @property 361 | def nchunks(self): 362 | """The number of chunks in this container.""" 363 | return int(self.array.extnitems / self.array.chunknitems) 364 | 365 | @property 366 | def ndim(self): 367 | """The number of dimensions of this container.""" 368 | return self.array.ndim 369 | 370 | @property 371 | def c_array(self): 372 | return self.array 373 | 374 | def __init__(self, **kwargs): 375 | self.kwargs = kwargs 376 | self.array = NULL 377 | 378 | def squeeze(self, **kwargs): 379 | ctx = Context(**kwargs) 380 | caterva_squeeze(ctx.context_, self.array) 381 | 382 | def to_buffer(self, **kwargs): 383 | ctx = Context(**kwargs) 384 | buffersize = self.size 385 | buffer = bytes(buffersize) 386 | caterva_to_buffer(ctx.context_, self.array, buffer, buffersize) 387 | return buffer 388 | 389 | def __dealloc__(self): 390 | if self.array != NULL: 391 | ctx = Context(**self.kwargs) 392 | caterva_free(ctx.context_, &self.array) 393 | 394 | 395 | def get_slice_numpy(arr, NDArray src, key, mask, **kwargs): 396 | ctx = Context(**kwargs) 397 | ndim = src.ndim 398 | start, stop = key 399 | 400 | cdef int64_t[CATERVA_MAX_DIM] start_, stop_ 401 | cdef int64_t buffersize_ = src.itemsize 402 | cdef int64_t[CATERVA_MAX_DIM] buffershape_ 403 | for i in range(src.ndim): 404 | start_[i] = start[i] 405 | stop_[i] = stop[i] 406 | buffershape_[i] = stop_[i] - start_[i] 407 | buffersize_ *= buffershape_[i] 408 | 409 | buffershape = [sp - st for st, sp in zip(start, stop)] 410 | cdef int64_t buffersize = src.itemsize 411 | 412 | cdef Py_buffer view 413 | PyObject_GetBuffer(arr, &view, PyBUF_SIMPLE) 414 | 415 | cdef caterva_array_t *array_ 416 | caterva_get_slice_buffer(ctx.context_, src.array, start_, stop_, view.buf, buffershape_, buffersize_) 417 | PyBuffer_Release(&view) 418 | 419 | return arr.squeeze() 420 | 421 | 422 | def get_slice(NDArray arr, NDArray src, key, mask, **kwargs): 423 | ctx = Context(**kwargs) 424 | ndim = src.ndim 425 | start, stop = key 426 | 427 | cdef int64_t[CATERVA_MAX_DIM] start_, stop_ 428 | 429 | for i in range(src.ndim): 430 | start_[i] = start[i] 431 | stop_[i] = stop[i] 432 | 433 | cdef caterva_storage_t storage_ 434 | create_caterva_storage(&storage_, kwargs) 435 | 436 | cdef caterva_array_t *array_ 437 | caterva_get_slice(ctx.context_, src.array, start_, stop_, &storage_, &array_) 438 | 439 | cdef bool mask_[CATERVA_MAX_DIM] 440 | for i in range(src.ndim): 441 | mask_[i] = mask[i] 442 | 443 | caterva_squeeze_index(ctx.context_, array_, mask_) 444 | arr.array = array_ 445 | return arr 446 | 447 | def set_slice(NDArray dst, key, ndarray): 448 | ctx = Context(**dst.kwargs) 449 | ndim = dst.ndim 450 | start, stop = key 451 | interface = ndarray.__array_interface__ 452 | cdef Py_buffer *buf = malloc(sizeof(Py_buffer)) 453 | PyObject_GetBuffer(ndarray, buf, PyBUF_SIMPLE) 454 | 455 | cdef int64_t[CATERVA_MAX_DIM] buffershape_, start_, stop_ 456 | for i in range(ndim): 457 | start_[i] = start[i] 458 | stop_[i] = stop[i] 459 | buffershape_[i] = stop[i] - start[i] 460 | 461 | caterva_set_slice_buffer(ctx.context_, buf.buf, buffershape_, buf.len, start_, stop_, dst.array) 462 | PyBuffer_Release(buf) 463 | return dst 464 | 465 | 466 | def empty(NDArray arr, shape, itemsize, **kwargs): 467 | ctx = Context(**kwargs) 468 | 469 | cdef caterva_params_t params_ 470 | create_caterva_params(¶ms_, shape, itemsize) 471 | 472 | cdef caterva_storage_t storage_ 473 | create_caterva_storage(&storage_, kwargs) 474 | 475 | cdef caterva_array_t *array_ 476 | caterva_empty(ctx.context_, ¶ms_, &storage_, &array_) 477 | arr.array = array_ 478 | 479 | 480 | def zeros(NDArray arr, shape, itemsize, **kwargs): 481 | ctx = Context(**kwargs) 482 | 483 | cdef caterva_params_t params_ 484 | create_caterva_params(¶ms_, shape, itemsize) 485 | 486 | cdef caterva_storage_t storage_ 487 | create_caterva_storage(&storage_, kwargs) 488 | 489 | cdef caterva_array_t *array_ 490 | caterva_zeros(ctx.context_, ¶ms_, &storage_, &array_) 491 | arr.array = array_ 492 | 493 | 494 | def full(NDArray arr, shape, value, **kwargs): 495 | ctx = Context(**kwargs) 496 | 497 | cdef caterva_params_t params_ 498 | create_caterva_params(¶ms_, shape, len(value)) 499 | 500 | cdef caterva_storage_t storage_ 501 | create_caterva_storage(&storage_, kwargs) 502 | cdef uint8_t *fill_value_ = value 503 | cdef caterva_array_t *array_ 504 | caterva_full(ctx.context_, ¶ms_, &storage_, fill_value_, &array_) 505 | arr.array = array_ 506 | 507 | 508 | def copy(NDArray arr, NDArray src, **kwargs): 509 | ctx = Context(**kwargs) 510 | cdef caterva_storage_t storage_ 511 | create_caterva_storage(&storage_, kwargs) 512 | 513 | cdef caterva_array_t *array_ 514 | caterva_copy(ctx.context_, src.array, &storage_, &array_) 515 | arr.array = array_ 516 | return arr 517 | 518 | def resize(NDArray arr, new_shape): 519 | ctx = Context(**arr.kwargs) 520 | cdef int64_t new_shape_[CATERVA_MAX_DIM] 521 | for i, s in enumerate(new_shape): 522 | new_shape_[i] = s 523 | caterva_resize(ctx.context_, arr.array, new_shape_, NULL) 524 | return arr 525 | 526 | def from_file(NDArray arr, urlpath, **kwargs): 527 | ctx = Context(**kwargs) 528 | 529 | urlpath = urlpath.encode("utf-8") if isinstance(urlpath, str) else urlpath 530 | if not os.path.exists(urlpath): 531 | raise FileNotFoundError 532 | 533 | cdef caterva_array_t *array_ 534 | caterva_open(ctx.context_, urlpath, &array_) 535 | arr.array = array_ 536 | 537 | 538 | def from_buffer(NDArray arr, buf, shape, itemsize, **kwargs): 539 | ctx = Context(**kwargs) 540 | 541 | cdef caterva_params_t params_ 542 | create_caterva_params(¶ms_, shape, itemsize) 543 | 544 | cdef caterva_storage_t storage_ 545 | create_caterva_storage(&storage_, kwargs) 546 | 547 | cdef caterva_array_t *array_ 548 | caterva_from_buffer(ctx.context_, buf, len(buf), ¶ms_, &storage_, &array_) 549 | arr.array = array_ 550 | 551 | 552 | def asarray(NDArray arr, ndarray, **kwargs): 553 | ctx = Context(**kwargs) 554 | 555 | interface = ndarray.__array_interface__ 556 | cdef Py_buffer *buf = malloc(sizeof(Py_buffer)) 557 | PyObject_GetBuffer(ndarray, buf, PyBUF_SIMPLE) 558 | 559 | shape = interface["shape"] 560 | itemsize = buf.itemsize 561 | 562 | cdef caterva_params_t params_ 563 | create_caterva_params(¶ms_, shape, itemsize) 564 | 565 | cdef caterva_storage_t storage_ 566 | create_caterva_storage(&storage_, kwargs) 567 | 568 | cdef caterva_array_t *array_ 569 | caterva_from_buffer(ctx.context_, buf.buf, buf.len, ¶ms_, &storage_, &array_) 570 | arr.array = array_ 571 | PyBuffer_Release(buf) 572 | 573 | 574 | def meta__contains__(self, name): 575 | cdef caterva_array_t *array = self.c_array 576 | name = name.encode("utf-8") if isinstance(name, str) else name 577 | n = blosc2_meta_exists(array.sc, name) 578 | return False if n < 0 else True 579 | 580 | def meta__getitem__(self, name): 581 | cdef caterva_array_t *array = self.c_array 582 | name = name.encode("utf-8") if isinstance(name, str) else name 583 | cdef uint8_t *content 584 | cdef int32_t content_len 585 | n = blosc2_meta_get(array.sc, name, &content, &content_len) 586 | return PyBytes_FromStringAndSize( content, content_len) 587 | 588 | def meta__setitem__(self, name, content): 589 | cdef caterva_array_t *array = self.c_array 590 | name = name.encode("utf-8") if isinstance(name, str) else name 591 | old_content = meta__getitem__(self, name) 592 | if len(old_content) != len(content): 593 | raise ValueError("The length of the content in a metalayer cannot change.") 594 | n = blosc2_meta_update(array.sc, name, content, len(content)) 595 | return n 596 | 597 | def meta__len__(self): 598 | cdef caterva_array_t *arr = self.c_array 599 | return arr.sc.nmetalayers 600 | 601 | def meta_keys(self): 602 | cdef caterva_array_t *arr = self.c_array 603 | keys = [] 604 | for i in range(meta__len__(self)): 605 | name = arr.sc.metalayers[i].name.decode("utf-8") 606 | keys.append(name) 607 | return keys 608 | --------------------------------------------------------------------------------