├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ └── bug_report.md ├── dependabot.yml └── workflows │ ├── build.yml │ ├── cibuildwheels.yml │ └── wasm.yml ├── .gitignore ├── .guix-channel ├── .guix └── modules │ └── python-blosc2-package.scm ├── .pre-commit-config.yaml ├── ANNOUNCE.rst ├── CMakeLists.txt ├── CONTRIBUTING.rst ├── LICENSE.txt ├── README.rst ├── README_DEVELOPERS.md ├── RELEASE_NOTES.md ├── RELEASING.rst ├── bench ├── compress_numpy.py ├── encode-itrunc-Linux-i13900K.ipynb ├── encode-itrunc-MacOS-M1.ipynb ├── encode-sparse-MacOS-Intel.ipynb ├── fill_special.py ├── get_slice.py ├── io.py ├── ndarray │ ├── aligned_chunks.py │ ├── array-constructor-memray.py │ ├── array-constructor.py │ ├── broadcast_expr.py │ ├── compare_getslice.py │ ├── compute_dists.py │ ├── compute_dists2.py │ ├── compute_expr_numba.py │ ├── compute_expr_udf.ipynb │ ├── compute_fields.py │ ├── compute_where.py │ ├── copy_postfilter.py │ ├── download_data.py │ ├── era5-pds │ │ ├── measurements-i10k.parquet │ │ ├── measurements-i13k-always-split.parquet │ │ ├── measurements-i13k-never-split.parquet │ │ ├── measurements-i13k.parquet │ │ ├── measurements-m1.parquet │ │ └── measurements-ryzen3.parquet │ ├── jit-expr.py │ ├── jit-numpy-funcs.py │ ├── jit-reduc-float64-lossy-plot.py │ ├── jit-reduc-float64-plot-dask.py │ ├── jit-reduc-float64-plot-semilogx.py │ ├── jit-reduc-float64-plot.py │ ├── jit-reduc-sizes-dask.py │ ├── jit-reduc-sizes.py │ ├── jit-reduc.py │ ├── lazyarray-constructors.py │ ├── lazyarray-dask-large.ipynb │ ├── lazyarray-dask-small.ipynb │ ├── lazyarray-expr-large.ipynb │ ├── lazyarray-expr.ipynb │ ├── matmul.ipynb │ ├── plot_transcode_data.ipynb │ ├── reduce_expr.py │ ├── run-jit-reduc-sizes.sh │ ├── transcode_data.py │ └── transpose.ipynb ├── pack_compress.py ├── pack_large.py ├── pack_tensor.py ├── set_slice.py └── sum_postfilter.py ├── code_of_conduct.md ├── doc ├── _static │ ├── blosc-favicon_32x32.png │ ├── blosc-favicon_64x64.png │ ├── blosc-logo_128.png │ ├── blosc-logo_256.png │ └── css │ │ └── custom.css ├── conf.py ├── development │ ├── code-of-conduct.rst │ ├── contributing.rst │ └── index.rst ├── getting_started │ ├── index.rst │ ├── installation.rst │ ├── overview.rst │ ├── tutorials.rst │ └── tutorials │ │ ├── 01.ndarray-basics.ipynb │ │ ├── 02.lazyarray-expressions.ipynb │ │ ├── 03.lazyarray-udf.ipynb │ │ ├── 04.reductions.ipynb │ │ ├── 05.persistent-reductions.ipynb │ │ ├── 06.remote_proxy.ipynb │ │ ├── 07.schunk-basics.ipynb │ │ ├── 08.schunk-slicing_and_beyond.ipynb │ │ ├── 10.ucodecs-ufilters.ipynb │ │ ├── 11.prefilters.ipynb │ │ ├── 12.postfilters.ipynb │ │ └── images │ │ ├── blosc2-pipeline.png │ │ ├── blosc2-pipeline.svg │ │ ├── lazyexpr-broadcast.png │ │ ├── reductions │ │ ├── 3D-cube-plane.png │ │ ├── 3D-cube.png │ │ ├── memory-access-2D-x.png │ │ └── memory-access-2D-y.png │ │ ├── remote_proxy.png │ │ └── ucodecs-filters │ │ ├── backward.png │ │ ├── backward.svg │ │ ├── decoder.png │ │ ├── decoder.svg │ │ ├── decoder2.png │ │ ├── decoder2.svg │ │ ├── encoder.png │ │ ├── encoder.svg │ │ ├── encoder2.png │ │ ├── encoder2.svg │ │ ├── forward.png │ │ └── forward.svg ├── index.rst ├── python-blosc2.rst ├── reference │ ├── array_operations.rst │ ├── autofiles │ │ └── schunk │ │ │ ├── blosc2.schunk.SChunk.__getitem__.rst │ │ │ ├── blosc2.schunk.SChunk.__init__.rst │ │ │ ├── blosc2.schunk.SChunk.__len__.rst │ │ │ ├── blosc2.schunk.SChunk.__setitem__.rst │ │ │ ├── blosc2.schunk.SChunk.append_data.rst │ │ │ ├── blosc2.schunk.SChunk.decompress_chunk.rst │ │ │ ├── blosc2.schunk.SChunk.delete_chunk.rst │ │ │ ├── blosc2.schunk.SChunk.fill_special.rst │ │ │ ├── blosc2.schunk.SChunk.filler.rst │ │ │ ├── blosc2.schunk.SChunk.get_chunk.rst │ │ │ ├── blosc2.schunk.SChunk.get_slice.rst │ │ │ ├── blosc2.schunk.SChunk.insert_chunk.rst │ │ │ ├── blosc2.schunk.SChunk.insert_data.rst │ │ │ ├── blosc2.schunk.SChunk.iterchunks.rst │ │ │ ├── blosc2.schunk.SChunk.iterchunks_info.rst │ │ │ ├── blosc2.schunk.SChunk.postfilter.rst │ │ │ ├── blosc2.schunk.SChunk.prefilter.rst │ │ │ ├── blosc2.schunk.SChunk.remove_postfilter.rst │ │ │ ├── blosc2.schunk.SChunk.remove_prefilter.rst │ │ │ ├── blosc2.schunk.SChunk.to_cframe.rst │ │ │ ├── blosc2.schunk.SChunk.update_chunk.rst │ │ │ └── blosc2.schunk.SChunk.update_data.rst │ ├── c2array.rst │ ├── classes.rst │ ├── decorators.rst │ ├── index.rst │ ├── lazy_functions.rst │ ├── lazyarray.rst │ ├── linear_algebra.rst │ ├── low_level.rst │ ├── ndarray.rst │ ├── ndfield.rst │ ├── proxy.rst │ ├── proxyndsource.rst │ ├── proxysource.rst │ ├── reduction_functions.rst │ ├── save_load.rst │ ├── schunk.rst │ ├── simpleproxy.rst │ ├── storage.rst │ └── utilities.rst └── release_notes │ └── index.md ├── examples ├── blosc2_hdf5_compression.py ├── btune.py ├── c2array-get-slice.py ├── compress2_decompress2.py ├── compress_decompress.py ├── filler.py ├── gil.py ├── mmap-rw.py ├── ndarray │ ├── arange-constructor.py │ ├── asarray_.py │ ├── broadcast_expr.py │ ├── buffer.py │ ├── bytedelta_filter.py │ ├── c2array_expr.py │ ├── compute_expr.py │ ├── compute_fields.py │ ├── compute_udf_numba.py │ ├── compute_where.py │ ├── copy_.py │ ├── empty_.py │ ├── eye-constructor.py │ ├── filter_sort_fields.py │ ├── formats.py │ ├── fromiter-constructor.py │ ├── general_expressions.py │ ├── getitem.py │ ├── iterchunks_info.py │ ├── jit-expr.py │ ├── jit-numpy-funcs.py │ ├── jit-reduc.py │ ├── lazyexpr_where_indexing.py │ ├── linspace-constructor.py │ ├── lists-vs-bools-idx.py │ ├── meta.py │ ├── ndarray_copy.py │ ├── ndmean.py │ ├── persistency.py │ ├── proxy-carray.py │ ├── proxy-ndarray.py │ ├── reduce_and_enlarge.py │ ├── reduce_expr.py │ ├── reduce_expr_save.py │ ├── reduce_string_expr.py │ ├── resize_.py │ ├── work_with_numpy.py │ └── zfp_codec.py ├── pack_array.py ├── pack_tensor.py ├── postfilter1.py ├── postfilter2.py ├── postfilter3.py ├── prefilter.py ├── save_tensor.py ├── schunk.py ├── schunk_roundtrip.py ├── ucodecs.py ├── ufilters.py └── vlmeta.py ├── generate_version.py ├── guix.scm ├── images ├── Complete-Write-Read-B2ND.png ├── M1-i386-vs-arm64-pack.png ├── M1-i386-vs-arm64-unpack.png ├── Read-Partial-Slices-B2ND.png ├── b2nd-2level-parts.png ├── eval-expr-full-mem-M2.png ├── eval-expr-scarce-mem-M2.png ├── lazyarray-dask-large.png ├── lazyarray-dask-small.png ├── lazyarray-expr-large.png ├── lazyarray-expr.png ├── linspace-compress.png ├── linspace-decompress.png ├── pack-array-cratios.png ├── reduc-float64-amd.png └── reduc-float64-log-amd.png ├── pyproject.toml ├── pytest.ini ├── src └── blosc2 │ ├── __init__.py │ ├── blosc2_ext.pyx │ ├── c2array.py │ ├── core.py │ ├── exceptions.py │ ├── helpers.py │ ├── info.py │ ├── lazyexpr.py │ ├── ndarray.py │ ├── proxy.py │ ├── schunk.py │ ├── storage.py │ └── version.py ├── tests ├── conftest.py ├── ndarray │ ├── test_auto_parts.py │ ├── test_buffer.py │ ├── test_c2array_expr.py │ ├── test_c2array_reductions.py │ ├── test_c2array_udf.py │ ├── test_copy.py │ ├── test_empty.py │ ├── test_evaluate.py │ ├── test_full.py │ ├── test_get_slice_nchunks.py │ ├── test_getitem.py │ ├── test_iterchunks_info.py │ ├── test_jit.py │ ├── test_lazyexpr.py │ ├── test_lazyexpr_fields.py │ ├── test_lazyudf.py │ ├── test_lossy.py │ ├── test_matmul.py │ ├── test_metalayers.py │ ├── test_mode.py │ ├── test_nans.py │ ├── test_ndarray.py │ ├── test_numpy.py │ ├── test_persistency.py │ ├── test_proxy.py │ ├── test_proxy_c2array.py │ ├── test_proxy_expr.py │ ├── test_reductions.py │ ├── test_resize.py │ ├── test_setitem.py │ ├── test_slice.py │ ├── test_squeeze.py │ ├── test_struct_dtype.py │ ├── test_transpose.py │ └── test_zeros.py ├── test_bytes_array.py ├── test_comp_info.py ├── test_compress2.py ├── test_compression_parameters.py ├── test_compressors.py ├── test_decompress.py ├── test_iterchunks.py ├── test_mmap.py ├── test_open.py ├── test_open_c2array.py ├── test_pandas_udf_engine.py ├── test_pathlib.py ├── test_postfilters.py ├── test_prefilters.py ├── test_proxy_schunk.py ├── test_python_blosc.py ├── test_schunk.py ├── test_schunk_constructor.py ├── test_schunk_delete.py ├── test_schunk_get_slice.py ├── test_schunk_get_slice_nchunks.py ├── test_schunk_insert.py ├── test_schunk_set_slice.py ├── test_schunk_update.py ├── test_storage.py ├── test_tensor.py ├── test_ucodecs.py ├── test_ufilters.py └── test_vlmeta.py └── update_version.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: FrancescAlted 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Create a self-contained code snippet reproducing the issue 16 | 2. Show the output of the error 17 | 18 | **Expected behavior** 19 | A clear and concise description of what you expected to happen. 20 | 21 | **Desktop (please complete the following information):** 22 | - OS: [e.g. iOS] 23 | - Version [e.g. 22] 24 | 25 | **Additional context** 26 | Add any other context about the problem here. 27 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 2 | version: 2 3 | updates: 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | interval: "weekly" 8 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - '**' # this matches all branches 9 | pull_request: 10 | branches: 11 | - main 12 | 13 | jobs: 14 | build_wheels: 15 | name: Build and test on ${{ matrix.os }} 16 | runs-on: ${{ matrix.os }} 17 | strategy: 18 | matrix: 19 | os: [ubuntu-latest, windows-latest, macos-latest] 20 | python-version: ["3.12"] 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | 25 | - name: Set up Python ${{ matrix.python-version }} 26 | uses: actions/setup-python@v5 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | 30 | - name: Install Ninja 31 | uses: seanmiddleditch/gha-setup-ninja@master 32 | 33 | - name: Build 34 | run: pip install -e .[test] 35 | 36 | - name: Test 37 | run: python -m pytest -m "not heavy and (network or not network)" 38 | -------------------------------------------------------------------------------- /.github/workflows/wasm.yml: -------------------------------------------------------------------------------- 1 | name: Python wheels for WASM 2 | on: 3 | push: 4 | tags: 5 | - '*' 6 | pull_request: 7 | branches: 8 | - main 9 | 10 | env: 11 | CIBW_BUILD_VERBOSITY: 1 12 | # cibuildwheel cannot choose for a specified version of pyodide yet 13 | # PYODIDE_VERSION: 0.27.2 14 | 15 | jobs: 16 | build_wheels_wasm: 17 | name: Build and test wheels for WASM on ${{ matrix.os }} for ${{ matrix.p_ver }} 18 | runs-on: ubuntu-latest 19 | permissions: 20 | contents: write 21 | env: 22 | CIBW_BUILD: ${{ matrix.cibw_build }} 23 | CMAKE_ARGS: "-DWITH_OPTIM=OFF" 24 | CIBW_TEST_COMMAND: "pytest {project}/tests/ndarray/test_reductions.py" 25 | strategy: 26 | matrix: 27 | os: [ubuntu-latest] 28 | cibw_build: ["cp3{11,12,13}-*"] 29 | p_ver: ["3.11-3.13"] 30 | 31 | steps: 32 | - name: Checkout repo 33 | uses: actions/checkout@v4 34 | 35 | - name: Set up Python 36 | uses: actions/setup-python@v5 37 | with: 38 | python-version: '3.x' 39 | 40 | - name: Install dependencies 41 | run: | 42 | sudo apt-get update 43 | sudo apt-get install -y cmake 44 | 45 | - name: Install cibuildwheel 46 | run: pip install cibuildwheel 47 | 48 | - name: Build wheels 49 | # Testing is automaticall made by cibuildwheel 50 | run: cibuildwheel --platform pyodide 51 | 52 | - name: Upload wheels 53 | uses: actions/upload-artifact@v4 54 | with: 55 | name: wheels-wasm-${{ matrix.os }}-${{ matrix.p_ver }} 56 | path: ./wheelhouse/*.whl 57 | 58 | # This is not working yet 59 | # - name: Upload wheel to release 60 | # if: startsWith(github.ref, 'refs/tags/') 61 | # env: 62 | # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 63 | # run: | 64 | # gh release upload ${GITHUB_REF_NAME} ./wheelhouse/*.whl 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | wheelhouse/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | src/blosc2/_version.py 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | 133 | # PyCharm 134 | .idea/ 135 | 136 | # skbuild 137 | _skbuild/ 138 | 139 | # sphinx 140 | doc/_build/ 141 | 142 | 143 | .*.swp 144 | -------------------------------------------------------------------------------- /.guix-channel: -------------------------------------------------------------------------------- 1 | (channel 2 | (version 0) 3 | (directory ".guix/modules")) 4 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: ^doc/reference/autofiles/ 2 | ci: 3 | autoupdate_commit_msg: "Update pre-commit hooks" 4 | autoupdate_schedule: "monthly" 5 | autofix_commit_msg: "Apply pre-commit fixes" 6 | autofix_prs: false 7 | default_stages: [pre-commit, pre-push] 8 | repos: 9 | - repo: https://github.com/pre-commit/pre-commit-hooks 10 | rev: v5.0.0 11 | hooks: 12 | - id: check-yaml 13 | - id: end-of-file-fixer 14 | - id: mixed-line-ending 15 | - id: requirements-txt-fixer 16 | - id: trailing-whitespace 17 | 18 | - repo: https://github.com/astral-sh/ruff-pre-commit 19 | rev: v0.11.12 20 | hooks: 21 | - id: ruff 22 | args: ["--fix", "--show-fixes"] 23 | - id: ruff-format 24 | exclude: ^bench/ 25 | 26 | - repo: https://github.com/adamchainz/blacken-docs 27 | rev: 1.19.1 28 | hooks: 29 | - id: blacken-docs 30 | additional_dependencies: [black==24.*] 31 | 32 | - repo: https://github.com/pre-commit/pygrep-hooks 33 | rev: v1.10.0 34 | hooks: 35 | - id: rst-backticks 36 | - id: rst-directive-colons 37 | - id: rst-inline-touching-normal 38 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.15.0) 2 | project(python-blosc2) 3 | # Specifying Python version below is tricky, but if you don't specify the minimum version here, 4 | # it would not consider python3 when looking for the executable. This is problematic since Fedora 5 | # does not include a python symbolic link to python3. 6 | # find_package(Python 3.12 COMPONENTS Interpreter NumPy Development.Module REQUIRED) 7 | # IMO, this would need to be solved in Fedora, so we can just use the following line: 8 | find_package(Python COMPONENTS Interpreter NumPy Development.Module REQUIRED) 9 | 10 | # Add custom command to generate the version file 11 | add_custom_command( 12 | OUTPUT src/blosc2/version.py 13 | COMMAND ${Python_EXECUTABLE} generate_version.py 14 | DEPENDS generate_version.py pyproject.toml 15 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 16 | VERBATIM 17 | ) 18 | 19 | # Compile the Cython extension manually... 20 | add_custom_command( 21 | OUTPUT blosc2_ext.c 22 | COMMAND Python::Interpreter -m cython 23 | "${CMAKE_CURRENT_SOURCE_DIR}/src/blosc2/blosc2_ext.pyx" --output-file blosc2_ext.c 24 | DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/blosc2/blosc2_ext.pyx" 25 | VERBATIM) 26 | # ...and add it to the target 27 | Python_add_library(blosc2_ext MODULE blosc2_ext.c WITH_SOABI) 28 | # We need to link against NumPy 29 | target_link_libraries(blosc2_ext PRIVATE Python::NumPy) 30 | 31 | if(DEFINED ENV{USE_SYSTEM_BLOSC2}) 32 | set(USE_SYSTEM_BLOSC2 ON) 33 | endif() 34 | 35 | if(USE_SYSTEM_BLOSC2) 36 | set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") 37 | find_package(PkgConfig REQUIRED) 38 | pkg_check_modules(Blosc2 REQUIRED IMPORTED_TARGET blosc2) 39 | target_link_libraries(blosc2_ext PRIVATE PkgConfig::Blosc2) 40 | else() 41 | set(STATIC_LIB ON CACHE BOOL "Build a static version of the blosc library.") 42 | set(SHARED_LIB ON CACHE BOOL "Build a shared library version of the blosc library.") 43 | set(BUILD_TESTS OFF CACHE BOOL "Build C-Blosc2 tests") 44 | set(BUILD_EXAMPLES OFF CACHE BOOL "Build C-Blosc2 examples") 45 | set(BUILD_BENCHMARKS OFF CACHE BOOL "Build C-Blosc2 benchmarks") 46 | set(BUILD_FUZZERS OFF CACHE BOOL "Build C-Blosc2 fuzzers") 47 | set(CMAKE_POSITION_INDEPENDENT_CODE ON) 48 | # we want the binaries of the C-Blosc2 library to go into the wheels 49 | set(BLOSC_INSTALL ON) 50 | include(FetchContent) 51 | FetchContent_Declare(blosc2 52 | GIT_REPOSITORY https://github.com/Blosc/c-blosc2 53 | GIT_TAG 4ef3c7440a85632e6c8b6c5d2a9e651e45569fc1 # v2.17.1 + mmap fix 54 | ) 55 | FetchContent_MakeAvailable(blosc2) 56 | include_directories("${blosc2_SOURCE_DIR}/include") 57 | target_link_libraries(blosc2_ext PRIVATE blosc2_static) 58 | endif() 59 | 60 | add_custom_command( 61 | TARGET blosc2_ext POST_BUILD 62 | COMMAND ${CMAKE_COMMAND} -E copy $ ${CMAKE_SOURCE_DIR}/blosc2 63 | ) 64 | 65 | install(TARGETS blosc2_ext LIBRARY DESTINATION blosc2) 66 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | Contributing guidelines 2 | ======================= 3 | 4 | We want to make contributing to this project as easy and transparent as 5 | possible. 6 | 7 | Our Development Process 8 | ----------------------- 9 | New versions are being developed in the "main" branch, 10 | or in their own feature branch. 11 | When they are deemed ready for a release, they are merged back into "main" 12 | again. 13 | 14 | So all contributions must stage first through "main" 15 | or their own feature branch. 16 | 17 | Pull Requests 18 | ------------- 19 | We actively welcome your pull requests. 20 | 21 | 1. Fork the repo and create your branch from ``main``. 22 | 2. If you've added code that should be tested, add tests. 23 | 3. If you've changed APIs, update the documentation. 24 | 4. Ensure the test suite passes. 25 | 5. Make sure your code does not issue new compiler warnings. 26 | 27 | Issues 28 | ------ 29 | We use GitHub issues to track public bugs. Please ensure your description is 30 | clear and has sufficient instructions to be able to reproduce the issue. 31 | 32 | License 33 | ------- 34 | By contributing to Python-Blosc2, you agree that your contributions will be licensed 35 | under the `LICENSE `_ 36 | file of the project. 37 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | For Blosc - A blocking, shuffling and lossless compression library 4 | 5 | Copyright (c) 2019-present, Blosc Development Team 6 | All rights reserved. 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions are met: 10 | 11 | 1. Redistributions of source code must retain the above copyright notice, this 12 | list of conditions and the following disclaimer. 13 | 14 | 2. Redistributions in binary form must reproduce the above copyright notice, 15 | this list of conditions and the following disclaimer in the documentation 16 | and/or other materials provided with the distribution. 17 | 18 | 3. Neither the name of the copyright holder nor the names of its 19 | contributors may be used to endorse or promote products derived from 20 | this software without specific prior written permission. 21 | 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 23 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 25 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 26 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /README_DEVELOPERS.md: -------------------------------------------------------------------------------- 1 | # Requirements for developers 2 | 3 | We are using Ruff as code formatter and as a linter. It is automatically enforced 4 | if you activate these as plugins for [pre-commit](https://pre-commit.com). You can activate 5 | the pre-commit actions by following the [instructions](https://pre-commit.com/#installation). 6 | As the config files are already there, this essentially boils down to: 7 | 8 | ``` bash 9 | python -m pip install pre-commit 10 | pre-commit install 11 | ``` 12 | 13 | You are done! 14 | 15 | ## Building from sources 16 | 17 | ``python-blosc2`` includes the C-Blosc2 source code and can be built in place: 18 | 19 | ``` bash 20 | git clone https://github.com/Blosc/python-blosc2/ 21 | cd python-blosc2 22 | pip install . # add -e for editable mode 23 | ``` 24 | 25 | That's it! You can now proceed to the testing section. 26 | 27 | ## Testing 28 | 29 | We are using pytest for testing. You can run the tests by executing 30 | 31 | ``` bash 32 | pytest 33 | ``` 34 | 35 | If you want to run a heavyweight version of the tests, you can use the following command: 36 | 37 | ``` bash 38 | pytest -m "heavy" 39 | ``` 40 | 41 | If you want to run the network tests, you can use the following command: 42 | 43 | ``` bash 44 | pytest -m "network" 45 | ``` 46 | 47 | ## Documentation 48 | 49 | We are using Sphinx for documentation. You can build the documentation by executing: 50 | 51 | ``` bash 52 | cd doc 53 | rm -rf html _build 54 | python -m sphinx . html 55 | ``` 56 | [You may need to install the `pandoc` package first: https://pandoc.org/installing.html] 57 | 58 | You will find the documentation in the `html` directory. 59 | -------------------------------------------------------------------------------- /bench/compress_numpy.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | 10 | """ 11 | Small benchmark that compares a plain NumPy array copy against 12 | compression through different compressors in blosc2. 13 | """ 14 | 15 | import time 16 | 17 | import numpy as np 18 | 19 | import blosc2 20 | 21 | NREP = 4 22 | N = int(1e8) 23 | Nexp = np.log10(N) 24 | 25 | blosc2.print_versions() 26 | 27 | print(f"Creating NumPy arrays with 10**{Nexp} int64/float64 elements:") 28 | arrays = ( 29 | (np.arange(N, dtype=np.int64), "the arange linear distribution"), 30 | (np.linspace(0, 10_000, N), "the linspace linear distribution"), 31 | (np.random.randint(0, 10_000, N), "the random distribution"), # noqa: NPY002 32 | ) 33 | 34 | in_ = arrays[0][0] 35 | # Cause a page fault here 36 | out_ = np.full_like(in_, fill_value=0) 37 | t0 = time.time() 38 | for _i in range(NREP): 39 | np.copyto(out_, in_) 40 | tcpy = (time.time() - t0) / NREP 41 | print( 42 | f" *** np.copyto() *** Time for memcpy():\t{tcpy:.3f} s\t({(N * 8 / tcpy) / 2**30:.2f} GB/s)" 43 | ) 44 | 45 | print("\nTimes for compressing/decompressing:") 46 | for in_, label in arrays: 47 | print(f"\n*** {label} ***") 48 | for codec in blosc2.compressor_list(): 49 | for filter in ( 50 | blosc2.Filter.NOFILTER, 51 | blosc2.Filter.SHUFFLE, 52 | blosc2.Filter.BITSHUFFLE, 53 | ): 54 | clevel = 6 55 | t0 = time.time() 56 | c = blosc2.compress(in_, in_.itemsize, clevel=clevel, filter=filter, codec=codec) 57 | tc = time.time() - t0 58 | # Cause a page fault here 59 | out = np.full_like(in_, fill_value=0) 60 | t0 = time.time() 61 | for _i in range(NREP): 62 | blosc2.decompress(c, dst=out) 63 | td = (time.time() - t0) / NREP 64 | assert np.array_equal(in_, out) 65 | print( 66 | f" *** {codec:15s}, {filter:20s} *** {tc:6.3f} s ({(N * 8 / tc) / 2**30:.2f} GB/s) / {td:5.3f} s ({(N * 8 / td) / 2**30:.2f} GB/s)", 67 | end="", 68 | ) 69 | print(f"\tcr: {N * 8.0 / len(c):5.1f}x") 70 | -------------------------------------------------------------------------------- /bench/fill_special.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import sys 10 | from time import time 11 | 12 | import numpy as np 13 | 14 | import blosc2 15 | 16 | # Dimensions, type and persistence properties for the arrays 17 | nelem = 1_00_000_000 18 | dtype = np.dtype(np.float64) 19 | print(f"Filling a SChunk with {nelem / 1e6} Melements of {dtype=}") 20 | 21 | persistent = bool(sys.argv[1]) if len(sys.argv) > 1 else False 22 | if persistent: 23 | urlpath = "bench_fill_special.b2frame" 24 | print(f"Writing output to {urlpath}...") 25 | else: 26 | urlpath = None 27 | 28 | 29 | def create_schunk(data=None): 30 | blosc2.remove_urlpath(urlpath) 31 | # Create the empty SChunk 32 | return blosc2.SChunk(data=data, urlpath=urlpath, cparams={"typesize": dtype.itemsize}) 33 | 34 | 35 | t0 = time() 36 | schunk = create_schunk(data=np.full(nelem, np.pi, dtype)) 37 | t = (time() - t0) * 1000. 38 | print(f"Time with `data` argument in constructor: {t:19.3f} ms") 39 | 40 | schunk = create_schunk() 41 | t0 = time() 42 | schunk.fill_special(nelem, blosc2.SpecialValue.UNINIT) 43 | schunk[:] = np.full(nelem, np.pi, dtype) 44 | t = (time() - t0) * 1000. 45 | print(f"Time without passing directly the value: {t:20.3f} ms") 46 | 47 | schunk = create_schunk() 48 | t0 = time() 49 | schunk.fill_special(nelem, blosc2.SpecialValue.VALUE, np.pi) 50 | t = (time() - t0) * 1000. 51 | print(f"Time passing directly the value to `fill_special`: {t:10.3f} ms") 52 | 53 | blosc2.remove_urlpath(urlpath) 54 | -------------------------------------------------------------------------------- /bench/get_slice.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import sys 10 | from time import time 11 | 12 | import numpy as np 13 | 14 | import blosc2 15 | 16 | # Dimensions, type and persistence properties for the arrays 17 | shape = 10_000 * 10_000 18 | chunksize = 100_000 19 | blocksize = 10_000 20 | 21 | dtype = np.float64 22 | 23 | nchunks = shape // chunksize 24 | # Set the compression and decompression parameters 25 | cparams = blosc2.CParams(codec=blosc2.Codec.BLOSCLZ, typesize=8, blocksize=blocksize * 8) 26 | dparams = blosc2.DParams() 27 | contiguous = True 28 | persistent = bool(sys.argv[1]) if len(sys.argv) > 1 else False 29 | 30 | if persistent: 31 | urlpath = "bench_getitem.b2frame" 32 | else: 33 | urlpath = None 34 | 35 | storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath) 36 | blosc2.remove_urlpath(urlpath) 37 | 38 | # Create the empty SChunk 39 | schunk = blosc2.SChunk(chunksize=chunksize * cparams.typesize, storage=storage, cparams=cparams, dparams=dparams) 40 | 41 | # Append some chunks 42 | for i in range(nchunks): 43 | buffer = i * np.arange(chunksize, dtype=dtype) 44 | nchunks_ = schunk.append_data(buffer) 45 | assert nchunks_ == (i + 1) 46 | 47 | # Use get_slice for reading blocks individually 48 | t0 = time() 49 | for i in range(shape // blocksize): 50 | _ = schunk.get_slice(start=i * blocksize, stop=(i + 1) * blocksize - 1) 51 | t1 = time() 52 | print(f"Time for reading with get_slice: {t1 - t0:.3f}s") 53 | 54 | blosc2.remove_urlpath(urlpath) 55 | -------------------------------------------------------------------------------- /bench/ndarray/array-constructor-memray.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | from time import time 10 | import os 11 | 12 | import numpy as np 13 | import memray 14 | 15 | import blosc2 16 | 17 | N = 100_000_000 18 | 19 | 20 | def info(a, t1): 21 | size = a.schunk.nbytes 22 | csize = a.schunk.cbytes 23 | print( 24 | f"Time: {t1:.3f} s - size: {size / 2 ** 30:.2f} GB ({size / t1 / 2 ** 30:.2f} GB/s)" 25 | f"\tStorage required: {csize / 2 ** 20:.2f} MB (cratio: {size / csize:.1f}x)" 26 | ) 27 | 28 | 29 | def run_benchmark(): 30 | shape = (N,) 31 | shape = (100, 1000, 1000) 32 | print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}) ***") 33 | t0 = time() 34 | #a = blosc2.arange(N, shape=shape, dtype=np.int32, urlpath="a.b2nd", mode="w") 35 | a = blosc2.linspace(0, 1, N, shape=shape, dtype=np.float64, urlpath="a.b2nd", mode="w") 36 | elapsed = time() - t0 37 | info(a, elapsed) 38 | return a 39 | 40 | 41 | # Check if we're being tracked by memray 42 | if not os.environ.get("MEMRAY_TRACKING", False): 43 | # Run the benchmark with memray tracking 44 | output_file = "array_constructor_memray.bin" 45 | print(f"Starting memray profiling. Results will be saved to {output_file}") 46 | 47 | with memray.Tracker(output_file): 48 | array = run_benchmark() 49 | 50 | print(f"\nMemray profiling completed. To view results, run:") 51 | print(f"memray flamegraph {output_file}") 52 | print(f"# or") 53 | print(f"memray summary {output_file}") 54 | print(f"# or") 55 | print(f"memray tree {output_file}") 56 | else: 57 | # We're already being tracked by memray 58 | run_benchmark() 59 | -------------------------------------------------------------------------------- /bench/ndarray/array-constructor.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | from time import time 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | N = 100_000_000 16 | 17 | def info(a, t1): 18 | size = a.schunk.nbytes 19 | csize = a.schunk.cbytes 20 | print( 21 | f"Time: {t1:.3f} s - size: {size / 2 ** 30:.2f} GB ({size / t1 / 2 ** 30:.2f} GB/s)" 22 | f"\tStorage required: {csize / 2 ** 20:.2f} MB (cratio: {size / csize:.1f}x)" 23 | ) 24 | 25 | 26 | shape = (N,) 27 | shape = (100, 1000, 1000) 28 | print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}) ***") 29 | t0 = time() 30 | # a = blosc2.arange(N, shape=shape, dtype=np.int32, urlpath="a.b2nd", mode="w") 31 | a = blosc2.linspace(0, 1, N, shape=shape, dtype=np.float64, urlpath="a.b2nd", mode="w") 32 | info(a, time() - t0) 33 | -------------------------------------------------------------------------------- /bench/ndarray/broadcast_expr.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Small benchmark for computing outer products using the broadcast feature 10 | 11 | from time import time 12 | 13 | import numpy as np 14 | 15 | import blosc2 16 | 17 | N = 10_000 18 | # N = 1_000 19 | # chunks = 11 20 | # blocks = 9 21 | shape1, shape2 = (N, 1), (N,) 22 | 23 | # Create a NDArray from a NumPy array 24 | npa = np.arange(np.prod(shape1), dtype=np.int64).reshape(shape1) 25 | npb = np.arange(np.prod(shape2), dtype=np.int64).reshape(shape2) 26 | # a = blosc2.asarray(npa, chunks=(chunks, 1), blocks=(blocks, 1)) 27 | # b = blosc2.asarray(npb, chunks=chunks, blocks=blocks) 28 | a = blosc2.asarray(npa) 29 | b = blosc2.asarray(npb) 30 | 31 | for codec in blosc2.Codec: 32 | if codec.value > blosc2.Codec.ZSTD.value: 33 | break 34 | print(f"Codec: {codec}") 35 | t0 = time() 36 | c = a * b 37 | # print(f"Elapsed time (expr): {time() - t0:.6f} s") 38 | t0 = time() 39 | # d = c.compute(cparams=dict(codec=codec, clevel=5), chunks=(chunks, chunks), blocks=(blocks, blocks)) 40 | d = c.compute(cparams={"codec": codec, "clevel": 5}) 41 | print(f"Elapsed time (compute): {time() - t0:.2f}s") 42 | # print(d[:]) 43 | print(f"cratio: {d.schunk.cratio:.2f}x") 44 | # print(d.info) 45 | -------------------------------------------------------------------------------- /bench/ndarray/compute_fields.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | from time import time 10 | 11 | import numexpr as ne 12 | import numpy as np 13 | 14 | import blosc2 15 | 16 | shape = (4_000, 5_000) 17 | chunks = (10, 5_000) 18 | blocks = (1, 1000) 19 | # Comment out the next line to force chunks and blocks above 20 | chunks, blocks = None, None 21 | # Check with fast compression 22 | cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.BLOSCLZ) 23 | 24 | print(f"*** Working with an struct array with shape: {shape}") 25 | # Create a structured NumPy array 26 | npa_ = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) 27 | npb_ = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) 28 | nps = np.empty(shape, dtype=[('a', npa_.dtype), ('b', npb_.dtype)]) 29 | nps['a'] = npa_ 30 | nps['b'] = npb_ 31 | npa = nps['a'] 32 | npb = nps['b'] 33 | t0 = time() 34 | npc = npa**2 + npb**2 > 2 * npa * npb + 1 35 | t = time() - t0 36 | print(f"Time to compute field expression (NumPy): {t:.3f} s; {nps.nbytes/2**30/t:.2f} GB/s") 37 | 38 | t0 = time() 39 | npc = ne.evaluate('a**2 + b**2 > 2 * a * b + 1', local_dict={'a': npa, 'b': npb}) 40 | t = time() - t0 41 | print(f"Time to compute field expression (NumExpr): {t:.3f} s; {nps.nbytes/2**30/t:.2f} GB/s") 42 | 43 | s = blosc2.asarray(nps, chunks=chunks, blocks=blocks, cparams=cparams) 44 | print(f"*** Working with NDArray with shape: {s.shape}, chunks: {s.chunks}, blocks: {s.blocks}," 45 | f" cratio: {s.schunk.cratio:.2f}x") 46 | a = s['a'] 47 | b = s['b'] 48 | 49 | # Get a LazyExpr instance 50 | c = a**2 + b**2 > 2 * a * b + 1 51 | # Compute: output is a NDArray 52 | t0 = time() 53 | d = c.compute(cparams=cparams) 54 | t = time() - t0 55 | print(f"Time to compute field expression (compute): {t:.3f} s; {nps.nbytes/2**30/t:.2f} GB/s") 56 | 57 | # Compute the whole slice: output is a NumPy array 58 | t0 = time() 59 | npd = c[:] 60 | t = time() - t0 61 | print(f"Time to compute field expression (getitem): {t:.3f} s; {nps.nbytes/2**30/t:.2f} GB/s") 62 | 63 | # Compute a partial slice: output is a NumPy array 64 | t0 = time() 65 | npd = c[1:10] 66 | t = time() - t0 67 | print(f"Time to compute field expression (partial getitem): {t:.3f} s; {npd.nbytes/2**20/t:.2f} MB/s") 68 | -------------------------------------------------------------------------------- /bench/ndarray/copy_postfilter.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | from time import time 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | # Size and dtype of super-chunks 16 | nchunks = 10_000 17 | chunkshape = 200_000 18 | dtype = np.dtype(np.int32) 19 | 20 | # Set the compression and decompression parameters 21 | dparams = {"nthreads" : 1} 22 | 23 | # Create array 24 | arr = blosc2.empty(shape=(nchunks * chunkshape,), chunks=(chunkshape,), dtype=dtype, dparams=dparams) 25 | data = np.arange(chunkshape, dtype=dtype) 26 | 27 | t0 = time() 28 | for i in range(nchunks): 29 | arr[i * chunkshape : (i + 1) * chunkshape] = data 30 | t = time() - t0 31 | print( 32 | f"time append: {t:.2f}s ({arr.schunk.nbytes / (t * 2**30):.3f} GB/s)" 33 | f" / cratio: {arr.schunk.cratio:.2f}x" 34 | ) 35 | 36 | t0 = time() 37 | arr_ = arr.copy() 38 | t = time() - t0 39 | print( 40 | f"time copy (no postfilter): {t:.2f}s ({arr_.schunk.nbytes / (t * 2**30):.3f} GB/s)" 41 | f" / cratio: {arr_.schunk.cratio:.2f}x" 42 | ) 43 | 44 | 45 | # Associate a postfilter to schunk 46 | @arr.schunk.postfilter(dtype) 47 | def py_postfilter(input, output, offset): 48 | output[:] = 0 49 | 50 | 51 | t0 = time() 52 | arr_ = arr.copy() 53 | t = time() - t0 54 | print( 55 | f"time sum (postfilter): {t:.2f}s ({arr_.schunk.nbytes / (t * 2**30):.3f} GB/s)" 56 | f" / cratio: {arr_.schunk.cratio:.2f}x" 57 | ) 58 | -------------------------------------------------------------------------------- /bench/ndarray/download_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os.path 3 | 4 | import numpy as np 5 | import s3fs 6 | import xarray as xr 7 | 8 | import blosc2 9 | 10 | dir_path = "era5-pds" 11 | 12 | 13 | def open_zarr(year, month, datestart, dateend, dset): 14 | fs = s3fs.S3FileSystem(anon=True) 15 | datestring = f"era5-pds/zarr/{year}/{month:02d}/data/" 16 | s3map = s3fs.S3Map(datestring + dset + ".zarr/", s3=fs) 17 | arr = xr.open_dataset(s3map, engine="zarr") 18 | if dset[:3] in ("air", "sno", "eas"): 19 | arr = arr.sel(time0=slice(np.datetime64(datestart), np.datetime64(dateend))) 20 | else: 21 | arr = arr.sel(time1=slice(np.datetime64(datestart), np.datetime64(dateend))) 22 | return getattr(arr, dset) 23 | 24 | 25 | datasets = [ 26 | ("precipitation_amount_1hour_Accumulation", "precip"), 27 | ("integral_wrt_time_of_surface_direct_downwelling_shortwave_flux_in_air_1hour_Accumulation", "flux"), 28 | ("air_pressure_at_mean_sea_level", "pressure"), 29 | ("snow_density", "snow"), 30 | ("eastward_wind_at_10_metres", "wind"), 31 | ] 32 | 33 | if not os.path.isdir(dir_path): 34 | os.mkdir(dir_path) 35 | 36 | for dset, short in datasets: 37 | print(f"Fetching dataset {dset} from S3 (era5-pds)...") 38 | precip_m0 = open_zarr(1987, 10, "1987-10-01", "1987-10-30 23:59", dset) 39 | cparams = {"codec": blosc2.Codec.ZSTD, "clevel": 6} 40 | blosc2.asarray(precip_m0.values, urlpath=f"{dir_path}/{short}.b2nd", mode="w", cparams=cparams) 41 | -------------------------------------------------------------------------------- /bench/ndarray/era5-pds/measurements-i10k.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/bench/ndarray/era5-pds/measurements-i10k.parquet -------------------------------------------------------------------------------- /bench/ndarray/era5-pds/measurements-i13k-always-split.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/bench/ndarray/era5-pds/measurements-i13k-always-split.parquet -------------------------------------------------------------------------------- /bench/ndarray/era5-pds/measurements-i13k-never-split.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/bench/ndarray/era5-pds/measurements-i13k-never-split.parquet -------------------------------------------------------------------------------- /bench/ndarray/era5-pds/measurements-i13k.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/bench/ndarray/era5-pds/measurements-i13k.parquet -------------------------------------------------------------------------------- /bench/ndarray/era5-pds/measurements-m1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/bench/ndarray/era5-pds/measurements-m1.parquet -------------------------------------------------------------------------------- /bench/ndarray/era5-pds/measurements-ryzen3.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/bench/ndarray/era5-pds/measurements-ryzen3.parquet -------------------------------------------------------------------------------- /bench/ndarray/lazyarray-constructors.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # This example shows how to use the `linspace()` constructor to create a blosc2 array. 10 | 11 | from time import time 12 | 13 | import numpy as np 14 | 15 | import blosc2 16 | 17 | N = 10_000_000 18 | 19 | # Use a constructor inside a lazy expression 20 | print("*** Using a constructor inside a lazy expression ***") 21 | t0 = time() 22 | o1 = blosc2.linspace(0, 10, N, shape=(5, N // 5)) 23 | la = blosc2.lazyexpr("o1 + 1") 24 | print(f"Build time: {time() - t0:.3f} s") 25 | t0 = time() 26 | for i in range(5): 27 | _ = la[i] 28 | print(f"Access time: {time() - t0:.3f} s") 29 | 30 | t0 = time() 31 | la = (o1 + 1).sum() 32 | print(f"Build time (sum): {time() - t0:.3f} s") 33 | t0 = time() 34 | print("sum:", la) 35 | print(f"Reduction time (sum): {time() - t0:.3f} s") 36 | 37 | # Use a constructor inside a lazy expression (string form) 38 | print("*** Using a constructor inside a lazy expression (string form) ***") 39 | o1 = f"linspace(0, 10, {N}, shape=(5, {N} // 5))" 40 | t0 = time() 41 | la = blosc2.lazyexpr(f"{o1} + 1") 42 | print(f"Build time: {time() - t0:.3f} s") 43 | t0 = time() 44 | for i in range(5): 45 | _ = la[i] 46 | print(f"Access time: {time() - t0:.3f} s") 47 | 48 | t0 = time() 49 | la = blosc2.lazyexpr(f"sum({o1} + 1)") 50 | print(f"Build time (sum): {time() - t0:.3f} s") 51 | t0 = time() 52 | print("sum:", la[()]) 53 | print(f"Reduction time (sum): {time() - t0:.3f} s") 54 | 55 | # Compare with numpy 56 | print("*** Comparison with numpy ***") 57 | t0 = time() 58 | o1 = np.linspace(0, 10, N).reshape(5, N // 5) + 1 59 | print(f"Build time: {time() - t0:.3f} s") 60 | t0 = time() 61 | for i in range(5): 62 | _ = o1[i] 63 | print(f"Access time: {time() - t0:.3f} s") 64 | 65 | t0 = time() 66 | print("sum:", o1.sum()) 67 | print(f"Reduction time (sum): {time() - t0:.3f} s") 68 | -------------------------------------------------------------------------------- /bench/ndarray/reduce_expr.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Benchmark to compute expressions with numba and NDArray instances as operands. 10 | # As numba takes a while to compile the first time, we use cached functions, so 11 | # make sure to run the script at least a couple of times. 12 | 13 | from time import time 14 | 15 | import numexpr as ne 16 | import numpy as np 17 | 18 | import blosc2 19 | 20 | shape = (100, 100, 10_000) 21 | chunks = [10, 100, 10_000] 22 | blocks = [4, 10, 1_000] 23 | # Comment out the next line to force chunks and blocks above 24 | chunks, blocks = None, None 25 | dtype = np.float32 26 | rtol = 1e-5 if dtype == np.float32 else 1e-16 27 | atol = 1e-5 if dtype == np.float32 else 1e-16 28 | 29 | # Axis to reduce 30 | laxis = (None, 0, 1, 2, (0, 2)) 31 | 32 | # cparams defaults 33 | blosc2.cparams_dflts["codec"] = blosc2.Codec.LZ4 34 | blosc2.cparams_dflts["clevel"] = 5 35 | 36 | # Create input arrays 37 | npx = np.linspace(0, 1, np.prod(shape), dtype=dtype).reshape(shape) 38 | npy = np.linspace(-1, 1, np.prod(shape), dtype=dtype).reshape(shape) 39 | npz = np.linspace(0, 10, np.prod(shape), dtype=dtype).reshape(shape) 40 | vardict = {"x": npx, "y": npy, "z": npz, "np": np} 41 | x = blosc2.asarray(npx, chunks=chunks, blocks=blocks) 42 | y = blosc2.asarray(npy, chunks=chunks, blocks=blocks) 43 | z = blosc2.asarray(npz, chunks=chunks, blocks=blocks) 44 | print(f"*** cratios: x={x.schunk.cratio:.2f}x, y={y.schunk.cratio:.2f}x, z={z.schunk.cratio:.2f}x") 45 | 46 | expr = "(x**2 + y**2 * z** 2) < 1" 47 | 48 | 49 | for axis in laxis: 50 | print(f"*** Computing expression on axis: {axis} ...") 51 | 52 | # Compute the reduction with NumPy/numexpr 53 | npexpr = expr.replace("sin", "np.sin").replace("cos", "np.cos") 54 | t0 = time() 55 | npres = eval(npexpr, vardict).sum(axis=axis) 56 | tref = time() - t0 57 | print("NumPy took %.3f s" % tref) 58 | # ne.set_num_threads(1) 59 | # nb.set_num_threads(1) # this does not work that well; better use the NUMBA_NUM_THREADS env var 60 | t0 = time() 61 | out = ne.evaluate(expr, vardict).sum(axis=axis) 62 | t1 = time() - t0 63 | print(f"NumExpr took {t1:.3f} s; {tref / t1:.1f}x wrt NumPy") 64 | 65 | # Reduce with Blosc2 66 | c = eval(expr) 67 | t0 = time() 68 | d = c.compute() 69 | d = d.sum(axis=axis) 70 | t1 = time() - t0 71 | print(f"LazyExpr+compute took {t1:.3f} s; {tref / t1:.1f}x wrt NumPy") 72 | # Check 73 | np.testing.assert_allclose(d[()], npres, rtol=rtol, atol=atol) 74 | t0 = time() 75 | d = c[:] 76 | d = d.sum(axis=axis) 77 | t1 = time() - t0 78 | print(f"LazyExpr+getitem took {t1:.3f} s; {tref / t1:.1f}x wrt NumPy") 79 | # Check 80 | np.testing.assert_allclose(d[()], npres, rtol=rtol, atol=atol) 81 | -------------------------------------------------------------------------------- /bench/ndarray/run-jit-reduc-sizes.sh: -------------------------------------------------------------------------------- 1 | /usr/bin/time -v python bench/ndarray/jit-reduc-sizes.py numpy 2 | /usr/bin/time -v python bench/ndarray/jit-reduc-sizes.py numpy_jit 3 | /usr/bin/time -v python bench/ndarray/jit-reduc-sizes.py 0 4 | /usr/bin/time -v python bench/ndarray/jit-reduc-sizes.py 1 LZ4 5 | /usr/bin/time -v python bench/ndarray/jit-reduc-sizes.py 1 ZSTD 6 | -------------------------------------------------------------------------------- /bench/pack_large.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | 10 | """ 11 | Small benchmark that exercises packaging of arrays larger than 2 GB. 12 | """ 13 | 14 | import time 15 | 16 | import numpy as np 17 | 18 | import blosc2 19 | 20 | NREP = 1 21 | N = int(4e8 - 2**27) # larger than 2 GB 22 | Nexp = np.log10(N) 23 | 24 | print(f"Creating NumPy array with {float(N):.3g} int64 elements...") 25 | in_ = np.arange(N, dtype=np.int64) 26 | 27 | if __name__ == "__main__": 28 | cparams = { 29 | "codec": blosc2.Codec.BLOSCLZ, 30 | "clevel": 9, 31 | # "filters": [blosc2.Filter.NOFILTER] * 4 + [blosc2.Filter.SHUFFLE, blosc2.Filter.BYTEDELTA], 32 | # "filters_meta": [0] * 6, 33 | # "splitmode": blosc2.SplitMode.NEVER_SPLIT, 34 | } 35 | print(f"Storing with {cparams=}") 36 | 37 | c = None 38 | ctic = time.time() 39 | for _i in range(NREP): 40 | c = blosc2.pack_tensor(in_, cparams=cparams) 41 | ctoc = time.time() 42 | tc = (ctoc - ctic) / NREP 43 | print( 44 | f" Time for pack_tensor: {tc:.3f} ({(N * 8 / tc) / 2**30:.2f} GB/s)) ", 45 | ) 46 | print(f"\tcr: {in_.size * in_.dtype.itemsize * 1.0 / len(c):5.1f}x") 47 | 48 | out = None 49 | dtic = time.time() 50 | for _i in range(NREP): 51 | out = blosc2.unpack_tensor(c) 52 | dtoc = time.time() 53 | 54 | td = (dtoc - dtic) / NREP 55 | print( 56 | f" Time for unpack_tensor: {td:.3f} s ({(N * 8 / td) / 2**30:.2f} GB/s)) ", 57 | ) 58 | assert np.array_equal(in_, out) 59 | -------------------------------------------------------------------------------- /bench/set_slice.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import sys 10 | from time import time 11 | 12 | import numpy as np 13 | 14 | import blosc2 15 | 16 | # Dimensions, type and persistence properties for the arrays 17 | shape = 10_000 * 10_000 18 | chunksize = 100_000 19 | blocksize = 10_000 20 | 21 | dtype = np.float64 22 | 23 | nchunks = shape // chunksize 24 | # Set the compression and decompression parameters 25 | cparams = blosc2.CParams(codec=blosc2.Codec.BLOSCLZ, typesize=8, blocksize=blocksize * 8) 26 | dparams = blosc2.DParams() 27 | contiguous = True 28 | persistent = bool(sys.argv[1]) if len(sys.argv) > 1 else False 29 | 30 | if persistent: 31 | urlpath = "bench_setitem.b2frame" 32 | else: 33 | urlpath = None 34 | 35 | storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath) 36 | blosc2.remove_urlpath(urlpath) 37 | 38 | # Create the empty SChunk 39 | schunk = blosc2.SChunk(chunksize=chunksize * cparams.typesize, storage=storage, cparams=cparams, dparams=dparams) 40 | 41 | # Append some chunks 42 | for i in range(nchunks): 43 | buffer = i * np.arange(chunksize, dtype=dtype) 44 | nchunks_ = schunk.append_data(buffer) 45 | assert nchunks_ == (i + 1) 46 | 47 | 48 | # Use set_slice 49 | start = 1 * chunksize + 3 50 | stop = shape 51 | val = nchunks * np.arange(start, stop, dtype=dtype) 52 | t0 = time() 53 | schunk[start:stop] = val 54 | t1 = time() 55 | print(f"Time for setting with setitem: {t1 - t0:.3f}s") 56 | 57 | blosc2.remove_urlpath(urlpath) 58 | -------------------------------------------------------------------------------- /bench/sum_postfilter.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | from time import time 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | # Size and dtype of super-chunks 16 | nchunks = 20_000 17 | chunkshape = 50_000 18 | dtype = np.dtype(np.int32) 19 | chunksize = chunkshape * dtype.itemsize 20 | 21 | # Set the compression and decompression parameters 22 | cparams = blosc2.CParams(typesize=4, nthreads=1) 23 | dparams = blosc2.DParams(nthreads=1) 24 | 25 | # Create super-chunks 26 | schunk0 = blosc2.SChunk(chunksize=chunksize, cparams=cparams, dparams=dparams) 27 | schunk = blosc2.SChunk(chunksize=chunksize, cparams=cparams, dparams=dparams) 28 | 29 | data = np.arange(chunkshape, dtype=dtype) 30 | t0 = time() 31 | for _i in range(nchunks): 32 | schunk.append_data(data) 33 | schunk0.append_data(data) 34 | print(f"time append: {time() - t0:.2f}s") 35 | print(f"cratio: {schunk.cratio:.2f}x") 36 | 37 | 38 | # Associate a postfilter to schunk 39 | @schunk.postfilter(np.dtype(dtype)) 40 | def py_postfilter(input, output, offset): 41 | output[:] = input + 1 42 | 43 | 44 | t0 = time() 45 | sum = 0 46 | for chunk in schunk0.iterchunks(dtype): 47 | chunk += 1 48 | sum += chunk.sum() 49 | print(f"time sum (no postfilter): {time() - t0:.2f}s") 50 | print(sum) 51 | 52 | t0 = time() 53 | sum = 0 54 | for chunk in schunk.iterchunks(dtype): 55 | sum += chunk.sum() 56 | print(f"time sum (postfilter): {time() - t0:.2f}s") 57 | print(sum) 58 | -------------------------------------------------------------------------------- /code_of_conduct.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | The Blosc community has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://github.com/Blosc/community/blob/master/code_of_conduct.md) 5 | so that you can understand what actions will and will not be tolerated. 6 | -------------------------------------------------------------------------------- /doc/_static/blosc-favicon_32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/_static/blosc-favicon_32x32.png -------------------------------------------------------------------------------- /doc/_static/blosc-favicon_64x64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/_static/blosc-favicon_64x64.png -------------------------------------------------------------------------------- /doc/_static/blosc-logo_128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/_static/blosc-logo_128.png -------------------------------------------------------------------------------- /doc/_static/blosc-logo_256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/_static/blosc-logo_256.png -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # -- Path setup -------------------------------------------------------------- 2 | import os 3 | import sys 4 | 5 | import blosc2 6 | 7 | sys.path.insert(0, os.path.abspath(os.path.dirname(blosc2.__file__))) 8 | 9 | project = "Python-Blosc2" 10 | copyright = "2019-present, The Blosc Developers" 11 | author = "The Blosc Developers" 12 | extensions = [ 13 | "sphinx.ext.autosummary", 14 | "sphinx.ext.autodoc", 15 | "sphinx.ext.intersphinx", 16 | "sphinx.ext.napoleon", 17 | "numpydoc", 18 | "myst_parser", 19 | "sphinx_paramlinks", 20 | "sphinx_panels", 21 | "nbsphinx", 22 | # For some reason, the following extensions are not working 23 | # "IPython.sphinxext.ipython_directive", 24 | # "IPython.sphinxext.ipython_console_highlighting", 25 | ] 26 | source_suffix = [".rst", ".md"] 27 | html_theme = "pydata_sphinx_theme" 28 | html_static_path = ["_static"] 29 | html_css_files = [ 30 | "css/custom.css", 31 | ] 32 | html_logo = "_static/blosc-logo_256.png" 33 | # Just use the favicon from the parent project 34 | # html_favicon = "_static/blosc-logo_128.png" 35 | html_favicon = "_static/blosc-favicon_64x64.png" 36 | html_theme_options = { 37 | "logo": { 38 | "link": "/index", 39 | "alt_text": "Blosc", 40 | }, 41 | "icon_links": [ 42 | { 43 | "name": "GitHub", 44 | "url": "https://github.com/Blosc/python-blosc2", 45 | "icon": "fab fa-github-square", 46 | }, 47 | { 48 | "name": "Mastodon", 49 | "url": "https://fosstodon.org/@Blosc2", 50 | "icon": "fab fa-mastodon", 51 | }, 52 | { 53 | "name": "Bluesky", 54 | "url": "https://bsky.app/profile/blosc.org", 55 | "icon": "fas fa-cloud-sun", 56 | }, 57 | ], 58 | "external_links": [ 59 | {"name": "C-Blosc2", "url": "/c-blosc2/c-blosc2.html"}, 60 | {"name": "Python-Blosc2", "url": "/python-blosc2/"}, 61 | {"name": "Donate to Blosc", "url": "/pages/donate/"}, 62 | ], 63 | } 64 | 65 | exclude_patterns = ["_build", ".DS_Store", "**.ipynb_checkpoints"] 66 | 67 | html_show_sourcelink = False 68 | 69 | autosummary_generate_overwrite = False 70 | 71 | hidden = "_ignore_multiple_size" 72 | 73 | 74 | def process_sig(app, what, name, obj, options, signature, return_annotation): 75 | if signature and hidden in signature: 76 | signature = signature.split(hidden)[0] + ")" 77 | return (signature, return_annotation) 78 | 79 | 80 | def setup(app): 81 | app.connect("autodoc-process-signature", process_sig) 82 | -------------------------------------------------------------------------------- /doc/development/code-of-conduct.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../code_of_conduct.md 2 | :parser: myst_parser.sphinx_ 3 | -------------------------------------------------------------------------------- /doc/development/contributing.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../CONTRIBUTING.rst 2 | -------------------------------------------------------------------------------- /doc/development/index.rst: -------------------------------------------------------------------------------- 1 | Development 2 | =========== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | contributing 8 | code-of-conduct 9 | -------------------------------------------------------------------------------- /doc/getting_started/index.rst: -------------------------------------------------------------------------------- 1 | Getting Started 2 | =============== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | overview 8 | installation 9 | tutorials 10 | -------------------------------------------------------------------------------- /doc/getting_started/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | You can install Python-Blosc2 wheels via PyPI using Pip, Conda or clone the GitHub repository. 4 | 5 | Pip 6 | +++ 7 | 8 | .. code-block:: 9 | 10 | pip install blosc2 --upgrade 11 | 12 | Conda 13 | +++++ 14 | 15 | .. code-block:: 16 | 17 | conda install -c conda-forge python-blosc2 18 | 19 | Source code 20 | +++++++++++ 21 | 22 | .. code-block:: console 23 | 24 | git clone https://github.com/Blosc/python-blosc2/ 25 | cd python-blosc2 26 | pip install .[test] # install with test dependencies 27 | 28 | That's all. You can proceed with testing section now. 29 | 30 | Testing 31 | ------- 32 | 33 | After compiling, you can quickly check that the package is sane by 34 | running the tests: 35 | 36 | .. code-block:: console 37 | 38 | pytest # add -v for verbose mode 39 | 40 | Benchmarking 41 | ------------ 42 | 43 | If curious, you may want to run a small benchmark that compares a plain 44 | NumPy array copy against compression through different compressors in 45 | your Blosc build: 46 | 47 | .. code-block:: console 48 | 49 | PYTHONPATH=. python bench/pack_compress.py 50 | -------------------------------------------------------------------------------- /doc/getting_started/tutorials.rst: -------------------------------------------------------------------------------- 1 | Tutorials 2 | ========= 3 | 4 | .. toctree:: 5 | :caption: Index 6 | :maxdepth: 1 7 | 8 | tutorials/01.ndarray-basics 9 | tutorials/02.lazyarray-expressions 10 | tutorials/03.lazyarray-udf 11 | tutorials/04.reductions 12 | tutorials/05.persistent-reductions 13 | tutorials/06.remote_proxy 14 | tutorials/07.schunk-basics 15 | tutorials/08.schunk-slicing_and_beyond 16 | tutorials/10.ucodecs-ufilters 17 | tutorials/11.prefilters 18 | tutorials/12.postfilters 19 | -------------------------------------------------------------------------------- /doc/getting_started/tutorials/images/blosc2-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/getting_started/tutorials/images/blosc2-pipeline.png -------------------------------------------------------------------------------- /doc/getting_started/tutorials/images/lazyexpr-broadcast.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/getting_started/tutorials/images/lazyexpr-broadcast.png -------------------------------------------------------------------------------- /doc/getting_started/tutorials/images/reductions/3D-cube-plane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/getting_started/tutorials/images/reductions/3D-cube-plane.png -------------------------------------------------------------------------------- /doc/getting_started/tutorials/images/reductions/3D-cube.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/getting_started/tutorials/images/reductions/3D-cube.png -------------------------------------------------------------------------------- /doc/getting_started/tutorials/images/reductions/memory-access-2D-x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/getting_started/tutorials/images/reductions/memory-access-2D-x.png -------------------------------------------------------------------------------- /doc/getting_started/tutorials/images/reductions/memory-access-2D-y.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/getting_started/tutorials/images/reductions/memory-access-2D-y.png -------------------------------------------------------------------------------- /doc/getting_started/tutorials/images/remote_proxy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/getting_started/tutorials/images/remote_proxy.png -------------------------------------------------------------------------------- /doc/getting_started/tutorials/images/ucodecs-filters/backward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/getting_started/tutorials/images/ucodecs-filters/backward.png -------------------------------------------------------------------------------- /doc/getting_started/tutorials/images/ucodecs-filters/decoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/getting_started/tutorials/images/ucodecs-filters/decoder.png -------------------------------------------------------------------------------- /doc/getting_started/tutorials/images/ucodecs-filters/decoder2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/getting_started/tutorials/images/ucodecs-filters/decoder2.png -------------------------------------------------------------------------------- /doc/getting_started/tutorials/images/ucodecs-filters/encoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/getting_started/tutorials/images/ucodecs-filters/encoder.png -------------------------------------------------------------------------------- /doc/getting_started/tutorials/images/ucodecs-filters/encoder2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/getting_started/tutorials/images/ucodecs-filters/encoder2.png -------------------------------------------------------------------------------- /doc/getting_started/tutorials/images/ucodecs-filters/forward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/doc/getting_started/tutorials/images/ucodecs-filters/forward.png -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: python-blosc2.rst 2 | -------------------------------------------------------------------------------- /doc/reference/array_operations.rst: -------------------------------------------------------------------------------- 1 | Operations with arrays 2 | ---------------------- 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | lazy_functions 8 | reduction_functions 9 | linear_algebra 10 | -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.__getitem__.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.\_\_getitem\_\_ 2 | ==================================== 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.__getitem__ -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.__init__.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.\_\_init\_\_ 2 | ================================= 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.__init__ -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.__len__.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.\_\_len\_\_ 2 | ================================ 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.__len__ -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.__setitem__.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.\_\_setitem\_\_ 2 | ==================================== 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.__setitem__ -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.append_data.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.append\_data 2 | ================================= 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.append_data -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.decompress_chunk.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.decompress\_chunk 2 | ====================================== 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.decompress_chunk -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.delete_chunk.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.delete\_chunk 2 | ================================== 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.delete_chunk -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.fill_special.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.fill\_special 2 | ================================== 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.fill_special -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.filler.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.filler 2 | =========================== 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.filler -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.get_chunk.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.get\_chunk 2 | =============================== 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.get_chunk -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.get_slice.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.get\_slice 2 | =============================== 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.get_slice -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.insert_chunk.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.insert\_chunk 2 | ================================== 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.insert_chunk -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.insert_data.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.insert\_data 2 | ================================= 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.insert_data -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.iterchunks.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.iterchunks 2 | =============================== 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.iterchunks -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.iterchunks_info.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.iterchunks\_info 2 | ===================================== 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.iterchunks_info -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.postfilter.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.postfilter 2 | =============================== 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.postfilter -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.prefilter.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.prefilter 2 | ============================== 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.prefilter -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.remove_postfilter.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.remove\_postfilter 2 | ======================================= 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.remove_postfilter -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.remove_prefilter.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.remove\_prefilter 2 | ====================================== 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.remove_prefilter -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.to_cframe.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.to\_cframe 2 | =============================== 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.to_cframe -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.update_chunk.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.update\_chunk 2 | ================================== 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.update_chunk -------------------------------------------------------------------------------- /doc/reference/autofiles/schunk/blosc2.schunk.SChunk.update_data.rst: -------------------------------------------------------------------------------- 1 | blosc2.schunk.SChunk.update\_data 2 | ================================= 3 | 4 | .. currentmodule:: blosc2.schunk 5 | 6 | .. automethod:: SChunk.update_data -------------------------------------------------------------------------------- /doc/reference/c2array.rst: -------------------------------------------------------------------------------- 1 | .. _C2Array: 2 | 3 | C2Array 4 | ======= 5 | 6 | This is a class for remote arrays. This kind of array can also work as operand on a LazyExpr, LazyUDF or reduction. 7 | 8 | .. currentmodule:: blosc2.C2Array 9 | 10 | Methods 11 | ------- 12 | 13 | .. autosummary:: 14 | :toctree: autofiles/c2array 15 | :nosignatures: 16 | 17 | __init__ 18 | __getitem__ 19 | get_chunk 20 | 21 | Attributes 22 | ---------- 23 | 24 | .. autosummary:: 25 | :toctree: autofiles/c2array 26 | 27 | shape 28 | chunks 29 | blocks 30 | dtype 31 | cparams 32 | 33 | .. _URLPath: 34 | 35 | URLPath class 36 | ------------- 37 | 38 | .. currentmodule:: blosc2.URLPath 39 | 40 | .. autosummary:: 41 | :toctree: autofiles/URLPath 42 | 43 | __init__ 44 | 45 | Context managers 46 | ---------------- 47 | 48 | .. currentmodule:: blosc2 49 | 50 | .. autosummary:: 51 | :toctree: autofiles/c2array 52 | 53 | c2context 54 | -------------------------------------------------------------------------------- /doc/reference/classes.rst: -------------------------------------------------------------------------------- 1 | Main Classes 2 | ------------ 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | schunk 8 | ndarray 9 | ndfield 10 | lazyarray 11 | c2array 12 | proxy 13 | proxysource 14 | proxyndsource 15 | simpleproxy 16 | -------------------------------------------------------------------------------- /doc/reference/decorators.rst: -------------------------------------------------------------------------------- 1 | Decorators 2 | ---------- 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | 8 | .. autofunction:: blosc2.jit 9 | -------------------------------------------------------------------------------- /doc/reference/index.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | classes 8 | save_load 9 | storage 10 | array_operations 11 | utilities 12 | low_level 13 | -------------------------------------------------------------------------------- /doc/reference/lazy_functions.rst: -------------------------------------------------------------------------------- 1 | .. _lazy_functions: 2 | 3 | Lazy Functions 4 | -------------- 5 | 6 | The next functions can be used for computing with any of :ref:`NDArray `, :ref:`C2Array `, :ref:`NDField ` and :ref:`LazyExpr `. 7 | 8 | Their result is always a :ref:`LazyExpr` instance, which can be evaluated (with ``compute`` or ``__getitem__``) to get the actual values of the computation. 9 | 10 | .. currentmodule:: blosc2 11 | 12 | .. autosummary:: 13 | :toctree: autofiles/operations_with_arrays/ 14 | :nosignatures: 15 | 16 | abs 17 | arcsin 18 | arccos 19 | arctan 20 | arctan2 21 | arcsinh 22 | arccosh 23 | arctanh 24 | sin 25 | cos 26 | tan 27 | sinh 28 | cosh 29 | tanh 30 | exp 31 | expm1 32 | log 33 | log10 34 | log1p 35 | sqrt 36 | conj 37 | real 38 | imag 39 | contains 40 | where 41 | -------------------------------------------------------------------------------- /doc/reference/lazyarray.rst: -------------------------------------------------------------------------------- 1 | .. _LazyArray: 2 | 3 | LazyArray 4 | ========= 5 | 6 | This is an API interface for computing an expression or a Python user defined function. 7 | 8 | You can get an object following the LazyArray API with any of the following ways: 9 | 10 | * Any expression that involves one or more NDArray objects. e.g. ``a + b``, where ``a`` and ``b`` are NDArray objects (see `this tutorial <../getting_started/tutorials/03.lazyarray-expressions.html>`_). 11 | * Using the ``lazyexpr`` constructor. 12 | * Using the ``lazyudf`` constructor (see `a tutorial <../getting_started/tutorials/03.lazyarray-udf.html>`_). 13 | 14 | The LazyArray object is a thin wrapper around the expression or user-defined function that allows for lazy computation. This means that the expression is not computed until the ``compute`` or ``__getitem__`` methods are called. The ``compute`` method will return a new NDArray object with the result of the expression evaluation. The ``__getitem__`` method will return an NumPy object instead. 15 | 16 | See the `LazyExpr`_ and `LazyUDF`_ sections for more information. 17 | 18 | .. currentmodule:: blosc2.LazyArray 19 | 20 | Methods 21 | ------- 22 | 23 | .. autosummary:: 24 | :toctree: autofiles/lazyarray 25 | :nosignatures: 26 | 27 | __getitem__ 28 | compute 29 | indices 30 | save 31 | sort 32 | 33 | 34 | .. _LazyExpr: 35 | 36 | LazyExpr 37 | -------- 38 | 39 | An expression like ``a + sum(b)``, where there is at least one NDArray object in operands ``a`` and ``b``, `returns a LazyExpr object <../getting_started/tutorials/03.lazyarray-expressions.html>`_. You can also get a LazyExpr object using the ``lazyexpr`` constructor (see below). 40 | 41 | This object follows the `LazyArray`_ API for computation and storage. 42 | 43 | .. currentmodule:: blosc2 44 | 45 | .. autosummary:: 46 | :toctree: autofiles/lazyarray 47 | :nosignatures: 48 | 49 | lazyexpr 50 | 51 | 52 | .. _LazyUDF: 53 | 54 | LazyUDF 55 | ------- 56 | 57 | For getting a LazyUDF object (which is LazyArray-compliant) from a user-defined Python function, you can use the lazyudf constructor below. See `a tutorial on how this works <../getting_started/tutorials/03.lazyarray-udf.html>`_. 58 | 59 | This object follows the `LazyArray`_ API for computation, although storage is not supported yet. 60 | 61 | .. autosummary:: 62 | :toctree: autofiles/lazyarray 63 | :nosignatures: 64 | 65 | lazyudf 66 | -------------------------------------------------------------------------------- /doc/reference/linear_algebra.rst: -------------------------------------------------------------------------------- 1 | .. _linear_algebra: 2 | 3 | Linear Algebra 4 | -------------- 5 | 6 | The next functions can be used for computing linear algebra operations with :ref:`NDArray `. 7 | 8 | .. currentmodule:: blosc2 9 | 10 | .. autosummary:: 11 | :toctree: autofiles/operations_with_arrays/ 12 | :nosignatures: 13 | 14 | matmul 15 | transpose 16 | matrix_transpose 17 | permute_dims 18 | -------------------------------------------------------------------------------- /doc/reference/low_level.rst: -------------------------------------------------------------------------------- 1 | Compression Utilities 2 | ===================== 3 | 4 | Although using NDArray/SChunk objects is the recommended way to work with Blosc2 data, there are some utilities that allow you to work with Blosc2 data in a more low-level way. This is useful when you need to work with data that is not stored in NDArray/SChunk objects, or when you need to work with data that is stored in a different format. 5 | 6 | This API is meant to be compatible with the existing python-blosc API. There could be some parameters that are called differently, but other than that, they are largely compatible. In addition, there are some new functions that are not present in the original python-blosc API that are mainly meant to overcome the 2 GB limit that the original API had. 7 | 8 | .. currentmodule:: blosc2 9 | 10 | Compress and decompress 11 | ----------------------- 12 | 13 | .. autosummary:: 14 | :toctree: autofiles/low_level/ 15 | :nosignatures: 16 | 17 | compress 18 | compress2 19 | decompress 20 | decompress2 21 | pack 22 | pack_array 23 | pack_array2 24 | pack_tensor 25 | unpack 26 | unpack_array 27 | unpack_array2 28 | unpack_tensor 29 | 30 | Set / get compression params 31 | ---------------------------- 32 | 33 | .. autosummary:: 34 | :toctree: autofiles/low_level/ 35 | :nosignatures: 36 | 37 | clib_info 38 | compressor_list 39 | detect_number_of_cores 40 | free_resources 41 | get_clib 42 | nthreads 43 | print_versions 44 | register_codec 45 | register_filter 46 | set_blocksize 47 | set_nthreads 48 | set_releasegil 49 | set_compressor 50 | get_compressor 51 | get_blocksize 52 | get_cbuffer_sizes 53 | cparams_dflts 54 | dparams_dflts 55 | storage_dflts 56 | 57 | Enumerated classes 58 | ------------------ 59 | 60 | .. autosummary:: 61 | :toctree: autofiles/low_level/ 62 | :nosignatures: 63 | 64 | Codec 65 | Filter 66 | SpecialValue 67 | SplitMode 68 | Tuner 69 | 70 | Utils 71 | ----- 72 | 73 | .. currentmodule:: blosc2 74 | 75 | .. autosummary:: 76 | :toctree: autofiles/low_level/ 77 | 78 | compute_chunks_blocks 79 | get_slice_nchunks 80 | remove_urlpath 81 | 82 | Utility variables 83 | ----------------- 84 | 85 | .. currentmodule:: blosc2 86 | 87 | .. autosummary:: 88 | :toctree: autofiles/low_level/ 89 | 90 | blosclib_version 91 | DEFINED_CODECS_STOP 92 | GLOBAL_REGISTERED_CODECS_STOP 93 | USER_REGISTERED_CODECS_STOP 94 | EXTENDED_HEADER_LENGTH 95 | MAX_BUFFERSIZE 96 | MAX_BLOCKSIZE 97 | MAX_OVERHEAD 98 | MAX_TYPESIZE 99 | MIN_HEADER_LENGTH 100 | prefilter_funcs 101 | postfilter_funcs 102 | ucodecs_registry 103 | ufilters_registry 104 | VERSION_DATE 105 | VERSION_STRING 106 | __version__ 107 | -------------------------------------------------------------------------------- /doc/reference/ndarray.rst: -------------------------------------------------------------------------------- 1 | .. _NDArray: 2 | 3 | NDArray 4 | ======= 5 | 6 | The multidimensional data array class. 7 | 8 | .. currentmodule:: blosc2.NDArray 9 | 10 | Methods 11 | ------- 12 | 13 | .. autosummary:: 14 | :toctree: autofiles/ndarray 15 | :nosignatures: 16 | 17 | __iter__ 18 | __len__ 19 | __getitem__ 20 | __setitem__ 21 | copy 22 | get_chunk 23 | indices 24 | iterchunks_info 25 | reshape 26 | resize 27 | save 28 | slice 29 | sort 30 | squeeze 31 | tobytes 32 | to_cframe 33 | 34 | In addition, all the functions from the :ref:`Lazy Functions ` section can be used with NDArray instances. 35 | 36 | 37 | Attributes 38 | ---------- 39 | 40 | .. autosummary:: 41 | :toctree: autofiles/ndarray 42 | 43 | T 44 | ndim 45 | shape 46 | ext_shape 47 | chunks 48 | ext_chunks 49 | blocks 50 | blocksize 51 | chunksize 52 | dtype 53 | fields 54 | keep_last_read 55 | info 56 | schunk 57 | size 58 | cparams 59 | dparams 60 | urlpath 61 | vlmeta 62 | 63 | 64 | .. currentmodule:: blosc2 65 | 66 | Constructors 67 | ------------ 68 | 69 | .. autosummary:: 70 | :toctree: autofiles/ndarray 71 | :nosignatures: 72 | 73 | asarray 74 | copy 75 | empty 76 | frombuffer 77 | fromiter 78 | nans 79 | ndarray_from_cframe 80 | uninit 81 | zeros 82 | ones 83 | full 84 | arange 85 | linspace 86 | eye 87 | reshape 88 | -------------------------------------------------------------------------------- /doc/reference/ndfield.rst: -------------------------------------------------------------------------------- 1 | .. _NDField: 2 | 3 | NDField 4 | ======= 5 | 6 | This class is used to represent fields of a structured :ref:`NDArray `. 7 | 8 | For instance, you can create an array with two fields:: 9 | 10 | s = blosc2.empty(shape, dtype=[("a", np.float32), ("b", np.float64)]) 11 | a = blosc2.NDField(s, "a") 12 | b = blosc2.NDField(s, "b") 13 | 14 | .. currentmodule:: blosc2.NDField 15 | 16 | Methods 17 | ------- 18 | 19 | .. autosummary:: 20 | :toctree: autofiles/ndfield 21 | :nosignatures: 22 | 23 | __init__ 24 | __iter__ 25 | __len__ 26 | __getitem__ 27 | __setitem__ 28 | 29 | Attributes 30 | ---------- 31 | 32 | .. autosummary:: 33 | :toctree: autofiles/ndfield 34 | 35 | schunk 36 | shape 37 | -------------------------------------------------------------------------------- /doc/reference/proxy.rst: -------------------------------------------------------------------------------- 1 | .. _Proxy: 2 | 3 | Proxy 4 | ===== 5 | 6 | Class that implements a proxy (with cache support) of a Python-Blosc2 container. 7 | 8 | This can be used to cache chunks of regular data container which follows the 9 | :ref:`ProxySource` or :ref:`ProxyNDSource` interfaces. 10 | 11 | .. currentmodule:: blosc2.Proxy 12 | 13 | Methods 14 | ------- 15 | 16 | .. autosummary:: 17 | :toctree: autofiles/proxy 18 | :nosignatures: 19 | 20 | __init__ 21 | __getitem__ 22 | fetch 23 | afetch 24 | 25 | Attributes 26 | ---------- 27 | 28 | .. autosummary:: 29 | :toctree: autofiles/proxy 30 | 31 | shape 32 | dtype 33 | cparams 34 | info 35 | fields 36 | vlmeta 37 | -------------------------------------------------------------------------------- /doc/reference/proxyndsource.rst: -------------------------------------------------------------------------------- 1 | .. _ProxyNDSource: 2 | 3 | ProxyNDSource 4 | ============= 5 | 6 | Interface for NDim sources in :ref:`Proxy`. For example, a NDArray, a HDF5 dataset, etc. 7 | For a simpler source, see :ref:`ProxySource`. 8 | 9 | .. currentmodule:: blosc2.ProxyNDSource 10 | 11 | Methods 12 | ------- 13 | 14 | .. autosummary:: 15 | :toctree: autofiles/proxyndsource 16 | :nosignatures: 17 | 18 | get_chunk 19 | aget_chunk 20 | 21 | Attributes 22 | ---------- 23 | 24 | .. autosummary:: 25 | :toctree: autofiles/proxyndsource 26 | :nosignatures: 27 | 28 | shape 29 | dtype 30 | chunks 31 | blocks 32 | -------------------------------------------------------------------------------- /doc/reference/proxysource.rst: -------------------------------------------------------------------------------- 1 | .. _ProxySource: 2 | 3 | ProxySource 4 | =========== 5 | 6 | Base interface for all supported sources in :ref:`Proxy` and are not NDim objects. 7 | For example, a file, a memory buffer, a network resource, etc. For n-dimemsional 8 | ones, see :ref:`ProxyNDSource`. 9 | 10 | .. currentmodule:: blosc2.ProxySource 11 | 12 | Methods 13 | ------- 14 | 15 | .. autosummary:: 16 | :toctree: autofiles/proxysource 17 | :nosignatures: 18 | 19 | get_chunk 20 | aget_chunk 21 | 22 | Attributes 23 | ---------- 24 | 25 | .. autosummary:: 26 | :toctree: autofiles/proxysource 27 | :nosignatures: 28 | 29 | nbytes 30 | chunksize 31 | typesize 32 | -------------------------------------------------------------------------------- /doc/reference/reduction_functions.rst: -------------------------------------------------------------------------------- 1 | Reduction Functions 2 | ------------------- 3 | 4 | Contrarily to lazy functions, reduction functions are evaluated eagerly, and the result is always a NumPy array (although this can be converted internally into an :ref:`NDArray ` if you pass any :func:`blosc2.empty` arguments in ``kwargs``). 5 | 6 | Reduction operations can be used with any of :ref:`NDArray `, :ref:`C2Array `, :ref:`NDField ` and :ref:`LazyExpr `. Again, although these can be part of a :ref:`LazyExpr `, you must be aware that they are not lazy, but will be evaluated eagerly during the construction of a LazyExpr instance (this might change in the future). 7 | 8 | .. currentmodule:: blosc2 9 | 10 | .. autosummary:: 11 | :toctree: autofiles/operations_with_arrays/ 12 | :nosignatures: 13 | 14 | all 15 | any 16 | sum 17 | prod 18 | mean 19 | std 20 | var 21 | min 22 | max 23 | -------------------------------------------------------------------------------- /doc/reference/save_load.rst: -------------------------------------------------------------------------------- 1 | Save and load 2 | ------------- 3 | 4 | .. currentmodule:: blosc2 5 | 6 | .. autosummary:: 7 | :toctree: autofiles/save_load/ 8 | :nosignatures: 9 | 10 | save 11 | open 12 | save_array 13 | load_array 14 | save_tensor 15 | load_tensor 16 | -------------------------------------------------------------------------------- /doc/reference/schunk.rst: -------------------------------------------------------------------------------- 1 | .. _SChunk: 2 | 3 | SChunk 4 | ====== 5 | 6 | The basic compressed data container (aka super-chunk). This class consists of a set of useful parameters and methods that allow not only to create compressed data, and decompress it, but also to manage the data in a more sophisticated way. For example, it is possible to append new data, update existing data, delete data, etc. 7 | 8 | .. currentmodule:: blosc2.schunk 9 | 10 | Methods 11 | ------- 12 | 13 | .. autosummary:: 14 | :toctree: autofiles/schunk/ 15 | :nosignatures: 16 | 17 | SChunk.__init__ 18 | SChunk.append_data 19 | SChunk.decompress_chunk 20 | SChunk.delete_chunk 21 | SChunk.get_chunk 22 | SChunk.insert_chunk 23 | SChunk.insert_data 24 | SChunk.iterchunks 25 | SChunk.iterchunks_info 26 | SChunk.fill_special 27 | SChunk.update_chunk 28 | SChunk.update_data 29 | SChunk.get_slice 30 | SChunk.__getitem__ 31 | SChunk.__setitem__ 32 | SChunk.__len__ 33 | SChunk.to_cframe 34 | SChunk.postfilter 35 | SChunk.remove_postfilter 36 | SChunk.filler 37 | SChunk.prefilter 38 | SChunk.remove_prefilter 39 | 40 | .. _SChunkAttributes: 41 | 42 | Attributes 43 | ---------- 44 | 45 | .. autosummary:: 46 | :toctree: autofiles/schunk/ 47 | :nosignatures: 48 | 49 | SChunk.blocksize 50 | SChunk.cbytes 51 | SChunk.chunkshape 52 | SChunk.chunksize 53 | SChunk.contiguous 54 | SChunk.cparams 55 | SChunk.cratio 56 | SChunk.dparams 57 | SChunk.meta 58 | SChunk.nbytes 59 | SChunk.typesize 60 | SChunk.urlpath 61 | SChunk.vlmeta 62 | 63 | Functions 64 | --------- 65 | 66 | .. currentmodule:: blosc2 67 | 68 | .. autosummary:: 69 | :toctree: autofiles/schunk/ 70 | 71 | schunk_from_cframe 72 | -------------------------------------------------------------------------------- /doc/reference/simpleproxy.rst: -------------------------------------------------------------------------------- 1 | .. _SimpleProxy: 2 | 3 | SimpleProxy 4 | =========== 5 | 6 | Simple proxy for a NumPy array (or similar) that can be used with the Blosc2 compute engine. 7 | 8 | This only supports the __getitem__ method. No caching is performed. 9 | 10 | .. currentmodule:: blosc2.SimpleProxy 11 | 12 | Methods 13 | ------- 14 | 15 | .. autosummary:: 16 | :toctree: autofiles/simpleproxy 17 | :nosignatures: 18 | 19 | __init__ 20 | __getitem__ 21 | 22 | Attributes 23 | ---------- 24 | 25 | .. autosummary:: 26 | :toctree: autofiles/simpleproxy 27 | 28 | shape 29 | dtype 30 | src 31 | -------------------------------------------------------------------------------- /doc/reference/storage.rst: -------------------------------------------------------------------------------- 1 | .. _CompStorParams: 2 | 3 | Compression, decompression and storage parameters 4 | ================================================= 5 | 6 | Dataclasses for setting the compression, decompression and storage parameters. All their parameters are optional. 7 | 8 | .. currentmodule:: blosc2 9 | 10 | CParams 11 | ------- 12 | 13 | .. autosummary:: 14 | :toctree: autofiles/storage 15 | :nosignatures: 16 | 17 | CParams 18 | 19 | DParams 20 | ------- 21 | 22 | .. autosummary:: 23 | :toctree: autofiles/storage 24 | :nosignatures: 25 | 26 | DParams 27 | 28 | Storage 29 | ------- 30 | 31 | .. autosummary:: 32 | :toctree: autofiles/storage 33 | :nosignatures: 34 | 35 | Storage 36 | -------------------------------------------------------------------------------- /doc/reference/utilities.rst: -------------------------------------------------------------------------------- 1 | Expression Utilities 2 | ==================== 3 | 4 | A series of utilities are provided to work with expressions in a more convenient way. 5 | 6 | Functions 7 | --------- 8 | 9 | .. autosummary:: 10 | :toctree: autofiles/utilities 11 | :nosignatures: 12 | 13 | blosc2.evaluate 14 | blosc2.get_expr_operands 15 | blosc2.validate_expr 16 | 17 | Decorators 18 | ---------- 19 | 20 | .. autosummary:: 21 | :toctree: autofiles/utilities 22 | :nosignatures: 23 | 24 | blosc2.jit 25 | -------------------------------------------------------------------------------- /doc/release_notes/index.md: -------------------------------------------------------------------------------- 1 | ```{include} ../../RELEASE_NOTES.md 2 | ``` 3 | -------------------------------------------------------------------------------- /examples/btune.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | # 8 | # This example can only be run if blosc2-btune is installed. You can 9 | # get it from https://pypi.org/project/blosc2-btune/ 10 | # For more info on this tuner plugin see 11 | # https://github.com/Blosc/blosc2_btune/blob/main/README.md 12 | ####################################################################### 13 | 14 | import blosc2_btune 15 | import numpy as np 16 | 17 | import blosc2 18 | 19 | nchunks = 10 20 | # Set the compression and decompression parameters, use BTUNE tuner 21 | cparams = blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4, tuner=blosc2.Tuner.BTUNE) 22 | dparams = blosc2.DParams() 23 | contiguous = True 24 | urlpath = "filename" 25 | 26 | storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode="a") 27 | blosc2.remove_urlpath(urlpath) 28 | 29 | # Set the Btune configuration to use 30 | btune_conf = {"tradeoff": 0.3, "perf_mode": blosc2_btune.PerformanceMode.DECOMP} 31 | blosc2_btune.set_params_defaults(**btune_conf) 32 | 33 | # Create the SChunk 34 | data = np.arange(200 * 1000 * nchunks) 35 | schunk = blosc2.SChunk( 36 | chunksize=200 * 1000 * 4, data=data, cparams=cparams, dparams=dparams, storage=storage 37 | ) 38 | 39 | # Check data can be retrieved correctly 40 | data2 = np.empty(data.shape, dtype=data.dtype) 41 | schunk.get_slice(out=data2) 42 | assert np.array_equal(data, data2) 43 | 44 | blosc2.remove_urlpath(urlpath) 45 | -------------------------------------------------------------------------------- /examples/c2array-get-slice.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Example for opening and reading a C2Array (remote array) 10 | 11 | from time import time 12 | 13 | import numpy as np 14 | 15 | import blosc2 16 | 17 | urlbase = "https://cat2.cloud/demo" 18 | root = "@public" 19 | 20 | # Access the server 21 | # urlpath = blosc2.URLPath(f'{root}/examples/ds-1d.b2nd', urlbase) 22 | # urlpath = blosc2.URLPath(f'{root}/examples/sa-1M.b2nd', urlbase) 23 | urlpath = blosc2.URLPath(f"{root}/examples/lung-jpeg2000_10x.b2nd", urlbase) 24 | # urlpath = blosc2.URLPath(f'{root}/examples/uncompressed_lung-jpeg2000_10x.b2nd', urlbase) 25 | 26 | # Open the remote array 27 | t0 = time() 28 | remote_array = blosc2.open(urlpath, mode="r") 29 | size = np.prod(remote_array.shape) * remote_array.cparams.typesize 30 | print(f"Time for opening data (HTTP): {time() - t0:.3f}s - file size: {size / 2**10:.2f} KB") 31 | 32 | # Fetch a slice of the remote array as a numpy array 33 | t0 = time() 34 | a = remote_array[5:9] 35 | print(f"Time for reading data (HTTP): {time() - t0:.3f}s - {a.nbytes / 2**10:.2f} KB") 36 | 37 | # TODO: Fetch a slice of the remote array as a blosc2.NDArray 38 | -------------------------------------------------------------------------------- /examples/compress2_decompress2.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | 11 | import blosc2 12 | 13 | a = np.linspace(0, 1, 1_000_000, dtype=np.float64) 14 | typesize = a.dtype.itemsize 15 | c_bytesobj = blosc2.compress2( 16 | a, 17 | typesize=typesize, 18 | codec=blosc2.Codec.ZSTD, 19 | filters=[blosc2.Filter.TRUNC_PREC, blosc2.Filter.SHUFFLE], 20 | filters_meta=[20, 0], 21 | ) 22 | assert len(c_bytesobj) < (len(a) * typesize) 23 | cratio = (len(a) * typesize) / len(c_bytesobj) 24 | print(f"cratio: {cratio:.3f}") 25 | 26 | a_bytesobj2 = blosc2.decompress2(c_bytesobj) 27 | # The next check does not work when using truncation (obviously) 28 | # assert a_bytesobj == a_bytesobj2 29 | -------------------------------------------------------------------------------- /examples/compress_decompress.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | 10 | import array 11 | 12 | # Compress and decompress different arrays 13 | import blosc2 14 | 15 | a = array.array("i", range(1000 * 1000)) 16 | a_bytesobj = a.tobytes() 17 | c_bytesobj = blosc2.compress(a_bytesobj, typesize=4) 18 | assert len(c_bytesobj) < len(a_bytesobj) 19 | a_bytesobj2 = blosc2.decompress(c_bytesobj) 20 | assert a_bytesobj == a_bytesobj2 21 | 22 | dest = blosc2.compress(b"", 1) 23 | assert blosc2.decompress(dest) == b"" 24 | assert type(blosc2.decompress(blosc2.compress(b"1" * 7, 1), as_bytearray=True)) is bytearray 25 | -------------------------------------------------------------------------------- /examples/filler.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Fill an SChunk with a filler decorator 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | nchunks = 3 16 | chunk_len = 200 * 1000 17 | schunk_dtype = np.dtype(np.float64) 18 | 19 | # Set the compression parameters. We need nthreads=1 for this example. 20 | cparams = blosc2.CParams(typesize=schunk_dtype.itemsize, nthreads=1) 21 | 22 | # Create empty SChunk 23 | schunk = blosc2.SChunk(chunksize=chunk_len * schunk_dtype.itemsize, cparams=cparams) 24 | 25 | # Create operands (can be a SChunk, numpy.ndarray or Python scalar) 26 | op_dtype = np.dtype(np.int32) 27 | data = np.full(chunk_len * nchunks, 1234, dtype=op_dtype) 28 | schunk_op = blosc2.SChunk(chunksize=chunk_len * op_dtype.itemsize, data=data) 29 | op2_dtype = np.dtype(np.float32) 30 | nparray_op = np.arange(0, chunk_len * nchunks, dtype=op2_dtype) 31 | py_scalar = np.e 32 | 33 | 34 | # Set filler with decorator 35 | @schunk.filler(((schunk_op, op_dtype), (nparray_op, op2_dtype), (py_scalar, np.float32)), schunk_dtype) 36 | def filler(inputs_tuple, output, offset): 37 | output[:] = inputs_tuple[0] - inputs_tuple[1] * inputs_tuple[2] 38 | 39 | 40 | # Check that SChunk has been filled correctly 41 | out = np.empty(chunk_len * nchunks, dtype=schunk_dtype) 42 | schunk.get_slice(0, chunk_len * nchunks, out=out) 43 | 44 | res = np.empty(data.shape, dtype=schunk_dtype) 45 | filler((data, nparray_op, py_scalar), res, None) 46 | np.testing.assert_allclose(out, res) 47 | -------------------------------------------------------------------------------- /examples/gil.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | 10 | import blosc2 11 | 12 | print(blosc2.set_releasegil(True)) 13 | print(blosc2.set_releasegil(True)) 14 | -------------------------------------------------------------------------------- /examples/mmap-rw.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | 10 | # Example for writing and reading memory-mapped files 11 | 12 | import numpy as np 13 | 14 | import blosc2 15 | 16 | urlpath = "array.b2nd" 17 | blosc2.remove_urlpath(urlpath) 18 | a = np.arange(1_000_000, dtype=np.int64) 19 | 20 | # Optional: the size of the array is generous enough for the mapping size since we expect the compressed data to be 21 | # smaller than the original size 22 | initial_mapping_size = a.size * a.itemsize 23 | 24 | # mmap_mode and initial_mapping_size can be used for all functions which create arrays on disk 25 | # (SChunk, asarray, empty, etc.) 26 | blosc2.asarray(a, urlpath=urlpath, mmap_mode="w+", initial_mapping_size=initial_mapping_size) 27 | 28 | # Read the ndarray back via the general open function 29 | a_read = blosc2.open(urlpath, mmap_mode="r") 30 | 31 | assert np.all(a == a_read) 32 | blosc2.remove_urlpath(urlpath) 33 | -------------------------------------------------------------------------------- /examples/ndarray/asarray_.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Import structured arrays using the array interface 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | shape = (2, 2) 16 | dtype = np.float64 17 | 18 | # Create a structured array 19 | arr0 = np.arange(np.prod(shape), dtype=dtype).reshape(shape) 20 | arr1 = np.arange(np.prod(shape), dtype=dtype).reshape(shape) 21 | arr = np.array([arr0, arr1], dtype="f8,f8") 22 | print("NumPy struct array:\n", arr) 23 | 24 | # And convert it into a NDArray using the array interface 25 | a = blosc2.asarray(arr) 26 | print("\nNDArray struct array:\n", a[...]) 27 | -------------------------------------------------------------------------------- /examples/ndarray/broadcast_expr.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # This shows how to evaluate expressions with NDArray instances having different shapes as operands. 10 | # The broadcast is done internally and tries to mimic NumPy behavior. 11 | 12 | import numpy as np 13 | 14 | import blosc2 15 | 16 | # Two operands with the next shapes should be supported 17 | # shape1, shape2 = (2, 1, 3, 2), (3, 3, 2) 18 | # shape1, shape2 = (2, 1, 3, 2), (3, 1, 2) 19 | shape1, shape2 = (2, 1, 1, 1), (3, 2, 2) 20 | 21 | # Create a NDArray from a NumPy array 22 | npa = np.linspace(0, 1, np.prod(shape1), dtype=np.float32).reshape(shape1) 23 | npb = np.linspace(1, 2, np.prod(shape2), dtype=np.float64).reshape(shape2) 24 | npc = npa + npb 25 | npres = npa + npb 26 | print("Broadcast with NumPy:\n", npres) 27 | 28 | a = blosc2.asarray(npa) 29 | b = blosc2.asarray(npb) 30 | 31 | # Get a LazyExpr instance 32 | c = a + b 33 | # Evaluate: output is a NDArray 34 | # d = a + blosc2.mean(a, axis=0) 35 | # d = a + np.mean(npa, axis=0) 36 | d = a + b 37 | # print(d, d.shape, d.dtype) 38 | # print(d.expression, d.operands) 39 | assert isinstance(d, blosc2.LazyExpr) 40 | e = d.compute() 41 | print(e) 42 | assert isinstance(d, blosc2.LazyExpr) 43 | # Check 44 | assert isinstance(e, blosc2.NDArray) 45 | res = e[:] 46 | print("Broadcast with Blosc2:\n", res) 47 | 48 | assert np.allclose(res, npres) 49 | 50 | # # Evaluate a slice: output is a NumPy array 51 | npd = d[:] 52 | # # Check 53 | assert np.allclose(npd, npres) 54 | 55 | print("NDArray expression evaluated correctly in-memory!") 56 | -------------------------------------------------------------------------------- /examples/ndarray/buffer.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Creating/dumping an NDArray from/to a buffer 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | shape = (50, 50) 16 | chunks = (49, 49) 17 | dtype = np.dtype("|S8") 18 | typesize = dtype.itemsize 19 | 20 | # Create a NDArray from a buffer 21 | random = np.random.default_rng() 22 | buffer = bytes(random.normal(0, 1, np.prod(shape)) * typesize) 23 | a = blosc2.frombuffer(buffer, shape, chunks=chunks, dtype=dtype) 24 | print("compression ratio:", a.schunk.cratio) 25 | 26 | # Convert a NDArray to a buffer 27 | buffer2 = a.tobytes() 28 | assert buffer == buffer2 29 | -------------------------------------------------------------------------------- /examples/ndarray/bytedelta_filter.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Shows how to use the bytedelta filter. Remember that bytedelta is designed 10 | # to work after shuffle. 11 | 12 | import math 13 | 14 | import numpy as np 15 | 16 | import blosc2 17 | 18 | shape = (1000, 1000) 19 | 20 | # Create a buffer 21 | nparray = np.linspace(0, 1000, math.prod(shape)).reshape(shape) 22 | 23 | # Compress with and without bytedelta 24 | cparams = blosc2.CParams(filters=[blosc2.Filter.SHUFFLE], filters_meta=[0]) 25 | a = blosc2.asarray(nparray, cparams=cparams) 26 | print( 27 | f"Compression ratio with shuffle: {a.schunk.cratio:.2f} x", 28 | ) 29 | 30 | # Now with bytedelta 31 | cparams = blosc2.CParams(filters=[blosc2.Filter.SHUFFLE, blosc2.Filter.BYTEDELTA], filters_meta=[0, 0]) 32 | a = blosc2.asarray(nparray, cparams=cparams) 33 | print( 34 | f"Compression ratio with shuffle + bytedelta: {a.schunk.cratio:.2f} x", 35 | ) 36 | -------------------------------------------------------------------------------- /examples/ndarray/c2array_expr.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | import numpy as np 4 | 5 | import blosc2 6 | 7 | host = "https://demo.caterva2.net/" 8 | root = "b2tests" 9 | dir = "expr/" 10 | 11 | # For a Caterva2 server running locally, use: 12 | # host = 'localhost:8002' 13 | 14 | # The root of the datasets 15 | root = "b2tests" 16 | # The directory inside root where the datasets are stored 17 | dir = "expr/" 18 | 19 | name1 = "ds-0-10-linspace-float64-(True, True)-a1-(60, 60)d.b2nd" 20 | name2 = "ds-0-10-linspace-float64-(True, True)-a2-(60, 60)d.b2nd" 21 | path1 = pathlib.Path(f"{root}/{dir + name1}").as_posix() 22 | path2 = pathlib.Path(f"{root}/{dir + name2}").as_posix() 23 | 24 | a = blosc2.C2Array(path1, host) 25 | b = blosc2.C2Array(path2, host) 26 | 27 | # Evaluate only a slice of the expression 28 | c = a + b 29 | print(type(c)) 30 | print(c[10:20, 10:20]) 31 | 32 | np.testing.assert_allclose(c[:], a[:] + b[:]) 33 | 34 | # Get an NDArray instance instead of a NumPy array 35 | ndarr = c.compute() 36 | np.testing.assert_allclose(ndarr[:], a[:] + b[:]) 37 | -------------------------------------------------------------------------------- /examples/ndarray/compute_expr.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # This shows how to evaluate expressions with NDArray instances as operands. 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | shape = (50, 50) 16 | 17 | # Create a NDArray from a NumPy array 18 | npa = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) 19 | npb = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) 20 | npc = npa**2 + npb**2 + 2 * npa * npb + 1 21 | 22 | a = blosc2.asarray(npa) 23 | b = blosc2.asarray(npb) 24 | 25 | # Get a LazyExpr instance 26 | c = a**2 + b**2 + 2 * a * b + 1 27 | # Evaluate: output is a NDArray 28 | d = c.compute() 29 | # Check 30 | assert isinstance(d, blosc2.NDArray) 31 | assert np.allclose(d[:], npc) 32 | 33 | # Evaluate the whole slice: output is a NumPy array 34 | npd = c[:] 35 | # Check 36 | assert isinstance(npd, np.ndarray) 37 | assert np.allclose(npd, npc) 38 | 39 | # Evaluate a partial slice: output is a NumPy array 40 | npd = c[1:10] 41 | # Check 42 | assert isinstance(npd, np.ndarray) 43 | assert np.allclose(npd, npc[1:10]) 44 | 45 | print("NDArray expression evaluated correctly in-memory!") 46 | 47 | # Now, evaluate the expression from operands in disk 48 | # TODO: when doing a copy, mode should be 'w' by default? 49 | da = a.copy(urlpath="a.b2nd", mode="w") 50 | db = b.copy(urlpath="b.b2nd", mode="w") 51 | 52 | # Get a LazyExpr instance 53 | (da**2 + db**2 + 2 * da * db + 1).save(urlpath="c.b2nd") 54 | dc = blosc2.open("c.b2nd") 55 | 56 | # Evaluate: output is a NDArray 57 | dc2 = dc.compute() 58 | # Check 59 | assert isinstance(dc2, blosc2.NDArray) 60 | assert np.allclose(dc2[:], npc) 61 | print("NDArray expression evaluated correctly on-disk!") 62 | -------------------------------------------------------------------------------- /examples/ndarray/compute_fields.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # This shows how to evaluate expressions with NDField instances as operands. 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | shape = (50, 50) 16 | 17 | # Create a structured NumPy array 18 | npa = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) 19 | npb = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) 20 | npc = npa**2 + npb**2 > 2 * npa * npb + 1 21 | nps = np.empty(shape, dtype=[("a", npa.dtype), ("b", npb.dtype)]) 22 | nps["a"] = npa 23 | nps["b"] = npb 24 | 25 | s = blosc2.asarray(nps) 26 | a = blosc2.NDField(s, "a") 27 | b = blosc2.NDField(s, "b") 28 | 29 | # Get a LazyExpr instance 30 | c = a**2 + b**2 > 2 * a * b + 1 31 | 32 | # Evaluate: output is a NDArray 33 | d = c.compute() 34 | # Check 35 | assert isinstance(d, blosc2.NDArray) 36 | assert np.allclose(d[:], npc) 37 | 38 | # Evaluate the whole slice: output is a NumPy array 39 | npd = c[:] 40 | # Check 41 | assert isinstance(npd, np.ndarray) 42 | assert np.allclose(npd, npc) 43 | 44 | # Evaluate a partial slice: output is a NumPy array 45 | npd = c[1:10] 46 | # Check 47 | assert isinstance(npd, np.ndarray) 48 | assert np.allclose(npd, npc[1:10]) 49 | 50 | print("Expression with NDField operands evaluated correctly!") 51 | -------------------------------------------------------------------------------- /examples/ndarray/compute_udf_numba.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # This shows how to evaluate expressions with NDArray instances as operands. 10 | 11 | import numba as nb 12 | import numpy as np 13 | 14 | import blosc2 15 | 16 | 17 | # The UDF to be evaluated 18 | @nb.jit(nopython=True, parallel=True) 19 | def func_numba(inputs_tuple, output, offset): 20 | x = inputs_tuple[0] 21 | output[:] = x + 1 22 | 23 | 24 | # Create a NDArray from a NumPy array 25 | shape = (13, 13) 26 | npa = np.linspace(0, 1, np.prod(shape)).reshape(shape) 27 | npc = npa + 1 28 | a = blosc2.asarray(npa) 29 | 30 | lazyarray = blosc2.lazyudf(func_numba, (npa,), npa.dtype) 31 | print(lazyarray.info) 32 | res = lazyarray.compute() 33 | print(res.info) 34 | np.testing.assert_allclose(res[...], npc) 35 | print("Numba + LazyArray evaluated correctly!") 36 | -------------------------------------------------------------------------------- /examples/ndarray/copy_.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Copying NDArrays 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | shape = (10, 10) 16 | blocks = (10, 10) 17 | dtype = np.float64 18 | 19 | # Create a NDArray from a buffer 20 | buffer = bytes(np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape)) 21 | a = blosc2.frombuffer(buffer, shape, dtype=dtype, blocks=blocks) 22 | 23 | # Get a copy of a 24 | b = blosc2.copy(a) 25 | 26 | # Another copy example 27 | b[1:5, 2:9] = 0 28 | b2 = blosc2.copy(b, blocks=blocks) 29 | print(b2[...]) 30 | -------------------------------------------------------------------------------- /examples/ndarray/empty_.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Create an empty array with different compression parameters and set some values on it 10 | 11 | import blosc2 12 | 13 | cparams = blosc2.CParams( 14 | codec=blosc2.Codec.LZ4, 15 | clevel=5, 16 | nthreads=4, 17 | filters=[blosc2.Filter.DELTA, blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], 18 | filters_meta=[0, 3, 0], # keep just 3 bits in mantissa 19 | ) 20 | a = blosc2.empty(shape=(40, 401), blocks=(6, 26), dtype="f8", cparams=cparams) 21 | 22 | a[...] = 222 23 | print(a.info) 24 | 25 | print(a[:, 0]) # note the truncation filter at work 26 | -------------------------------------------------------------------------------- /examples/ndarray/eye-constructor.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # This example shows how to use the `eye()` constructor to create a blosc2 array. 10 | 11 | import math 12 | from time import time 13 | 14 | import numpy as np 15 | 16 | import blosc2 17 | 18 | N = 20_000 19 | 20 | shape = (N, N) 21 | print(f"*** Creating a blosc2 eye array with shape: {shape} ***") 22 | t0 = time() 23 | a = blosc2.eye(*shape, dtype=np.int8) 24 | cratio = a.schunk.nbytes / a.schunk.cbytes 25 | print( 26 | f"Time: {time() - t0:.3f} s ({math.prod(shape) / (time() - t0) / 1e6:.2f} M/s)" 27 | f"\tStorage required: {a.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)" 28 | ) 29 | print(f"Last 3 elements:\n{a[-3:]}") 30 | 31 | # You can create rectangular arrays too 32 | shape = (N, N * 5) 33 | print(f"*** Creating a blosc2 eye array with shape: {shape} ***") 34 | t0 = time() 35 | a = blosc2.eye(*shape, dtype=np.int8) 36 | cratio = a.schunk.nbytes / a.schunk.cbytes 37 | print( 38 | f"Time: {time() - t0:.3f} s ({math.prod(shape) / (time() - t0) / 1e6:.2f} M/s)" 39 | f"\tStorage required: {a.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)" 40 | ) 41 | print(f"First 3 elements:\n{a[:3]}") 42 | 43 | 44 | # In conclusion, you can use blosc2 eye() to create blosc2 arrays requiring much less storage 45 | # than numpy arrays. 46 | -------------------------------------------------------------------------------- /examples/ndarray/filter_sort_fields.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Filter and sort fields in a structured array 10 | # Note that this only works for 1D arrays 11 | 12 | import sys 13 | from time import time 14 | 15 | import numpy as np 16 | 17 | import blosc2 18 | 19 | N = 1_000_000 20 | 21 | # arr = blosc2.open("/Users/faltet/Downloads/ds-1d-fields.b2nd") 22 | # Create a numpy structured array with 3 fields and N elements 23 | dt = np.dtype([("a", "i4"), ("b", "f4"), ("c", "f8")]) 24 | nsa = np.empty((N,), dtype=dt) 25 | # TODO: Make this work with a 2D array 26 | # nsa = np.empty((N,N), dtype=dt) 27 | nsa["a"][:] = np.arange(N, dtype="i4") 28 | nsa["b"][:] = np.linspace(0, 1, N, dtype="f4") 29 | rng = np.random.default_rng(42) # to get reproducible results 30 | nsa["c"][:] = rng.random(N) 31 | 32 | arr = blosc2.asarray(nsa) 33 | 34 | t0 = time() 35 | # Using plain sort in combination with filter 36 | # farr = arr["b >= c"].sort("c").compute() 37 | # You can use indices() to get the indices sorted 38 | farr = arr["b >= c"].indices(order="c").compute() 39 | # You can also use __getitem__ to get numpy arrays as result 40 | # farr = arr["b >= c"].sort("c")[:] 41 | print(f"Time to filter: {time() - t0:.3f} s") 42 | print(f"farr: {farr[:10]}") 43 | if farr.dtype == np.dtype("int64"): 44 | print(f"sorted (blosc2):\n {arr[farr[:10]]}") 45 | 46 | print(f"len(farr): {len(farr)}, len(arr): {len(arr)}") 47 | print(f"type of farr: {farr.dtype}, type of arr: {arr.dtype}") 48 | 49 | if isinstance(farr, np.ndarray): 50 | print(f"nbytes of farr: {farr.nbytes / 2**20:.2f}MB") 51 | # We cannot proceed anymore 52 | sys.exit(1) 53 | 54 | print(f"cratio of farr: {farr.schunk.cratio:.2f}, cratio of arr: {arr.schunk.cratio:.2f}") 55 | print( 56 | f"nbytes of farr: {farr.schunk.nbytes / 2**20:.2f}MB, nbytes of arr: {arr.schunk.nbytes / 2**20:.2f}MB" 57 | ) 58 | print( 59 | f"cbytes of farr: {farr.schunk.cbytes / 2**20:.2f}MB, cbytes of arr: {arr.schunk.cbytes / 2**20:.2f}MB" 60 | ) 61 | print(f"cparams of farr: {farr.cparams}, cparams of arr: {arr.cparams}") 62 | print(f"chunks of farr: {farr.chunks}, chunks of arr: {arr.chunks}") 63 | -------------------------------------------------------------------------------- /examples/ndarray/formats.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Storing data in sparse vs contiguous mode 10 | 11 | from time import time 12 | 13 | import numpy as np 14 | 15 | import blosc2 16 | 17 | urlpath_sparse = "ex_formats_sparse.b2nd" 18 | urlpath_contiguous = "ex_formats_contiguous.b2nd" 19 | 20 | shape = (1000 * 1000,) 21 | chunks = (1000,) 22 | blocks = (100,) 23 | dtype = np.dtype(np.float64) 24 | 25 | t0 = time() 26 | a = blosc2.empty( 27 | shape, 28 | dtype=dtype, 29 | chunks=chunks, 30 | blocks=blocks, 31 | urlpath=urlpath_sparse, 32 | contiguous=False, 33 | mode="w", 34 | ) 35 | for nchunk in range(a.schunk.nchunks): 36 | a[nchunk * chunks[0] : (nchunk + 1) * chunks[0]] = np.arange(chunks[0], dtype=dtype) 37 | t1 = time() 38 | 39 | print(f"Time: {(t1 - t0):.4f} s") 40 | an = a[...] 41 | 42 | t0 = time() 43 | b = blosc2.empty( 44 | shape, 45 | dtype=dtype, 46 | chunks=chunks, 47 | blocks=blocks, 48 | urlpath=urlpath_contiguous, 49 | contiguous=True, 50 | mode="w", 51 | ) 52 | 53 | for nchunk in range(shape[0] // chunks[0]): 54 | b[nchunk * chunks[0] : (nchunk + 1) * chunks[0]] = np.arange(chunks[0], dtype=dtype) 55 | t1 = time() 56 | 57 | print(f"Time: {(t1 - t0):.4f} s") 58 | bn = b[...] 59 | 60 | np.testing.assert_allclose(an, bn) 61 | -------------------------------------------------------------------------------- /examples/ndarray/general_expressions.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # This shows how to build expressions with a general mix of NDArray and NumPy operands. 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | shape = (50, 50) 16 | 17 | # Create a NDArray from a NumPy array 18 | npa = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) 19 | npb = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) 20 | npc = npa**2 + npb**2 + 2 * npa * npb + 1 21 | 22 | a = blosc2.asarray(npa) 23 | b = blosc2.asarray(npb) 24 | 25 | # Get a LazyExpr instance with all NDArray operands 26 | c = blosc2.lazyexpr("a**2 + b**2 + 2 * a * b + 1", {"a": a, "b": b}) 27 | d = c.compute() 28 | assert np.allclose(d[:], npc) 29 | 30 | # A LazyExpr instance with a mix of NDArray and NumPy operands 31 | c = blosc2.lazyexpr("a**2 + b**2 + 2 * a * b + 1", {"a": npa, "b": b}) 32 | d = c.compute() 33 | assert np.allclose(d[:], npc) 34 | 35 | # A LazyExpr instance with a all NumPy operands 36 | c = blosc2.lazyexpr("a**2 + b**2 + 2 * a * b + 1", {"a": npa, "b": npb}) 37 | d = c.compute() 38 | assert np.allclose(d[:], npc) 39 | 40 | # Evaluate partial slices 41 | npd = c[1] 42 | # Check 43 | assert np.allclose(npd, npc[1]) 44 | 45 | npd = c[1:10] 46 | # Check 47 | assert np.allclose(npd, npc[1:10]) 48 | 49 | print(d.info) 50 | 51 | print("Lazy expression evaluated correctly in-memory!") 52 | -------------------------------------------------------------------------------- /examples/ndarray/getitem.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Show how getitem / setitem works for an NDArray 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | shape = (10, 10) 16 | slices = (slice(2, 7), slice(4, 8)) 17 | 18 | # Create a NDArray from a numpy array 19 | nparray = np.arange(int(np.prod(shape)), dtype=np.int32).reshape(shape) 20 | a = blosc2.asarray(nparray) 21 | 22 | # Get a slice 23 | buffer = a[slices] 24 | 25 | # Set a slice 26 | a[slices] = np.ones_like(buffer) - buffer 27 | print(a[...]) 28 | -------------------------------------------------------------------------------- /examples/ndarray/iterchunks_info.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Using the iterchunks_info for efficient iteration over chunks 10 | 11 | from time import time 12 | 13 | import blosc2 14 | 15 | shape = (1000,) * 3 16 | chunks = (500,) * 3 17 | dtype = "f4" 18 | 19 | # Create the NDArray with a mix of different special values (and not special too!) 20 | # a = blosc2.zeros(shape, chunks=chunks, dtype=dtype) 21 | a = blosc2.full(shape, fill_value=9, chunks=chunks, dtype=dtype) 22 | slice_ = (slice(0, 500), slice(0, 500), slice(0, 500)) 23 | a[slice_] = 0 # introduce a zeroed chunk (another type of special value) 24 | slice_ = (slice(-500, -1), slice(-500, -1), slice(-500, -1)) 25 | a[slice_] = 1 # blosc2 is currently not able to determine special values in this case 26 | 27 | # Iterate over chunks 28 | t0 = time() 29 | for info in a.iterchunks_info(): 30 | print(info) 31 | # Do something fancy with the chunk 32 | print(f"Time for iterating over {a.schunk.nchunks} chunks: {time() - t0:.4f} s") 33 | -------------------------------------------------------------------------------- /examples/ndarray/jit-expr.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Examples of using the jit decorator with expressions 10 | # You can find benchmarks for this example in the bench/ndarray directory 11 | 12 | import numpy as np 13 | 14 | import blosc2 15 | 16 | 17 | # Example 1: Basic usage of the jit decorator 18 | @blosc2.jit 19 | def expr_jit(a, b, c): 20 | # This function computes a boolean array where the condition is met 21 | return ((a**3 + np.sin(a * 2)) < c) & (b > 0) 22 | 23 | 24 | # Create some sample data 25 | a = blosc2.linspace(0, 1, 10 * 100, dtype="float32", shape=(10, 100)) 26 | b = blosc2.linspace(1, 2, 10 * 100, dtype="float32", shape=(10, 100)) 27 | c = blosc2.linspace(-10, 10, 10, dtype="float32", shape=(100,)) 28 | 29 | # Call the function with the jit decorator 30 | result = expr_jit(a, b, c) 31 | print(result[1, :10]) 32 | 33 | # Example 2: Using the jit decorator with an out parameter 34 | out = blosc2.zeros((10, 100), dtype=np.bool_) 35 | 36 | 37 | @blosc2.jit(out=out) 38 | def expr_jit_out(a, b, c): 39 | # This function computes a boolean array and stores the result in the 'out' array 40 | return ((a**3 + np.sin(a * 2)) < c) & (b > 0) 41 | 42 | 43 | # Call the function with the jit decorator and out parameter 44 | result_out = expr_jit_out(a, b, c) 45 | print(result_out[1, :10]) 46 | print(out[1, :10]) # The 'out' array should now contain the same result 47 | 48 | # Example 3: Using the jit decorator with additional keyword arguments 49 | cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4, filters=[blosc2.Filter.BITSHUFFLE]) 50 | 51 | 52 | @blosc2.jit(cparams=cparams) 53 | def expr_jit_cparams(a, b, c): 54 | # This function computes a boolean array with custom compression parameters 55 | return ((a**3 + np.sin(a * 2)) < c) & (b > 0) 56 | 57 | 58 | # Call the function with the jit decorator and custom parameters 59 | result_cparams = expr_jit_cparams(a, b, c) 60 | print(result_cparams[1, :10]) 61 | -------------------------------------------------------------------------------- /examples/ndarray/jit-numpy-funcs.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Examples of using the jit decorator with arbitrary NumPy functions. 10 | # These functions are not optimized for performance, but they show how 11 | # to use the jit decorator with NumPy functions. 12 | # You can find benchmarks for this example in the bench/ndarray directory 13 | 14 | import numpy as np 15 | 16 | import blosc2 17 | 18 | # Create some sample data 19 | a = blosc2.linspace(0, 1, 10 * 100, dtype="float32", shape=(10, 100)) 20 | b = blosc2.linspace(1, 2, 10 * 100, dtype="float32", shape=(10, 100)) 21 | c = blosc2.linspace(-10, 10, 100, dtype="float32", shape=(100,)) 22 | 23 | 24 | # Example 1: Basic usage of the jit decorator with reduction 25 | @blosc2.jit 26 | def expr_jit(a, b, c): 27 | # This function computes a cumulative sum reduction along axis 0 28 | return np.cumsum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=0) 29 | 30 | 31 | # Call the function with the jit decorator 32 | result = expr_jit(a, b, c) 33 | print(f"Example 1 result[0, 0:10]: {result[0, 0:10]}") 34 | 35 | 36 | # Example 2: Using the jit decorator with an out parameter for reduction 37 | out = np.zeros(result.shape, dtype=np.int64) 38 | 39 | 40 | @blosc2.jit 41 | def expr_jit_out(a, b, c): 42 | return np.cumulative_prod(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=0, out=out, include_initial=False) 43 | 44 | 45 | # Call the function with the jit decorator and out parameter 46 | result = expr_jit_out(a, b, c) 47 | print(f"Example 2 result[0, 0:10]: {result[0, 0:10]}") 48 | print("Example 2 out[0, 0:10] array:", out[0, 0:10]) # the 'out' array should now contain the same result 49 | 50 | 51 | # Example 3: Using the jit decorator with a combination of NumPy functions 52 | @blosc2.jit 53 | def expr_jit_diff(a, b, c): 54 | return np.diff((a**3 + np.cumsum(b * 2, axis=1) + c), axis=1) 55 | 56 | 57 | # Call the function with the jit decorator and custom parameters 58 | result = expr_jit_diff(a, b, c) 59 | print(f"Example 3 result[0, 0:5]: {result[0, 0:5]}") 60 | -------------------------------------------------------------------------------- /examples/ndarray/jit-reduc.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Examples of using the jit decorator with reductions 10 | # You can find benchmarks for this example in the bench/ndarray directory 11 | 12 | import numpy as np 13 | 14 | import blosc2 15 | 16 | 17 | # Example 1: Basic usage of the jit decorator with reduction 18 | @blosc2.jit 19 | def expr_jit(a, b, c): 20 | # This function computes a sum reduction along axis 1 21 | return np.sum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1) 22 | 23 | 24 | # Create some sample data 25 | a = blosc2.linspace(0, 1, 10 * 100, dtype="float32", shape=(10, 100)) 26 | b = blosc2.linspace(1, 2, 10 * 100, dtype="float32", shape=(10, 100)) 27 | c = blosc2.linspace(-10, 10, 10, dtype="float32", shape=(100,)) 28 | 29 | # Call the function with the jit decorator 30 | result = expr_jit(a, b, c) 31 | print("Example 1 result:", result) 32 | 33 | # Example 2: Using the jit decorator with an out parameter for reduction 34 | out = np.zeros((10,), dtype=np.int64) 35 | 36 | 37 | @blosc2.jit 38 | def expr_jit_out(a, b, c): 39 | # This function computes a sum reduction along axis 1 and stores the result in the 'out' array 40 | return np.sum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1, out=out) 41 | 42 | 43 | # Call the function with the jit decorator and out parameter 44 | result_out = expr_jit_out(a, b, c) 45 | print("Example 2 result:", result_out) 46 | print("Example 2 out array:", out) # The 'out' array should now contain the same result 47 | 48 | # Example 3: Using the jit decorator with additional keyword arguments for reduction 49 | cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4, filters=[blosc2.Filter.BITSHUFFLE]) 50 | out_cparams = blosc2.zeros((10,), dtype=np.int64, cparams=cparams) 51 | 52 | 53 | @blosc2.jit 54 | def expr_jit_cparams(a, b, c): 55 | # This function computes a sum reduction along axis 1 with custom compression parameters 56 | return np.sum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1, out=out_cparams) 57 | 58 | 59 | # Call the function with the jit decorator and custom parameters 60 | result_cparams = expr_jit_cparams(a, b, c) 61 | print("Example 3 result:", result_cparams[...]) 62 | print("Example 3 out array:", out_cparams[...]) # The 'out_cparams' array should now contain the same result 63 | -------------------------------------------------------------------------------- /examples/ndarray/lazyexpr_where_indexing.py: -------------------------------------------------------------------------------- 1 | # Imports 2 | 3 | import numpy as np 4 | 5 | import blosc2 6 | 7 | N = 1000 8 | it = ((-x + 1, x - 2, 0.1 * x) for x in range(N)) 9 | sa = blosc2.fromiter( 10 | it, dtype=[("A", "i4"), ("B", "f4"), ("C", "f8")], shape=(N,), urlpath="sa-1M.b2nd", mode="w" 11 | ) 12 | expr = sa["(A < B)"] 13 | A = sa["A"][:] 14 | B = sa["B"][:] 15 | C = sa["C"][:] 16 | temp = sa[:] 17 | indices = A < B 18 | idx = np.argmax(indices) 19 | 20 | # One might think that expr[:10] gives the first 10 elements of the evaluated expression, but this is not the case. 21 | # It actually computes the expression on the first 10 elements of the operands; since for some elements the condition 22 | # is False, the result will be shorter than 10 elements. 23 | # Returns less than 10 elements in general 24 | sliced = expr.compute(slice(0, 10)) 25 | gotitem = expr[:10] 26 | np.testing.assert_array_equal(sliced[:], gotitem) 27 | np.testing.assert_array_equal(gotitem, temp[:10][indices[:10]]) # Equivalent syntax 28 | # Actually this makes sense since one can understand this as a request to compute on a portion of operands. 29 | # If one desires a portion of the result, one should compute the whole expression and then slice it. 30 | 31 | # Get first element for which condition is true 32 | sliced = expr.compute(idx) 33 | gotitem = expr[idx] 34 | # Arrays of one element 35 | np.testing.assert_array_equal(sliced[()], gotitem) 36 | np.testing.assert_array_equal(gotitem, temp[idx]) 37 | 38 | # Should return void arrays here. 39 | sliced = expr.compute(0) 40 | gotitem = expr[0] 41 | np.testing.assert_array_equal(sliced[()], gotitem) 42 | np.testing.assert_array_equal(gotitem, temp[0]) 43 | -------------------------------------------------------------------------------- /examples/ndarray/linspace-constructor.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # This example shows how to use the `linspace()` constructor to create a blosc2 array. 10 | 11 | from time import time 12 | 13 | import numpy as np 14 | 15 | import blosc2 16 | 17 | N = 10_000_000 18 | 19 | shape = (N,) 20 | print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}) ***") 21 | t0 = time() 22 | a = blosc2.linspace(0, 10, N) 23 | cratio = a.schunk.nbytes / a.schunk.cbytes 24 | print( 25 | f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" 26 | f"\tStorage required: {a.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)" 27 | ) 28 | print(f"Last 3 elements: {a[-3:]}") 29 | 30 | # You can create ndim arrays too 31 | shape = (5, N // 5) 32 | chunks = None 33 | # chunks = (5, N // 10) # Uncomment this line to experiment with chunks 34 | print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}, c_order: True) ***") 35 | t0 = time() 36 | b = blosc2.linspace(0, 10, N, shape=(5, N // 5), chunks=chunks, c_order=True) 37 | cratio = b.schunk.nbytes / b.schunk.cbytes 38 | print( 39 | f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" 40 | f"\tStorage required: {b.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)" 41 | ) 42 | 43 | # You can go faster by not requesting the array to be C ordered (fun for users) 44 | shape = (5, N // 5) 45 | chunks = None 46 | # chunks = (5, N // 10) # Uncomment this line to experiment with chunks 47 | print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}, c_order: False) ***") 48 | t0 = time() 49 | b = blosc2.linspace(0, 10, N, shape=(5, N // 5), chunks=chunks, c_order=False) 50 | cratio = b.schunk.nbytes / b.schunk.cbytes 51 | print( 52 | f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" 53 | f"\tStorage required: {b.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)" 54 | ) 55 | 56 | 57 | # For reference, let's compare with numpy 58 | print(f"*** Creating a numpy array with {N:_} elements (shape: {shape}) ***") 59 | t0 = time() 60 | na = np.linspace(0, 10, N).reshape(shape) 61 | print( 62 | f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" 63 | f"\tStorage required: {na.nbytes / 1e6:.2f} MB" 64 | ) 65 | # np.testing.assert_allclose(b[:], na) 66 | 67 | # Create an NDArray from a numpy array 68 | print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}) from numpy ***") 69 | t0 = time() 70 | c = blosc2.asarray(na) 71 | cratio = c.schunk.nbytes / c.schunk.cbytes 72 | print( 73 | f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" 74 | f"\tStorage required: {c.schunk.cbytes / 1e6:.2f} MB ({cratio:.2f}x)" 75 | ) 76 | # np.testing.assert_allclose(c[:], na) 77 | 78 | # In conclusion, you can use blosc2 linspace() to create blosc2 arrays requiring much less storage 79 | # than numpy arrays. If speed is important, and you can afford the extra memory, you can create 80 | # blosc2 arrays faster straight from numpy arrays as well. 81 | -------------------------------------------------------------------------------- /examples/ndarray/meta.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Store metadata in persistent arrays 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | shape = (128, 128) 16 | urlpath = "ex_meta.b2nd" 17 | dtype = np.complex128 18 | 19 | # Create a numpy array 20 | nparray = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape) 21 | 22 | meta = { 23 | "m1": b"1111", 24 | "m2": b"2222", 25 | } 26 | # Create a NDArray from a numpy array (on disk) 27 | a = blosc2.frombuffer(bytes(nparray), nparray.shape, urlpath=urlpath, mode="w", dtype=dtype, meta=meta) 28 | print(a.info) 29 | 30 | # Read a b2nd array from disk 31 | b = blosc2.open(urlpath) 32 | 33 | # Deal with meta 34 | m1 = b.schunk.meta.get("m5", b"0000") 35 | m2 = b.schunk.meta["m2"] 36 | print("m1 meta:", m1) 37 | print("m2 meta:", m2) 38 | -------------------------------------------------------------------------------- /examples/ndarray/ndarray_copy.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Copying NDArrays 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | shape = (10, 10) 16 | blocks = (10, 10) 17 | dtype = np.float64 18 | 19 | # Create a NDArray from a buffer 20 | buffer = bytes(np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape)) 21 | a = blosc2.frombuffer(buffer, shape, dtype=dtype, blocks=blocks) 22 | 23 | # Get a copy of a 24 | b = blosc2.copy(a) 25 | 26 | # Another copy example 27 | b[1:5, 2:9] = 0 28 | b2 = blosc2.copy(b, blocks=blocks) 29 | print(b2[...]) 30 | -------------------------------------------------------------------------------- /examples/ndarray/ndmean.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | 10 | import numpy as np 11 | 12 | import blosc2 13 | 14 | shape = (50, 50) 15 | chunks = (49, 49) 16 | dtype = np.float64 17 | typesize = dtype.itemsize 18 | 19 | # Create a NDArray from a NumPy array 20 | random = np.random.default_rng() 21 | array = random.normal(0, 1, np.prod(shape)).reshape(shape) 22 | # Use NDMEAN filter 23 | cparams = blosc2.CParams(filters=[blosc2.Filter.NDMEAN], filters_meta=[4]) 24 | a = blosc2.asarray(array, chunks=chunks, cparams=cparams) 25 | print("compression ratio:", a.schunk.cratio) 26 | -------------------------------------------------------------------------------- /examples/ndarray/persistency.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Shows how you can persist an array on disk 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | shape = (128, 128) 16 | urlpath = "ex_persistency.b2nd" 17 | dtype = np.complex128 18 | 19 | # Create a NDArray from a numpy array (and save it on disk) 20 | nparray = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape) 21 | a = blosc2.asarray(nparray, urlpath=urlpath, mode="w") 22 | 23 | # Read the array from disk 24 | b = blosc2.open(urlpath) 25 | # And see its contents 26 | print(b[...]) 27 | -------------------------------------------------------------------------------- /examples/ndarray/proxy-carray.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Shows how you can make a proxy of a remote array (served with Caterva2) on disk 10 | # Note that, for running this example, you will need the blosc2-grok package. 11 | 12 | import os 13 | from time import time 14 | 15 | import blosc2 16 | 17 | urlbase = "https://demo.caterva2.net/" 18 | path = "example/lung-jpeg2000_10x.b2nd" 19 | a = blosc2.C2Array(path, urlbase=urlbase) 20 | b = blosc2.Proxy(a, urlpath="proxy.b2nd", mode="w") 21 | 22 | # Check metadata (note that all should be the same) 23 | print("*** Metadata ***") 24 | print(f"Codec in 'a': {a.cparams.codec}") 25 | print(f"Codec in 'b': {b.cparams.codec}") 26 | print(f"Filters in 'a': {a.cparams.filters}") 27 | print(f"Filters in 'b': {b.cparams.filters}") 28 | 29 | # Check array properties 30 | print("*** Array properties ***") 31 | print(f"Shape in 'a': {a.shape}") 32 | print(f"Shape in 'b': {b.shape}") 33 | print(f"Type in 'a': {a.dtype}") 34 | print(f"Type in 'b': {b.dtype}") 35 | 36 | print("*** Fetching data ***") 37 | t0 = time() 38 | print(f"Data in 'a': {a[0, 0, 0:10]}") 39 | print(f"Time to fetch data in 'a': {time() - t0:.3f}s") 40 | t0 = time() 41 | print(f"Data in 'b': {b[0, 0, 0:10]}") 42 | print(f"Time to fetch data in 'b': {time() - t0:.3f}s") 43 | t0 = time() 44 | print(f"Data in 'b': {b[0, 0, 0:10]}") 45 | print(f"Time to fetch data in 'b' (cached): {time() - t0:.3f}s") 46 | 47 | # Check sizes. Note that the proxy will only have the 'touched' chunks (only 1 in this case) 48 | print("*** Sizes ***") 49 | print(f"Size in 'a': {a.meta['schunk']['cbytes']}") 50 | print(f"Size in 'b': {b.schunk.cbytes}") 51 | # Check sizes on disk 52 | print("*** Disk sizes ***") 53 | print(f"Size 'b' (disk): {os.stat(b.urlpath).st_size}") 54 | -------------------------------------------------------------------------------- /examples/ndarray/proxy-ndarray.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Shows how you can make a proxy of a local array on disk. 10 | 11 | import os 12 | 13 | import blosc2 14 | 15 | cparams = blosc2.CParams( 16 | clevel=5, codec=blosc2.Codec.LZ4, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0] 17 | ) 18 | 19 | cwd = os.getcwd() 20 | a = blosc2.full((128, 128), 1, dtype="float64", urlpath=f"{cwd}/a.b2nd", mode="w", cparams=cparams) 21 | b = blosc2.Proxy(a, urlpath=f"{cwd}/proxy.b2nd", mode="w") 22 | 23 | # Check metadata 24 | print("*** Metadata ***") 25 | print(f"Codec in 'a': {a.cparams.codec}") 26 | print(f"Codec in 'b': {b.cparams.codec}") 27 | print(f"Clevel in 'a': {a.cparams.clevel}") 28 | print(f"Clevel in 'b': {b.cparams.clevel}") 29 | print(f"Filters in 'a': {a.cparams.filters}") 30 | print(f"Filters in 'b': {b.cparams.filters}") 31 | 32 | # Check array properties 33 | print("*** Array properties ***") 34 | print(f"Shape in 'a': {a.shape}") 35 | print(f"Shape in 'b': {b.shape}") 36 | print(f"Type in 'a': {a.dtype}") 37 | print(f"Type in 'b': {b.dtype}") 38 | 39 | # Check data 40 | print("*** Fetching data ***") 41 | print(f"Data in 'a': {a[0, 0:10]}") 42 | print(f"Data in 'b': {b[0, 0:10]}") 43 | 44 | # Check sizes. Note that the proxy will only have the 'touched' chunks (only 1 in this case) 45 | print("*** Sizes ***") 46 | print(f"Size in 'a': {a.schunk.cbytes}") 47 | print(f"Size in 'b': {b.schunk.cbytes}") 48 | # Check sizes on disk 49 | print("*** Disk sizes ***") 50 | print(f"Size 'a' (disk): {os.stat(a.urlpath).st_size}") 51 | print(f"Size 'b' (disk): {os.stat(b.urlpath).st_size}") 52 | 53 | # Check vlmeta 54 | print("*** VLmeta ***") 55 | print(f"VLmeta in 'a': {list(a.vlmeta)}") 56 | print(f"VLmeta in 'b': {list(b.vlmeta)}") 57 | -------------------------------------------------------------------------------- /examples/ndarray/reduce_and_enlarge.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # This shows how to evaluate and store expressions with reductions, 10 | # using NDArray instances as operands. 11 | # 12 | # For this to work correctly, we must use a string for the expression, 13 | # as the reductions are normally evaluated eagerly. 14 | # String-expressions also allow to be stored for later evaluation. 15 | # 16 | # Note how: 17 | # 0) The expression can be evaluated and stored for later evaluation. 18 | # 1) Re-opening a stored expression can adapt to changes in operands. 19 | # 2) The expression can be evaluated lazily, only when needed. 20 | # 3) Broadcasting is supported. 21 | 22 | import numpy as np 23 | 24 | import blosc2 25 | 26 | # Create arrays with specific dimensions 27 | a = blosc2.full((2, 3, 4), 1, dtype=np.int8, urlpath="a.b2nd", mode="w") 28 | b = blosc2.full((2, 4), 2, dtype=np.uint16, urlpath="b.b2nd", mode="w") 29 | c = blosc2.full((4,), 3, dtype=np.int8, urlpath="c.b2nd", mode="w") 30 | 31 | # print("Array a:", a[:]) 32 | # print("Array b:", b[:]) 33 | # print("Array c:", c[:]) 34 | 35 | # Define an expression using the arrays above 36 | # We can use a rich variety of functions, like sum, mean, std, sin, cos, etc. 37 | # expr = "a.sum() + b * c" 38 | # expr = "a.sum(axis=1) + b * c" 39 | expr = "sum(a, axis=1) + b * sin(c)" 40 | # Create a lazy expression 41 | print("expr:", expr) 42 | lazy_expr = blosc2.lazyexpr(expr) 43 | print(f"expr shape: {lazy_expr.shape}; dtype: {lazy_expr.dtype}") 44 | # Evaluate and print the result of the lazy expression (should be a 2x4 arr) 45 | print(lazy_expr[:]) 46 | 47 | # Store and reload the expressions 48 | url_path = "my_expr.b2nd" 49 | lazy_expr.save(urlpath=url_path, mode="w") 50 | 51 | url_path = "my_expr.b2nd" 52 | # Open the saved file 53 | lazy_expr = blosc2.open(urlpath=url_path) 54 | print(lazy_expr) 55 | print(f"expr (after open) shape: {lazy_expr.shape}; dtype: {lazy_expr.dtype}") 56 | # Evaluate and print the result of the lazy expression (should be a 2x4 arr) 57 | print(lazy_expr[:]) 58 | 59 | # Enlarge the arrays and re-evaluate the expression 60 | a.resize((3, 3, 4)) 61 | a[2] = 3 62 | b.resize((3, 4)) 63 | b[2] = 5 64 | lazy_expr = blosc2.open(urlpath=url_path) # Open the saved file 65 | print(f"expr (after resize & reopen) shape: {lazy_expr.shape}; dtype: {lazy_expr.dtype}") 66 | print(lazy_expr[:]) 67 | -------------------------------------------------------------------------------- /examples/ndarray/reduce_expr.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # This shows how to evaluate expressions with NDArray instances as operands. 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | shape = (10, 10, 2) 16 | 17 | # Create a NDArray from a NumPy array 18 | npa = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) 19 | npb = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) 20 | npc = npa**2 + npb**2 + 2 * npa * npb + 1 21 | 22 | a = blosc2.asarray(npa) 23 | b = blosc2.asarray(npb) 24 | 25 | # Get a LazyExpr instance 26 | c = a**2 + b**2 + 2 * a * b + 1 27 | # Evaluate: output is a NDArray 28 | # d = c.sum(axis=1) 29 | # d = blosc2.sum(c, axis=1) 30 | # d = blosc2.sum(c) + blosc2.mean(a) 31 | # d = blosc2.sum(c, axis=1) + blosc2.mean(a, axis=0) 32 | # d = blosc2.sum(c, axis=(0, 2)) + blosc2.mean(a, axis=(0, 2)) 33 | # d = blosc2.sum(c) + blosc2.std(a, axis=1) 34 | d = blosc2.any(c, axis=(0, 2)) < b.slice((0, slice(0, 10), 0)) 35 | print(d, d.shape, d.dtype) 36 | # print(d.expression, d.operands) 37 | e = d.compute() 38 | # print(e) 39 | assert isinstance(d, blosc2.LazyExpr) 40 | 41 | # Check 42 | assert isinstance(e, blosc2.NDArray) 43 | sum = e[()] 44 | print("Reduction with Blosc2:\n", sum) 45 | # npsum = npc.sum(axis=1) 46 | # npsum = np.sum(npc, axis=1) 47 | # npsum = np.sum(npc) + np.mean(npa) 48 | # npsum = np.sum(npc, axis=1) + np.mean(npa, axis=0) 49 | # npsum = np.sum(npc, axis=(0, 2)) + np.mean(npa, axis=(0, 2)) 50 | # npsum = np.sum(npc) + np.std(npa) 51 | npsum = np.any(npc, axis=(0, 2)) < npb[0, :, 0] 52 | print("Reduction with NumPy:\n", npsum) 53 | # npsum = np.sum(npc, axis=(0,2)) + np.std(npa, axis=(0, 2)) 54 | assert np.allclose(sum, npsum) 55 | 56 | # # Evaluate a slice: output is a NumPy array 57 | npd = d[()] 58 | # # Check 59 | assert np.allclose(npd, npsum) 60 | 61 | print("NDArray expression evaluated correctly in-memory!") 62 | -------------------------------------------------------------------------------- /examples/ndarray/reduce_expr_save.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # This shows how to evaluate expressions with NDArray instances as operands. 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | shape = (10, 1, 2) 16 | 17 | # Create a NDArray from a NumPy array 18 | npa = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) 19 | npb = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) 20 | npc = npa**2 + npb**2 + 2 * npa * npb + 1 21 | 22 | a = blosc2.asarray(npa, urlpath="a.b2nd", mode="w") 23 | b = blosc2.asarray(npb, urlpath="b.b2nd", mode="w") 24 | 25 | # Get a LazyExpr instance 26 | c = a**2 + b**2 + 2 * a * b + 1 27 | c.save(urlpath="c.b2nd") 28 | c = blosc2.open("c.b2nd") 29 | # Evaluate: output is a NDArray 30 | d = blosc2.lazyexpr("a + c.sum() + a.std()", operands={"a": a, "c": c}) 31 | d.save(urlpath="lazy-d.b2nd") 32 | 33 | # Load the expression from disk 34 | d = blosc2.open("lazy-d.b2nd") 35 | print(f"Expression: {d}") 36 | assert isinstance(d, blosc2.LazyExpr) 37 | e = d.compute() 38 | assert isinstance(e, blosc2.NDArray) 39 | sum = e[()] 40 | print("Reduction with Blosc2:\n", sum[1]) 41 | npsum = npa + np.sum(npc) + np.std(npa) 42 | print("Reduction with NumPy:\n", npsum[1]) 43 | assert np.allclose(sum, npsum) 44 | 45 | print("NDArray expression evaluated correctly in-memory!") 46 | -------------------------------------------------------------------------------- /examples/ndarray/reduce_string_expr.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # This shows how to evaluate expressions with NDArray instances as operands. 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | shape = (10, 10, 2) 16 | 17 | # Create a NDArray from a NumPy array 18 | npa = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) 19 | npb = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) 20 | npc = npa**2 + npb**2 + 2 * npa * npb + 1 21 | 22 | a = blosc2.asarray(npa) 23 | b = blosc2.asarray(npb) 24 | 25 | # Get a LazyExpr instance 26 | c = a**2 + b**2 + 2 * a * b + 1 27 | # Evaluate: output is a NDArray 28 | d = blosc2.lazyexpr("sl + c.sum() + a.std()", operands={"a": a, "c": c, "sl": a.slice((1, 1))}) 29 | print(f"Expression: {d.expression}") 30 | print(f"Operands: {d.operands}") 31 | assert isinstance(d, blosc2.LazyExpr) 32 | e = d.compute() 33 | assert isinstance(d, blosc2.LazyExpr) 34 | # Check 35 | assert isinstance(e, blosc2.NDArray) 36 | sum = e[()] 37 | print("Reduction with Blosc2:\n", sum) 38 | npsum = npa[1, 1] + np.sum(npc) + np.std(npa) 39 | print("Reduction with NumPy:\n", npsum) 40 | # npsum = np.sum(npc, axis=(0,2)) + np.std(npa, axis=(0, 2)) 41 | assert np.allclose(sum, npsum) 42 | 43 | print("NDArray expression evaluated correctly in-memory!") 44 | -------------------------------------------------------------------------------- /examples/ndarray/resize_.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Resizing an array is simple (and efficient too) 10 | 11 | import blosc2 12 | 13 | a = blosc2.full((4, 4), fill_value=9) 14 | a.resize((5, 7)) 15 | a[3:5, 2:7] = 8 16 | print(a[:]) 17 | -------------------------------------------------------------------------------- /examples/ndarray/work_with_numpy.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Shows how you can easily convert from/to NumPy arrays 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | shape = (1234, 23) 16 | chunks = (253, 23) 17 | dtype = bool 18 | 19 | # Create a buffer 20 | random = np.random.default_rng() 21 | nparray = random.choice(a=[True, False], size=np.prod(shape)).reshape(shape) 22 | 23 | # Create a NDArray from a NumPy array 24 | a = blosc2.asarray(nparray, chunks=chunks) 25 | b = a.copy() 26 | 27 | # Convert a NDArray to a NumPy array 28 | nparray2 = b[...] 29 | print(nparray2) 30 | -------------------------------------------------------------------------------- /examples/ndarray/zfp_codec.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | 10 | import numpy as np 11 | 12 | import blosc2 13 | 14 | shape = (50, 50) 15 | chunks = (49, 49) 16 | dtype = np.float64 17 | typesize = dtype.itemsize 18 | 19 | # Create a NDArray from a NumPy array 20 | random = np.random.default_rng() 21 | array = random.normal(0, 1, np.prod(shape)).reshape(shape) 22 | # Use ZFP_RATE codec 23 | cparams = blosc2.CParams(codec=blosc2.Codec.ZFP_RATE, codec_meta=37) 24 | a = blosc2.asarray(array, chunks=chunks, cparams=cparams) 25 | print("compression ratio:", a.schunk.cratio) 26 | -------------------------------------------------------------------------------- /examples/pack_array.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | 10 | # A simple example using the pack and unpack functions 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | a = np.array(["å", "ç", "ø"]) 16 | parray = blosc2.pack(a, 9) 17 | a2 = blosc2.unpack(parray) 18 | assert np.all(a == a2) 19 | -------------------------------------------------------------------------------- /examples/pack_tensor.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | 10 | # A simple example using the pack_tensor and unpack_tensor functions 11 | 12 | import numpy as np 13 | 14 | import blosc2 15 | 16 | a = np.arange(1_000_000) 17 | 18 | cparams = blosc2.CParams( 19 | codec=blosc2.Codec.ZSTD, clevel=9, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0] 20 | ) 21 | cframe = blosc2.pack_tensor(a, cparams=cparams) 22 | print("Length of packed array in bytes:", len(cframe)) 23 | 24 | a2 = blosc2.unpack_tensor(cframe) 25 | assert np.all(a == a2) 26 | -------------------------------------------------------------------------------- /examples/postfilter1.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | 11 | import blosc2 12 | 13 | nchunks = 5 14 | input_dtype = np.dtype(np.int32) 15 | output_dtype = np.dtype(np.float32) 16 | 17 | # Set the compression and decompression parameters 18 | cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, typesize=4) 19 | dparams = blosc2.DParams(nthreads=1) 20 | contiguous = True 21 | urlpath = None 22 | storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode="a") 23 | # Remove previous SChunk 24 | blosc2.remove_urlpath(urlpath) 25 | # Create and set data 26 | data = np.arange(200 * 1000 * nchunks, dtype=input_dtype) 27 | schunk = blosc2.SChunk( 28 | chunksize=200 * 1000 * input_dtype.itemsize, data=data, cparams=cparams, dparams=dparams, storage=storage 29 | ) 30 | 31 | out1 = np.empty(200 * 1000 * nchunks, dtype=input_dtype) 32 | schunk.get_slice(0, 200 * 1000 * nchunks, out=out1) 33 | 34 | 35 | # Set postfilter with decorator 36 | @schunk.postfilter(input_dtype, output_dtype) 37 | def postfilter(input, output, offset): 38 | output[:] = input - np.pi 39 | 40 | 41 | out2 = np.empty(200 * 1000 * nchunks, dtype=output_dtype) 42 | schunk.get_slice(0, 200 * 1000 * nchunks, out=out2) 43 | 44 | res = np.empty(out1.shape, dtype=output_dtype) 45 | postfilter(data, res, None) 46 | # Check postfilter is applied 47 | assert np.allclose(res, out2) 48 | -------------------------------------------------------------------------------- /examples/postfilter2.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | 11 | import blosc2 12 | 13 | nchunks = 10 14 | input_dtype = np.dtype("M8[D]") 15 | output_dtype = np.int64 # output dtype has to be of the same size as input 16 | 17 | # Set the compression and decompression parameters 18 | cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, typesize=input_dtype.itemsize) 19 | dparams = blosc2.DParams(nthreads=1) 20 | contiguous = True 21 | urlpath = "filename" 22 | storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode="a") 23 | # Remove previous SChunk 24 | blosc2.remove_urlpath(urlpath) 25 | # Create and set data 26 | chunkshape = 200 * 1000 27 | data = np.arange(0, chunkshape * nchunks, dtype=input_dtype) 28 | schunk = blosc2.SChunk( 29 | chunksize=chunkshape * input_dtype.itemsize, data=data, cparams=cparams, dparams=dparams, storage=storage 30 | ) 31 | 32 | out1 = np.empty(chunkshape * nchunks, dtype=input_dtype) 33 | schunk.get_slice(0, chunkshape * nchunks, out=out1) 34 | 35 | 36 | # Set postfilter with decorator 37 | @schunk.postfilter(input_dtype, output_dtype) 38 | def postfilter(input, output, offset): 39 | output[:] = input <= np.datetime64("1997-12-31") 40 | 41 | 42 | out2 = np.empty(chunkshape * nchunks, dtype=output_dtype) 43 | schunk.get_slice(0, chunkshape * nchunks, out=out2) 44 | 45 | res = np.empty(out1.shape, dtype=output_dtype) 46 | postfilter(data, res, None) 47 | # Check postfilter is applied 48 | assert np.array_equal(res, out2) 49 | -------------------------------------------------------------------------------- /examples/postfilter3.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | 11 | import blosc2 12 | 13 | nchunks = 10 14 | input_dtype = np.dtype(np.int64) 15 | 16 | # Set the compression and decompression parameters 17 | cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, typesize=input_dtype.itemsize) 18 | dparams = blosc2.DParams(nthreads=1) 19 | contiguous = False 20 | urlpath = None 21 | storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode="a") 22 | # Remove previous SChunk 23 | blosc2.remove_urlpath(urlpath) 24 | # Create and set data 25 | chunkshape = 20_000 26 | data = np.zeros(chunkshape * nchunks, dtype=input_dtype) 27 | schunk = blosc2.SChunk( 28 | chunksize=chunkshape * input_dtype.itemsize, data=data, cparams=cparams, dparams=dparams, storage=storage 29 | ) 30 | 31 | out1 = np.empty(chunkshape * nchunks, dtype=input_dtype) 32 | schunk.get_slice(0, chunkshape * nchunks, out=out1) 33 | 34 | 35 | # Set postfilter with decorator 36 | @schunk.postfilter(input_dtype) 37 | def postfilter(input, output, offset): 38 | for i in range(input.size): 39 | output[i] = offset + i 40 | 41 | 42 | out2 = np.empty(chunkshape * nchunks, dtype=input_dtype) 43 | schunk.get_slice(0, chunkshape * nchunks, out=out2) 44 | 45 | res = np.arange(out1.size, dtype=input_dtype) 46 | # Check postfilter is applied 47 | assert np.array_equal(res, out2) 48 | -------------------------------------------------------------------------------- /examples/prefilter.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # Example of prefiltering data before compression 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | nchunks = 3 16 | input_dtype = np.dtype(np.int32) 17 | output_dtype = np.dtype(np.float32) 18 | 19 | # Set the compression and decompression parameters 20 | cparams = blosc2.CParams(typesize=4, nthreads=1) 21 | dparams = blosc2.DParams(nthreads=4) 22 | storage = blosc2.Storage(mode="a") 23 | # Create empty schunk 24 | schunk = blosc2.SChunk( 25 | chunksize=200 * 1000 * input_dtype.itemsize, cparams=cparams, dparams=dparams, storage=storage 26 | ) 27 | 28 | 29 | # Set prefilter with decorator 30 | @schunk.prefilter(input_dtype, output_dtype) 31 | def prefilter(input, output, offset): 32 | output[:] = input - np.pi 33 | 34 | 35 | # Append data 36 | data = np.arange(200 * 1000 * nchunks, dtype=input_dtype) 37 | schunk[: 200 * 1000 * nchunks] = data 38 | 39 | # Check prefilter is applied correctly 40 | out2 = np.empty(200 * 1000 * nchunks, dtype=output_dtype) 41 | schunk.get_slice(0, 200 * 1000 * nchunks, out=out2) 42 | 43 | res = np.empty(data.shape, dtype=output_dtype) 44 | prefilter(data, res, None) 45 | assert np.allclose(res, out2) 46 | -------------------------------------------------------------------------------- /examples/save_tensor.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | 10 | # A simple example using the save_tensor and load_tensor functions 11 | 12 | import numpy as np 13 | 14 | import blosc2 15 | 16 | a = np.arange(1_000_000) 17 | 18 | file_size = blosc2.save_tensor(a, "save_tensor.bl2", mode="w") 19 | print("Length of saved tensor in file (bytes):", file_size) 20 | 21 | a2 = blosc2.load_tensor("save_tensor.bl2") 22 | assert np.all(a == a2) 23 | -------------------------------------------------------------------------------- /examples/schunk.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | 11 | import blosc2 12 | 13 | nchunks = 10 14 | # Set the compression and decompression parameters 15 | cparams = blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4) 16 | dparams = blosc2.DParams() 17 | contiguous = True 18 | urlpath = "filename" 19 | 20 | storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode="a") 21 | blosc2.remove_urlpath(urlpath) 22 | numpy_meta = {b"dtype": str(np.dtype("int32"))} 23 | test_meta = {b"lorem": 1234} 24 | meta = {"numpy": numpy_meta, "test": test_meta} 25 | 26 | # Create the empty SChunk 27 | schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, meta=meta, cparams=cparams, dparams=dparams) 28 | # Append some chunks 29 | for i in range(nchunks): 30 | buffer = i * np.arange(200 * 1000, dtype="int32") 31 | nchunks_ = schunk.append_data(buffer) 32 | assert nchunks_ == (i + 1) 33 | 34 | # Decompress the second chunk in different ways 35 | buffer = 1 * np.arange(200 * 1000, dtype="int32") 36 | bytes_obj = buffer.tobytes() 37 | res = schunk.decompress_chunk(1) 38 | assert res == bytes_obj 39 | 40 | dest = np.empty(buffer.shape, buffer.dtype) 41 | schunk.decompress_chunk(1, dest) 42 | assert np.array_equal(buffer, dest) 43 | 44 | schunk.decompress_chunk(1, memoryview(dest)) 45 | assert np.array_equal(buffer, dest) 46 | 47 | dest = bytearray(buffer) 48 | schunk.decompress_chunk(1, dest) 49 | assert dest == bytes_obj 50 | 51 | # Insert a chunk in the 5th position 52 | buffer = 10 * np.arange(200 * 1000, dtype="int32") 53 | schunk.insert_data(5, buffer, False) 54 | 55 | # Update a chunk compressing the data first 56 | buffer = 11 * np.arange(200 * 1000, dtype="int32") 57 | chunk = blosc2.compress2(buffer, cparams=cparams) 58 | schunk.update_chunk(7, chunk) 59 | 60 | # Delete the 4th chunk 61 | schunk.delete_chunk(4) 62 | 63 | # Get the compressed chunk 64 | schunk.get_chunk(1) 65 | 66 | # Set a slice from the SChunk 67 | start = 5 * 200 * 1000 + 47 68 | stop = start + 200 * 1000 + 4 69 | val = nchunks * np.arange(stop - start, dtype="int32") 70 | schunk[start:stop] = val 71 | 72 | # Get the modified slice 73 | out = np.empty(val.shape, dtype="int32") 74 | schunk.get_slice(start, stop, out) 75 | assert np.array_equal(val, out) 76 | 77 | # Expand the SChunk with __setitem__ 78 | # When a part of the slice section overflows the SChunk size, 79 | # the remaining data is appended until stop is reached 80 | start = nchunks * 200 * 1000 - 40 81 | stop = start + 200 * 1000 82 | val = nchunks * np.arange(stop - start, dtype="int32") 83 | schunk[start:stop] = val 84 | 85 | blosc2.remove_urlpath(urlpath) 86 | -------------------------------------------------------------------------------- /examples/schunk_roundtrip.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | 11 | import blosc2 12 | 13 | nchunks = 10 14 | # Set the compression and decompression parameters 15 | cparams = blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4) 16 | dparams = blosc2.DParams() 17 | contiguous = True 18 | urlpath = "filename" 19 | 20 | storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath) 21 | blosc2.remove_urlpath(urlpath) 22 | 23 | # Create the SChunk 24 | data = np.arange(200 * 1000 * nchunks) 25 | schunk = blosc2.SChunk( 26 | chunksize=200 * 1000 * 4, data=data, cparams=cparams, dparams=dparams, storage=storage 27 | ) 28 | 29 | cframe = schunk.to_cframe() 30 | 31 | schunk2 = blosc2.schunk_from_cframe(cframe, False) 32 | data2 = np.empty(data.shape, dtype=data.dtype) 33 | schunk2.get_slice(out=data2) 34 | assert np.array_equal(data, data2) 35 | 36 | blosc2.remove_urlpath(urlpath) 37 | -------------------------------------------------------------------------------- /examples/ucodecs.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # This shows how to implement an user defined codec in pure Python 10 | 11 | import sys 12 | 13 | import numpy as np 14 | 15 | import blosc2 16 | 17 | nchunks = 2 18 | chunk_len = 20 * 1000 19 | dtype = np.dtype(np.int32) 20 | 21 | 22 | # Define encoder and decoder functions 23 | def encoder1(input, output, meta, schunk): 24 | # Check whether the data is an arange 25 | nd_input = input.view(dtype) 26 | step = int(nd_input[1] - nd_input[0]) 27 | res = nd_input[1:] - nd_input[:-1] 28 | if np.min(res) == np.max(res): 29 | output[0:4] = input[0:4] # start 30 | n = step.to_bytes(4, sys.byteorder) 31 | output[4:8] = [n[i] for i in range(4)] 32 | return 8 33 | else: 34 | # Not compressible, tell Blosc2 to do a memcpy 35 | return 0 36 | 37 | 38 | def decoder1(input, output, meta, schunk): 39 | # For decoding we only have to worry about the arange case 40 | # (other cases are handled by Blosc2) 41 | nd_input = input.view(dtype) 42 | nd_output = output.view(dtype) 43 | nd_output[:] = [nd_input[0] + i * nd_input[1] for i in range(nd_output.size)] 44 | 45 | return nd_output.size * schunk.typesize 46 | 47 | 48 | # Register codec 49 | codec_name = "codec" 50 | id = 180 51 | blosc2.register_codec(codec_name, id, encoder1, decoder1) 52 | 53 | # Set the compression and decompression parameters 54 | cparams = blosc2.CParams( 55 | typesize=dtype.itemsize, codec=id, nthreads=1, filters=[blosc2.Filter.NOFILTER], filters_meta=[0] 56 | ) 57 | dparams = blosc2.DParams(nthreads=1) 58 | 59 | # Create SChunk and fill it with data 60 | data = np.arange(0, chunk_len * nchunks, 1, dtype=dtype) 61 | schunk = blosc2.SChunk(chunksize=chunk_len * dtype.itemsize, data=data, cparams=cparams, dparams=dparams) 62 | 63 | # Check data can be decompressed correctly 64 | out = np.empty(chunk_len * nchunks, dtype=dtype) 65 | schunk.get_slice(0, chunk_len * nchunks, out=out) 66 | assert np.array_equal(data, out) 67 | -------------------------------------------------------------------------------- /examples/ufilters.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | # This shows how to implement an user defined filter in pure Python 10 | 11 | import numpy as np 12 | 13 | import blosc2 14 | 15 | nchunks = 2 16 | chunk_len = 20 * 1000 17 | dtype = np.dtype(np.int32) 18 | 19 | 20 | # Define forward and backward functions 21 | def forward(input, output, meta, schunk): 22 | nd_input = input.view(dtype) 23 | nd_output = output.view(dtype) 24 | 25 | nd_output[:] = nd_input + 1 26 | 27 | 28 | def backward(input, output, meta, schunk): 29 | nd_input = input.view(dtype) 30 | nd_output = output.view(dtype) 31 | 32 | nd_output[:] = nd_input - 1 33 | 34 | 35 | # Register filter 36 | id = 160 37 | blosc2.register_filter(id, forward, backward) 38 | 39 | # Set the compression and decompression parameters 40 | cparams = blosc2.CParams( 41 | typesize=dtype.itemsize, nthreads=1, filters=[blosc2.Filter.NOFILTER, id], filters_meta=[0, 0] 42 | ) 43 | dparams = blosc2.DParams(nthreads=1) 44 | 45 | # Create SChunk and fill it with data 46 | data = np.arange(0, chunk_len * nchunks, 1, dtype=dtype) 47 | schunk = blosc2.SChunk(chunksize=chunk_len * dtype.itemsize, data=data, cparams=cparams, dparams=dparams) 48 | 49 | # Check data can be decompressed correctly 50 | out = np.empty(chunk_len * nchunks, dtype=dtype) 51 | schunk.get_slice(0, chunk_len * nchunks, out=out) 52 | assert np.array_equal(data, out) 53 | -------------------------------------------------------------------------------- /examples/vlmeta.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | 11 | import blosc2 12 | 13 | nchunks = 10 14 | schunk = blosc2.SChunk(chunksize=200 * 1000 * 4) 15 | for i in range(nchunks): 16 | buffer = i * np.arange(200 * 1000, dtype="int32") 17 | nchunks_ = schunk.append_data(buffer) 18 | assert nchunks_ == (i + 1) 19 | 20 | # Initially the vlmeta is empty 21 | print(len(schunk.vlmeta)) 22 | # Add a vlmeta 23 | schunk.vlmeta["meta1"] = "first vlmetalayer" 24 | print(schunk.vlmeta.getall()) 25 | # Update the vlmeta 26 | schunk.vlmeta["meta1"] = "new vlmetalayer" 27 | print(schunk.vlmeta.getall()) 28 | # Add another vlmeta 29 | schunk.vlmeta["vlmeta2"] = "second vlmeta" 30 | # Check that it has been added 31 | assert "vlmeta2" in schunk.vlmeta 32 | 33 | # Delete a vlmeta 34 | del schunk.vlmeta["vlmeta2"] 35 | assert "vlmeta2" not in schunk.vlmeta 36 | -------------------------------------------------------------------------------- /generate_version.py: -------------------------------------------------------------------------------- 1 | import tomllib as toml 2 | 3 | with open("pyproject.toml", "rb") as f: 4 | pyproject = toml.load(f) 5 | 6 | version = pyproject["project"]["version"] 7 | 8 | with open("src/blosc2/version.py", "w") as f: 9 | f.write(f'__version__ = "{version}"\n') 10 | -------------------------------------------------------------------------------- /guix.scm: -------------------------------------------------------------------------------- 1 | .guix/modules/python-blosc2-package.scm -------------------------------------------------------------------------------- /images/Complete-Write-Read-B2ND.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/images/Complete-Write-Read-B2ND.png -------------------------------------------------------------------------------- /images/M1-i386-vs-arm64-pack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/images/M1-i386-vs-arm64-pack.png -------------------------------------------------------------------------------- /images/M1-i386-vs-arm64-unpack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/images/M1-i386-vs-arm64-unpack.png -------------------------------------------------------------------------------- /images/Read-Partial-Slices-B2ND.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/images/Read-Partial-Slices-B2ND.png -------------------------------------------------------------------------------- /images/b2nd-2level-parts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/images/b2nd-2level-parts.png -------------------------------------------------------------------------------- /images/eval-expr-full-mem-M2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/images/eval-expr-full-mem-M2.png -------------------------------------------------------------------------------- /images/eval-expr-scarce-mem-M2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/images/eval-expr-scarce-mem-M2.png -------------------------------------------------------------------------------- /images/lazyarray-dask-large.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/images/lazyarray-dask-large.png -------------------------------------------------------------------------------- /images/lazyarray-dask-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/images/lazyarray-dask-small.png -------------------------------------------------------------------------------- /images/lazyarray-expr-large.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/images/lazyarray-expr-large.png -------------------------------------------------------------------------------- /images/lazyarray-expr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/images/lazyarray-expr.png -------------------------------------------------------------------------------- /images/linspace-compress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/images/linspace-compress.png -------------------------------------------------------------------------------- /images/linspace-decompress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/images/linspace-decompress.png -------------------------------------------------------------------------------- /images/pack-array-cratios.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/images/pack-array-cratios.png -------------------------------------------------------------------------------- /images/reduc-float64-amd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/images/reduc-float64-amd.png -------------------------------------------------------------------------------- /images/reduc-float64-log-amd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Blosc/python-blosc2/8bc8ce2618388c9bd788f68e0c3caf671becc63e/images/reduc-float64-log-amd.png -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --doctest-modules -m "not network and not heavy" 3 | testpaths = 4 | tests 5 | blosc2/core.py 6 | blosc2/ndarray.py 7 | blosc2/schunk.py 8 | 9 | markers = 10 | heavy: tests that take long time to complete. 11 | network: tests that require network access. 12 | 13 | filterwarnings = 14 | error 15 | ignore::UserWarning 16 | -------------------------------------------------------------------------------- /src/blosc2/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | class MissingOperands(ValueError): 3 | def __init__(self, expr, missing_ops): 4 | self.expr = expr 5 | self.missing_ops = missing_ops 6 | 7 | message = f"Lazy expression \"{expr}\" with missing operands: {missing_ops}" 8 | super().__init__(message) 9 | -------------------------------------------------------------------------------- /src/blosc2/helpers.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import re 10 | 11 | 12 | def _inherit_doc_parameter(parent_func, parameter, replacements=None): 13 | # Small decorator to copy parameter descriptions from other functions with optional support of replacement rules 14 | # (see blosc2.open for an example) 15 | def wrapper(child_func): 16 | # Copy relevant lines from parent function 17 | matching_lines = [] 18 | indent_parent = None 19 | for line in parent_func.__doc__.splitlines(): 20 | if parameter in line: 21 | match = re.search(rf"(\s*){parameter}", line) 22 | assert match is not None, ( 23 | f"Parameter {parameter} not found in the docstring of {parent_func.__name__}" 24 | ) 25 | indent_parent = match.group(1) 26 | 27 | # The first line should be without the indentation because it will be placed at the correct location 28 | # in the child function 29 | matching_lines.append(line.lstrip()) 30 | elif indent_parent is not None: 31 | if re.search(rf"^{indent_parent}\w+", line) is not None: 32 | # Next parameter starts, stop copying lines 33 | break 34 | matching_lines.append(line) 35 | assert len(matching_lines) > 0, ( 36 | f"Could not extract the parameter {parameter} from the docstring of {parent_func.__name__}" 37 | ) 38 | 39 | # Replace the indentation of the parent with the indentation used in the child function 40 | match = re.search(rf"([ \t]+){parameter}", child_func.__doc__) 41 | assert match is not None, ( 42 | f"Parameter {parameter} not found in the docstring of {child_func.__name__}" 43 | ) 44 | indent_child = match.group(1) 45 | 46 | # First line contains the parameter name itself which should not be indented 47 | matching_lines = [matching_lines[0].lstrip()] + [ 48 | ml.replace(indent_parent, indent_child, 1) for ml in matching_lines[1:] 49 | ] 50 | 51 | child_func.__doc__ = child_func.__doc__.replace(parameter, "\n".join(matching_lines)) 52 | 53 | if replacements is not None: 54 | for regex, repl in replacements.items(): 55 | new_doc = re.sub(regex, repl, child_func.__doc__) 56 | assert new_doc != child_func.__doc__, ( 57 | f"The replacement rule {regex}: {repl} did not change the docstring of {child_func.__name__}" 58 | ) 59 | child_func.__doc__ = new_doc 60 | 61 | return child_func 62 | 63 | return wrapper 64 | -------------------------------------------------------------------------------- /src/blosc2/info.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import io 10 | import pprint 11 | from textwrap import TextWrapper 12 | 13 | 14 | def info_text_report_(items: list) -> str: 15 | with io.StringIO() as buf: 16 | print(items, file=buf) 17 | return buf.getvalue() 18 | 19 | 20 | def info_text_report(items: list) -> str: 21 | keys = [k for k, v in items] 22 | max_key_len = max(len(k) for k in keys) 23 | report = "" 24 | for k, v in items: 25 | if isinstance(v, dict): 26 | # rich way, this is disabled because it doesn't work well in the notebooks 27 | # with io.StringIO() as buf: 28 | # v_sorted = {k: val for k, val in sorted(v.items())} 29 | # rich.print(v_sorted, file=buf) 30 | # str_v = buf.getvalue()[:-1] # remove the trailing \n 31 | # text = k.ljust(max_key_len) + " : " + str_v 32 | # pprint way 33 | text = k.ljust(max_key_len) + " : " + pprint.pformat(v) 34 | else: 35 | wrapper = TextWrapper( 36 | width=96, 37 | initial_indent=k.ljust(max_key_len) + " : ", 38 | subsequent_indent=" " * max_key_len + " : ", 39 | ) 40 | text = wrapper.fill(str(v)) 41 | report += text + "\n" 42 | return report 43 | 44 | 45 | def info_html_report(items: list) -> str: 46 | report = '' 47 | report += "" 48 | for k, v in items: 49 | report += f'' 50 | report += "" 51 | report += "
{k}{v}
" 52 | return report 53 | 54 | 55 | class InfoReporter: 56 | def __init__(self, obj): 57 | self.obj = obj 58 | 59 | def __repr__(self): 60 | items = self.obj.info_items 61 | return info_text_report(items) 62 | 63 | def _repr_html_(self): 64 | items = self.obj.info_items 65 | return info_html_report(items) 66 | -------------------------------------------------------------------------------- /src/blosc2/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "3.3.5.dev0" 2 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | import os 9 | 10 | import pytest 11 | 12 | import blosc2 13 | 14 | 15 | def pytest_configure(config): 16 | blosc2.print_versions() 17 | 18 | 19 | @pytest.fixture(scope="session") 20 | def c2sub_context(): 21 | # You may use the URL and credentials for an already existing user 22 | # in a different Caterva2 subscriber. 23 | urlbase = os.environ.get("BLOSC_C2URLBASE", "https://demo.caterva2.net/") 24 | c2params = {"urlbase": urlbase, "username": None, "password": None} 25 | with blosc2.c2context(**c2params): 26 | yield c2params 27 | 28 | 29 | # This is to avoid sporadic failures in the CI when reaching network, 30 | # but this makes the tests to stuck in local. Perhaps move this to 31 | # every test module that needs it? 32 | # def pytest_runtest_call(item): 33 | # try: 34 | # item.runtest() 35 | # except requests.ConnectTimeout: 36 | # pytest.skip("Skipping test due to sporadic requests.ConnectTimeout") 37 | # except requests.ReadTimeout: 38 | # pytest.skip("Skipping test due to sporadic requests.ReadTimeout") 39 | # except requests.Timeout: 40 | # pytest.skip("Skipping test due to sporadic requests.Timeout") 41 | -------------------------------------------------------------------------------- /tests/ndarray/test_buffer.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | import pytest 11 | 12 | import blosc2 13 | 14 | 15 | @pytest.mark.parametrize( 16 | ("shape", "chunks", "blocks", "dtype", "urlpath", "contiguous", "meta"), 17 | [ 18 | ([450], [128], [25], "|S8", "frombuffer.b2nd", True, None), 19 | ([20, 134, 13], [3, 13, 5], [3, 10, 5], np.complex128, "frombuffer.b2nd", False, {"123": 123}), 20 | ([45], [12], [6], "|S4", None, True, None), 21 | ([30, 29], [15, 28], [5, 27], np.int16, None, False, {"2": 123, "meta2": "abcdef"}), 22 | ], 23 | ) 24 | def test_buffer(shape, chunks, blocks, dtype, urlpath, contiguous, meta): 25 | blosc2.remove_urlpath(urlpath) 26 | 27 | dtype = np.dtype(dtype) 28 | typesize = dtype.itemsize 29 | size = int(np.prod(shape)) 30 | buffer = bytes(size * typesize) 31 | a = blosc2.frombuffer( 32 | buffer, 33 | shape, 34 | chunks=chunks, 35 | blocks=blocks, 36 | dtype=dtype, 37 | urlpath=urlpath, 38 | contiguous=contiguous, 39 | meta=meta, 40 | ) 41 | buffer2 = a.tobytes() 42 | assert buffer == buffer2 43 | 44 | blosc2.remove_urlpath(urlpath) 45 | 46 | 47 | @pytest.mark.parametrize( 48 | ("shape", "dtype"), 49 | [ 50 | ([450], "|S8"), 51 | ([20, 134, 13], np.complex128), 52 | ([45], "|S4"), 53 | ([30, 29], np.int16), 54 | ], 55 | ) 56 | def test_buffer_simple(shape, dtype): 57 | dtype = np.dtype(dtype) 58 | typesize = dtype.itemsize 59 | size = int(np.prod(shape)) 60 | buffer = bytes(size * typesize) 61 | a = blosc2.frombuffer(buffer, shape, dtype=dtype) 62 | buffer2 = a.tobytes() 63 | assert buffer == buffer2 64 | -------------------------------------------------------------------------------- /tests/ndarray/test_get_slice_nchunks.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | import pytest 11 | 12 | import blosc2 13 | 14 | argnames = "shape, chunks, blocks, slices, dtype" 15 | argvalues = [ 16 | ([456], [258], [73], slice(0, 1), np.int32), 17 | ([456, 200], [258, 100], [73, 25], (slice(0), slice(0)), np.int64), 18 | ([77, 134, 13], [31, 13, 5], [7, 8, 3], (slice(3, 7), slice(50, 100), 7), np.float64), 19 | ([12, 13, 14, 15, 16], [5, 5, 5, 5, 5], [2, 2, 2, 2, 2], (slice(1, 3), ..., slice(3, 6)), np.float32), 20 | ] 21 | 22 | 23 | @pytest.mark.parametrize(argnames, argvalues) 24 | def test_getitem(shape, chunks, blocks, slices, dtype): 25 | a = blosc2.zeros(shape, dtype, chunks=chunks, blocks=blocks) 26 | schunk = a.schunk 27 | for i in range(schunk.nchunks): 28 | chunk = np.full(schunk.chunksize // schunk.typesize, i, dtype=dtype) 29 | schunk.update_data(i, chunk, True) 30 | 31 | np.array_equal(np.unique(a[slices]), blosc2.get_slice_nchunks(a, slices)) 32 | -------------------------------------------------------------------------------- /tests/ndarray/test_iterchunks_info.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | import pytest 11 | 12 | import blosc2 13 | 14 | 15 | @pytest.mark.parametrize( 16 | ("shape", "chunks", "dtype", "fill_value"), 17 | [ 18 | ((401, 100), (200, 10), "S10", "Hola!"), # repeated string 19 | ((1020, 100), (200, 20), np.bool_, False), # zeros 20 | ((1000, 99), (200, 20), np.int32, 1), # ones 21 | ((799, 99), (20, 20), np.float64, np.nan), # repeated float 22 | ], 23 | ) 24 | def test_iterchunks_info(shape, chunks, dtype, fill_value): 25 | a = blosc2.full(shape, fill_value=fill_value, chunks=chunks, dtype=dtype) 26 | slice_ = (slice(0, chunks[0]), slice(0, chunks[1])) 27 | a[slice_] = 0 # introduce a zeroed chunk (another type of special value) 28 | 29 | for i, info in enumerate(a.iterchunks_info()): 30 | # print(info) 31 | assert info.nchunk == i 32 | if info.special == blosc2.SpecialValue.NOT_SPECIAL: 33 | assert info.cratio >= 10 34 | else: 35 | assert info.cratio >= 50 36 | -------------------------------------------------------------------------------- /tests/ndarray/test_lossy.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | from dataclasses import asdict 10 | 11 | import numpy as np 12 | import pytest 13 | 14 | import blosc2 15 | 16 | 17 | @pytest.mark.parametrize( 18 | ("shape", "dtype", "cparams", "urlpath", "contiguous"), 19 | [ 20 | ( 21 | (32, 18), 22 | np.float32, 23 | blosc2.CParams(codec=blosc2.Codec.NDLZ, codec_meta=4), 24 | None, 25 | False, 26 | ), 27 | ( 28 | # For some reason, ZFP needs to always split buffers in this test 29 | (100, 1230), 30 | np.float64, 31 | {"codec": blosc2.Codec.ZFP_ACC, "codec_meta": 37, "splitmode": blosc2.SplitMode.ALWAYS_SPLIT}, 32 | None, 33 | False, 34 | ), 35 | ( 36 | (23, 34), 37 | np.float64, 38 | {"codec": blosc2.Codec.ZFP_PREC, "codec_meta": 37}, 39 | "lossy.b2nd", 40 | True, 41 | ), 42 | ( 43 | # For some reason, ZFP needs to always split buffers in this test 44 | (80, 51, 60), 45 | np.float32, 46 | {"codec": blosc2.Codec.ZFP_RATE, "codec_meta": 37, "splitmode": blosc2.SplitMode.ALWAYS_SPLIT}, 47 | "lossy.b2nd", 48 | False, 49 | ), 50 | ( 51 | (13, 13), 52 | np.int32, 53 | {"filters": [blosc2.Filter.NDMEAN], "filters_meta": [4]}, 54 | None, 55 | True, 56 | ), 57 | ( 58 | (10, 10), 59 | np.int64, 60 | {"filters": [blosc2.Filter.NDCELL], "filters_meta": [4]}, 61 | None, 62 | False, 63 | ), 64 | ], 65 | ) 66 | def test_lossy(shape, cparams, dtype, urlpath, contiguous): 67 | cparams_dict = cparams if isinstance(cparams, dict) else asdict(cparams) 68 | if cparams_dict.get("codec") == blosc2.Codec.NDLZ: 69 | dtype = np.uint8 70 | array = np.linspace(0, np.prod(shape), np.prod(shape), dtype=dtype).reshape(shape) 71 | a = blosc2.asarray(array, cparams=cparams, urlpath=urlpath, contiguous=contiguous, mode="w") 72 | 73 | if ( 74 | a.schunk.cparams.codec in (blosc2.Codec.ZFP_RATE, blosc2.Codec.ZFP_PREC, blosc2.Codec.ZFP_ACC) 75 | or a.schunk.cparams.filters[0] == blosc2.Filter.NDMEAN 76 | ): 77 | _ = a[...] 78 | else: 79 | tol = 1e-5 80 | if dtype in (np.float32, np.float64): 81 | np.testing.assert_allclose(a[...], array, rtol=tol, atol=tol) 82 | else: 83 | np.array_equal(a[...], array) 84 | 85 | blosc2.remove_urlpath(urlpath) 86 | -------------------------------------------------------------------------------- /tests/ndarray/test_metalayers.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import os 10 | 11 | import numpy as np 12 | import pytest 13 | 14 | import blosc2 15 | 16 | 17 | @pytest.mark.parametrize( 18 | "contiguous", 19 | [ 20 | True, 21 | False, 22 | ], 23 | ) 24 | @pytest.mark.parametrize( 25 | ("shape", "chunks", "blocks", "urlpath", "dtype"), 26 | [ 27 | ([556], [221], [33], "testmeta00.b2nd", np.float64), 28 | ([20, 134, 13], [12, 66, 8], [3, 13, 5], "testmeta01.b2nd", np.int32), 29 | ([12, 13, 14, 15, 16], [8, 9, 4, 12, 9], [2, 6, 4, 5, 4], "testmeta02.b2nd", np.float32), 30 | ], 31 | ) 32 | def test_metalayers(shape, chunks, blocks, urlpath, contiguous, dtype): 33 | blosc2.remove_urlpath(urlpath) 34 | 35 | numpy_meta = {b"dtype": str(np.dtype(dtype))} 36 | test_meta = {b"lorem": 1234} 37 | 38 | # Create an empty b2nd array (on disk) 39 | a = blosc2.empty( 40 | shape, 41 | chunks=chunks, 42 | blocks=blocks, 43 | dtype=dtype, 44 | storage=blosc2.Storage( 45 | urlpath=urlpath, contiguous=contiguous, meta={"numpy": numpy_meta, "test": test_meta} 46 | ), 47 | ) 48 | assert os.path.exists(urlpath) 49 | 50 | assert "numpy" in a.schunk.meta 51 | assert "error" not in a.schunk.meta 52 | assert a.schunk.meta["numpy"] == numpy_meta 53 | assert "test" in a.schunk.meta 54 | assert a.schunk.meta["test"] == test_meta 55 | 56 | test_meta = {b"lorem": 4231} 57 | a.schunk.meta["test"] = test_meta 58 | assert a.schunk.meta["test"] == test_meta 59 | 60 | # Remove file on disk 61 | blosc2.remove_urlpath(urlpath) 62 | -------------------------------------------------------------------------------- /tests/ndarray/test_mode.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | import pytest 11 | 12 | import blosc2 13 | 14 | 15 | @pytest.mark.parametrize("mode", ["r", "w", "a"]) 16 | @pytest.mark.parametrize("urlpath", ["test_mode.b2nd"]) 17 | @pytest.mark.parametrize( 18 | ("shape", "fill_value", "dtype", "cparams", "dparams", "contiguous"), 19 | [ 20 | ( 21 | (80, 51, 60), 22 | 3.14, 23 | np.float64, 24 | {"codec": blosc2.Codec.ZLIB, "clevel": 5, "use_dict": False, "nthreads": 2}, 25 | blosc2.DParams(nthreads=1), 26 | False, 27 | ), 28 | ( 29 | (13, 13), 30 | 123456789, 31 | None, 32 | blosc2.CParams(codec=blosc2.Codec.LZ4HC, clevel=8, use_dict=False, nthreads=2), 33 | {"nthreads": 2}, 34 | True, 35 | ), 36 | ], 37 | ) 38 | def test_mode(shape, fill_value, cparams, dparams, dtype, urlpath, contiguous, mode): 39 | blosc2.remove_urlpath(urlpath) 40 | if mode == "r": 41 | with pytest.raises(ValueError): 42 | blosc2.full( 43 | shape, 44 | fill_value, 45 | dtype=dtype, 46 | cparams=cparams, 47 | dparams=dparams, 48 | storage={"urlpath": urlpath, "contiguous": contiguous, "mode": mode}, 49 | ) 50 | _ = blosc2.full( 51 | shape, 52 | fill_value, 53 | dtype=dtype, 54 | cparams=cparams, 55 | dparams=dparams, 56 | storage={"urlpath": urlpath, "contiguous": contiguous}, 57 | ) 58 | 59 | a = blosc2.open(urlpath, mode=mode) 60 | if mode == "r": 61 | with pytest.raises(ValueError): 62 | a[...] = 0 63 | with pytest.raises(ValueError): 64 | a.resize([50] * a.ndim) 65 | else: 66 | a[...] = 0 67 | a.resize([50] * a.ndim) 68 | 69 | blosc2.remove_urlpath(urlpath) 70 | -------------------------------------------------------------------------------- /tests/ndarray/test_nans.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | import pytest 11 | 12 | import blosc2 13 | 14 | 15 | @pytest.mark.parametrize( 16 | ("shape", "dtype"), 17 | [ 18 | ((100, 1230), np.float64), 19 | ((23, 34), np.float32), 20 | ((80, 51, 60), "f4"), 21 | ((13, 13), None), 22 | ], 23 | ) 24 | def test_nans_simple(shape, dtype): 25 | a = blosc2.nans(shape, dtype=dtype) 26 | assert a.dtype == np.dtype(dtype) if dtype is not None else np.dtype(np.float64) 27 | 28 | b = np.full(shape=shape, fill_value=np.nan, dtype=a.dtype) 29 | np.testing.assert_allclose(a[...], b) 30 | 31 | 32 | @pytest.mark.parametrize("asarray", [True, False]) 33 | @pytest.mark.parametrize("typesize", [1, 3, 255, 256, 257, 256 * 256]) 34 | @pytest.mark.parametrize("shape", [(1,), (3,), (10,), (2 * 10,)]) 35 | def test_large_typesize(shape, typesize, asarray): 36 | dtype = np.dtype([("f_001", "f8", (typesize,)), ("f_002", "f4", (typesize,))]) 37 | a = np.full(shape, np.nan, dtype=dtype) 38 | if asarray: 39 | b = blosc2.asarray(a) 40 | else: 41 | # b = blosc2.nans(shape, dtype=dtype) # TODO: this is not working; perhaps deprecate blosc2.nans()? 42 | b = blosc2.full(shape, np.nan, dtype=dtype) 43 | for field in dtype.fields: 44 | np.testing.assert_allclose(b[field][:], a[field], equal_nan=True) 45 | -------------------------------------------------------------------------------- /tests/ndarray/test_numpy.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | import pytest 11 | 12 | import blosc2 13 | 14 | 15 | @pytest.mark.parametrize( 16 | ("shape", "chunks", "blocks", "dtype"), 17 | [ 18 | ([931], [223], [45], np.int32), 19 | ([134, 121, 78], [12, 13, 18], [4, 4, 9], np.float64), 20 | ], 21 | ) 22 | def test_numpy(shape, chunks, blocks, dtype): 23 | size = int(np.prod(shape)) 24 | nparray = np.arange(size, dtype=dtype).reshape(shape) 25 | a = blosc2.asarray(nparray, chunks=chunks, blocks=blocks) 26 | nparray2 = a[...] 27 | np.testing.assert_almost_equal(nparray, nparray2) 28 | -------------------------------------------------------------------------------- /tests/ndarray/test_persistency.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | 10 | import numpy as np 11 | import pytest 12 | 13 | import blosc2 14 | 15 | 16 | @pytest.mark.parametrize( 17 | "contiguous", 18 | [ 19 | True, 20 | False, 21 | ], 22 | ) 23 | @pytest.mark.parametrize( 24 | ("shape", "chunks", "blocks", "urlpath", "dtype"), 25 | [ 26 | ([634], [156], [33], "test00.b2nd", np.float64), 27 | ([20, 134, 13], [7, 22, 5], [3, 5, 3], "test01.b2nd", np.int32), 28 | ([12, 13, 14, 15, 16], [4, 6, 4, 7, 5], [2, 4, 2, 3, 3], "test02.b2nd", np.float32), 29 | ], 30 | ) 31 | def test_persistency(shape, chunks, blocks, urlpath, contiguous, dtype): 32 | blosc2.remove_urlpath(urlpath) 33 | 34 | size = int(np.prod(shape)) 35 | nparray = np.arange(size, dtype=dtype).reshape(shape) 36 | _ = blosc2.asarray(nparray, chunks=chunks, blocks=blocks, urlpath=urlpath, contiguous=contiguous) 37 | b = blosc2.open(urlpath) 38 | 39 | bc = b[:] 40 | 41 | nparray2 = np.asarray(bc).view(dtype) 42 | np.testing.assert_almost_equal(nparray, nparray2) 43 | 44 | blosc2.remove_urlpath(urlpath) 45 | -------------------------------------------------------------------------------- /tests/ndarray/test_proxy_c2array.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | import pathlib 9 | 10 | import numpy as np 11 | import pytest 12 | 13 | import blosc2 14 | 15 | pytestmark = pytest.mark.network 16 | 17 | NITEMS_SMALL = 1_000 18 | ROOT = "b2tests" 19 | DIR = "expr/" 20 | 21 | 22 | def get_array(shape, chunks_blocks): 23 | dtype = np.float64 24 | urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a1-{shape}d.b2nd" 25 | path = pathlib.Path(f"{ROOT}/{DIR + urlpath}").as_posix() 26 | return blosc2.C2Array(path) 27 | 28 | 29 | @pytest.mark.parametrize( 30 | "chunks_blocks", 31 | [ 32 | (True, True), 33 | (True, False), 34 | (False, True), 35 | (False, False), 36 | ], 37 | ) 38 | @pytest.mark.parametrize( 39 | ("urlpath", "slices"), 40 | [ 41 | (None, (slice(0, 23), slice(None))), 42 | ("proxy", (slice(None), slice(None))), 43 | (None, (slice(0, 5), slice(0, 60))), 44 | ("proxy", (slice(37, 53), slice(19, 233))), 45 | ], 46 | ) 47 | def test_simple(chunks_blocks, c2sub_context, urlpath, slices): 48 | shape = (60, 60) 49 | a = get_array(shape, chunks_blocks) 50 | b = blosc2.Proxy(a, urlpath=urlpath, mode="w") 51 | 52 | np.testing.assert_allclose(b[slices], a[slices]) 53 | 54 | cache_slice = b.fetch(slices) 55 | assert cache_slice.schunk.urlpath == urlpath 56 | np.testing.assert_allclose(cache_slice[slices], a[slices]) 57 | 58 | cache = b.fetch() 59 | assert cache.schunk.urlpath == urlpath 60 | np.testing.assert_allclose(cache[...], a[...]) 61 | 62 | blosc2.remove_urlpath(urlpath) 63 | 64 | 65 | def test_small(c2sub_context): 66 | shape = (NITEMS_SMALL,) 67 | chunks_blocks = "default" 68 | a = get_array(shape, chunks_blocks) 69 | b = blosc2.Proxy(a) 70 | 71 | np.testing.assert_allclose(b[0:100], a[0:100]) 72 | 73 | cache_slice = b.fetch(slice(0, 100)) 74 | np.testing.assert_allclose(cache_slice[0:100], a[0:100]) 75 | 76 | cache = b.fetch() 77 | np.testing.assert_allclose(cache[...], a[...]) 78 | 79 | 80 | def test_open(c2sub_context): 81 | urlpath = "proxy.b2nd" 82 | shape = (NITEMS_SMALL,) 83 | chunks_blocks = "default" 84 | a = get_array(shape, chunks_blocks) 85 | b = blosc2.Proxy(a, urlpath=urlpath, mode="w") 86 | del a 87 | del b 88 | 89 | b = blosc2.open(urlpath) 90 | a = get_array(shape, chunks_blocks) 91 | 92 | np.testing.assert_allclose(b[...], a[...]) 93 | 94 | blosc2.remove_urlpath(urlpath) 95 | -------------------------------------------------------------------------------- /tests/ndarray/test_proxy_expr.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | import pathlib 9 | 10 | import numpy as np 11 | import pytest 12 | 13 | import blosc2 14 | from blosc2.lazyexpr import ne_evaluate 15 | 16 | pytestmark = pytest.mark.network 17 | 18 | ROOT = "b2tests" 19 | DIR = "expr/" 20 | 21 | 22 | def get_arrays(shape, chunks_blocks): 23 | dtype = np.float64 24 | nelems = np.prod(shape) 25 | na1 = np.linspace(0, 10, nelems, dtype=dtype).reshape(shape) 26 | cleanup_paths = [] 27 | urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a1-{shape}d.b2nd" 28 | path = pathlib.Path(f"{ROOT}/{DIR + urlpath}").as_posix() 29 | cleanup_paths.append(path) 30 | a1 = blosc2.C2Array(path) 31 | urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a2-{shape}d.b2nd" 32 | cleanup_paths.append(urlpath) 33 | path = pathlib.Path(f"{ROOT}/{DIR + urlpath}").as_posix() 34 | a2 = blosc2.C2Array(path) 35 | # Let other operands be local, on-disk NDArray copies 36 | urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a3-{shape}d.b2nd" 37 | cleanup_paths.append(urlpath) 38 | a3 = blosc2.asarray(a2, urlpath=urlpath, mode="w") 39 | urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a4-{shape}d.b2nd" 40 | cleanup_paths.append(urlpath) 41 | a4 = a3.copy(urlpath=urlpath, mode="w") 42 | assert isinstance(a1, blosc2.C2Array) 43 | assert isinstance(a2, blosc2.C2Array) 44 | assert isinstance(a3, blosc2.NDArray) 45 | assert isinstance(a4, blosc2.NDArray) 46 | 47 | p1 = blosc2.Proxy(a1, urlpath="p1.b2nd", mode="w") 48 | p3 = blosc2.Proxy(a3, urlpath="p3.b2nd", mode="w") 49 | cleanup_paths.extend(["p1.b2nd", "p3.b2nd"]) 50 | 51 | return p1, a2, p3, a4, na1, np.copy(na1), np.copy(na1), np.copy(na1), cleanup_paths 52 | 53 | 54 | @pytest.mark.parametrize( 55 | "chunks_blocks", 56 | [ 57 | (True, True), 58 | (True, False), 59 | (False, True), 60 | (False, False), 61 | ], 62 | ) 63 | def test_expr_proxy_operands(chunks_blocks, c2sub_context): 64 | shape = (60, 60) 65 | a1, a2, a3, a4, na1, na2, na3, na4, cleanup_paths = get_arrays(shape, chunks_blocks) 66 | 67 | # Slice 68 | sl = slice(10) 69 | expr = a1 + a2 + a3 + a4 70 | expr += 3 71 | nres = ne_evaluate("na1 + na2 + na3 + na4 + 3") 72 | res = expr.compute(item=sl) 73 | np.testing.assert_allclose(res[:], nres[sl]) 74 | 75 | # Save 76 | urlpath = "expr_proxies.b2nd" 77 | expr.save(urlpath=urlpath, mode="w") 78 | del expr 79 | expr_opened = blosc2.open("expr_proxies.b2nd") 80 | assert isinstance(expr_opened, blosc2.LazyExpr) 81 | 82 | # All 83 | res = expr_opened.compute() 84 | np.testing.assert_allclose(res[:], nres) 85 | 86 | # Cleanup 87 | blosc2.remove_urlpath(urlpath) 88 | for path in cleanup_paths: 89 | blosc2.remove_urlpath(path) 90 | -------------------------------------------------------------------------------- /tests/ndarray/test_resize.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | import pytest 11 | 12 | import blosc2 13 | 14 | 15 | @pytest.mark.parametrize( 16 | ("shape", "new_shape", "chunks", "blocks", "fill_value"), 17 | [ 18 | ((100, 1230), (200, 1230), (200, 100), (55, 3), b"0123"), 19 | ((23, 34), (23, 120), (20, 20), (10, 10), 1234), 20 | ((80, 51, 60), (80, 100, 100), (20, 10, 33), (6, 6, 26), 3.333), 21 | ], 22 | ) 23 | def test_resize(shape, new_shape, chunks, blocks, fill_value): 24 | a = blosc2.full(shape, fill_value=fill_value, chunks=chunks, blocks=blocks) 25 | 26 | a.resize(new_shape) 27 | assert a.shape == new_shape 28 | 29 | slices = tuple(slice(s) for s in shape) 30 | for i in np.nditer(a[slices]): 31 | assert i == fill_value 32 | -------------------------------------------------------------------------------- /tests/ndarray/test_squeeze.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import pytest 10 | 11 | import blosc2 12 | 13 | 14 | @pytest.mark.parametrize( 15 | ("shape", "chunks", "blocks", "fill_value"), 16 | [ 17 | ((1, 1230), (1, 100), (1, 3), b"0123"), 18 | ((23, 1, 1, 34), (20, 1, 1, 20), None, 1234), 19 | ((80, 1, 51, 60, 1), None, (6, 1, 6, 26, 1), 3.333), 20 | ((1, 1, 1), None, None, True), 21 | ], 22 | ) 23 | def test_squeeze(shape, chunks, blocks, fill_value): 24 | a = blosc2.full(shape, fill_value=fill_value, chunks=chunks, blocks=blocks) 25 | 26 | a.squeeze() 27 | b = a[...] 28 | 29 | assert a.shape == b.shape 30 | -------------------------------------------------------------------------------- /tests/ndarray/test_struct_dtype.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | import pytest 11 | 12 | import blosc2 13 | 14 | 15 | @pytest.mark.parametrize( 16 | ("shape", "dtype", "urlpath"), 17 | [ 18 | ((100, 123), "f4,f8", None), 19 | ((234, 125), "f4,(2,)f8", "test1.b2nd"), 20 | (80, [("f0", " 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | 10 | import pytest 11 | 12 | import blosc2 13 | 14 | 15 | @pytest.mark.parametrize("arr", [b"", b"1" * 7]) 16 | @pytest.mark.parametrize("gil", [True, False]) 17 | def test_bytes_array(arr, gil): 18 | blosc2.set_releasegil(gil) 19 | dest = blosc2.compress(arr, 1) 20 | assert arr == blosc2.decompress(dest) 21 | 22 | 23 | @pytest.mark.parametrize("data", [bytearray(7241), bytearray(7241) * 7]) 24 | def test_bytearray(data): 25 | cdata = blosc2.compress(data, typesize=1) 26 | uncomp = blosc2.decompress(cdata) 27 | assert data == uncomp 28 | -------------------------------------------------------------------------------- /tests/test_comp_info.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | 10 | import numpy as np 11 | import pytest 12 | 13 | import blosc2 14 | 15 | 16 | @pytest.mark.parametrize("codec", blosc2.compressor_list()) 17 | def test_comp_info(codec): 18 | blosc2.clib_info(codec) 19 | blosc2.set_compressor(codec) 20 | assert codec.name.lower() == blosc2.get_compressor() 21 | 22 | arr = np.zeros(1_000_000, dtype="V8") 23 | src = blosc2.compress2(arr) 24 | nbytes, cbytes, blocksize = blosc2.get_cbuffer_sizes(src) 25 | assert nbytes == arr.size * arr.dtype.itemsize 26 | assert cbytes == blosc2.MAX_OVERHEAD 27 | # When raising the next limit when this would fail in the future, one should raise the SIZE too 28 | assert blocksize <= 2**23 29 | blosc2.print_versions() 30 | -------------------------------------------------------------------------------- /tests/test_compression_parameters.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | 10 | import pytest 11 | 12 | import blosc2 13 | 14 | 15 | @pytest.mark.parametrize(("nthreads", "blocksize"), [(2, 0), (1, 30), (4, 5)]) 16 | def test_compression_parameters(nthreads, blocksize): 17 | blosc2.set_nthreads(nthreads) 18 | blosc2.set_blocksize(blocksize) 19 | -------------------------------------------------------------------------------- /tests/test_compressors.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | 10 | import pytest 11 | 12 | import blosc2 13 | 14 | 15 | @pytest.mark.parametrize("gil", [True, False]) 16 | @pytest.mark.parametrize( 17 | ("clevel", "codec"), 18 | [ 19 | (8, blosc2.Codec.BLOSCLZ), 20 | (9, blosc2.Codec.LZ4), 21 | (3, blosc2.Codec.LZ4HC), 22 | (5, blosc2.Codec.ZLIB), 23 | (2, blosc2.Codec.ZSTD), 24 | ], 25 | ) 26 | @pytest.mark.parametrize("filt", list(blosc2.Filter)) 27 | def test_compressors(clevel, filt, codec, gil): 28 | blosc2.set_releasegil(gil) 29 | src = b"Something to be compressed" * 100 30 | dest = blosc2.compress(src, 1, clevel, filt, codec) 31 | src2 = blosc2.decompress(dest) 32 | assert src == src2 33 | if codec == blosc2.Codec.LZ4HC: 34 | assert blosc2.get_clib(dest).lower() == "lz4" 35 | else: 36 | assert blosc2.get_clib(dest).lower() == codec.name.lower() 37 | blosc2.free_resources() 38 | -------------------------------------------------------------------------------- /tests/test_iterchunks.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | import pytest 11 | 12 | import blosc2 13 | 14 | 15 | @pytest.mark.parametrize("contiguous", [True, False]) 16 | @pytest.mark.parametrize("urlpath", [None, "b2frame"]) 17 | @pytest.mark.parametrize( 18 | ("cparams", "dparams", "nchunks"), 19 | [ 20 | ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, {"nthreads": 1}, 0), 21 | ({"typesize": 4}, {"nthreads": 1}, 1), 22 | ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, {"nthreads": 1}, 5), 23 | ({"codec": blosc2.Codec.LZ4HC, "typesize": 4}, {"nthreads": 1}, 10), 24 | ], 25 | ) 26 | def test_iterchunks(contiguous, urlpath, cparams, dparams, nchunks): 27 | kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} 28 | blosc2.remove_urlpath(urlpath) 29 | 30 | schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, **kwargs) 31 | 32 | for i in range(nchunks): 33 | buffer = i * np.arange(200 * 1000, dtype="int32") 34 | nchunks_ = schunk.append_data(buffer) 35 | assert nchunks_ == (i + 1) 36 | 37 | dest = np.empty(200 * 1000, np.int32) 38 | for i, chunk in enumerate(schunk.iterchunks(np.int32)): 39 | schunk.decompress_chunk(i, dest) 40 | assert np.array_equal(chunk, dest) 41 | 42 | blosc2.remove_urlpath(urlpath) 43 | 44 | 45 | @pytest.mark.parametrize("contiguous", [True, False]) 46 | @pytest.mark.parametrize("urlpath", [None, "b2frame"]) 47 | @pytest.mark.parametrize( 48 | ("cparams", "dparams", "nchunks"), 49 | [ 50 | ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, {"nthreads": 1}, 2), 51 | ({"typesize": 4}, {"nthreads": 1}, 1), 52 | ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, {"nthreads": 1}, 5), 53 | ({"codec": blosc2.Codec.LZ4HC, "typesize": 4}, {"nthreads": 1}, 3), 54 | ], 55 | ) 56 | def test_iterchunks_pf(contiguous, urlpath, cparams, dparams, nchunks): 57 | kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} 58 | blosc2.remove_urlpath(urlpath) 59 | 60 | chunkshape = 200 * 1000 61 | data = np.arange(0, nchunks * chunkshape, dtype=np.int32) 62 | schunk = blosc2.SChunk(chunksize=chunkshape * 4, data=data, **kwargs) 63 | 64 | @schunk.postfilter(np.int32, np.int32) 65 | def postf1(input, output, offset): 66 | output[:] = input - 1 67 | 68 | data -= 1 69 | for i, chunk in enumerate(schunk.iterchunks(np.int32)): 70 | assert np.array_equal(chunk, data[i * chunkshape : (i + 1) * chunkshape]) 71 | 72 | blosc2.remove_urlpath(urlpath) 73 | -------------------------------------------------------------------------------- /tests/test_schunk_get_slice_nchunks.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import numpy as np 10 | import pytest 11 | 12 | import blosc2 13 | 14 | 15 | @pytest.mark.parametrize( 16 | ("contiguous", "urlpath", "cparams", "nchunks", "start", "stop"), 17 | [ 18 | (True, None, {"typesize": 4}, 10, 0, 100), 19 | (True, "b2frame", {"typesize": 4}, 1, 7, 23), 20 | ( 21 | False, 22 | None, 23 | {"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, 24 | 5, 25 | 21, 26 | 200 * 2 * 100, 27 | ), 28 | (False, "b2frame", {"codec": blosc2.Codec.LZ4HC, "typesize": 4}, 7, None, None), 29 | (True, None, {"blocksize": 200 * 100, "typesize": 4}, 5, -2456, -234), 30 | (True, "b2frame", {"blocksize": 200 * 100, "typesize": 4}, 4, 2456, -234), 31 | (False, None, {"blocksize": 100 * 100, "typesize": 4}, 2, -200 * 100 + 234, 40000), 32 | (True, None, {"blocksize": 100 * 100, "typesize": 4}, 2, 0, None), 33 | ], 34 | ) 35 | def test_schunk_get_slice(contiguous, urlpath, cparams, nchunks, start, stop): 36 | kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams} 37 | schunk = blosc2.SChunk(chunksize=200 * 100 * 4, mode="w", **kwargs) 38 | for i in range(nchunks): 39 | chunk = np.full(schunk.chunksize // schunk.typesize, i, dtype=np.int32) 40 | schunk.append_data(chunk) 41 | 42 | aux = np.empty(200 * 100 * nchunks, dtype=np.int32) 43 | schunk.get_slice(start, stop, aux) 44 | if stop is None and start is not None: 45 | res = aux[start] 46 | np.array_equal(res, blosc2.get_slice_nchunks(schunk, start)) 47 | else: 48 | res = aux[start:stop] 49 | np.array_equal(np.unique(res), blosc2.get_slice_nchunks(schunk, (start, stop))) 50 | # slice variant 51 | np.array_equal(np.unique(res), blosc2.get_slice_nchunks(schunk, slice(start, stop))) 52 | 53 | blosc2.remove_urlpath(urlpath) 54 | -------------------------------------------------------------------------------- /update_version.py: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Copyright (c) 2019-present, Blosc Development Team 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under a BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) 7 | ####################################################################### 8 | 9 | import re 10 | import sys 11 | 12 | 13 | def update_version(new_version): 14 | # Update version in pyproject.toml 15 | with open("pyproject.toml") as file: 16 | pyproject_content = file.read() 17 | pyproject_content = re.sub(r'version = ".*"', f'version = "{new_version}"', pyproject_content) 18 | with open("pyproject.toml", "w") as file: 19 | file.write(pyproject_content) 20 | 21 | # Update version in src/blosc2/version.py 22 | with open("src/blosc2/version.py") as file: 23 | version_content = file.read() 24 | version_content = re.sub(r'__version__ = ".*"', f'__version__ = "{new_version}"', version_content) 25 | with open("src/blosc2/version.py", "w") as file: 26 | file.write(version_content) 27 | 28 | 29 | if __name__ == "__main__": 30 | if len(sys.argv) != 2: 31 | print("Usage: python update_version.py ") 32 | sys.exit(1) 33 | new_version = sys.argv[1] 34 | update_version(new_version) 35 | print(f"Version updated to {new_version}") 36 | --------------------------------------------------------------------------------