├── .gitignore
├── .readthedocs.yaml
├── LICENSE
├── README.md
└── docs
    ├── Makefile
    ├── _static
        ├── css
        │   └── custom.css
        ├── draft-watermark.png
        └── logo.png
    ├── conf.py
    ├── index.rst
    ├── make.bat
    ├── requirements.txt
    ├── specs.rst
    ├── v1
        └── v1.0.rst
    ├── v2
        └── v2.0.rst
    └── v3
        ├── chunk-grids
            ├── index.rst
            └── regular-grid
            │   └── index.rst
        ├── chunk-key-encodings
            ├── default
            │   └── index.rst
            ├── index.rst
            └── v2
            │   └── index.rst
        ├── codecs
            ├── blosc
            │   └── index.rst
            ├── bytes
            │   └── index.rst
            ├── crc32c
            │   └── index.rst
            ├── gzip
            │   └── index.rst
            ├── index.rst
            ├── sharding-indexed
            │   ├── index.rst
            │   └── sharding.png
            └── transpose
            │   └── index.rst
        ├── core
            ├── index.rst
            ├── terminology-hierarchy.excalidraw.png
            └── terminology-read.excalidraw.png
        ├── data-types
            └── index.rst
        ├── storage-transformers
            └── index.rst
        └── stores
            ├── filesystem
                └── index.rst
            └── index.rst


/.gitignore:
--------------------------------------------------------------------------------
 1 | # emacs temp files
 2 | *~
 3 | 
 4 | # sphinx build files
 5 | docs/_build
 6 | 
 7 | # pycharm
 8 | .idea
 9 | 
10 | # virtual environments
11 | .venv
12 | 
13 | # visual studio code
14 | .vscode


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: ubuntu-22.04
 5 |   tools:
 6 |     python: "3.11"
 7 | 
 8 | sphinx:
 9 |    configuration: docs/conf.py
10 | 
11 | python:
12 |   install:
13 |     - requirements: docs/requirements.txt
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Attribution 4.0 International
  2 | 
  3 | =======================================================================
  4 | 
  5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  6 | does not provide legal services or legal advice. Distribution of
  7 | Creative Commons public licenses does not create a lawyer-client or
  8 | other relationship. Creative Commons makes its licenses and related
  9 | information available on an "as-is" basis. Creative Commons gives no
 10 | warranties regarding its licenses, any material licensed under their
 11 | terms and conditions, or any related information. Creative Commons
 12 | disclaims all liability for damages resulting from their use to the
 13 | fullest extent possible.
 14 | 
 15 | Using Creative Commons Public Licenses
 16 | 
 17 | Creative Commons public licenses provide a standard set of terms and
 18 | conditions that creators and other rights holders may use to share
 19 | original works of authorship and other material subject to copyright
 20 | and certain other rights specified in the public license below. The
 21 | following considerations are for informational purposes only, are not
 22 | exhaustive, and do not form part of our licenses.
 23 | 
 24 |      Considerations for licensors: Our public licenses are
 25 |      intended for use by those authorized to give the public
 26 |      permission to use material in ways otherwise restricted by
 27 |      copyright and certain other rights. Our licenses are
 28 |      irrevocable. Licensors should read and understand the terms
 29 |      and conditions of the license they choose before applying it.
 30 |      Licensors should also secure all rights necessary before
 31 |      applying our licenses so that the public can reuse the
 32 |      material as expected. Licensors should clearly mark any
 33 |      material not subject to the license. This includes other CC-
 34 |      licensed material, or material used under an exception or
 35 |      limitation to copyright. More considerations for licensors:
 36 |     wiki.creativecommons.org/Considerations_for_licensors
 37 | 
 38 |      Considerations for the public: By using one of our public
 39 |      licenses, a licensor grants the public permission to use the
 40 |      licensed material under specified terms and conditions. If
 41 |      the licensor's permission is not necessary for any reason--for
 42 |      example, because of any applicable exception or limitation to
 43 |      copyright--then that use is not regulated by the license. Our
 44 |      licenses grant only permissions under copyright and certain
 45 |      other rights that a licensor has authority to grant. Use of
 46 |      the licensed material may still be restricted for other
 47 |      reasons, including because others have copyright or other
 48 |      rights in the material. A licensor may make special requests,
 49 |      such as asking that all changes be marked or described.
 50 |      Although not required by our licenses, you are encouraged to
 51 |      respect those requests where reasonable. More considerations
 52 |      for the public:
 53 |     wiki.creativecommons.org/Considerations_for_licensees
 54 | 
 55 | =======================================================================
 56 | 
 57 | Creative Commons Attribution 4.0 International Public License
 58 | 
 59 | By exercising the Licensed Rights (defined below), You accept and agree
 60 | to be bound by the terms and conditions of this Creative Commons
 61 | Attribution 4.0 International Public License ("Public License"). To the
 62 | extent this Public License may be interpreted as a contract, You are
 63 | granted the Licensed Rights in consideration of Your acceptance of
 64 | these terms and conditions, and the Licensor grants You such rights in
 65 | consideration of benefits the Licensor receives from making the
 66 | Licensed Material available under these terms and conditions.
 67 | 
 68 | 
 69 | Section 1 -- Definitions.
 70 | 
 71 |   a. Adapted Material means material subject to Copyright and Similar
 72 |      Rights that is derived from or based upon the Licensed Material
 73 |      and in which the Licensed Material is translated, altered,
 74 |      arranged, transformed, or otherwise modified in a manner requiring
 75 |      permission under the Copyright and Similar Rights held by the
 76 |      Licensor. For purposes of this Public License, where the Licensed
 77 |      Material is a musical work, performance, or sound recording,
 78 |      Adapted Material is always produced where the Licensed Material is
 79 |      synched in timed relation with a moving image.
 80 | 
 81 |   b. Adapter's License means the license You apply to Your Copyright
 82 |      and Similar Rights in Your contributions to Adapted Material in
 83 |      accordance with the terms and conditions of this Public License.
 84 | 
 85 |   c. Copyright and Similar Rights means copyright and/or similar rights
 86 |      closely related to copyright including, without limitation,
 87 |      performance, broadcast, sound recording, and Sui Generis Database
 88 |      Rights, without regard to how the rights are labeled or
 89 |      categorized. For purposes of this Public License, the rights
 90 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
 91 |      Rights.
 92 | 
 93 |   d. Effective Technological Measures means those measures that, in the
 94 |      absence of proper authority, may not be circumvented under laws
 95 |      fulfilling obligations under Article 11 of the WIPO Copyright
 96 |      Treaty adopted on December 20, 1996, and/or similar international
 97 |      agreements.
 98 | 
 99 |   e. Exceptions and Limitations means fair use, fair dealing, and/or
100 |      any other exception or limitation to Copyright and Similar Rights
101 |      that applies to Your use of the Licensed Material.
102 | 
103 |   f. Licensed Material means the artistic or literary work, database,
104 |      or other material to which the Licensor applied this Public
105 |      License.
106 | 
107 |   g. Licensed Rights means the rights granted to You subject to the
108 |      terms and conditions of this Public License, which are limited to
109 |      all Copyright and Similar Rights that apply to Your use of the
110 |      Licensed Material and that the Licensor has authority to license.
111 | 
112 |   h. Licensor means the individual(s) or entity(ies) granting rights
113 |      under this Public License.
114 | 
115 |   i. Share means to provide material to the public by any means or
116 |      process that requires permission under the Licensed Rights, such
117 |      as reproduction, public display, public performance, distribution,
118 |      dissemination, communication, or importation, and to make material
119 |      available to the public including in ways that members of the
120 |      public may access the material from a place and at a time
121 |      individually chosen by them.
122 | 
123 |   j. Sui Generis Database Rights means rights other than copyright
124 |      resulting from Directive 96/9/EC of the European Parliament and of
125 |      the Council of 11 March 1996 on the legal protection of databases,
126 |      as amended and/or succeeded, as well as other essentially
127 |      equivalent rights anywhere in the world.
128 | 
129 |   k. You means the individual or entity exercising the Licensed Rights
130 |      under this Public License. Your has a corresponding meaning.
131 | 
132 | 
133 | Section 2 -- Scope.
134 | 
135 |   a. License grant.
136 | 
137 |        1. Subject to the terms and conditions of this Public License,
138 |           the Licensor hereby grants You a worldwide, royalty-free,
139 |           non-sublicensable, non-exclusive, irrevocable license to
140 |           exercise the Licensed Rights in the Licensed Material to:
141 | 
142 |             a. reproduce and Share the Licensed Material, in whole or
143 |                in part; and
144 | 
145 |             b. produce, reproduce, and Share Adapted Material.
146 | 
147 |        2. Exceptions and Limitations. For the avoidance of doubt, where
148 |           Exceptions and Limitations apply to Your use, this Public
149 |           License does not apply, and You do not need to comply with
150 |           its terms and conditions.
151 | 
152 |        3. Term. The term of this Public License is specified in Section
153 |           6(a).
154 | 
155 |        4. Media and formats; technical modifications allowed. The
156 |           Licensor authorizes You to exercise the Licensed Rights in
157 |           all media and formats whether now known or hereafter created,
158 |           and to make technical modifications necessary to do so. The
159 |           Licensor waives and/or agrees not to assert any right or
160 |           authority to forbid You from making technical modifications
161 |           necessary to exercise the Licensed Rights, including
162 |           technical modifications necessary to circumvent Effective
163 |           Technological Measures. For purposes of this Public License,
164 |           simply making modifications authorized by this Section 2(a)
165 |           (4) never produces Adapted Material.
166 | 
167 |        5. Downstream recipients.
168 | 
169 |             a. Offer from the Licensor -- Licensed Material. Every
170 |                recipient of the Licensed Material automatically
171 |                receives an offer from the Licensor to exercise the
172 |                Licensed Rights under the terms and conditions of this
173 |                Public License.
174 | 
175 |             b. No downstream restrictions. You may not offer or impose
176 |                any additional or different terms or conditions on, or
177 |                apply any Effective Technological Measures to, the
178 |                Licensed Material if doing so restricts exercise of the
179 |                Licensed Rights by any recipient of the Licensed
180 |                Material.
181 | 
182 |        6. No endorsement. Nothing in this Public License constitutes or
183 |           may be construed as permission to assert or imply that You
184 |           are, or that Your use of the Licensed Material is, connected
185 |           with, or sponsored, endorsed, or granted official status by,
186 |           the Licensor or others designated to receive attribution as
187 |           provided in Section 3(a)(1)(A)(i).
188 | 
189 |   b. Other rights.
190 | 
191 |        1. Moral rights, such as the right of integrity, are not
192 |           licensed under this Public License, nor are publicity,
193 |           privacy, and/or other similar personality rights; however, to
194 |           the extent possible, the Licensor waives and/or agrees not to
195 |           assert any such rights held by the Licensor to the limited
196 |           extent necessary to allow You to exercise the Licensed
197 |           Rights, but not otherwise.
198 | 
199 |        2. Patent and trademark rights are not licensed under this
200 |           Public License.
201 | 
202 |        3. To the extent possible, the Licensor waives any right to
203 |           collect royalties from You for the exercise of the Licensed
204 |           Rights, whether directly or through a collecting society
205 |           under any voluntary or waivable statutory or compulsory
206 |           licensing scheme. In all other cases the Licensor expressly
207 |           reserves any right to collect such royalties.
208 | 
209 | 
210 | Section 3 -- License Conditions.
211 | 
212 | Your exercise of the Licensed Rights is expressly made subject to the
213 | following conditions.
214 | 
215 |   a. Attribution.
216 | 
217 |        1. If You Share the Licensed Material (including in modified
218 |           form), You must:
219 | 
220 |             a. retain the following if it is supplied by the Licensor
221 |                with the Licensed Material:
222 | 
223 |                  i. identification of the creator(s) of the Licensed
224 |                     Material and any others designated to receive
225 |                     attribution, in any reasonable manner requested by
226 |                     the Licensor (including by pseudonym if
227 |                     designated);
228 | 
229 |                 ii. a copyright notice;
230 | 
231 |                iii. a notice that refers to this Public License;
232 | 
233 |                 iv. a notice that refers to the disclaimer of
234 |                     warranties;
235 | 
236 |                  v. a URI or hyperlink to the Licensed Material to the
237 |                     extent reasonably practicable;
238 | 
239 |             b. indicate if You modified the Licensed Material and
240 |                retain an indication of any previous modifications; and
241 | 
242 |             c. indicate the Licensed Material is licensed under this
243 |                Public License, and include the text of, or the URI or
244 |                hyperlink to, this Public License.
245 | 
246 |        2. You may satisfy the conditions in Section 3(a)(1) in any
247 |           reasonable manner based on the medium, means, and context in
248 |           which You Share the Licensed Material. For example, it may be
249 |           reasonable to satisfy the conditions by providing a URI or
250 |           hyperlink to a resource that includes the required
251 |           information.
252 | 
253 |        3. If requested by the Licensor, You must remove any of the
254 |           information required by Section 3(a)(1)(A) to the extent
255 |           reasonably practicable.
256 | 
257 |        4. If You Share Adapted Material You produce, the Adapter's
258 |           License You apply must not prevent recipients of the Adapted
259 |           Material from complying with this Public License.
260 | 
261 | 
262 | Section 4 -- Sui Generis Database Rights.
263 | 
264 | Where the Licensed Rights include Sui Generis Database Rights that
265 | apply to Your use of the Licensed Material:
266 | 
267 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
268 |      to extract, reuse, reproduce, and Share all or a substantial
269 |      portion of the contents of the database;
270 | 
271 |   b. if You include all or a substantial portion of the database
272 |      contents in a database in which You have Sui Generis Database
273 |      Rights, then the database in which You have Sui Generis Database
274 |      Rights (but not its individual contents) is Adapted Material; and
275 | 
276 |   c. You must comply with the conditions in Section 3(a) if You Share
277 |      all or a substantial portion of the contents of the database.
278 | 
279 | For the avoidance of doubt, this Section 4 supplements and does not
280 | replace Your obligations under this Public License where the Licensed
281 | Rights include other Copyright and Similar Rights.
282 | 
283 | 
284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
285 | 
286 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
287 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
288 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
289 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
290 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
291 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
292 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
293 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
294 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
295 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
296 | 
297 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
298 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
299 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
300 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
301 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
302 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
303 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
304 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
305 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
306 | 
307 |   c. The disclaimer of warranties and limitation of liability provided
308 |      above shall be interpreted in a manner that, to the extent
309 |      possible, most closely approximates an absolute disclaimer and
310 |      waiver of all liability.
311 | 
312 | 
313 | Section 6 -- Term and Termination.
314 | 
315 |   a. This Public License applies for the term of the Copyright and
316 |      Similar Rights licensed here. However, if You fail to comply with
317 |      this Public License, then Your rights under this Public License
318 |      terminate automatically.
319 | 
320 |   b. Where Your right to use the Licensed Material has terminated under
321 |      Section 6(a), it reinstates:
322 | 
323 |        1. automatically as of the date the violation is cured, provided
324 |           it is cured within 30 days of Your discovery of the
325 |           violation; or
326 | 
327 |        2. upon express reinstatement by the Licensor.
328 | 
329 |      For the avoidance of doubt, this Section 6(b) does not affect any
330 |      right the Licensor may have to seek remedies for Your violations
331 |      of this Public License.
332 | 
333 |   c. For the avoidance of doubt, the Licensor may also offer the
334 |      Licensed Material under separate terms or conditions or stop
335 |      distributing the Licensed Material at any time; however, doing so
336 |      will not terminate this Public License.
337 | 
338 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
339 |      License.
340 | 
341 | 
342 | Section 7 -- Other Terms and Conditions.
343 | 
344 |   a. The Licensor shall not be bound by any additional or different
345 |      terms or conditions communicated by You unless expressly agreed.
346 | 
347 |   b. Any arrangements, understandings, or agreements regarding the
348 |      Licensed Material not stated herein are separate from and
349 |      independent of the terms and conditions of this Public License.
350 | 
351 | 
352 | Section 8 -- Interpretation.
353 | 
354 |   a. For the avoidance of doubt, this Public License does not, and
355 |      shall not be interpreted to, reduce, limit, restrict, or impose
356 |      conditions on any use of the Licensed Material that could lawfully
357 |      be made without permission under this Public License.
358 | 
359 |   b. To the extent possible, if any provision of this Public License is
360 |      deemed unenforceable, it shall be automatically reformed to the
361 |      minimum extent necessary to make it enforceable. If the provision
362 |      cannot be reformed, it shall be severed from this Public License
363 |      without affecting the enforceability of the remaining terms and
364 |      conditions.
365 | 
366 |   c. No term or condition of this Public License will be waived and no
367 |      failure to comply consented to unless expressly agreed to by the
368 |      Licensor.
369 | 
370 |   d. Nothing in this Public License constitutes or may be interpreted
371 |      as a limitation upon, or waiver of, any privileges and immunities
372 |      that apply to the Licensor or You, including from the legal
373 |      processes of any jurisdiction or authority.
374 | 
375 | 
376 | =======================================================================
377 | 
378 | Creative Commons is not a party to its public
379 | licenses. Notwithstanding, Creative Commons may elect to apply one of
380 | its public licenses to material it publishes and in those instances
381 | will be considered the “Licensor.” The text of the Creative Commons
382 | public licenses is dedicated to the public domain under the CC0 Public
383 | Domain Dedication. Except for the limited purpose of indicating that
384 | material is shared under a Creative Commons public license or as
385 | otherwise permitted by the Creative Commons policies published at
386 | creativecommons.org/policies, Creative Commons does not authorize the
387 | use of the trademark "Creative Commons" or any other trademark or logo
388 | of Creative Commons without its prior written consent including,
389 | without limitation, in connection with any unauthorized modifications
390 | to any of its public licenses or any other arrangements,
391 | understandings, or agreements concerning use of licensed material. For
392 | the avoidance of doubt, this paragraph does not form part of the
393 | public licenses.
394 | 
395 | Creative Commons may be contacted at creativecommons.org.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Zarr Specification
 2 | 
 3 | **Zarr core protocol for storage and retrieval of N-dimensional typed arrays**
 4 | 
 5 | <img src="https://github.com/zarr-developers/zarr-logo/releases/download/2022-04-28/zarr-pink-stacked-transparent.png" alt="drawing" height="200"/>
 6 | 
 7 | For the v1 and v2 specs, please see
 8 | https://github.com/zarr-developers/zarr-python/tree/main/docs/spec.
 9 | 
10 | The rendered docs of the `main` branch are available at https://zarr-specs.readthedocs.io
11 | 
12 | ## Usage
13 | 
14 | The following steps install the necessary packages to render the specs with
15 | automatic updating and reloading of changes:
16 | 
17 | ```shell
18 | ## optionally setup an venv
19 | # python3 -m venv .venv
20 | # . .venv/bin/activate
21 | pip install -r docs/requirements.txt
22 | pip install sphinx-autobuild
23 | sphinx-autobuild -a docs docs/_build/html
24 | ```
25 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = .
 8 | BUILDDIR      = _build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
 1 | .bd-main .bd-content .bd-article-container {
 2 |     flex-grow: 1;
 3 |     max-width: 100%;
 4 | }
 5 | 
 6 | @media (min-width:960px) {
 7 |     .bd-page-width {
 8 |         max-width: 100rem;
 9 |     }
10 | }
11 | 
12 | footer {
13 |     display: none;
14 | }
15 | 
16 | .sidebar-end-items {
17 |     margin-top: 0% !important;
18 | }
19 | 
20 | /* Remove ↗ for external links in top bar menu */
21 | .nav-link.nav-external:after {
22 |     display: none;
23 | }
24 | 
25 | div.bd-article-container:has(.draft) {
26 |     background-image: url(../draft-watermark.png) !important;
27 |     background-repeat: repeat-y !important;
28 |     background-position: center top !important;
29 |     background-attachment: scroll !important;
30 | }
31 | 


--------------------------------------------------------------------------------
/docs/_static/draft-watermark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zarr-developers/zarr-specs/b880fb385bedb18dd78ffef1bd683e7e93270c74/docs/_static/draft-watermark.png


--------------------------------------------------------------------------------
/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zarr-developers/zarr-specs/b880fb385bedb18dd78ffef1bd683e7e93270c74/docs/_static/logo.png


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | # import os
 14 | # import sys
 15 | # sys.path.insert(0, os.path.abspath('.'))
 16 | 
 17 | 
 18 | # -- Project information -----------------------------------------------------
 19 | 
 20 | project = 'Zarr specs'
 21 | copyright = '2024, Zarr Developers'
 22 | author = 'Zarr Developers'
 23 | 
 24 | 
 25 | # -- General configuration ---------------------------------------------------
 26 | 
 27 | # Add any Sphinx extension module names here, as strings. They can be
 28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 29 | # ones.
 30 | extensions = [
 31 |   'sphinx.ext.todo',
 32 |   'sphinxcontrib.mermaid',
 33 |   'sphinx_reredirects',
 34 | ]
 35 | 
 36 | # Display todos by setting to True
 37 | todo_include_todos = True
 38 | 
 39 | # Add any paths that contain templates here, relative to this directory.
 40 | templates_path = ['_templates']
 41 | 
 42 | # List of patterns, relative to source directory, that match files and
 43 | # directories to ignore when looking for source files.
 44 | # This pattern also affects html_static_path and html_extra_path.
 45 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 46 | 
 47 | 
 48 | # -- Options for HTML output -------------------------------------------------
 49 | 
 50 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 51 | # a list of builtin themes.
 52 | #
 53 | html_theme = "pydata_sphinx_theme"
 54 | html_logo = '_static/logo.png'
 55 | 
 56 | html_theme_options = {
 57 |   "github_url": "https://github.com/zarr-developers/zarr-specs",
 58 |   "icon_links": [
 59 |     {
 60 |       "name": "Bluesky",
 61 |       "url": "https://bsky.app/profile/zarr.dev",
 62 |       "icon": "fa-brands fa-bluesky",
 63 |     },
 64 |     {
 65 |       "name": "Zulip",
 66 |       "url": "https://ossci.zulipchat.com/",
 67 |       "icon": "fas fa-comments",
 68 |     },
 69 |   ],
 70 |   "show_prev_next": False,
 71 |   "secondary_sidebar_items": ["page-toc"],
 72 | }
 73 | 
 74 | # Add any paths that contain custom static files (such as style sheets) here,
 75 | # relative to this directory. They are copied after the builtin static files,
 76 | # so a file named "default.css" will overwrite the builtin "default.css".
 77 | html_static_path = ['_static']
 78 | 
 79 | html_css_files = [
 80 |     'css/custom.css',
 81 | ]
 82 | 
 83 | suppress_warnings = [
 84 |     # suppress "duplicate citation" warnings
 85 |     'ref.citation',
 86 | ]
 87 | 
 88 | redirects = {
 89 |     "index": "specs.html",
 90 |     "v3/core/v3.0.html": "./index.html",
 91 |     "v3/codecs/blosc/v1.0.rst": "./index.html",
 92 |     "v3/codecs/bytes/v1.0.rst": "./index.html",
 93 |     "v3/codecs/crc32c/v1.0.rst": "./index.html",
 94 |     "v3/codecs/gzip/v1.0.rst": "./index.html",
 95 |     "v3/codecs/sharding-indexed/v1.0.rst": "./index.html",
 96 |     "v3/codecs/transpose/v1.0.rst": "./index.html",
 97 |     "v3/stores/filesystem/v1.0.rst": "./index.html",
 98 |     "v3/chunk-grid.rst": "chunk-grids/index.rst",
 99 |     "v3/chunk-key-encoding.rst": "chunk-key-encodings/index.html",
100 |     "v3/codecs.rst": "codecs/index.html",
101 |     "v3/data-types.rst": "data-types/index.html",
102 |     "v3/array-storage-transformers.rst": "storage-transformers/index.html",
103 |     "v3/stores.rst": "stores/index.html",
104 | }
105 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | =====
 2 | Specs
 3 | =====
 4 | 
 5 | A good starting point is the :ref:`zarr-core-specification-v3`.
 6 | 
 7 | .. toctree::
 8 | 
 9 |    Home <https://zarr.dev>
10 |    specs
11 |    ZEPs <https://zarr.dev/zeps>
12 |    Implementations <https://github.com/zarr-developers/zarr_implementations>
13 | 
14 | 
15 | Indices and tables
16 | ==================
17 | 
18 | * :ref:`genindex`
19 | * :ref:`modindex`
20 | * :ref:`search`
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.https://www.sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | pydata-sphinx-theme
3 | sphinxcontrib-mermaid
4 | sphinx-reredirects
5 | 


--------------------------------------------------------------------------------
/docs/specs.rst:
--------------------------------------------------------------------------------
 1 | ==============
 2 | Specifications
 3 | ==============
 4 | 
 5 | .. _zarr-specs:
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 1
 9 |    :caption: v3
10 | 
11 |    Core <v3/core/index>
12 |    v3/codecs/index
13 |    v3/chunk-grids/index
14 |    v3/chunk-key-encodings/index
15 |    v3/data-types/index
16 |    v3/stores/index
17 |    v3/storage-transformers/index
18 | 
19 | .. toctree::
20 |    :maxdepth: 1
21 |    :caption: v2
22 | 
23 |    Zarr spec v2 <v2/v2.0.rst>
24 | 
25 | .. toctree::
26 |    :maxdepth: 1
27 |    :caption: v1
28 | 
29 |    Zarr spec v1 <v1/v1.0.rst>
30 | 


--------------------------------------------------------------------------------
/docs/v1/v1.0.rst:
--------------------------------------------------------------------------------
  1 | .. _spec_v1:
  2 | 
  3 | Zarr Storage Specification Version 1
  4 | ====================================
  5 | 
  6 | This document provides a technical specification of the protocol and
  7 | format used for storing a Zarr array. The key words "MUST", "MUST
  8 | NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT",
  9 | "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
 10 | interpreted as described in `RFC 2119
 11 | <https://www.ietf.org/rfc/rfc2119.txt>`_.
 12 | 
 13 | Status
 14 | ------
 15 | 
 16 | This specification is deprecated. See :ref:`zarr-specs` for the latest version.
 17 | 
 18 | Storage
 19 | -------
 20 | 
 21 | A Zarr array can be stored in any storage system that provides a
 22 | key/value interface, where a key is an ASCII string and a value is an
 23 | arbitrary sequence of bytes, and the supported operations are read
 24 | (get the sequence of bytes associated with a given key), write (set
 25 | the sequence of bytes associated with a given key) and delete (remove
 26 | a key/value pair).
 27 | 
 28 | For example, a directory in a file system can provide this interface,
 29 | where keys are file names, values are file contents, and files can be
 30 | read, written or deleted via the operating system. Equally, an S3
 31 | bucket can provide this interface, where keys are resource names,
 32 | values are resource contents, and resources can be read, written or
 33 | deleted via HTTP.
 34 | 
 35 | Below an "array store" refers to any system implementing this
 36 | interface.
 37 | 
 38 | Metadata
 39 | --------
 40 | 
 41 | Each array requires essential configuration metadata to be stored,
 42 | enabling correct interpretation of the stored data. This metadata is
 43 | encoded using JSON and stored as the value of the 'meta' key within an
 44 | array store.
 45 | 
 46 | The metadata resource is a JSON object. The following keys MUST be
 47 | present within the object:
 48 | 
 49 | zarr_format
 50 |     An integer defining the version of the storage specification to which the
 51 |     array store adheres.
 52 | shape
 53 |     A list of integers defining the length of each dimension of the array.
 54 | chunks
 55 |     A list of integers defining the length of each dimension of a chunk of the
 56 |     array. Note that all chunks within a Zarr array have the same shape.
 57 | dtype
 58 |     A string or list defining a valid data type for the array. See also
 59 |     the subsection below on data type encoding.
 60 | compression
 61 |     A string identifying the primary compression library used to compress
 62 |     each chunk of the array.
 63 | compression_opts
 64 |     An integer, string or dictionary providing options to the primary
 65 |     compression library.
 66 | fill_value
 67 |     A scalar value providing the default value to use for uninitialized
 68 |     portions of the array.
 69 | order
 70 |     Either 'C' or 'F', defining the layout of bytes within each chunk of the
 71 |     array. 'C' means row-major order, i.e., the last dimension varies fastest;
 72 |     'F' means column-major order, i.e., the first dimension varies fastest.
 73 | 
 74 | Other keys MAY be present within the metadata object however they MUST
 75 | NOT alter the interpretation of the required fields defined above.
 76 | 
 77 | For example, the JSON object below defines a 2-dimensional array of
 78 | 64-bit little-endian floating point numbers with 10000 rows and 10000
 79 | columns, divided into chunks of 1000 rows and 1000 columns (so there
 80 | will be 100 chunks in total arranged in a 10 by 10 grid). Within each
 81 | chunk the data are laid out in C contiguous order, and each chunk is
 82 | compressed using the Blosc compression library::
 83 | 
 84 |     {
 85 |         "chunks": [
 86 |             1000,
 87 |             1000
 88 |         ],
 89 |         "compression": "blosc",
 90 |         "compression_opts": {
 91 |             "clevel": 5,
 92 |             "cname": "lz4",
 93 |             "shuffle": 1
 94 |         },
 95 |         "dtype": "<f8",
 96 |         "fill_value": null,
 97 |         "order": "C",
 98 |         "shape": [
 99 |             10000,
100 |             10000
101 |         ],
102 |         "zarr_format": 1
103 |     }
104 | 
105 | Data type encoding
106 | ~~~~~~~~~~~~~~~~~~
107 | 
108 | Simple data types are encoded within the array metadata resource as a
109 | string, following the `NumPy array protocol type string (typestr)
110 | format
111 | <https://numpy.org/doc/stable/reference/arrays.interface.html>`_. The
112 | format consists of 3 parts: a character describing the byteorder of
113 | the data (``<``: little-endian, ``>``: big-endian, ``|``:
114 | not-relevant), a character code giving the basic type of the array,
115 | and an integer providing the number of bytes the type uses. The byte
116 | order MUST be specified. E.g., ``"<f8"``, ``">i4"``, ``"|b1"`` and
117 | ``"|S12"`` are valid data types.
118 | 
119 | Structure data types (i.e., with multiple named fields) are encoded as
120 | a list of two-element lists, following `NumPy array protocol type
121 | descriptions (descr)
122 | <https://numpy.org/doc/stable/reference/arrays.interface.html>`_.
123 | For example, the JSON list ``[["r", "|u1"], ["g", "|u1"], ["b",
124 | "|u1"]]`` defines a data type composed of three single-byte unsigned
125 | integers labelled 'r', 'g' and 'b'.
126 | 
127 | Chunks
128 | ------
129 | 
130 | Each chunk of the array is compressed by passing the raw bytes for the
131 | chunk through the primary compression library to obtain a new sequence
132 | of bytes comprising the compressed chunk data. No header is added to
133 | the compressed bytes or any other modification made. The internal
134 | structure of the compressed bytes will depend on which primary
135 | compressor was used. For example, the `Blosc compressor
136 | <https://github.com/Blosc/c-blosc/blob/main/README_CHUNK_FORMAT.rst>`_
137 | produces a sequence of bytes that begins with a 16-byte header
138 | followed by compressed data.
139 | 
140 | The compressed sequence of bytes for each chunk is stored under a key
141 | formed from the index of the chunk within the grid of chunks
142 | representing the array. To form a string key for a chunk, the indices
143 | are converted to strings and concatenated with the period character
144 | ('.') separating each index. For example, given an array with shape
145 | (10000, 10000) and chunk shape (1000, 1000) there will be 100 chunks
146 | laid out in a 10 by 10 grid. The chunk with indices (0, 0) provides
147 | data for rows 0-999 and columns 0-999 and is stored under the key
148 | '0.0'; the chunk with indices (2, 4) provides data for rows 2000-2999
149 | and columns 4000-4999 and is stored under the key '2.4'; etc.
150 | 
151 | There is no need for all chunks to be present within an array
152 | store. If a chunk is not present then it is considered to be in an
153 | uninitialized state.  An uninitialized chunk MUST be treated as if it
154 | was uniformly filled with the value of the 'fill_value' field in the
155 | array metadata. If the 'fill_value' field is ``null`` then the
156 | contents of the chunk are undefined.
157 | 
158 | Note that all chunks in an array have the same shape. If the length of
159 | any array dimension is not exactly divisible by the length of the
160 | corresponding chunk dimension then some chunks will overhang the edge
161 | of the array. The contents of any chunk region falling outside the
162 | array are undefined.
163 | 
164 | Attributes
165 | ----------
166 | 
167 | Each array can also be associated with custom attributes, which are
168 | simple key/value items with application-specific meaning. Custom
169 | attributes are encoded as a JSON object and stored under the 'attrs'
170 | key within an array store. Even if the attributes are empty, the
171 | 'attrs' key MUST be present within an array store.
172 | 
173 | For example, the JSON object below encodes three attributes named
174 | 'foo', 'bar' and 'baz'::
175 | 
176 |     {
177 |         "foo": 42,
178 |         "bar": "apples",
179 |         "baz": [1, 2, 3, 4]
180 |     }
181 | 
182 | Example
183 | -------
184 | 
185 | Below is an example of storing a Zarr array, using a directory on the
186 | local file system as storage.
187 | 
188 | Initialize the store::
189 | 
190 |     >>> import zarr
191 |     >>> store = zarr.DirectoryStore('example.zarr')
192 |     >>> zarr.init_store(store, shape=(20, 20), chunks=(10, 10),
193 |     ...                 dtype='i4', fill_value=42, compression='zlib',
194 |     ...                 compression_opts=1, overwrite=True)
195 | 
196 | No chunks are initialized yet, so only the 'meta' and 'attrs' keys
197 | have been set::
198 | 
199 |     >>> import os
200 |     >>> sorted(os.listdir('example.zarr'))
201 |     ['attrs', 'meta']
202 | 
203 | Inspect the array metadata::
204 | 
205 |     >>> print(open('example.zarr/meta').read())
206 |     {
207 |         "chunks": [
208 |             10,
209 |             10
210 |         ],
211 |         "compression": "zlib",
212 |         "compression_opts": 1,
213 |         "dtype": "<i4",
214 |         "fill_value": 42,
215 |         "order": "C",
216 |         "shape": [
217 |             20,
218 |             20
219 |         ],
220 |         "zarr_format": 1
221 |     }
222 | 
223 | Inspect the array attributes::
224 | 
225 |     >>> print(open('example.zarr/attrs').read())
226 |     {}
227 | 
228 | Set some data::
229 | 
230 |     >>> z = zarr.Array(store)
231 |     >>> z[0:10, 0:10] = 1
232 |     >>> sorted(os.listdir('example.zarr'))
233 |     ['0.0', 'attrs', 'meta']
234 | 
235 | Set some more data::
236 | 
237 |     >>> z[0:10, 10:20] = 2
238 |     >>> z[10:20, :] = 3
239 |     >>> sorted(os.listdir('example.zarr'))
240 |     ['0.0', '0.1', '1.0', '1.1', 'attrs', 'meta']
241 | 
242 | Manually decompress a single chunk for illustration::
243 | 
244 |     >>> import zlib
245 |     >>> b = zlib.decompress(open('example.zarr/0.0', 'rb').read())
246 |     >>> import numpy as np
247 |     >>> a = np.frombuffer(b, dtype='<i4')
248 |     >>> a
249 |     array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
250 |            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
251 |            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
252 |            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
253 |            1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)
254 | 
255 | Modify the array attributes::
256 | 
257 |     >>> z.attrs['foo'] = 42
258 |     >>> z.attrs['bar'] = 'apples'
259 |     >>> z.attrs['baz'] = [1, 2, 3, 4]
260 |     >>> print(open('example.zarr/attrs').read())
261 |     {
262 |         "bar": "apples",
263 |         "baz": [
264 |             1,
265 |             2,
266 |             3,
267 |             4
268 |         ],
269 |         "foo": 42
270 |     }
271 | 


--------------------------------------------------------------------------------
/docs/v2/v2.0.rst:
--------------------------------------------------------------------------------
  1 | .. _spec_v2:
  2 | 
  3 | Zarr Storage Specification Version 2
  4 | ====================================
  5 | 
  6 | This document provides a technical specification of the protocol and format
  7 | used for storing Zarr arrays. The key words "MUST", "MUST NOT", "REQUIRED",
  8 | "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and
  9 | "OPTIONAL" in this document are to be interpreted as described in `RFC 2119
 10 | <https://www.ietf.org/rfc/rfc2119.txt>`_.
 11 | 
 12 | Status
 13 | ------
 14 | 
 15 | This specification has been superseded. See :ref:`zarr-specs` for the latest
 16 | version.
 17 | 
 18 | .. _spec_v2_storage:
 19 | 
 20 | Storage
 21 | -------
 22 | 
 23 | A Zarr array can be stored in any storage system that provides a key/value
 24 | interface, where a key is an ASCII string and a value is an arbitrary sequence
 25 | of bytes, and the supported operations are read (get the sequence of bytes
 26 | associated with a given key), write (set the sequence of bytes associated with
 27 | a given key) and delete (remove a key/value pair).
 28 | 
 29 | For example, a directory in a file system can provide this interface, where
 30 | keys are file names, values are file contents, and files can be read, written
 31 | or deleted via the operating system. Equally, an S3 bucket can provide this
 32 | interface, where keys are resource names, values are resource contents, and
 33 | resources can be read, written or deleted via HTTP.
 34 | 
 35 | Below an "array store" refers to any system implementing this interface.
 36 | 
 37 | .. _spec_v2_array:
 38 | 
 39 | Arrays
 40 | ------
 41 | 
 42 | .. _spec_v2_array_metadata:
 43 | 
 44 | Metadata
 45 | ~~~~~~~~
 46 | 
 47 | Each array requires essential configuration metadata to be stored, enabling
 48 | correct interpretation of the stored data. This metadata is encoded using JSON
 49 | and stored as the value of the ".zarray" key within an array store.
 50 | 
 51 | The metadata resource is a JSON object. The following keys MUST be present
 52 | within the object:
 53 | 
 54 | zarr_format
 55 |     An integer defining the version of the storage specification to which the
 56 |     array store adheres.
 57 | shape
 58 |     A list of integers defining the length of each dimension of the array.
 59 | chunks
 60 |     A list of integers defining the length of each dimension of a chunk of the
 61 |     array. Note that all chunks within a Zarr array have the same shape.
 62 | dtype
 63 |     A string or list defining a valid data type for the array. See also
 64 |     the subsection below on data type encoding.
 65 | compressor
 66 |     A JSON object identifying the primary compression codec and providing
 67 |     configuration parameters, or ``null`` if no compressor is to be used.
 68 |     The object MUST contain an ``"id"`` key identifying the codec to be used.
 69 | fill_value
 70 |     A scalar value providing the default value to use for uninitialized
 71 |     portions of the array, or ``null`` if no fill_value is to be used.
 72 | order
 73 |     Either "C" or "F", defining the layout of bytes within each chunk of the
 74 |     array. "C" means row-major order, i.e., the last dimension varies fastest;
 75 |     "F" means column-major order, i.e., the first dimension varies fastest.
 76 | filters
 77 |     A list of JSON objects providing codec configurations, or ``null`` if no
 78 |     filters are to be applied. Each codec configuration object MUST contain a
 79 |     ``"id"`` key identifying the codec to be used.
 80 | 
 81 | The following keys MAY be present within the object:
 82 | 
 83 | dimension_separator
 84 |     If present, either the string ``"."`` or ``"/"`` defining the separator placed
 85 |     between the dimensions of a chunk. If the value is not set, then the
 86 |     default MUST be assumed to be ``"."``, leading to chunk keys of the form "0.0".
 87 |     Arrays defined with ``"/"`` as the dimension separator can be considered to have
 88 |     nested, or hierarchical, keys of the form "0/0" that SHOULD where possible
 89 |     produce a directory-like structure.
 90 | 
 91 | Other keys SHOULD NOT be present within the metadata object and SHOULD be
 92 | ignored by implementations.
 93 | 
 94 | For example, the JSON object below defines a 2-dimensional array of 64-bit
 95 | little-endian floating point numbers with 10000 rows and 10000 columns, divided
 96 | into chunks of 1000 rows and 1000 columns (so there will be 100 chunks in total
 97 | arranged in a 10 by 10 grid). Within each chunk the data are laid out in C
 98 | contiguous order. Each chunk is encoded using a delta filter and compressed
 99 | using the Blosc compression library prior to storage::
100 | 
101 |     {
102 |         "chunks": [
103 |             1000,
104 |             1000
105 |         ],
106 |         "compressor": {
107 |             "id": "blosc",
108 |             "cname": "lz4",
109 |             "clevel": 5,
110 |             "shuffle": 1
111 |         },
112 |         "dtype": "<f8",
113 |         "fill_value": "NaN",
114 |         "filters": [
115 |             {"id": "delta", "dtype": "<f8", "astype": "<f4"}
116 |         ],
117 |         "order": "C",
118 |         "shape": [
119 |             10000,
120 |             10000
121 |         ],
122 |         "zarr_format": 2
123 |     }
124 | 
125 | .. _spec_v2_array_dtype:
126 | 
127 | Data type encoding
128 | ~~~~~~~~~~~~~~~~~~
129 | 
130 | Simple data types are encoded within the array metadata as a string,
131 | following the `NumPy array protocol type string (typestr) format <https://numpy.org/doc/stable/reference/arrays.interface.html#the-array-interface-protocol>`_. The format
132 | consists of 3 parts:
133 | 
134 | * One character describing the byteorder of the data (``"<"``: little-endian;
135 |   ``">"``: big-endian; ``"|"``: not-relevant)
136 | * One character code giving the basic type of the array (``"b"``: Boolean (integer
137 |   type where all values are only True or False); ``"i"``: integer; ``"u"``: unsigned
138 |   integer; ``"f"``: floating point; ``"c"``: complex floating point; ``"m"``: timedelta;
139 |   ``"M"``: datetime; ``"S"``: string (fixed-length sequence of char); ``"U"``: unicode
140 |   (fixed-length sequence of Py_UNICODE); ``"V"``: other (void * – each item is a
141 |   fixed-size chunk of memory))
142 | * An integer specifying the number of bytes the type uses.
143 | 
144 | The byte order MUST be specified. E.g., ``"<f8"``, ``">i4"``, ``"|b1"`` and
145 | ``"|S12"`` are valid data type encodings.
146 | 
147 | For datetime64 ("M") and timedelta64 ("m") data types, these MUST also include the
148 | units within square brackets. A list of valid units and their definitions are given in
149 | the `NumPy documentation on Datetimes and Timedeltas <https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-units>`_.
150 | For example, ``"<M8[ns]"`` specifies a datetime64 data type with nanosecond time units.
151 | 
152 | Structured data types (i.e., with multiple named fields) are encoded
153 | as a list of lists, following `NumPy array protocol type descriptions
154 | (descr) <https://numpy.org/doc/stable/reference/arrays.interface.html#the-array-interface-protocol>`_. Each
155 | sub-list has the form ``[fieldname, datatype, shape]`` where ``shape``
156 | is optional. ``fieldname`` is a string, ``datatype`` is a string
157 | specifying a simple data type (see above), and ``shape`` is a list of
158 | integers specifying subarray shape. For example, the JSON list below
159 | defines a data type composed of three single-byte unsigned integer
160 | fields named "r", "g" and "b"::
161 | 
162 |     [["r", "|u1"], ["g", "|u1"], ["b", "|u1"]]
163 | 
164 | For example, the JSON list below defines a data type composed of three
165 | fields named "x", "y" and "z", where "x" and "y" each contain 32-bit
166 | floats, and each item in "z" is a 2 by 2 array of floats::
167 | 
168 |     [["x", "<f4"], ["y", "<f4"], ["z", "<f4", [2, 2]]]
169 | 
170 | Structured data types may also be nested, e.g., the following JSON
171 | list defines a data type with two fields "foo" and "bar", where "bar"
172 | has two sub-fields "baz" and "qux"::
173 | 
174 |     [["foo", "<f4"], ["bar", [["baz", "<f4"], ["qux", "<i4"]]]]
175 | 
176 | .. _spec_v2_array_fill_value:
177 | 
178 | Fill value encoding
179 | ~~~~~~~~~~~~~~~~~~~
180 | 
181 | For simple floating point data types, the following table MUST be used to
182 | encode values of the "fill_value" field:
183 | 
184 | =================  ===============
185 | Value              JSON encoding
186 | =================  ===============
187 | Not a Number       ``"NaN"``
188 | Positive Infinity  ``"Infinity"``
189 | Negative Infinity  ``"-Infinity"``
190 | =================  ===============
191 | 
192 | If an array has a fixed length byte string data type (e.g., ``"|S12"``), or a
193 | structured data type, and if the fill value is not null, then the fill value
194 | MUST be encoded as an ASCII string using the standard Base64 alphabet.
195 | 
196 | .. _spec_v2_array_chunks:
197 | 
198 | Chunks
199 | ~~~~~~
200 | 
201 | Each chunk of the array is compressed by passing the raw bytes for the chunk
202 | through the primary compression library to obtain a new sequence of bytes
203 | comprising the compressed chunk data. No header is added to the compressed
204 | bytes or any other modification made. The internal structure of the compressed
205 | bytes will depend on which primary compressor was used. For example, the `Blosc
206 | compressor <https://github.com/Blosc/c-blosc/blob/main/README_CHUNK_FORMAT.rst>`_
207 | produces a sequence of bytes that begins with a 16-byte header followed by
208 | compressed data.
209 | 
210 | The compressed sequence of bytes for each chunk is stored under a key formed
211 | from the index of the chunk within the grid of chunks representing the array.
212 | To form a string key for a chunk, the indices are converted to strings and
213 | concatenated with the period character (".") separating each index. For
214 | example, given an array with shape (10000, 10000) and chunk shape (1000, 1000)
215 | there will be 100 chunks laid out in a 10 by 10 grid. The chunk with indices
216 | (0, 0) provides data for rows 0-999 and columns 0-999 and is stored under the
217 | key "0.0"; the chunk with indices (2, 4) provides data for rows 2000-2999 and
218 | columns 4000-4999 and is stored under the key "2.4"; etc.
219 | 
220 | There is no need for all chunks to be present within an array store. If a chunk
221 | is not present then it is considered to be in an uninitialized state.  An
222 | uninitialized chunk MUST be treated as if it was uniformly filled with the value
223 | of the "fill_value" field in the array metadata. If the "fill_value" field is
224 | ``null`` then the contents of the chunk are undefined.
225 | 
226 | Note that all chunks in an array have the same shape. If the length of any
227 | array dimension is not exactly divisible by the length of the corresponding
228 | chunk dimension then some chunks will overhang the edge of the array. The
229 | contents of any chunk region falling outside the array are undefined.
230 | 
231 | .. _spec_v2_array_filters:
232 | 
233 | Filters
234 | ~~~~~~~
235 | 
236 | Optionally a sequence of one or more filters can be used to transform chunk
237 | data prior to compression. When storing data, filters are applied in the order
238 | specified in array metadata to encode data, then the encoded data are passed to
239 | the primary compressor. When retrieving data, stored chunk data are
240 | decompressed by the primary compressor then decoded using filters in the
241 | reverse order.
242 | 
243 | .. _spec_v2_hierarchy:
244 | 
245 | Hierarchies
246 | -----------
247 | 
248 | .. _spec_v2_hierarchy_paths:
249 | 
250 | Logical storage paths
251 | ~~~~~~~~~~~~~~~~~~~~~
252 | 
253 | Multiple arrays can be stored in the same array store by associating each array
254 | with a different logical path. A logical path is simply an ASCII string. The
255 | logical path is used to form a prefix for keys used by the array. For example,
256 | if an array is stored at logical path "foo/bar" then the array metadata will be
257 | stored under the key "foo/bar/.zarray", the user-defined attributes will be
258 | stored under the key "foo/bar/.zattrs", and the chunks will be stored under
259 | keys like "foo/bar/0.0", "foo/bar/0.1", etc.
260 | 
261 | To ensure consistent behaviour across different storage systems, logical paths
262 | MUST be normalized as follows:
263 | 
264 | * Replace all backward slash characters ("\\\\") with forward slash characters
265 |   ("/")
266 | * Strip any leading "/" characters
267 | * Strip any trailing "/" characters
268 | * Collapse any sequence of more than one "/" character into a single "/"
269 |   character
270 | 
271 | The key prefix is then obtained by appending a single "/" character to the
272 | normalized logical path.
273 | 
274 | After normalization, if splitting a logical path by the "/" character results
275 | in any path segment equal to the string "." or the string ".." then an error
276 | MUST be raised.
277 | 
278 | N.B., how the underlying array store processes requests to store values under
279 | keys containing the "/" character is entirely up to the store implementation
280 | and is not constrained by this specification. E.g., an array store could simply
281 | treat all keys as opaque ASCII strings; equally, an array store could map
282 | logical paths onto some kind of hierarchical storage (e.g., directories on a
283 | file system).
284 | 
285 | .. _spec_v2_hierarchy_groups:
286 | 
287 | Groups
288 | ~~~~~~
289 | 
290 | Arrays can be organized into groups which can also contain other groups. A
291 | group is created by storing group metadata under the ".zgroup" key under some
292 | logical path. E.g., a group exists at the root of an array store if the
293 | ".zgroup" key exists in the store, and a group exists at logical path "foo/bar"
294 | if the "foo/bar/.zgroup" key exists in the store.
295 | 
296 | If the user requests a group to be created under some logical path, then groups
297 | MUST also be created at all ancestor paths. E.g., if the user requests group
298 | creation at path "foo/bar" then groups MUST be created at path "foo" and the
299 | root of the store, if they don't already exist.
300 | 
301 | If the user requests an array to be created under some logical path, then
302 | groups MUST also be created at all ancestor paths. E.g., if the user requests
303 | array creation at path "foo/bar/baz" then groups must be created at path
304 | "foo/bar", path "foo", and the root of the store, if they don't already exist.
305 | 
306 | The group metadata resource is a JSON object. The following keys MUST be present
307 | within the object:
308 | 
309 | zarr_format
310 |     An integer defining the version of the storage specification to which the
311 |     array store adheres.
312 | 
313 | Other keys MUST NOT be present within the metadata object.
314 | 
315 | The members of a group are arrays and groups stored under logical paths that
316 | are direct children of the parent group's logical path. E.g., if groups exist
317 | under the logical paths "foo" and "foo/bar" and an array exists at logical path
318 | "foo/baz" then the members of the group at path "foo" are the group at path
319 | "foo/bar" and the array at path "foo/baz".
320 | 
321 | .. _spec_v2_attrs:
322 | 
323 | Attributes
324 | ----------
325 | 
326 | An array or group can be associated with custom attributes, which are arbitrary
327 | key/value pairs with application-specific meaning. Custom attributes are encoded
328 | as a JSON object and stored under the ".zattrs" key within an array store. The
329 | ".zattrs" key does not have to be present, and if it is absent the attributes
330 | should be treated as empty.
331 | 
332 | For example, the JSON object below encodes three attributes named
333 | "foo", "bar" and "baz"::
334 | 
335 |     {
336 |         "foo": 42,
337 |         "bar": "apples",
338 |         "baz": [1, 2, 3, 4]
339 |     }
340 | 
341 | .. _spec_v2_examples:
342 | 
343 | Examples
344 | --------
345 | 
346 | Storing a single array
347 | ~~~~~~~~~~~~~~~~~~~~~~
348 | 
349 | Below is an example of storing a Zarr array, using a directory on the
350 | local file system as storage.
351 | 
352 | Create an array::
353 | 
354 |     >>> import zarr
355 |     >>> store = zarr.DirectoryStore('data/example.zarr')
356 |     >>> a = zarr.create(shape=(20, 20), chunks=(10, 10), dtype='i4',
357 |     ...                 fill_value=42, compressor=zarr.Zlib(level=1),
358 |     ...                 store=store, overwrite=True)
359 | 
360 | No chunks are initialized yet, so only the ".zarray" and ".zattrs" keys
361 | have been set in the store::
362 | 
363 |     >>> import os
364 |     >>> sorted(os.listdir('data/example.zarr'))
365 |     ['.zarray']
366 | 
367 | Inspect the array metadata::
368 | 
369 |     >>> print(open('data/example.zarr/.zarray').read())
370 |     {
371 |         "chunks": [
372 |             10,
373 |             10
374 |         ],
375 |         "compressor": {
376 |             "id": "zlib",
377 |             "level": 1
378 |         },
379 |         "dtype": "<i4",
380 |         "fill_value": 42,
381 |         "filters": null,
382 |         "order": "C",
383 |         "shape": [
384 |             20,
385 |             20
386 |         ],
387 |         "zarr_format": 2
388 |     }
389 | 
390 | Chunks are initialized on demand. E.g., set some data::
391 | 
392 |     >>> a[0:10, 0:10] = 1
393 |     >>> sorted(os.listdir('data/example.zarr'))
394 |     ['.zarray', '0.0']
395 | 
396 | Set some more data::
397 | 
398 |     >>> a[0:10, 10:20] = 2
399 |     >>> a[10:20, :] = 3
400 |     >>> sorted(os.listdir('data/example.zarr'))
401 |     ['.zarray', '0.0', '0.1', '1.0', '1.1']
402 | 
403 | Manually decompress a single chunk for illustration::
404 | 
405 |     >>> import zlib
406 |     >>> buf = zlib.decompress(open('data/example.zarr/0.0', 'rb').read())
407 |     >>> import numpy as np
408 |     >>> chunk = np.frombuffer(buf, dtype='<i4')
409 |     >>> chunk
410 |     array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
411 |            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
412 |            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
413 |            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
414 |            1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)
415 | 
416 | Modify the array attributes::
417 | 
418 |     >>> a.attrs['foo'] = 42
419 |     >>> a.attrs['bar'] = 'apples'
420 |     >>> a.attrs['baz'] = [1, 2, 3, 4]
421 |     >>> sorted(os.listdir('data/example.zarr'))
422 |     ['.zarray', '.zattrs', '0.0', '0.1', '1.0', '1.1']
423 |     >>> print(open('data/example.zarr/.zattrs').read())
424 |     {
425 |         "bar": "apples",
426 |         "baz": [
427 |             1,
428 |             2,
429 |             3,
430 |             4
431 |         ],
432 |         "foo": 42
433 |     }
434 | 
435 | Storing multiple arrays in a hierarchy
436 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
437 | 
438 | Below is an example of storing multiple Zarr arrays organized into a group
439 | hierarchy, using a directory on the local file system as storage. This storage
440 | implementation maps logical paths onto directory paths on the file system,
441 | however this is an implementation choice and is not required.
442 | 
443 | Setup the store::
444 | 
445 |     >>> import zarr
446 |     >>> store = zarr.DirectoryStore('data/group.zarr')
447 | 
448 | Create the root group::
449 | 
450 |     >>> root_grp = zarr.group(store, overwrite=True)
451 | 
452 | The metadata resource for the root group has been created::
453 | 
454 |     >>> import os
455 |     >>> sorted(os.listdir('data/group.zarr'))
456 |     ['.zgroup']
457 | 
458 | Inspect the group metadata::
459 | 
460 |     >>> print(open('data/group.zarr/.zgroup').read())
461 |     {
462 |         "zarr_format": 2
463 |     }
464 | 
465 | Create a sub-group::
466 | 
467 |     >>> sub_grp = root_grp.create_group('foo')
468 | 
469 | What has been stored::
470 | 
471 |     >>> sorted(os.listdir('data/group.zarr'))
472 |     ['.zgroup', 'foo']
473 |     >>> sorted(os.listdir('data/group.zarr/foo'))
474 |     ['.zgroup']
475 | 
476 | Create an array within the sub-group::
477 | 
478 |     >>> a = sub_grp.create_dataset('bar', shape=(20, 20), chunks=(10, 10))
479 |     >>> a[:] = 42
480 | 
481 | Set a custom attributes::
482 | 
483 |     >>> a.attrs['comment'] = 'answer to life, the universe and everything'
484 | 
485 | What has been stored::
486 | 
487 |     >>> sorted(os.listdir('data/group.zarr'))
488 |     ['.zgroup', 'foo']
489 |     >>> sorted(os.listdir('data/group.zarr/foo'))
490 |     ['.zgroup', 'bar']
491 |     >>> sorted(os.listdir('data/group.zarr/foo/bar'))
492 |     ['.zarray', '.zattrs', '0.0', '0.1', '1.0', '1.1']
493 | 
494 | Here is the same example using a Zip file as storage::
495 | 
496 |     >>> store = zarr.ZipStore('data/group.zip', mode='w')
497 |     >>> root_grp = zarr.group(store)
498 |     >>> sub_grp = root_grp.create_group('foo')
499 |     >>> a = sub_grp.create_dataset('bar', shape=(20, 20), chunks=(10, 10))
500 |     >>> a[:] = 42
501 |     >>> a.attrs['comment'] = 'answer to life, the universe and everything'
502 |     >>> store.close()
503 | 
504 | What has been stored::
505 | 
506 |     >>> import zipfile
507 |     >>> zf = zipfile.ZipFile('data/group.zip', mode='r')
508 |     >>> for name in sorted(zf.namelist()):
509 |     ...     print(name)
510 |     .zgroup
511 |     foo/.zgroup
512 |     foo/bar/.zarray
513 |     foo/bar/.zattrs
514 |     foo/bar/0.0
515 |     foo/bar/0.1
516 |     foo/bar/1.0
517 |     foo/bar/1.1
518 | 
519 | .. _spec_v2_changes:
520 | 
521 | Changes
522 | -------
523 | 
524 | Version 2 clarifications
525 | ~~~~~~~~~~~~~~~~~~~~~~~~
526 | 
527 | The following changes have been made to the version 2 specification since it was
528 | initially published to clarify ambiguities and add some missing information.
529 | 
530 | * The specification now describes how bytes fill values should be encoded and
531 |   decoded for arrays with a fixed-length byte string data type (`#165
532 |   <https://github.com/zarr-developers/zarr-python/issues/165/>`_, `#176
533 |   <https://github.com/zarr-developers/zarr-python/issues/176/>`_).
534 | 
535 | * The specification now clarifies that units must be specified for datetime64 and
536 |   timedelta64 data types (`#85
537 |   <https://github.com/zarr-developers/zarr-python/issues/85/>`_, `#215
538 |   <https://github.com/zarr-developers/zarr-python/issues/215/>`_).
539 | 
540 | * The specification now clarifies that the '.zattrs' key does not have to be present for
541 |   either arrays or groups, and if absent then custom attributes should be treated as
542 |   empty.
543 | 
544 | * The specification now describes how structured datatypes with
545 |   subarray shapes and/or with nested structured data types are encoded
546 |   in array metadata (`#111
547 |   <https://github.com/zarr-developers/zarr-python/issues/111/>`_, `#296
548 |   <https://github.com/zarr-developers/zarr-python/issues/296/>`_).
549 | 
550 | * Clarified the key/value pairs of custom attributes as "arbitrary" rather than
551 |   "simple".
552 | 
553 | Changes from version 1 to version 2
554 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
555 | 
556 | The following changes were made between version 1 and version 2 of this specification:
557 | 
558 | * Added support for storing multiple arrays in the same store and organising
559 |   arrays into hierarchies using groups.
560 | * Array metadata is now stored under the ".zarray" key instead of the "meta"
561 |   key.
562 | * Custom attributes are now stored under the ".zattrs" key instead of the
563 |   "attrs" key.
564 | * Added support for filters.
565 | * Changed encoding of "fill_value" field within array metadata.
566 | * Changed encoding of compressor information within array metadata to be
567 |   consistent with representation of filter information.
568 | 


--------------------------------------------------------------------------------
/docs/v3/chunk-grids/index.rst:
--------------------------------------------------------------------------------
 1 | .. _chunk-grid-list:
 2 | 
 3 | ===========
 4 | Chunk Grids
 5 | ===========
 6 | 
 7 | The following documents specify chunk grids which SHOULD
 8 | be implemented by all implementations.
 9 | 
10 | .. toctree::
11 |    :glob:
12 |    :maxdepth: 1
13 |    :titlesonly:
14 |    :caption: Contents:
15 | 
16 |    */*
17 | 
18 | Extensions
19 | ----------
20 | 
21 | Registered chunk grid extensions can be found under
22 | `zarr-extensions::chunk-grids <https://github.com/zarr-developers/zarr-extensions/tree/main/chunk-grids>`_.
23 | 


--------------------------------------------------------------------------------
/docs/v3/chunk-grids/regular-grid/index.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | .. _regular-chunkgrid:
  3 | 
  4 | ==================
  5 | Regular chunk grid
  6 | ==================
  7 | 
  8 | Version:
  9 |     1.0
 10 | Specification URI:
 11 |     https://zarr-specs.readthedocs.io/en/latest/v3/chunk-grids/regular-grid/
 12 | Corresponding ZEP:
 13 |     `ZEP0001 — Zarr specification version 3 <https://zarr.dev/zeps/draft/ZEP0001.html>`_
 14 | Issue tracking:
 15 |     `GitHub issues <https://github.com/zarr-developers/zarr-specs/labels/chunk-grid>`_
 16 | Suggest an edit for this spec:
 17 |     `GitHub editor <https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/chunk-grids/regular-grid/index.rst>`_
 18 | 
 19 | Copyright 2020-Present Zarr core development team. This work
 20 | is licensed under a `Creative Commons Attribution 3.0 Unported License
 21 | <https://creativecommons.org/licenses/by/3.0/>`_.
 22 | 
 23 | ----
 24 | 
 25 | Abstract
 26 | ========
 27 | 
 28 | A regular grid is a type of grid where an array is divided into chunks
 29 | such that each chunk is a hyperrectangle of the same shape. The
 30 | dimensionality of the grid is the same as the dimensionality of the
 31 | array. Each chunk in the grid can be addressed by a tuple of positive
 32 | integers (`k`, `j`, `i`, ...) corresponding to the indices of the
 33 | chunk along each dimension.
 34 | 
 35 | Description
 36 | ===========
 37 | 
 38 | The origin element of a chunk has coordinates in the array space (`k` *
 39 | `dz`, `j` * `dy`, `i` * `dx`, ...) where (`dz`, `dy`, `dx`, ...) are
 40 | the chunk sizes along each dimension.
 41 | Thus the origin element of the chunk at grid index (0, 0, 0,
 42 | ...) is at coordinate (0, 0, 0, ...) in the array space, i.e., the
 43 | grid is aligned with the origin of the array. If the length of any
 44 | array dimension is not perfectly divisible by the chunk length along
 45 | the same dimension, then the grid will overhang the edge of the array
 46 | space.
 47 | 
 48 | The shape of the chunk grid will be (ceil(`z` / `dz`), ceil(`y` /
 49 | `dy`), ceil(`x` / `dx`), ...)  where (`z`, `y`, `x`, ...) is the array
 50 | shape, "/" is the division operator and "ceil" is the ceiling
 51 | function. For example, if a 3 dimensional array has shape (10, 200,
 52 | 3000), and has chunk shape (5, 20, 400), then the shape of the chunk
 53 | grid will be (2, 10, 8), meaning that there will be 2 chunks along the
 54 | first dimension, 10 along the second dimension, and 8 along the third
 55 | dimension.
 56 | 
 57 | .. list-table:: Regular Grid Example
 58 |     :header-rows: 1
 59 | 
 60 |     * - Array Shape
 61 |       - Chunk Shape
 62 |       - Chunk Grid Shape
 63 |       - Notes
 64 |     * - (10, 200, 3000)
 65 |       - (5, 20, 400)
 66 |       - (2, 10, 8)
 67 |       - The grid does overhang the edge of the array on the 3rd dimension.
 68 | 
 69 | An element of an array with coordinates (`c`, `b`, `a`, ...) will
 70 | occur within the chunk at grid index (`c` // `dz`, `b` // `dy`, `a` //
 71 | `dx`, ...), where "//" is the floor division operator. The element
 72 | will have coordinates (`c` % `dz`, `b` % `dy`, `a` % `dx`, ...) within
 73 | that chunk, where "%" is the modulo operator. For example, if a
 74 | 3 dimensional array has shape (10, 200, 3000), and has chunk shape
 75 | (5, 20, 400), then the element of the array with coordinates (7, 150, 900)
 76 | is contained within the chunk at grid index (1, 7, 2) and has coordinates
 77 | (2, 10, 100) within that chunk.
 78 | 
 79 | The store key corresponding to a given grid cell is determined based on the
 80 | :ref:`array-metadata-chunk-key-encoding` member of the :ref:`array-metadata`.
 81 | 
 82 | Note that this specification does not consider the case where the
 83 | chunk grid and the array space are not aligned at the origin vertices
 84 | of the array and the chunk at grid index (0, 0, 0, ...). However,
 85 | extensions may define variations on the regular grid type
 86 | such that the grid indices may include negative integers, and the
 87 | origin element of the array may occur at an arbitrary position within
 88 | any chunk, which is required to allow arrays to be extended by an
 89 | arbitrary length in a "negative" direction along any dimension.
 90 | 
 91 | .. note:: Chunks at the border of an array always have the full chunk size, even when
 92 |    the array only covers parts of it. For example, having an array with ``"shape": [30, 30]`` and
 93 |    ``"chunk_shape": [16, 16]``, the chunk ``0,1`` would also contain unused values for the indices
 94 |    ``0-16, 30-31``. When writing such chunks it is recommended to use the current fill value
 95 |    for elements outside the bounds of the array.
 96 | 
 97 | 
 98 | 
 99 | Status of this document
100 | =======================
101 | 
102 | ZEP0001 was accepted on May 15th, 2023 via https://github.com/zarr-developers/zarr-specs/issues/227.
103 | 
104 | 
105 | Document conventions
106 | ====================
107 | 
108 | Conformance requirements are expressed with a combination of
109 | descriptive assertions and [RFC2119]_ terminology. The key words
110 | "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
111 | "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in the normative
112 | parts of this document are to be interpreted as described in
113 | [RFC2119]_. However, for readability, these words do not appear in all
114 | uppercase letters in this specification.
115 | 
116 | All of the text of this specification is normative except sections
117 | explicitly marked as non-normative, examples, and notes. Examples in
118 | 


--------------------------------------------------------------------------------
/docs/v3/chunk-key-encodings/default/index.rst:
--------------------------------------------------------------------------------
 1 | .. _default-chunkkeyencoding:
 2 | 
 3 | ==========================
 4 | Default chunk key encoding
 5 | ==========================
 6 | 
 7 | Version:
 8 |     1.0
 9 | Specification URI:
10 |     https://zarr-specs.readthedocs.io/en/latest/v3/chunk-key-encodings/default/
11 | Corresponding ZEP:
12 |     `ZEP0001 — Zarr specification version 3 <https://zarr.dev/zeps/draft/ZEP0001.html>`_
13 | Issue tracking:
14 |     `GitHub issues <https://github.com/zarr-developers/zarr-specs/labels/chunk-grid>`_
15 | Suggest an edit for this spec:
16 |     `GitHub editor <https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/chunk-key-encodings/default/index.rst>`_
17 | 
18 | Copyright 2020-Present Zarr core development team. This work
19 | is licensed under a `Creative Commons Attribution 3.0 Unported License
20 | <https://creativecommons.org/licenses/by/3.0/>`_.
21 | 
22 | ----
23 | 
24 | Description
25 | ===========
26 | 
27 | The ``configuration`` object may contain one optional member,
28 | ``separator``, which must be either ``"/"`` or ``"."``.  If not specified,
29 | ``separator`` defaults to ``"/"``.
30 | 
31 | The key for a chunk with grid index (``k``, ``j``, ``i``, ...) is
32 | formed by taking the initial prefix ``c``, and appending for each dimension:
33 | 
34 | - the ``separator`` character, followed by,
35 | 
36 | - the ASCII decimal string representation of the chunk index within that dimension.
37 | 
38 | For example, in a 3 dimensional array, with a separator of ``/`` the identifier
39 | for the chunk at grid index (1, 23, 45) is the string ``"c/1/23/45"``.  With a
40 | separator of ``.``, the identifier is the string ``"c.1.23.45"``. The initial prefix 
41 | ``c`` ensures that metadata documents and chunks have separate prefixes.
42 | 
43 | .. note:: A main difference with spec v2 is that the default chunk separator
44 |     changed from ``.`` to ``/``, as in N5.  This decreases the maximum number of
45 |     items in hierarchical stores like directory stores.
46 | 
47 | .. note:: Arrays may have 0 dimensions (when for example representing scalars),
48 |     in which case the coordinate of a chunk is the empty tuple, and the chunk key
49 |     will consist of the string ``c``.
50 | 
51 | 
52 | Status of this document
53 | =======================
54 | 
55 | ZEP0001 was accepted on May 15th, 2023 via https://github.com/zarr-developers/zarr-specs/issues/227.
56 | 
57 | 
58 | Document conventions
59 | ====================
60 | 
61 | Conformance requirements are expressed with a combination of
62 | descriptive assertions and [RFC2119]_ terminology. The key words
63 | "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
64 | "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in the normative
65 | parts of this document are to be interpreted as described in
66 | [RFC2119]_. However, for readability, these words do not appear in all
67 | uppercase letters in this specification.
68 | 
69 | All of the text of this specification is normative except sections
70 | explicitly marked as non-normative, examples, and notes. Examples in
71 | 


--------------------------------------------------------------------------------
/docs/v3/chunk-key-encodings/index.rst:
--------------------------------------------------------------------------------
 1 | .. _chunk-key-encoding-list:
 2 | 
 3 | ===================
 4 | Chunk Key Encodings
 5 | ===================
 6 | 
 7 | The following documents specify chunk key encodings which SHOULD
 8 | be implemented by all implementations.
 9 | 
10 | .. toctree::
11 |    :glob:
12 |    :maxdepth: 1
13 |    :titlesonly:
14 |    :caption: Contents:
15 | 
16 |    */*
17 | 
18 | Extensions
19 | ----------
20 | 
21 | Registered chunk grid extensions can be found under
22 | `zarr-extensions::chunk-key-encodings <https://github.com/zarr-developers/zarr-extensions/tree/main/chunk-key-encodings>`_.
23 | 


--------------------------------------------------------------------------------
/docs/v3/chunk-key-encodings/v2/index.rst:
--------------------------------------------------------------------------------
 1 | .. _v2-chunkkeyencoding:
 2 | 
 3 | =====================
 4 | v2 chunk key encoding
 5 | =====================
 6 | 
 7 | Version:
 8 |     1.0
 9 | Specification URI:
10 |     https://zarr-specs.readthedocs.io/en/latest/v3/chunk-key-encodings/v2/
11 | Corresponding ZEP:
12 |     `ZEP0001 — Zarr specification version 3 <https://zarr.dev/zeps/draft/ZEP0001.html>`_
13 | Issue tracking:
14 |     `GitHub issues <https://github.com/zarr-developers/zarr-specs/labels/chunk-grid>`_
15 | Suggest an edit for this spec:
16 |     `GitHub editor <https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/chunk-key-encodings/v2/index.rst>`_
17 | 
18 | Copyright 2020-Present Zarr core development team. This work
19 | is licensed under a `Creative Commons Attribution 3.0 Unported License
20 | <https://creativecommons.org/licenses/by/3.0/>`_.
21 | 
22 | ----
23 | 
24 | Description
25 | ===========
26 | 
27 | The ``configuration`` object may contain one optional member,
28 | ``separator``, which must be either ``"/"`` or ``"."``.  If not specified,
29 | ``separator`` defaults to ``"."``.
30 | 
31 | The identifier for chunk with at least one dimension is formed by
32 | concatenating for each dimension:
33 | 
34 |  - the ASCII decimal string representation of the chunk index within that
35 |    dimension, followed by
36 | 
37 |  - the ``separator`` character, except that it is omitted for the last
38 |    dimension.
39 | 
40 | For example, in a 3 dimensional array, with a separator of ``.`` the identifier
41 | for the chunk at grid index (1, 23, 45) is the string ``"1.23.45"``.  With a
42 | separator of ``/``, the identifier is the string ``"1/23/45"``.
43 | 
44 | For chunk grids with 0 dimensions, the single chunk has the key ``"0"``.
45 | 
46 | .. warning::
47 | 
48 |     This encoding is intended only to allow existing v2 arrays to be
49 |     converted to v3 without having to rename chunks.  It is not recommended
50 |     to be used when writing new arrays.
51 | 
52 | 
53 | Status of this document
54 | =======================
55 | 
56 | ZEP0001 was accepted on May 15th, 2023 via https://github.com/zarr-developers/zarr-specs/issues/227.
57 | 
58 | 
59 | Document conventions
60 | ====================
61 | 
62 | Conformance requirements are expressed with a combination of
63 | descriptive assertions and [RFC2119]_ terminology. The key words
64 | "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
65 | "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in the normative
66 | parts of this document are to be interpreted as described in
67 | [RFC2119]_. However, for readability, these words do not appear in all
68 | uppercase letters in this specification.
69 | 
70 | All of the text of this specification is normative except sections
71 | explicitly marked as non-normative, examples, and notes. Examples in
72 | 


--------------------------------------------------------------------------------
/docs/v3/codecs/blosc/index.rst:
--------------------------------------------------------------------------------
  1 | ===========
  2 | Blosc codec
  3 | ===========
  4 | 
  5 | Version:
  6 |     1.0
  7 | Specification URI:
  8 |     https://zarr-specs.readthedocs.io/en/latest/v3/codecs/blosc/
  9 | Corresponding ZEP:
 10 |     `ZEP0001 — Zarr specification version 3 <https://zarr.dev/zeps/accepted/ZEP0001.html>`_
 11 | Issue tracking:
 12 |     `GitHub issues <https://github.com/zarr-developers/zarr-specs/labels/codec>`_
 13 | Suggest an edit for this spec:
 14 |     `GitHub editor <https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/codecs/blosc/index.rst>`_
 15 | 
 16 | Copyright 2020-Present Zarr core development team. This work
 17 | is licensed under a `Creative Commons Attribution 3.0 Unported License
 18 | <https://creativecommons.org/licenses/by/3.0/>`_.
 19 | 
 20 | ----
 21 | 
 22 | 
 23 | Abstract
 24 | ========
 25 | 
 26 | Defines a ``bytes -> bytes`` codec that uses the blosc container format.
 27 | 
 28 | 
 29 | Status of this document
 30 | =======================
 31 | 
 32 | ZEP0001 was accepted on May 15th, 2023 via https://github.com/zarr-developers/zarr-specs/issues/227.
 33 | 
 34 | 
 35 | Document conventions
 36 | ====================
 37 | 
 38 | Conformance requirements are expressed with a combination of
 39 | descriptive assertions and [RFC2119]_ terminology. The key words
 40 | "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
 41 | "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in the normative
 42 | parts of this document are to be interpreted as described in
 43 | [RFC2119]_. However, for readability, these words do not appear in all
 44 | uppercase letters in this specification.
 45 | 
 46 | All of the text of this specification is normative except sections
 47 | explicitly marked as non-normative, examples, and notes. Examples in
 48 | this specification are introduced with the words "for example".
 49 | 
 50 | 
 51 | Codec name
 52 | ==========
 53 | 
 54 | The value of the ``name`` member in the codec object MUST be ``blosc``.
 55 | 
 56 | 
 57 | Configuration parameters
 58 | ========================
 59 | 
 60 | cname:
 61 |     A string identifying the internal compression algorithm to be
 62 |     used. At the time of writing, the following values are supported
 63 |     by the c-blosc library: "lz4", "lz4hc", "blosclz", "zstd",
 64 |     "snappy", "zlib".
 65 |     
 66 | clevel:
 67 |     An integer from 0 to 9 which controls the speed and level of
 68 |     compression. A level of 1 is the fastest compression method and
 69 |     produces the least compressions, while 9 is slowest and produces
 70 |     the most compression. Compression is turned off completely when
 71 |     level is 0.
 72 | 
 73 | shuffle:
 74 |     Specifies the type of shuffling to perform, if any, prior to compression.
 75 |     Must be one of:
 76 | 
 77 |     - ``"noshuffle"``, to indicate no shuffling;
 78 |     - ``"shuffle"``, to indicate byte-wise shuffling;
 79 |     - ``"bitshuffle"``, to indicate bit-wise shuffling.
 80 | 
 81 |     Zarr implementations MAY provide users an option to choose a shuffle mode
 82 |     automatically based on the typesize or other information, but MUST record in
 83 |     the metadata the mode that is chosen.
 84 | 
 85 | typesize:
 86 |     Positive integer specifying the stride in bytes over which shuffling is
 87 |     performed.  Required unless ``shuffle`` is ``"noshuffle"``, in which case the value
 88 |     is ignored.
 89 | 
 90 |     Zarr implementations MAY allow users to leave this unspecified and have the
 91 |     implementation choose a value automatically based on the array data type and
 92 |     previous codecs in the chain, but MUST record in the metadata the value that
 93 |     is chosen.
 94 | 
 95 | blocksize:
 96 |     An integer giving the size in bytes of blocks into which a
 97 |     buffer is divided before compression. A value of 0
 98 |     indicates that an automatic size will be used.
 99 | 
100 | For example, the array metadata document below specifies that the compressor is
101 | the Blosc codec configured with a compression level of 1, byte-wise shuffling
102 | with a stride of 4, the ``lz4`` compression algorithm and the default block
103 | size::
104 | 
105 |     {
106 |         "codecs": [{
107 |             "name": "blosc",
108 |             "configuration": {
109 |                 "cname": "lz4",
110 |                 "clevel": 1,
111 |                 "shuffle": "shuffle",
112 |                 "typesize": 4,
113 |                 "blocksize": 0
114 |             }
115 |         }],
116 |     }
117 | 
118 | 
119 | Format and algorithm
120 | ====================
121 | 
122 | This is a ``bytes -> bytes`` codec.
123 | 
124 | Blosc is a meta-compressor, which divides an input buffer into blocks,
125 | then applies an internal compression algorithm to each block, then
126 | packs the encoded blocks together into a single output buffer with a
127 | header. The format of the encoded buffer is defined in [BLOSC]_. The
128 | reference implementation is provided by the `c-blosc library
129 | <https://github.com/Blosc/c-blosc>`_.
130 | 
131 | 
132 | Comparison to Zarr v2
133 | =====================
134 | 
135 | While the binary format is identical, the JSON metadata differs from that used
136 | by the Zarr v2 ``blosc`` codec in the following ways:
137 | 
138 | - The `shuffle` mode is now specified more clearly as `noshuffle` (0 in Zarr v2),
139 |   `"bitshuffle"` (2 in Zarr v2), or `"shuffle"` (1 in Zarr v2).  Using these constants
140 |   rather than numbers makes it much easier to know what shuffle mode will be
141 |   used from manual inspection of the metadata.
142 | 
143 | - When shuffling is enabled, the `typesize` must now be specified explicitly in
144 |   the metadata, rather than determined implicitly from the input data.  This
145 |   allows Blosc to function as a pure "bytes -> bytes" codec rather than an
146 |   "array -> bytes" codec.
147 | 
148 | - There is no option to choose between bit-wise and byte-wise shuffling
149 |   automatically, as supported in Zarr v2 via a `shuffle` value of `-1`.
150 | 
151 | References
152 | ==========
153 | 
154 | .. [RFC2119] S. Bradner. Key words for use in RFCs to Indicate
155 |    Requirement Levels. March 1997. Best Current Practice. URL:
156 |    https://tools.ietf.org/html/rfc2119
157 | 
158 | .. [BLOSC] F. Alted. Blosc Chunk Format. URL:
159 |    https://github.com/Blosc/c-blosc/blob/HEAD/README_CHUNK_FORMAT.rst
160 | 
161 | 
162 | Change log
163 | ==========
164 | 
165 | No changes yet.
166 | 


--------------------------------------------------------------------------------
/docs/v3/codecs/bytes/index.rst:
--------------------------------------------------------------------------------
  1 | .. _bytes-codec-v1:
  2 | 
  3 | ===========
  4 | Bytes codec
  5 | ===========
  6 | 
  7 | Version:
  8 |     1.0
  9 | Specification URI:
 10 |     https://zarr-specs.readthedocs.io/en/latest/v3/codecs/bytes/
 11 | Corresponding ZEP:
 12 |     `ZEP0001 — Zarr specification version 3 <https://zarr.dev/zeps/accepted/ZEP0001.html>`_
 13 | Issue tracking:
 14 |     `GitHub issues <https://github.com/zarr-developers/zarr-specs/labels/codec>`_
 15 | Suggest an edit for this spec:
 16 |     `GitHub editor <https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/codecs/bytes/index.rst>`_
 17 | 
 18 | Copyright 2020-Present Zarr core development team. This work
 19 | is licensed under a `Creative Commons Attribution 3.0 Unported License
 20 | <https://creativecommons.org/licenses/by/3.0/>`_.
 21 | 
 22 | ----
 23 | 
 24 | 
 25 | Abstract
 26 | ========
 27 | 
 28 | Defines an ``array -> bytes`` codec that encodes arrays of fixed-size numeric
 29 | data types as a sequence of bytes in lexicographical order. For multi-byte data
 30 | types, it encodes the array either in little endian or big endian.
 31 | 
 32 | 
 33 | Status of this document
 34 | =======================
 35 | 
 36 | ZEP0001 was accepted on May 15th, 2023 via https://github.com/zarr-developers/zarr-specs/issues/227.
 37 | 
 38 | 
 39 | Document conventions
 40 | ====================
 41 | 
 42 | Conformance requirements are expressed with a combination of
 43 | descriptive assertions and [RFC2119]_ terminology. The key words
 44 | "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
 45 | "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in the normative
 46 | parts of this document are to be interpreted as described in
 47 | [RFC2119]_. However, for readability, these words do not appear in all
 48 | uppercase letters in this specification.
 49 | 
 50 | All of the text of this specification is normative except sections
 51 | explicitly marked as non-normative, examples, and notes. Examples in
 52 | this specification are introduced with the words "for example".
 53 | 
 54 | 
 55 | Codec name
 56 | ==========
 57 | 
 58 | The value of the ``name`` member in the codec object MUST be ``bytes``.
 59 | 
 60 | 
 61 | Configuration parameters
 62 | ========================
 63 | 
 64 | endian:
 65 |     Required for data types for which endianness is applicable. For example, 
 66 |     this includes multi-byte data types, such as ``uint16`` and ``int32``, 
 67 |     but not single-byte data types, such as ``uint8`` or ``bool``. 
 68 |     If present, the value MUST be a string equal to either ``"big"`` or 
 69 |     ``"little"``.
 70 | 
 71 | 
 72 | Format and algorithm
 73 | ====================
 74 | 
 75 | This is an ``array -> bytes`` codec.
 76 | 
 77 | Each element of the array is encoded using the specified endian variant of its
 78 | binary representation listed below.  Array elements are encoded in
 79 | lexicographical order.  For example, with ``endian`` specified as ``big``, the
 80 | ``int32`` data type is encoded as a 4-byte big endian two's complement integer,
 81 | and the ``complex128`` data type is encoded as two consecutive 8-byte big endian
 82 | IEEE 754 binary64 values.
 83 | 
 84 | .. list-table:: Supported data types
 85 |    :header-rows: 1
 86 | 
 87 |    * - Identifier
 88 |      - Binary representation
 89 |    * - ``bool``
 90 |      - Single byte, with false encoded as ``\\x00`` and true encoded as
 91 |        ``\\x01``.  Does not depend on ``endian`` parameter.
 92 |    * - ``int8``
 93 |      - 1 byte two's complement.  Does not depend on ``endian`` parameter.
 94 |    * - ``int16``
 95 |      - 2-byte two's complement
 96 |    * - ``int32``
 97 |      - 4-byte two's complement
 98 |    * - ``int64``
 99 |      - 8-byte two's complement
100 |    * - ``uint8``
101 |      - 1 byte.  Does not depend on ``endian`` parameter.
102 |    * - ``uint16``
103 |      - 2-byte
104 |    * - ``uint32``
105 |      - 4-byte
106 |    * - ``uint64``
107 |      - 8-byte
108 |    * - ``float16`` (optionally supported)
109 |      - 2-byte IEEE 754 binary16
110 |    * - ``float32``
111 |      - 4-byte IEEE 754 binary32
112 |    * - ``float64``
113 |      - 8-byte IEEE 754 binary64
114 |    * - ``complex64``
115 |      - 2 consecutive 4-byte IEEE 754 binary32 values (real component followed by imaginary component)
116 |    * - ``complex128``
117 |      - 2 consecutive 8-byte IEEE 754 binary64 values (real component followed by imaginary component)
118 |    * - ``r*``
119 |      - number of bits, which must be a multiple of 8, given by ``*``.
120 | 
121 | .. note::
122 | 
123 |    To encode elements in a different order than lexicographical order (C
124 |    order/row major), the :ref:`transpose codec<transpose-codec-v1>` may be
125 |    specified.
126 | 
127 | References
128 | ==========
129 | 
130 | .. [RFC2119] S. Bradner. Key words for use in RFCs to Indicate
131 |    Requirement Levels. March 1997. Best Current Practice. URL:
132 |    https://tools.ietf.org/html/rfc2119
133 | 
134 | 
135 | Change log
136 | ==========
137 | 
138 | - ``endian`` codec was renamed to ``bytes`` codec.  `PR #263
139 |   <https://github.com/zarr-developers/zarr-specs/pull/263/>`_
140 | 


--------------------------------------------------------------------------------
/docs/v3/codecs/crc32c/index.rst:
--------------------------------------------------------------------------------
 1 | .. _crc32c-codec:
 2 | 
 3 | =====================
 4 | CRC32C checksum codec
 5 | =====================
 6 | 
 7 | Version:
 8 |     1.0
 9 | Specification URI:
10 |     https://zarr-specs.readthedocs.io/en/latest/v3/codecs/crc32c/
11 | Editors:
12 |     * Jonathan Striebel (`@jstriebel <https://github.com/jstriebel>`_), Scalable Minds
13 |     * Norman Rzepka (`@normanrz <https://github.com/normanrz>`_), Scalable Minds
14 |     * Jeremy Maitin-Shepard (`@jbms <https://github.com/jbms>`_), Google
15 | Corresponding ZEP:
16 |     `ZEP0002 — Sharding codec <https://zarr.dev/zeps/accepted/ZEP0002.html>`_
17 | Issue tracking:
18 |     `GitHub issues <https://github.com/zarr-developers/zarr-specs/labels/codec>`_
19 | Suggest an edit for this spec:
20 |     `GitHub editor <https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/codecs/crc32c/index.rst>`_
21 | 
22 | Copyright 2022-Present `Zarr core development team
23 | <https://github.com/orgs/zarr-developers/teams/core-devs>`_. This work
24 | is licensed under a `Creative Commons Attribution 3.0 Unported License
25 | <https://creativecommons.org/licenses/by/3.0/>`_.
26 | 
27 | ----
28 | 
29 | 
30 | Abstract
31 | ========
32 | 
33 | Defines an ``bytes -> bytes`` codec that appends a CRC32C checksum of the input bytestream.
34 | 
35 | 
36 | Status of this document
37 | =======================
38 | 
39 | ZEP0002 was accepted on November 1st, 2023 via https://github.com/zarr-developers/zarr-specs/issues/254.
40 | 
41 | Document conventions
42 | ====================
43 | 
44 | Conformance requirements are expressed with a combination of
45 | descriptive assertions and [RFC2119]_ terminology. The key words
46 | "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
47 | "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in the normative
48 | parts of this document are to be interpreted as described in
49 | [RFC2119]_. However, for readability, these words do not appear in all
50 | uppercase letters in this specification.
51 | 
52 | All of the text of this specification is normative except sections
53 | explicitly marked as non-normative, examples, and notes. Examples in
54 | this specification are introduced with the words "for example".
55 | 
56 | 
57 | Codec name
58 | ==========
59 | 
60 | The value of the ``name`` member in the codec object MUST be ``crc32c``.
61 | 
62 | 
63 | Configuration parameters
64 | ========================
65 | 
66 | None.
67 | 
68 | 
69 | Format and algorithm
70 | ====================
71 | 
72 | This is a ``bytes -> bytes`` codec.
73 | 
74 | The codec computes the CRC32C checksum as defined in [RFC3720]_ of the input
75 | bytestream. The output bytestream is composed of the unchanged input byte 
76 | stream with the appended checksum. The checksum is represented as a 32-bit
77 | unsigned integer represented in little endian. 
78 | 
79 | 
80 | References
81 | ==========
82 | 
83 | .. [RFC2119] S. Bradner. Key words for use in RFCs to Indicate
84 |    Requirement Levels. March 1997. Best Current Practice. URL:
85 |    https://tools.ietf.org/html/rfc2119
86 | 
87 | .. [RFC3720] J. Satran et al. Internet Small Computer Systems 
88 |    Interface (iSCSI). April 2004. Proposed Standard. URL:
89 |    https://tools.ietf.org/html/rfc3720
90 | 
91 | 
92 | Change log
93 | ==========
94 | 
95 | No changes yet.
96 | 


--------------------------------------------------------------------------------
/docs/v3/codecs/gzip/index.rst:
--------------------------------------------------------------------------------
  1 | ==========
  2 | Gzip codec
  3 | ==========
  4 | 
  5 | Version:
  6 |     1.0
  7 | Specification URI:
  8 |     https://zarr-specs.readthedocs.io/en/latest/v3/codecs/gzip/
  9 | Corresponding ZEP:
 10 |     `ZEP0001 — Zarr specification version 3 <https://zarr.dev/zeps/accepted/ZEP0001.html>`_
 11 | Issue tracking:
 12 |     `GitHub issues <https://github.com/zarr-developers/zarr-specs/labels/codec>`_
 13 | Suggest an edit for this spec:
 14 |     `GitHub editor <https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/codecs/gzip/index.rst>`_
 15 | 
 16 | Copyright 2020-Present Zarr core development team. This work
 17 | is licensed under a `Creative Commons Attribution 3.0 Unported License
 18 | <https://creativecommons.org/licenses/by/3.0/>`_.
 19 | 
 20 | ----
 21 | 
 22 | 
 23 | Abstract
 24 | ========
 25 | 
 26 | Defines a ``bytes -> bytes`` codec that applies gzip compression.
 27 | 
 28 | 
 29 | Status of this document
 30 | =======================
 31 | 
 32 | ZEP0001 was accepted on May 15th, 2023 via https://github.com/zarr-developers/zarr-specs/issues/227.
 33 | 
 34 | 
 35 | Document conventions
 36 | ====================
 37 | 
 38 | Conformance requirements are expressed with a combination of
 39 | descriptive assertions and [RFC2119]_ terminology. The key words
 40 | "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
 41 | "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in the normative
 42 | parts of this document are to be interpreted as described in
 43 | [RFC2119]_. However, for readability, these words do not appear in all
 44 | uppercase letters in this specification.
 45 | 
 46 | All of the text of this specification is normative except sections
 47 | explicitly marked as non-normative, examples, and notes. Examples in
 48 | this specification are introduced with the words "for example".
 49 | 
 50 | 
 51 | Codec name
 52 | ==========
 53 | 
 54 | The value of the ``name`` member in the codec object MUST be ``gzip``.
 55 | 
 56 | 
 57 | Configuration parameters
 58 | ========================
 59 | 
 60 | level:
 61 |     An integer from 0 to 9 which controls the speed and level of
 62 |     compression. A level of 1 is the fastest compression method and
 63 |     produces the least compressions, while 9 is slowest and produces
 64 |     the most compression. Compression is turned off completely when
 65 |     level is 0.
 66 | 
 67 | For example, the array metadata below specifies that the compressor is
 68 | the Gzip codec configured with a compression level of 1::
 69 | 
 70 |     {
 71 |         "codecs": [{
 72 |             "name": "gzip",
 73 |             "configuration": {
 74 |                 "level": 1
 75 |             }
 76 |         }],
 77 |     }
 78 | 
 79 | 
 80 | Format and algorithm
 81 | ====================
 82 | 
 83 | This is a ``bytes -> bytes`` codec.
 84 | 
 85 | Encoding and decoding is performed using the algorithm defined in
 86 | [RFC1951]_.
 87 | 
 88 | Encoded data should conform to the Gzip file format [RFC1952]_.
 89 | 
 90 | 
 91 | References
 92 | ==========
 93 | 
 94 | .. [RFC2119] S. Bradner. Key words for use in RFCs to Indicate
 95 |    Requirement Levels. March 1997. Best Current Practice. URL:
 96 |    https://tools.ietf.org/html/rfc2119
 97 | 
 98 | .. [RFC1951] P. Deutsch. DEFLATE Compressed Data Format Specification version
 99 |    1.3. Requirement Levels. May 1996. Informational. URL:
100 |    https://tools.ietf.org/html/rfc1951
101 | 
102 | .. [RFC1952] P. Deutsch. GZIP file format specification version 4.3.
103 |    Requirement Levels. May 1996. Informational. URL:
104 |    https://tools.ietf.org/html/rfc1952
105 | 
106 | 
107 | Change log
108 | ==========
109 | 
110 | No changes yet.
111 | 


--------------------------------------------------------------------------------
/docs/v3/codecs/index.rst:
--------------------------------------------------------------------------------
 1 | .. _codec-list:
 2 | 
 3 | ======
 4 | Codecs
 5 | ======
 6 | 
 7 | The following documents specify codecs which SHOULD
 8 | be implemented by all implementations.
 9 | 
10 | .. toctree::
11 |    :glob:
12 |    :maxdepth: 1
13 |    :titlesonly:
14 |    :caption: Contents:
15 | 
16 |    */*
17 | 
18 | Extensions
19 | ----------
20 | 
21 | Registered codec extensions can be found under
22 | `zarr-extensions::codecs <https://github.com/zarr-developers/zarr-extensions/tree/main/codecs>`_.
23 | 


--------------------------------------------------------------------------------
/docs/v3/codecs/sharding-indexed/index.rst:
--------------------------------------------------------------------------------
  1 | .. _sharding-indexed-codec:
  2 | 
  3 | ==============
  4 | Sharding codec
  5 | ==============
  6 | 
  7 | Version:
  8 |     1.0
  9 | Specification URI:
 10 |     https://zarr-specs.readthedocs.io/en/latest/v3/codecs/sharding-indexed/
 11 | Editors:
 12 |     * Jonathan Striebel (`@jstriebel <https://github.com/jstriebel>`_), Scalable Minds
 13 |     * Norman Rzepka (`@normanrz <https://github.com/normanrz>`_), Scalable Minds
 14 |     * Jeremy Maitin-Shepard (`@jbms <https://github.com/jbms>`_), Google
 15 | Corresponding ZEP:
 16 |     `ZEP0002 — Sharding codec <https://zarr.dev/zeps/accepted/ZEP0002.html>`_
 17 | Issue tracking:
 18 |     `GitHub issues <https://github.com/zarr-developers/zarr-specs/labels/sharding-indexed-codec-v1.0>`_
 19 | Suggest an edit for this spec:
 20 |     `GitHub editor <https://github.com/zarr-developers/zarr-specs/blob/main/docs/codecs/sharding-indexed/index.rst>`_
 21 | 
 22 | Copyright 2022-Present `Zarr core development team
 23 | <https://github.com/orgs/zarr-developers/teams/core-devs>`_. This work
 24 | is licensed under a `Creative Commons Attribution 3.0 Unported License
 25 | <https://creativecommons.org/licenses/by/3.0/>`_.
 26 | 
 27 | ----
 28 | 
 29 | 
 30 | Abstract
 31 | ========
 32 | 
 33 | This specification defines a Zarr ``array -> bytes`` codec for sharding.
 34 | 
 35 | Sharding logically splits chunks ("shards") into sub-chunks ("inner chunks") 
 36 | that can be individually compressed and accessed. This allows to colocate 
 37 | multiple chunks within one storage object, bundling them in shards.
 38 | 
 39 | Status of this document
 40 | =======================
 41 | 
 42 | ZEP0002 was accepted on November 1st, 2023 via https://github.com/zarr-developers/zarr-specs/issues/254.
 43 | 
 44 | Motivation
 45 | ==========
 46 | 
 47 | In many cases, it becomes inefficient or impractical to store a large number of
 48 | chunks as separate files or objects due to the design constraints of the
 49 | underlying storage. For example, the file block size and maximum inode number
 50 | restrict the usage of numerous small files for typical file systems, also cloud
 51 | storage such as S3, GCS, and various distributed filesystems do not efficiently
 52 | handle large numbers of small files or objects.
 53 | 
 54 | Increasing the chunk size works only up to a certain point, as chunk sizes need
 55 | to be small for read efficiency requirements, for example to stream data in 
 56 | browser-based visualization software.
 57 | 
 58 | Therefore, chunks may need to be smaller than the minimum size of one storage
 59 | key. In those cases, it is efficient to store objects at a more coarse
 60 | granularity than reading chunks.
 61 | 
 62 | **Sharding solves this by allowing to store multiple chunks in one storage key,
 63 | which is called a shard**:
 64 | 
 65 | .. image:: sharding.png
 66 | 
 67 | 
 68 | Document conventions
 69 | ====================
 70 | 
 71 | Conformance requirements are expressed with a combination of descriptive
 72 | assertions and [RFC2119]_ terminology. The key words "MUST", "MUST NOT",
 73 | "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY",
 74 | and "OPTIONAL" in the normative parts of this document are to be interpreted as
 75 | described in [RFC2119]_. However, for readability, these words do not appear in
 76 | all uppercase letters in this specification.
 77 | 
 78 | All of the text of this specification is normative except sections explicitly
 79 | marked as non-normative, examples, and notes. Examples in this specification are
 80 | introduced with the words "for example".
 81 | 
 82 | 
 83 | Codec name
 84 | ==========
 85 | 
 86 | The value of the ``name`` member in the codec object MUST be ``sharding_indexed``.
 87 | 
 88 | 
 89 | Configuration parameters
 90 | ========================
 91 | 
 92 | Sharding can be configured per array in the :ref:`array-metadata` as follows::
 93 | 
 94 |     {
 95 |       "codecs": [
 96 |         {
 97 |           "name": "sharding_indexed"
 98 |           "configuration": {
 99 |             "chunk_shape": [32, 32],
100 |             "codecs": [
101 |               { 
102 |                 "name": "bytes",
103 |                 "configuration": {
104 |                   "endian": "little",
105 |                 }
106 |               },
107 |               {
108 |                 "name": "gzip",
109 |                 "configuration": {
110 |                   "level": 1
111 |                 }
112 |               }
113 |             ],
114 |             "index_codecs": [
115 |               { 
116 |                 "name": "bytes",
117 |                 "configuration": {
118 |                   "endian": "little",
119 |                 }
120 |               },
121 |               { "name": "crc32c" } 
122 |             ],
123 |             "index_location": "end"
124 |           }
125 |         }
126 |       ]
127 |     }
128 | 
129 | ``chunk_shape``
130 | 
131 |     An array of integers specifying the shape of the inner chunks in a shard
132 |     along each dimension of the outer array. The length of the ``chunk_shape``
133 |     array must match the number of dimensions of the shard shape to which this
134 |     sharding codec is applied, and the inner chunk shape along each dimension must
135 |     evenly divide the size of the shard shape. For example, an inner chunk
136 |     shape of ``[32, 2]`` with an shard shape ``[64, 64]`` indicates that
137 |     64 inner chunks are combined in one shard, 2 along the first dimension, and for
138 |     each of those 32 along the second dimension.
139 | 
140 | ``codecs``
141 | 
142 |     Specifies a list of codecs to be used for encoding and decoding inner chunks. 
143 |     The value must be an array of objects, as specified in the 
144 |     :ref:`array-metadata`. The ``codecs`` member is required and needs to contain
145 |     exactly one ``array -> bytes`` codec.
146 | 
147 | ``index_codecs``
148 | 
149 |     Specifies a list of codecs to be used for encoding and decoding shard index. 
150 |     The value must be an array of objects, as specified in the 
151 |     :ref:`array-metadata`. The ``index_codecs`` member is required and needs to 
152 |     contain exactly one ``array -> bytes`` codec. Codecs that produce 
153 |     variable-sized encoded representation, such as compression codecs, MUST NOT
154 |     be used for index codecs. It is RECOMMENDED to use a little-endian codec 
155 |     followed by a crc32c checksum as index codecs.
156 | 
157 | ``index_location``
158 | 
159 |     Specifies whether the shard index is located at the beginning or end of the 
160 |     file. The parameter value must be either the string ``start`` or ``end``. 
161 |     If the parameter is not present, the value defaults to ``end``.
162 |     
163 | Definitions
164 | ===========
165 | 
166 | * **Shard** is a chunk of the outer array that corresponds to one storage object. 
167 |   As described in this document, shards MAY have multiple inner chunks.
168 | * **Inner chunk** is a chunk within the shard.
169 | * **Shard shape** is the chunk shape of the outer array.
170 | * **Inner chunk shape** is defined by the ``chunk_shape`` configuration of the codec.
171 |   The inner chunk shape needs to have the same number of dimensions as the shard shape and the
172 |   inner chunk shape along each dimension must evenly divide the size of the shard shape.
173 | * **Chunks per shard** is the element-wise division of the shard shape by the 
174 |   inner chunk shape.
175 | 
176 | 
177 | Binary shard format
178 | ===================
179 | 
180 | This is an ``array -> bytes`` codec.
181 | 
182 | In the ``sharding_indexed`` binary format, inner chunks are written successively in a 
183 | shard, where unused space between them is allowed, followed by an index 
184 | referencing them.
185 | 
186 | The index is an array with 64-bit unsigned integers with a shape that matches the
187 | chunks per shard tuple with an appended dimension of size 2.
188 | For example, given a shard shape of ``[128, 128]`` and chunk shape of ``[32, 32]``,
189 | there are ``[4, 4]`` inner chunks in a shard. The corresponding shard index has a 
190 | shape of ``[4, 4, 2]``.
191 | 
192 | The index contains the ``offset`` and ``nbytes`` values for each inner chunk.
193 | The ``offset[i]`` specifies the byte offset within the shard at which the
194 | encoded representation of chunk ``i`` begins, and ``nbytes[i]`` specifies the
195 | encoded length in bytes.
196 | 
197 | Empty inner chunks are denoted by setting both offset and nbytes to ``2^64 - 1``. 
198 | Empty inner chunks are interpreted as being filled with the fill value. The index 
199 | always has the full shape of all possible inner chunks per shard, even if they extend
200 | beyond the array shape.
201 | 
202 | The index is either placed at the end of the file or at the beginning of the file,
203 | as configured by the ``index_location`` parameter. The index is encoded into binary 
204 | representations using the specified index codecs. The byte size of the index is 
205 | determined by the number of inner chunks in the shard ``n``, i.e. the product of 
206 | chunks per shard, and the choice of index codecs.
207 | 
208 | For an example, consider a shard shape of ``[64, 64]``, an inner chunk shape of 
209 | ``[32, 32]`` and an index codec combination of a little-endian codec followed by 
210 | a crc32c checksum codec. The size of the corresponding index is 
211 | ``16 (2x uint64) * 4 (chunks per shard) + 4 (crc32c checksum) = 68 bytes``.
212 | The index would look like::
213 | 
214 |     | chunk (0, 0)    | chunk (0, 1)    | chunk (1, 0)    | chunk (1, 1)    |          |
215 |     | offset | nbytes | offset | nbytes | offset | nbytes | offset | nbytes | checksum |
216 |     | uint64 | uint64 | uint64 | uint64 | uint64 | uint64 | uint64 | uint64 | uint32   |
217 | 
218 | 
219 | The actual order of the chunk content is not fixed and may be chosen by the
220 | implementation. All possible write orders are valid according to this
221 | specification and therefore can be read by any other implementation. When
222 | writing partial inner chunks into an existing shard, no specific order of the existing
223 | inner chunks may be expected. Some writing strategies might be
224 | 
225 | * **Fixed order**: Specify a fixed order (e.g. row-, column-major, or Morton
226 |   order). When replacing existing inner chunks larger or equal-sized inner chunks may be
227 |   replaced in-place, leaving unused space up to an upper limit that might
228 |   possibly be specified. Please note that, for regular-sized uncompressed data,
229 |   all inner chunks have the same size and can therefore be replaced in-place.
230 | * **Append-only**: Any chunk to write is appended to the existing shard,
231 |   followed by an updated index. If previous inner chunks are updated, their storage
232 |   space becomes unused, as well as the previous index. This might be useful for
233 |   storage that only allows append-only updates.
234 | * **Other formats**: Other formats that accept additional bytes at the end of
235 |   the file (such as HDF) could be used for storing shards, by writing the inner chunks
236 |   in the order the format prescribes and appending a binary index derived from
237 |   the byte offsets and lengths at the end of the file.
238 | 
239 | Any configuration parameters for the write strategy must not be part of the
240 | metadata document; instead they need to be configured at runtime, as this is
241 | implementation specific.
242 | 
243 | 
244 | Implementation notes
245 | ====================
246 | 
247 | The section suggests a non-normative implementation of the codec including
248 | common optimizations.
249 | 
250 | * **Decoding**: A simple implementation to decode inner chunks in a shard would (a) 
251 |   read the entire value from the store into a byte buffer, (b) parse the shard
252 |   index as specified above from the beginning or end (according to the 
253 |   ``index_location``) of the buffer and (c) cut out the relevant bytes that belong 
254 |   to the requested chunk. The relevant bytes are determined by the 
255 |   ``offset,nbytes`` pair in the shard index. This bytestream then needs to be 
256 |   decoded with the inner codecs as specified in the sharding configuration applying 
257 |   the :ref:`decoding_procedure`. This is similar to how an implementation would 
258 |   access a sub-slice of a chunk.
259 | 
260 |   The size of the index can be determined by applying ``c.compute_encoded_size``
261 |   for each index codec recursively. The initial size is the byte size of the index 
262 |   array, i.e. ``16 * chunks per shard``.
263 | 
264 |   When reading all inner chunks of a shard at once, a useful optimization would be to 
265 |   read the entire shard once into a byte buffer and then cut out and decode all 
266 |   inner chunks from that buffer in one pass.
267 | 
268 |   If the underlying store supports partial reads, the decoding of single inner
269 |   chunks can be optimized. In that case, the shard index can be read from the
270 |   store by requesting the ``n`` first or last bytes (according to the 
271 |   ``index_location``), where ``n`` is the size of the index as determined by 
272 |   the number of inner chunks in the shard and choice of index codecs. After 
273 |   parsing the shard index, single inner chunks can be requested from the store 
274 |   by specifying the byte range. The bytestream, then, needs to be decoded as above. 
275 | 
276 | * **Encoding**: A simple implementation to encode a chunk in a shard would (a)
277 |   encode the new chunk per :ref:`encoding_procedure` in a byte buffer using the 
278 |   shard's inner codecs, (b) read an existing shard from the store, (c) create a 
279 |   new bytestream with all encoded inner chunks of that shard including the overwritten 
280 |   chunk, (d) generate a new shard index that is prepended or appended (according 
281 |   to the ``index_location``) to the chunk bytestream and (e) writes the shard to 
282 |   the store. If there was no existing shard, an empty shard is assumed. When 
283 |   writing entire inner chunks, reading the existing shard first may be skipped.
284 | 
285 |   When working with inner chunks that have a fixed byte size (e.g., uncompressed) and 
286 |   a store that supports partial writes, a optimization would be to replace the
287 |   new chunk by writing to the store at the specified byte range.
288 | 
289 |   On stores with random-write capabilities, it may be useful to (a) place the shard 
290 |   index at the beginning of the file, (b) write out inner chunks in 
291 |   application-specific order, and (c) update the shard index accordingly. 
292 |   Synchronization of parallelly written inner chunks needs to be handled by the
293 |   application.
294 | 
295 |   Other use case-specific optimizations may be available, e.g., for append-only
296 |   workloads.
297 | 
298 | 
299 | References
300 | ==========
301 | 
302 | .. [RFC2119] S. Bradner. Key words for use in RFCs to Indicate
303 |    Requirement Levels. March 1997. Best Current Practice. URL:
304 |    https://tools.ietf.org/html/rfc2119
305 | 
306 | Change log
307 | ==========
308 | 
309 | * Adds ``index_location`` parameter. `PR 280 <https://github.com/zarr-developers/zarr-specs/pull/280>`_
310 | 
311 | * ZEP0002 was accepted. `Issue 254 <https://github.com/zarr-developers/zarr-specs/pull/254>`_
312 | 


--------------------------------------------------------------------------------
/docs/v3/codecs/sharding-indexed/sharding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zarr-developers/zarr-specs/b880fb385bedb18dd78ffef1bd683e7e93270c74/docs/v3/codecs/sharding-indexed/sharding.png


--------------------------------------------------------------------------------
/docs/v3/codecs/transpose/index.rst:
--------------------------------------------------------------------------------
  1 | .. _transpose-codec-v1:
  2 | 
  3 | ===============
  4 | Transpose codec
  5 | ===============
  6 | 
  7 | Version:
  8 |     1.0
  9 | Specification URI:
 10 |     https://zarr-specs.readthedocs.io/en/latest/v3/codecs/transpose/
 11 | Corresponding ZEP:
 12 |     `ZEP0001 — Zarr specification version 3 <https://zarr.dev/zeps/draft/ZEP0001.html>`_
 13 | Issue tracking:
 14 |     `GitHub issues <https://github.com/zarr-developers/zarr-specs/labels/codec>`_
 15 | Suggest an edit for this spec:
 16 |     `GitHub editor <https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/codecs/transpose/index.rst>`_
 17 | 
 18 | Copyright 2020-Present Zarr core development team. This work
 19 | is licensed under a `Creative Commons Attribution 3.0 Unported License
 20 | <https://creativecommons.org/licenses/by/3.0/>`_.
 21 | 
 22 | ----
 23 | 
 24 | 
 25 | Abstract
 26 | ========
 27 | 
 28 | Defines an ``array -> array`` codec that permutes the dimensions of the chunk
 29 | array.
 30 | 
 31 | 
 32 | Status of this document
 33 | =======================
 34 | 
 35 | ZEP0001 was accepted on May 15th, 2023 via https://github.com/zarr-developers/zarr-specs/issues/227.
 36 | 
 37 | 
 38 | Document conventions
 39 | ====================
 40 | 
 41 | Conformance requirements are expressed with a combination of
 42 | descriptive assertions and [RFC2119]_ terminology. The key words
 43 | "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
 44 | "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in the normative
 45 | parts of this document are to be interpreted as described in
 46 | [RFC2119]_. However, for readability, these words do not appear in all
 47 | uppercase letters in this specification.
 48 | 
 49 | All of the text of this specification is normative except sections
 50 | explicitly marked as non-normative, examples, and notes. Examples in
 51 | this specification are introduced with the words "for example".
 52 | 
 53 | 
 54 | Codec name
 55 | ==========
 56 | 
 57 | The value of the ``name`` member in the codec object MUST be ``transpose``.
 58 | 
 59 | 
 60 | Configuration parameters
 61 | ========================
 62 | 
 63 | order:
 64 |     Required.  Must be an array of integers specifying a permutation of ``0``, ``1``, ...,
 65 |     `n-1``, where ``n`` is the number of dimensions in the decoded chunk
 66 |     representation provided as input to this codec.
 67 | 
 68 | Format and algorithm
 69 | ====================
 70 | 
 71 | This is an ``array -> array`` codec.
 72 | 
 73 | Given a chunk array ``A`` with shape ``A_shape`` as the decoded representation,
 74 | the encoded representation is an array ``B`` with the same data type as ``A``
 75 | and shape ``B_shape``, where:
 76 | 
 77 | - ``B_shape[i] = A_shape[order[i]]`` for all dimension indices ``i``, and
 78 | - ``B[B_pos] = A[A_pos]``, where ``B_pos[i] = A_pos[order[i]]``, for all chunk
 79 |   positions ``A_pos`` and dimension indices ``i``.
 80 | 
 81 | .. note::
 82 | 
 83 |    Implementations of this codec may simply construct a virtual view that
 84 |    represents the transposed result, and avoid physically transposing the
 85 |    in-memory representation when possible.
 86 | 
 87 | References
 88 | ==========
 89 | 
 90 | .. [RFC2119] S. Bradner. Key words for use in RFCs to Indicate
 91 |    Requirement Levels. March 1997. Best Current Practice. URL:
 92 |    https://tools.ietf.org/html/rfc2119
 93 | 
 94 | 
 95 | Change log
 96 | ==========
 97 | 
 98 | Changes after acceptance of ZEP 1
 99 | ---------------------------------
100 | 
101 | The ``order`` configuration parameter no longer supports the constants ``"C"``
102 | or ``"F"`` and must instead always be specified as an explicit permutation.
103 | 


--------------------------------------------------------------------------------
/docs/v3/core/index.rst:
--------------------------------------------------------------------------------
   1 | .. This file is in restructured text format: https://docutils.sourceforge.io/rst.html
   2 | .. _zarr-core-specification-v3:
   3 | 
   4 | =======================
   5 | Zarr core specification
   6 | =======================
   7 | 
   8 | Version:
   9 |     3.1
  10 | Specification URI:
  11 |     https://zarr-specs.readthedocs.io/en/latest/v3/core/
  12 | 
  13 | Editors:
  14 |     * Alistair Miles (`@alimanfoo <https://github.com/alimanfoo>`_), Wellcome Sanger Institute
  15 |     * Jonathan Striebel (`@jstriebel <https://github.com/jstriebel>`_), Scalable Minds
  16 |     * Norman Rzepka (`@normanrz <https://github.com/normanrz>`_), Scalable Minds
  17 |     * Jeremy Maitin-Shepard (`@jbms <https://github.com/jbms>`_), Google
  18 |     * Josh Moore (`@joshmoore <https://github.com/joshmoore>`_), German BioImaging
  19 | 
  20 | Corresponding ZEPs:
  21 |     * `ZEP0001 — Zarr specification version 3 <https://zarr.dev/zeps/accepted/ZEP0001.html>`_
  22 |     * `ZEP0009 — Zarr extension naming <https://zarr.dev/zeps/draft/ZEP0009.html>`_
  23 | 
  24 | Issue tracking:
  25 |     `GitHub issues <https://github.com/zarr-developers/zarr-specs/labels/core-protocol-v3.1>`_
  26 | 
  27 | Suggest an edit for this spec:
  28 |     `GitHub editor <https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/core/index.rst>`_
  29 | 
  30 | Copyright 2019-Present Zarr core development team. This work
  31 | is licensed under a `Creative Commons Attribution 3.0 Unported License
  32 | <https://creativecommons.org/licenses/by/3.0/>`_.
  33 | 
  34 | ----
  35 | 
  36 | 
  37 | Abstract
  38 | ========
  39 | 
  40 | This specification defines the Zarr format for N-dimensional typed arrays.
  41 | 
  42 | 
  43 | Status of this document
  44 | =======================
  45 | 
  46 |  * ZEP0001 was accepted on May 15th, 2023 via https://github.com/zarr-developers/zarr-specs/issues/227.
  47 | 
  48 | This specification is the latest version.
  49 | 
  50 | 
  51 | Introduction
  52 | ============
  53 | 
  54 | This specification defines a format for multidimensional array data. This
  55 | type of data is common in scientific and numerical computing
  56 | applications. Many domains face computational challenges as
  57 | increasingly large volumes of data are being generated, for example,
  58 | via high resolution microscopy, remote sensing imagery, genome
  59 | sequencing or numerical simulation. The primary motivation for the
  60 | development of Zarr is to address this challenge by
  61 | enabling the storage of large multidimensional arrays in a way that is
  62 | compatible with parallel and/or distributed computing applications.
  63 | 
  64 | This specification supersedes the `Zarr storage
  65 | specification version 2
  66 | <https://zarr.readthedocs.io/en/stable/spec/v2.html>`_ (Zarr v2). The
  67 | Zarr v2 specification is implemented in several programming
  68 | languages and is used to store and analyse large
  69 | scientific datasets from a variety of domains. However, it has become
  70 | clear that there are several opportunities for modest but useful
  71 | improvements to be made in the format, and for establishing a foundation
  72 | that allows for greater interoperability, whilst also enabling a variety
  73 | of more advanced and specialised features to be explored and developed.
  74 | 
  75 | This specification also draws heavily on the `N5 API and
  76 | file-system specification <https://github.com/saalfeldlab/n5>`_, which
  77 | was developed in parallel to Zarr v2 with similar
  78 | goals and features. This specification defines a core set of features
  79 | at the intersection of both Zarr v2 and N5, and so aims to provide a
  80 | common target that can be fully implemented across multiple
  81 | programming environments and serve a wide range of applications.
  82 | 
  83 | We highlight the following areas motivating the
  84 | development of this specification.
  85 | 
  86 | Extensibility
  87 | -------------
  88 | 
  89 | The development of systems for storage of very large array-like data
  90 | is a very active area of research and development, and there are many
  91 | possibilities that remain to be explored. A goal of this specification
  92 | is to define a format with a number of clear extension points and
  93 | mechanisms, in order to provide a framework for freely building on and
  94 | exploring these possibilities. We aim to make this possible, whilst
  95 | also providing pathways for a graceful degradation of functionality
  96 | where possible, in order to retain interoperability. We also aim to
  97 | provide a framework for community-defined extensions, which can be
  98 | developed and published independently without requiring centralised
  99 | coordination of all specifications.
 100 | 
 101 | See :ref:`extension points <extensions_section>` below.
 102 | 
 103 | Interoperability
 104 | ----------------
 105 | 
 106 | While the Zarr v2 and N5 specifications have each been implemented in
 107 | multiple programming languages, there is currently not feature parity
 108 | across all implementations. This is in part because the feature set
 109 | includes some features that are not easily translated or supported
 110 | across different programming languages. This specification aims to
 111 | define a set of core features that are useful and sufficient to
 112 | address a significant fraction of use cases, but are also
 113 | straightforward to implement fully across different programming
 114 | languages. Additional functionality can then be layered via
 115 | extensions, some of which may aim for wide adoption, some of which may
 116 | be more specialised and have more limited implementation.
 117 | 
 118 | 
 119 | Stability Policy
 120 | ----------------
 121 | 
 122 | This core specification adheres to a ``MAJOR.MINOR`` version
 123 | number format. When incrementing the minor version, only additional features
 124 | can be added. Breaking changes require incrementing the major version.
 125 | 
 126 | A Zarr implementation that provides the read and write API by
 127 | implementing a specification ``X.Y`` can be considered compatible with all
 128 | datasets which only use features contained in version ``X.Y``.
 129 | 
 130 | For example, spec ``X.1`` adds core feature "foo" compared to ``X.0``. Assuming
 131 | implementation A implements ``X.1`` and implementation B implements ``X.0``,
 132 | data using feature "foo" can only be read with implementation A. B fails to open
 133 | it, as the key "foo" is unknown.
 134 | 
 135 | Data not using "foo" can be used with both implementations, even if it's written
 136 | with implementation B.
 137 | 
 138 | Therefore, data is only marked with the respective major version, unknown
 139 | features are auto-discovered via the metadata document.
 140 | 
 141 | :ref:`Extensions<extensions_section>` defined in subpages of this specification
 142 | follow the same stability policy but do so with their own version number.
 143 | 
 144 | Document conventions
 145 | ====================
 146 | 
 147 | Conformance requirements are expressed with a combination of
 148 | descriptive assertions and [RFC2119]_ terminology. The key words
 149 | "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
 150 | "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in the normative
 151 | parts of this document are to be interpreted as described in
 152 | [RFC2119]_. However, for readability, these words do not appear in all
 153 | uppercase letters in this specification.
 154 | 
 155 | All of the text of this specification is normative except sections
 156 | explicitly marked as non-normative, examples, and notes. Examples in
 157 | this specification are introduced with the words "for example".
 158 | 
 159 | Concepts and terminology
 160 | ========================
 161 | 
 162 | This section introduces and defines some key terms and explains the
 163 | conceptual model underpinning the Zarr format.
 164 | 
 165 | The following figure illustrates the first part of the terminology:
 166 | 
 167 | ..
 168 |    The following image was produced with https://excalidraw.com/
 169 |    and can be loaded there, as the source is embedded in the png.
 170 | .. image:: terminology-hierarchy.excalidraw.png
 171 |   :width: 600
 172 | 
 173 | .. _hierarchy:
 174 | 
 175 | *Hierarchy*
 176 | 
 177 |     A Zarr hierarchy is a tree structure, where each node in the tree
 178 |     is either a group_ or an array_. Group nodes may have children but
 179 |     array nodes may not. All nodes in a hierarchy have a name_ and a
 180 |     path_. The root of a Zarr hierarchy may be either a group_ or an array_.
 181 |     In the latter case, the hierarchy consists of just the single array.
 182 | 
 183 | .. _array:
 184 | .. _arrays:
 185 | 
 186 | *Array*
 187 | 
 188 |     An array is a node in a hierarchy_. An array is a data structure
 189 |     with zero or more dimensions_ whose lengths define the shape_ of
 190 |     the array. An array contains zero or more data elements_. All
 191 |     elements_ in an array conform to the same `data type`_. An array
 192 |     may not have child nodes.
 193 | 
 194 | .. _group:
 195 | .. _groups:
 196 | 
 197 | *Group*
 198 | 
 199 |     A group is a node in a hierarchy_ that may have child nodes.
 200 | 
 201 | .. _name:
 202 | .. _names:
 203 | 
 204 | *Name*
 205 | 
 206 |     Each child node of a group has a name, which is a string of
 207 |     characters with some additional constraints defined in the section
 208 |     on `node names`_ below.  Two sibling nodes cannot have the same
 209 |     name.
 210 | 
 211 | .. _path:
 212 | .. _paths:
 213 | 
 214 | *Path*
 215 | 
 216 |     Each node in a hierarchy_ has a path, a Unicode string that uniquely
 217 |     identifies the node and defines its location within the hierarchy_. The root
 218 |     node has a path of ``/``.  The path of a non-root node is equal the
 219 |     concatenation of:
 220 | 
 221 |     - the path of its parent node;
 222 |     - the ``/`` character, unless the parent is the root node;
 223 |     - the name_ of the node itself.
 224 | 
 225 |     For example, the path ``"/foo/bar"`` identifies a node named ``"bar"``,
 226 |     whose parent is named ``"foo"``, whose parent is the root of the hierarchy.
 227 | 
 228 |     A path always starts with ``/``, and a non-root path cannot end with ``/``,
 229 |     because node names_ must be non-empty and cannot contain ``/``.
 230 | 
 231 | .. _dimension:
 232 | .. _dimensions:
 233 | 
 234 | *Dimension*
 235 | 
 236 |     An array_ has a fixed number of zero or more dimensions. Each dimension has
 237 |     an integer length. This specification only considers the case where the
 238 |     lengths of all dimensions are finite. However,
 239 |     :ref:`extensions<extensions_section>` may be defined which allow a dimension
 240 |     to have an infinite or variable length.
 241 | 
 242 | .. _shape:
 243 | 
 244 | *Shape*
 245 | 
 246 |     The shape of an array_ is the tuple of dimension_ lengths. For
 247 |     example, if an array_ has 2 dimensions_, where the length of the
 248 |     first dimension_ is 100 and the length of the second dimension_ is
 249 |     20, then the shape of the array_ is (100, 20). A shape can be the empty
 250 |     tuple in the case of zero-dimension arrays (scalars).
 251 | 
 252 | .. _element:
 253 | .. _elements:
 254 | 
 255 | *Element*
 256 | 
 257 |     An array_ contains zero or more elements. Each element is
 258 |     identified by a tuple of integer coordinates, one for each
 259 |     dimension_ of the array_. If all dimensions_ of an array_ have
 260 |     finite length, then the number of elements in the array_ is given
 261 |     by the product of the dimension_ lengths.
 262 | 
 263 | .. _data type:
 264 | 
 265 | *Data type*
 266 | 
 267 |     A data type defines the set of possible values that an array_ may
 268 |     contain. For example, the 32-bit signed integer data type defines binary
 269 |     representations for all integers in the range −2,147,483,648 to
 270 |     2,147,483,647. This specification only defines a limited set of data types,
 271 |     but additional data types can be defined as :ref:`extensions<extensions_section>`.
 272 | 
 273 | .. _chunk:
 274 | .. _chunks:
 275 | 
 276 | *Chunk*
 277 | 
 278 |     An array_ is divided into a set of chunks, where each chunk is a
 279 |     hyperrectangle defined by a tuple of intervals, one for each
 280 |     dimension_ of the array_. The chunk shape is the tuple of interval
 281 |     lengths, and the chunk size (i.e., number of elements_ contained
 282 |     within the chunk) is the product of its interval lengths.
 283 | 
 284 |     The chunk shape elements are non-zero when the corresponding dimensions of
 285 |     the arrays have non-zero length.
 286 | 
 287 | .. _grid:
 288 | .. _grids:
 289 | 
 290 | *Grid*
 291 | 
 292 |     The chunks_ of an array_ are organised into a grid. This
 293 |     specification only considers the case where all chunks_ have the
 294 |     same chunk shape and the chunks form a regular grid. However,
 295 |     additional chunk grids can be defined as :ref:`extensions<extensions_section>`.
 296 | 
 297 | .. _codec:
 298 | .. _codecs:
 299 | 
 300 | *Codec*
 301 | 
 302 |     The list of *codecs* specified for an array_ determines the encoded byte
 303 |     representation of each chunk in the store_.
 304 | 
 305 | .. _metadata document:
 306 | .. _metadata documents:
 307 | 
 308 | *Metadata document*
 309 | 
 310 |     Each array_ or group_ in a hierarchy_ is represented by a metadata document,
 311 |     which is a machine-readable document containing essential
 312 |     processing information about the node. For example, an array_
 313 |     metadata document specifies the number of dimensions_, shape_,
 314 |     `data type`_, grid_, and codec_ for that array_.
 315 | 
 316 | .. _store:
 317 | .. _stores:
 318 | 
 319 | *Store*
 320 | 
 321 |     The `metadata documents`_ and encoded chunk_ data for all nodes in a
 322 |     hierarchy_ are held in a store as raw bytes. To enable a variety
 323 |     of different store types to be used, this specification defines an
 324 |     `Abstract store interface`_ which is a common set of operations that stores
 325 |     may provide. For example, a directory in a file system can be a Zarr store,
 326 |     where keys are file names, values are file contents, and files can be read,
 327 |     written, listed or deleted via the operating system. Equally, an S3 bucket
 328 |     can provide this interface, where keys are resource names, values are
 329 |     resource contents, and resources can be read, written or deleted via HTTP.
 330 | 
 331 | .. _storage transformer:
 332 | .. _storage transformers:
 333 | 
 334 | *Storage transformer*
 335 | 
 336 |     To provide performance enhancements or other optimizations,
 337 |     storage transformers may intercept and alter the storage keys and bytes
 338 |     of an array_ before they reach the underlying physical storage.
 339 |     Upon retrieval, the original keys and bytes are restored within the
 340 |     transformer. Any number of storage transformers can be registered and
 341 |     stacked. In contrast to codecs, storage transformers can act on the
 342 |     complete array, rather than individual chunks. See the
 343 |     `storage transformers details`_ below.
 344 | 
 345 | .. _`storage transformers details`: #storage-transformers-1
 346 | 
 347 | The following figure illustrates the codec, store and storage transformer
 348 | terminology for a use case of reading from an array:
 349 | 
 350 | ..
 351 |    The following image was produced with https://excalidraw.com/
 352 |    and can be loaded there, as the source is embedded in the png.
 353 | .. image:: terminology-read.excalidraw.png
 354 |   :width: 600
 355 | 
 356 | *Extension point*
 357 | 
 358 |     A field in a `metadata document`_ that can be extended to allow values
 359 |     not defined in this specification.
 360 |     See :ref:`extension points <extensions_section>` below.
 361 | 
 362 | *Extension*
 363 | 
 364 |     An implementation of an extension point which can be referenced
 365 |     by :ref:`name <extension-naming>`.
 366 |     See the linked lists of extensions under :ref:`extension points <extensions_section>` below.
 367 | 
 368 | *Core*
 369 | 
 370 |     Core refers to features or concepts defined within this specification. The
 371 |     designation of a feature as core does not imply that it is mandatory for
 372 |     all implementations.
 373 | 
 374 | .. _stored-representation:
 375 | 
 376 | Stored representation
 377 | =====================
 378 | 
 379 | A Zarr hierarchy_ is represented by the following set of key/value entries in an
 380 | underlying store_:
 381 | 
 382 | - The array_ or group_ metadata document for the root of a Zarr hierarchy_ is
 383 |   stored under the key ``zarr.json``.
 384 | 
 385 | - The metadata document of a non-root array or group with hierarchy path ``P``
 386 |   is obtained by stripping the leading ``/`` of the path and appending
 387 |   ``/zarr.json``.  For example, the metadata document of an array or group with
 388 |   path ``/foo/bar`` is ``foo/bar/zarr.json``.
 389 | 
 390 | - All chunk or other data of an array is stored under the key prefix determined
 391 |   by its path.  For a root array, the key prefix is obtained from the metadata
 392 |   document key by stripping the trailing ``zarr.json``.  For example, for a root
 393 |   array, the prefix is the empty string.  For a non-root array with hierarchy
 394 |   path ``/foo/bar``, the prefix is ``foo/bar/``.
 395 | 
 396 | .. list-table:: Metadata Storage Key example
 397 |     :header-rows: 1
 398 | 
 399 |     * - Type
 400 |       - Path "P"
 401 |       - Key for Metadata at path `P`
 402 |     * - Array (Root)
 403 |       - `/`
 404 |       - `zarr.json`
 405 |     * - Group (Root)
 406 |       - `/`
 407 |       - `zarr.json`
 408 |     * - Group
 409 |       - `/foo`
 410 |       - `foo/zarr.json`
 411 |     * - Array
 412 |       - `/foo`
 413 |       - `foo/zarr.json`
 414 |     * - Group
 415 |       - `/foo/bar`
 416 |       - `foo/bar/zarr.json`
 417 |     * - Array
 418 |       - `/foo/baz`
 419 |       - `foo/baz/zarr.json`
 420 | 
 421 | 
 422 | .. list-table:: Data Storage Key example
 423 |     :header-rows: 1
 424 | 
 425 |     * - Path `P` of array
 426 |       - Chunk grid indices
 427 |       - Data key
 428 |     * - `/foo/baz`
 429 |       - `(1, 0)`
 430 |       - `foo/baz/c/1/0`
 431 | 
 432 | .. note::
 433 | 
 434 |    When storing a Zarr hierarchy in a filesystem-like store (e.g. the local
 435 |    filesystem or S3) as a sub-directory, it is recommended that the
 436 |    sub-directory name ends with ``.zarr`` to indicate the start of a hierarchy
 437 |    to users.
 438 | 
 439 | .. _metadata:
 440 | 
 441 | Metadata
 442 | ========
 443 | 
 444 | This section defines the structure of metadata documents for Zarr hierarchies,
 445 | which consists of two types of metadata documents: array metadata documents, and
 446 | group metadata documents. Both types of metadata documents are stored under the
 447 | key ``zarr.json`` within the prefix of the array or group.  Each type of
 448 | metadata document is described in the following subsections.
 449 | 
 450 | Metadata documents are defined here using the JSON
 451 | type system defined in [RFC8259]_. In this section, the terms "value",
 452 | "number", "string" and "object" are used to denote the types as
 453 | defined in [RFC8259]_. The term "array" is also used as defined in
 454 | [RFC8259]_, except where qualified as "Zarr array". Following
 455 | [RFC8259]_, this section also describes an object as a set of
 456 | name/value pairs. This section also defines how metadata documents are
 457 | encoded for storage.
 458 | 
 459 | .. _array-metadata:
 460 | 
 461 | Array metadata
 462 | --------------
 463 | 
 464 | Each Zarr array in a hierarchy must have an array metadata document, named
 465 | ``zarr.json``.
 466 | 
 467 | Mandatory
 468 | ^^^^^^^^^
 469 | 
 470 | This document must contain a single object with the following
 471 | mandatory names:
 472 | 
 473 | .. _array-metadata-zarr-format:
 474 | 
 475 | ``zarr_format``
 476 | """"""""""""""""
 477 | 
 478 |     An integer defining the version of the storage specification to which the
 479 |     array store adheres, must be ``3`` here.
 480 | 
 481 | .. _array-metadata-node-type:
 482 | 
 483 | ``node_type``
 484 | """""""""""""""
 485 | 
 486 |     A string defining the type of hierarchy node element, must be ``array``
 487 |     here.
 488 | 
 489 | .. _array-metadata-shape:
 490 | 
 491 | ``shape``
 492 | """""""""
 493 | 
 494 |     An array of integers providing the length of each dimension of the
 495 |     Zarr array. For example, a value ``[10, 20]`` indicates a
 496 |     two-dimensional Zarr array, where the first dimension has length
 497 |     10 and the second dimension has length 20.
 498 | 
 499 | .. _array-metadata-data-type:
 500 | 
 501 | ``data_type``
 502 | """""""""""""
 503 | 
 504 |     The data type of the Zarr array.
 505 | 
 506 |     ``data_type`` is an :ref:`extension point<extensions_section>`
 507 |     and MUST conform to the :ref:`extension-definition`.
 508 | 
 509 |     If the data type is defined in :ref:`this specification <data-type-list>`,
 510 |     then the value must be the data type
 511 |     identifier provided as a string. For example, ``"float64"`` for
 512 |     little-endian 64-bit floating point number.
 513 | 
 514 |     Because the ``fill_value`` metadata key is dependent on the data type, 
 515 |     extension data types SHOULD specify permitted values for the ``fill_value`` in
 516 |     their specification.
 517 | 
 518 | .. _array-metadata-chunk-grid:
 519 | 
 520 | ``chunk_grid``
 521 | """"""""""""""
 522 | 
 523 |     The chunk grid of the Zarr array.
 524 | 
 525 |     ``chunk_grid`` is an :ref:`extension point<extensions_section>`
 526 |     and MUST conform to the :ref:`extension-definition`.
 527 | 
 528 |     If the chunk grid is a regular chunk grid
 529 |     as defined in this specification, then the value must be an object with the
 530 |     names ``name`` and ``configuration``. The value of ``name`` must be the
 531 |     string ``"regular"``, and the value of ``configuration`` an object with the
 532 |     member ``chunk_shape``. ``chunk_shape`` must be an array of
 533 |     integers providing the lengths of the chunk along each dimension of the
 534 |     array.  For example,
 535 |     ``{"name": "regular", "configuration": {"chunk_shape": [2, 5]}}``
 536 |     means a regular grid where the chunks have length 2 along the first
 537 |     dimension and length 5 along the second dimension.
 538 | 
 539 | 
 540 | .. _array-metadata-chunk-key-encoding:
 541 | 
 542 | ``chunk_key_encoding``
 543 | """"""""""""""""""""""
 544 | 
 545 |     The mapping from chunk grid cell coordinates to keys in the underlying
 546 |     store.
 547 | 
 548 |     ``chunk_key_encoding`` is an :ref:`extension point<extensions_section>`
 549 |     and MUST conform to the :ref:`extension-definition`.
 550 | 
 551 | .. _array-metadata-fill-value:
 552 | 
 553 | ``fill_value``
 554 | """"""""""""""
 555 | 
 556 |     Provides an element value to use for uninitialised portions of the
 557 |     Zarr array.
 558 | 
 559 |     The permitted values depend on the data type. Fill values for core
 560 |     data types are listed in :ref:`fill-value-list`.
 561 | 
 562 |     Extension data types MUST also define the JSON fill value representation.
 563 | 
 564 |     .. note::
 565 | 
 566 |        The ``fill_value`` metadata field is required, but Zarr implementations
 567 |        may provide an interface for creating a new array with which users can
 568 |        leave the fill value unspecified, in which case a default fill value for
 569 |        the data type will be chosen.  However, the default fill value that is
 570 |        chosen MUST be recorded in the metadata.
 571 | 
 572 | .. _array-metadata-codecs:
 573 | 
 574 | ``codecs``
 575 | """"""""""
 576 | 
 577 |     Specifies a list of codecs to be used for encoding and decoding chunks.
 578 | 
 579 |     Each codec is an :ref:`extension point<extensions_section>`
 580 |     and MUST conform to the :ref:`extension-definition`.
 581 | 
 582 |     Because ``codecs`` MUST contain an ``array
 583 |     -> bytes`` codec, the list cannot be empty (See :ref:`codecs <codecs>`).
 584 | 
 585 | Optional
 586 | ^^^^^^^^
 587 | 
 588 | The following members are optional:
 589 | 
 590 | .. _array-metadata-attributes:
 591 | 
 592 | ``attributes``
 593 | """"""""""""""
 594 | 
 595 |     The value must be an object. The object may contain any key/value
 596 |     pairs, where the key must be a string and the value can be an arbitrary
 597 |     JSON literal. Intended to allow storage of arbitrary user metadata.
 598 | 
 599 | 
 600 |   .. note::
 601 |     An extension to store user attributes in a separate document is being
 602 |     discussed in https://github.com/zarr-developers/zarr-specs/issues/72.
 603 | 
 604 |   .. note::
 605 |     A proposal to specify metadata conventions (ZEP 4) is being discussed in
 606 |     https://github.com/zarr-developers/zeps/pull/28.
 607 | 
 608 | .. _array-metadata-storage-transformers:
 609 | 
 610 | ``storage_transformers``
 611 | """"""""""""""""""""""""
 612 | 
 613 |     Specifies a list of `storage transformers`_.
 614 | 
 615 |     Each storage transformer is an :ref:`extension point<extensions_section>`
 616 |     and MUST conform to the :ref:`extension-definition`.
 617 | 
 618 |     When the ``storage_transformers`` name is
 619 |     absent no storage transformer is used, same for an empty list.
 620 | 
 621 | .. _array-metadata-dimension-names:
 622 | 
 623 | ``dimension_names``
 624 | """""""""""""""""""
 625 | 
 626 |     Specifies dimension names, e.g. ``["x", "y", "z"]``.  If specified, must be
 627 |     an array of strings or null objects with the same length as ``shape``.  An
 628 |     unnamed dimension is indicated by the null object.  If ``dimension_names`` is
 629 |     not specified, all dimensions are unnamed.
 630 | 
 631 |     For compatibility with Zarr implementations and applications that support
 632 |     using dimension names to uniquely identify dimensions, it is recommended but
 633 |     not required that all non-null dimension names are distinct (no two
 634 |     dimensions have the same non-empty name).
 635 | 
 636 |     This specification also does not place any restrictions on the use of the
 637 |     same dimension name across multiple arrays within the same Zarr hierarchy,
 638 |     but extensions or specific applications may do so.
 639 | 
 640 | .. _array-metadata-extensions:
 641 | 
 642 | Unknown
 643 | ^^^^^^^
 644 | 
 645 | All other keys found in the metadata object MUST be interpreted
 646 | following the :ref:`Extensions section <extensions_section>`.
 647 | 
 648 | Example
 649 | ^^^^^^^
 650 | 
 651 | For example, the array metadata JSON document below defines a
 652 | two-dimensional array of 64-bit little-endian floating point numbers,
 653 | with 10000 rows and 1000 columns, divided into a regular chunk grid where
 654 | each chunk has 1000 rows and 100 columns, and thus there will be 100
 655 | chunks in total arranged into a 10 by 10 grid. Within each chunk the
 656 | binary values are laid out in C contiguous order::
 657 | 
 658 |     {
 659 |         "zarr_format": 3,
 660 |         "node_type": "array",
 661 |         "shape": [10000, 1000],
 662 |         "dimension_names": ["rows", "columns"],
 663 |         "data_type": "float64",
 664 |         "chunk_grid": {
 665 |             "name": "regular",
 666 |             "configuration": {
 667 |                 "chunk_shape": [1000, 100]
 668 |             }
 669 |         },
 670 |         "chunk_key_encoding": {
 671 |             "name": "default",
 672 |             "configuration": {
 673 |                 "separator": "/"
 674 |             }
 675 |         },
 676 |         "codecs": [{
 677 |             "name": "bytes",
 678 |             "configuration": {
 679 |                 "endian": "little"
 680 |             }
 681 |         }],
 682 |         "fill_value": "NaN",
 683 |         "attributes": {
 684 |             "foo": 42,
 685 |             "bar": "apples",
 686 |             "baz": [1, 2, 3, 4]
 687 |         }
 688 |     }
 689 | 
 690 | The following example illustrates an array with the same shape and chunking as
 691 | above, but using a (currently made up) extension data type::
 692 | 
 693 |     {
 694 |         "zarr_format": 3,
 695 |         "node_type": "array",
 696 |         "shape": [10000, 1000],
 697 |         "data_type": {
 698 |             "name": "urn:example:datetime",
 699 |             "configuration": {
 700 |                 "unit": "ns"
 701 |             }
 702 |         },
 703 |         "chunk_grid": {
 704 |             "name": "regular",
 705 |             "configuration": {
 706 |                 "chunk_shape": [1000, 100]
 707 |             }
 708 |         },
 709 |         "chunk_key_encoding": {
 710 |             "name": "default",
 711 |             "configuration": {
 712 |                 "separator": "/"
 713 |             }
 714 |         },
 715 |         "codecs": [{
 716 |             "name": "bytes",
 717 |             "configuration": {
 718 |                 "endian": "big"
 719 |             }
 720 |         }],
 721 |         "fill_value": null,
 722 |     }
 723 | 
 724 | .. note::
 725 | 
 726 |    Comparison with Zarr spec v2:
 727 | 
 728 |    - ``dtype`` has been renamed to ``data_type``,
 729 |    - ``chunks`` has been replaced with ``chunk_grid``,
 730 |    - ``dimension_separator`` has been replaced with ``chunk_key_encoding``,
 731 |    - ``order`` has been replaced by the :ref:`transpose <transpose-codec-v1>` codec,
 732 |    - the separate ``filters`` and ``compressor`` fields been combined into the single ``codecs`` field.
 733 | 
 734 | .. _group-metadata:
 735 | 
 736 | Group metadata
 737 | --------------
 738 | 
 739 | Mandatory
 740 | ^^^^^^^^^
 741 | 
 742 | A Zarr group metadata object must contain the following mandatory key:
 743 | 
 744 | ``zarr_format``
 745 | """""""""""""""
 746 | 
 747 |     An integer defining the version of the storage specification to which the
 748 |     array store adheres, must be ``3`` here.
 749 | 
 750 | ``node_type``
 751 | """""""""""""""
 752 | 
 753 |     A string defining the type of hierarchy node element, must be ``group``
 754 |     here.
 755 | 
 756 | Optional
 757 | ^^^^^^^^
 758 | 
 759 | Optional keys:
 760 | 
 761 | ``attributes``
 762 | """"""""""""""
 763 | 
 764 |     The value must be an object. The object may contain any key/value
 765 |     pairs, where the key must be a string and the value can be an arbitrary
 766 |     JSON literal. Intended to allow storage of arbitrary user metadata.
 767 | 
 768 | .. _group-metadata-extensions:
 769 | 
 770 | Unknown
 771 | ^^^^^^^
 772 | 
 773 | All other keys found in the metadata object MUST be interpreted
 774 | following the :ref:`Extensions section <extensions_section>`.
 775 | 
 776 | Example
 777 | ^^^^^^^
 778 | 
 779 | For example, the JSON document below defines a group::
 780 | 
 781 |     {
 782 |         "zarr_format": 3,
 783 |         "node_type": "group",
 784 |         "attributes": {
 785 |             "spam": "ham",
 786 |             "eggs": 42
 787 |         }
 788 |     }
 789 | 
 790 | Node names
 791 | ==========
 792 | 
 793 | The root node does not have a name and is the empty string ``""``.
 794 | Except for the root node, each node in a hierarchy must have a name,
 795 | which is a string of unicode code points. The following constraints
 796 | apply to node names:
 797 | 
 798 | * must not be the empty string (``""``)
 799 | * must not include the character ``"/"``
 800 | * must not be a string composed only of period characters, e.g. ``"."`` or ``".."``
 801 | * must not start with the reserved prefix ``"__"``
 802 | 
 803 | To ensure consistent behaviour across different storage systems and programming
 804 | languages, we recommend users to only use characters in the sets ``a-z``,
 805 | ``A-Z``, ``0-9``, ``-``, ``_``, ``.``.
 806 | 
 807 | Node names are case sensitive, e.g., the names "foo" and "FOO" are **not**
 808 | identical.
 809 | 
 810 | When using non-ASCII Unicode characters, we recommend users to use
 811 | case-folded NFKC-normalized strings following the
 812 | `General Security Profile for Identifiers of the Unicode Security Mechanisms (Unicode Technical Standard #39) <http://www.unicode.org/reports/tr39/#General_Security_Profile>`_.
 813 | This follows the
 814 | `Recommendations for Programmers (B) of the Unicode Security Considerations (Unicode Technical Report #36) <https://unicode.org/reports/tr36/#Recommendations_General>`_.
 815 | 
 816 | .. note::
 817 |     A storage transformer for unicode normalization might be added later, see
 818 |     https://github.com/zarr-developers/zarr-specs/issues/201.
 819 | 
 820 | .. note::
 821 |     The underlying store might pose additional restriction on node names,
 822 |     such as the following:
 823 | 
 824 |     * `260 characters path length limit in Windows <https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation>`_
 825 |     * 1,024 bytes UTF8 object key limit for
 826 |       `AWS S3 <https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html>`_
 827 |       and `GCS <https://cloud.google.com/storage/docs/objects#naming>`_, with
 828 |       additional constraints.
 829 |     * `Windows paths are case-insensitive by default <https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions>`_
 830 |     * `MacOS paths are case-insensitive by default <https://support.apple.com/guide/disk-utility/file-system-formats-dsku19ed921c/mac>`_
 831 | 
 832 | .. note::
 833 |     If a store requires an explicit byte string representation the default
 834 |     representation is the ``UTF-8`` encoded Unicode string.
 835 | 
 836 | .. note::
 837 |     The prefix ``__zarr`` is reserved for core Zarr data, and extensions
 838 |     can use other files and folders starting with ``__``.
 839 | 
 840 | 
 841 | Data types
 842 | ==========
 843 | 
 844 | A data type describes the set of possible binary values that an array
 845 | element may take, along with some information about how the values
 846 | should be interpreted.
 847 | 
 848 | This specification defines a limited set of data types to
 849 | represent boolean values, integers, and floating point
 850 | numbers. These can be found under :ref:`Data Types<data-type-list>`.
 851 | 
 852 | All of the data types defined here have a fixed size, in the sense that all values
 853 | require the same number of bytes.
 854 | 
 855 | Additional data types may be defined as :ref:`extensions<extensions_section>`
 856 | which MAY have variable sized data types.
 857 | 
 858 | Note that the Zarr specification is intended to enable communication
 859 | of data between a variety of computing environments. The native byte
 860 | order may differ between machines used to write and read the data.
 861 | 
 862 | Each data type is associated with an identifier, which can be used in
 863 | metadata documents to refer to the data type. For the data types
 864 | defined in this specification, the identifier is a simple ASCII
 865 | string. However, extensions may use any JSON value to identify a data
 866 | type.
 867 | 
 868 | In addition to these base types, an implementation should also handle the
 869 | raw/opaque pass-through type designated by the lower-case letter ``r`` followed
 870 | by the number of bits, multiple of 8. For example, ``r8``, ``r16``, and ``r24``
 871 | should be understood as fall-back types of respectively 1, 2, and 3 byte length.
 872 | 
 873 | Zarr v3 is limited to type sizes that are a multiple of 8 bits but may support
 874 | other type sizes in later versions of this specification.
 875 | 
 876 | .. note::
 877 | 
 878 |     We are explicitly looking for more feedback and prototypes of code using the ``r*``,
 879 |     raw bits, for various endianness and whether the spec could be made clearer.
 880 | 
 881 | .. note::
 882 | 
 883 |     Currently only fixed size elements are supported as a core data type.
 884 |     There are many requests for variable length element encoding. There are many
 885 |     ways to encode variable length and we want to keep flexibility. While we seem
 886 |     to agree that for random access the most likely contender is to have two
 887 |     arrays, one with the actual variable length data and one with fixed size
 888 |     (pointer + length) to the variable size data, we do not want to commit to such
 889 |     a structure.
 890 |     See https://github.com/zarr-developers/zarr-specs/issues/62.
 891 | 
 892 | 
 893 | Chunk grids
 894 | ===========
 895 | 
 896 | A chunk grid defines a set of chunks which contain the elements of an
 897 | array. The chunks of a grid form a tessellation of the array space,
 898 | which is a space defined by the dimensionality and shape of the
 899 | array. This means that every element of the array is a member of one
 900 | chunk, and there are no gaps or overlaps between chunks.
 901 | 
 902 | In general there are different possible types of grids. Those defined
 903 | under the core specification can be found under :ref:`chunk-grid-list`.
 904 | Additional grid types MAY be defined as :ref:`extensions<extensions_section>`,
 905 | such as rectilinear grids where chunks are still
 906 | hyperrectangles but do not all share the same shape.
 907 | 
 908 | A grid type must also define rules for constructing an identifier for
 909 | each chunk that is unique within the grid, which is a string of ASCII
 910 | characters that can be used to construct keys to save and retrieve
 911 | chunk data in a store, see also the `Storage`_ section.
 912 | 
 913 | Chunk encoding
 914 | ==============
 915 | 
 916 | Chunks are encoded into a binary representation for storage in a store_, using
 917 | the chain of codecs_ specified by the ``codecs`` metadata field.
 918 | 
 919 | Codecs
 920 | ------
 921 | 
 922 | An array_ has an associated list of *codecs*.  Each codec specifies a
 923 | bidirectional transform (an *encode* transform and a *decode* transform).
 924 | 
 925 | Each codec has an *encoded representation* and a *decoded representation*;
 926 | each of these two representations are defined to be either:
 927 | 
 928 | - a multi-dimensional array of some shape and data type, or
 929 | - a byte string.
 930 | 
 931 | Based on the input and output representations for the encode transform,
 932 | codecs can be classified as one of three kinds:
 933 | 
 934 | - ``array -> array``
 935 | - ``array -> bytes``
 936 | - ``bytes -> bytes``
 937 | 
 938 | .. note::
 939 | 
 940 |    ``bytes -> array`` codecs, where after encoding an array as a byte
 941 |    string, it is subsequently transformed back into an array, to then later
 942 |    be transformed back into a byte string, are not currently allowed, due to
 943 |    the lack of a clear use case.
 944 | 
 945 | If multiple codecs are specified for an array, each codec is applied
 946 | sequentially; when encoding, the encoded output of codec ``i`` serves as the
 947 | decoded input of codec ``i+1``, and similarly when decoding, the decoded output
 948 | of codec ``i+1`` serves as the encoded input to codec ``i``.  Since ``bytes ->
 949 | array`` codecs are not supported, it follows that the list of codecs must be of
 950 | the following form:
 951 | 
 952 | - zero or more ``array -> array`` codecs; followed by
 953 | - exactly one ``array -> bytes`` codec; followed by
 954 | - zero or more ``bytes -> bytes`` codecs.
 955 | 
 956 | Logically, a codec ``c`` must define three properties:
 957 | 
 958 | - ``c.compute_encoded_representation_type(decoded_representation_type)``, a
 959 |   procedure that determines the encoded representation based on the decoded
 960 |   representation and any codec parameters.  In the case of a decoded
 961 |   representation that is a multi-dimensional array, the shape and data type
 962 |   of the encoded representation must be computable based only on the shape
 963 |   and data type, but not the actual element values, of the decoded
 964 |   representation.  If the ``decoded_representation_type`` is not supported,
 965 |   this algorithm must fail with an error.
 966 | 
 967 | - ``c.encode(decoded_value)``, a procedure that computes the encoded
 968 |   representation, and is used when writing an array.
 969 | 
 970 | - ``c.decode(encoded_value, decoded_representation_type)``, a procedure that
 971 |   computes the decoded representation, and is used when reading an array.
 972 | 
 973 | Implementations MAY support partial decoding for certain codecs
 974 | (e.g. sharding, blosc).  Logically, partial decoding may be defined in terms
 975 | of an additional operation:
 976 | 
 977 | - ``c.partial_decode(input_handle, decoded_representation_type,
 978 |   decoded_regions)``, where:
 979 | 
 980 |   - ``input_handle`` provides an interface for requesting partial reads of
 981 |     the encoded representation and itself supports the same
 982 |     ``partial_decode`` interface;
 983 |   - ``decoded_representation_type`` is the same as for ``c.decode``;
 984 |   - ``decoded_regions`` specifies the regions of the decoded representation
 985 |     that must be returned.
 986 | 
 987 |   If the encoded representation is a multi-dimensional array, then
 988 |   ``decoded_regions`` specifies a subset of the array's domain.  If the
 989 |   encoded representation is a byte string, then ``decoded_regions``
 990 |   specifies a list of byte ranges.
 991 | 
 992 | - ``c.compute_encoded_size(input_size)``, a procedure that determines the
 993 |   size of the encoded representation given a size of the decoded representation.
 994 |   This procedure cannot be implemented for codecs that produce variable-sized
 995 |   encoded representations, such as compression algorithms. Depending on the
 996 |   type of the codec, the signature could differ:
 997 | 
 998 |   - ``c.compute_encoded_size(array_size, data_type) -> (array_size, data_type)``
 999 |     for ``array -> array`` codecs, where ``array_size`` is the number of items
1000 |     in the array, i.e., the product of the components of the array's shape;
1001 |   - ``c.compute_encoded_size(array_size, data_type) -> byte_size``
1002 |     for ``array -> bytes`` codecs;
1003 |   - ``c.compute_encoded_size(byte_size) -> byte_size``
1004 |     for ``bytes -> bytes`` codecs.
1005 | 
1006 | .. note::
1007 | 
1008 |    If ``partial_decode`` is not supported by a particular codec, it can
1009 |    always be implemented in terms of ``decode`` by simply decoding in full
1010 |    and then satisfying any ``decoded_regions`` requests directly from the
1011 |    cached decoded representation.
1012 | 
1013 | Determination of encoded representations
1014 | ----------------------------------------
1015 | 
1016 | To encode or decode a chunk, the encoded and decoded representations for each
1017 | codec in the chain must first be determined as follows:
1018 | 
1019 | 1. The initial decoded representation, ``decoded_representation[0]`` is a
1020 |    multi-dimensional array with the same data type as the Zarr array, and shape
1021 |    equal to the chunk shape.
1022 | 
1023 | 2. For each codec ``i``, the encoded representation is equal to the decoded
1024 |    representation ``decoded_representation[i+1]`` of the next codec, and is
1025 |    computed from
1026 |    ``codecs[i].compute_encoded_representation_type(decoded_representation[i])``.
1027 |    If ``compute_encoded_representation_type`` fails because of an incompatible
1028 |    decoded representation, an implementation should indicate an error.
1029 | 
1030 | .. _encoding_procedure:
1031 | 
1032 | Encoding procedure
1033 | ------------------
1034 | 
1035 | Based on the computed ``decoded_representations`` list, a chunk is encoded using
1036 | the following procedure:
1037 | 
1038 | 1. The initial *encoded chunk* ``EC[0]`` of the type specified by
1039 |    ``decoded_representation[0]`` is equal to the chunk array ``A`` (with a shape
1040 |    equal to the chunk shape, and data type equal to the Zarr array data type).
1041 | 
1042 | 2. For each codec ``codecs[i]`` in ``codecs``, ``EC[i+1] :=
1043 |    codecs[i].encode(EC[i])``.
1044 | 
1045 | 3. The final encoded chunk representation ``EC_final := EC[codecs.length]``.
1046 |    This is always a byte string due to the requirement that the list of codecs
1047 |    include an ``array -> bytes`` codec.
1048 | 
1049 | 4. ``EC_final`` is written to the store_.
1050 | 
1051 | .. _decoding_procedure:
1052 | 
1053 | Decoding procedure
1054 | ------------------
1055 | 
1056 | Based on the computed ``decoded_representations`` list, a chunk is decoded using
1057 | the following procedure:
1058 | 
1059 | 1. The encoded chunk representation ``EC_final`` is read from the store_.
1060 | 
1061 | 2. ``EC[codecs.length] := EC_final``.
1062 | 
1063 | 3. For each codec ``codecs[i]`` in ``codecs``, iterating in reverse order,
1064 |    ``EC[i] := codecs[i].decode(EC[i+1], decoded_representation[i])``.
1065 | 
1066 | 4. The chunk array ``A`` is equal to ``EC[0]``.
1067 | 
1068 | .. _codec-specification:
1069 | 
1070 | Core codecs
1071 | -----------
1072 | 
1073 | This specification defines a set of codecs ("core codecs") which all Zarr implementations SHOULD implement in
1074 | order to ensure a minimal level of interoperability between Zarr implementations.
1075 | The list of core codecs is part of the Zarr v3 specification.
1076 | Changes to the list of core codecs MUST be made via the same protocol used for
1077 | changing the Zarr v3 specification. Changes to the list of core codecs SHOULD be made
1078 | in close collaboration with extant Zarr v3 implementations. A new core codec SHOULD be added to the
1079 | list when a sufficient number of Zarr implementations support or intend to support that codec.
1080 | An existing core codec SHOULD be removed from the list when a sufficient number of implementation
1081 | developers and Zarr users deem the codec worth removing, e.g. because of a technical flaw in the
1082 | algorithm underlying the codec.
1083 | 
1084 | Extension codecs
1085 | ----------------
1086 | 
1087 | To allow for flexibility to define and implement new codecs, the
1088 | list of codecs defined for an array MAY contain codecs which are
1089 | defined in separate specifications. In order to refer to codecs in array metadata
1090 | documents, each codec must have a conformant identifier as specified under
1091 | "`extension naming <extension-naming>`_" below.
1092 | For ease of discovery, it is
1093 | recommended that codec specifications are contributed to the
1094 | registry of extensions
1095 | (`zarr-extensions`_).
1096 | 
1097 | A codec specification MUST declare the codec identifier, and describe
1098 | (or cite documents that describe) the encoding and decoding algorithms
1099 | and the format of the encoded data.
1100 | A codec MAY have configuration parameters which modify the behaviour
1101 | of the codec in some way. For example, a compression codec may have a
1102 | compression level parameter, which is an integer that affects the
1103 | resulting compression ratio of the data. Configuration parameters must
1104 | be declared in the codec specification, including a definition of how
1105 | configuration parameters are represented as JSON.
1106 | 
1107 | Further details of how codecs are configured for an array are given in the
1108 | `Array metadata`_ section.
1109 | 
1110 | Stores
1111 | ======
1112 | 
1113 | A Zarr store is a system that can be used to store and retrieve data
1114 | from a Zarr hierarchy. For a store to be compatible with this
1115 | specification, it must support a set of operations defined in the `Abstract store
1116 | interface`_ subsection. The store interface can be implemented using a
1117 | variety of underlying storage technologies, described in the
1118 | subsection on `Store implementations`_.
1119 | 
1120 | Additionally, a store should specify a canonical URI format that can be used to
1121 | identify nodes in this store. Implementations should use the specified formats
1122 | when opening a Zarr hierarchy to automatically determine the appropriate store.
1123 | 
1124 | .. _abstract-store-interface:
1125 | 
1126 | Abstract store interface
1127 | ------------------------
1128 | 
1129 | The store interface is intended to be simple to implement using a
1130 | variety of different underlying storage technologies. It is defined in
1131 | a general way here, but it should be straightforward to translate into
1132 | a software interface in any given programming language. The goal is
1133 | that an implementation of this specification could be modular and
1134 | allow for different store implementations to be used.
1135 | 
1136 | The store interface defines a set of operations involving `keys` and
1137 | `values`. In the context of this interface, a `key` is a Unicode
1138 | string, where the final character is **not** a ``/`` character.
1139 | In general, a `value` is a sequence of bytes. Specific stores
1140 | may choose more specific storage formats, which must be stated in the
1141 | specification of the respective store. E.g. a database store might
1142 | encode values of ``*.json`` keys with a database-native json type.
1143 | 
1144 | It is assumed that the store holds (`key`, `value`) pairs, with only
1145 | one such pair for any given `key`. I.e., a store is a mapping from
1146 | keys to values. It is also assumed that keys are case sensitive, i.e.,
1147 | the keys "foo" and "FOO" are different.
1148 | 
1149 | To read and write partial values, a `range` specifies two integers
1150 | `range_start` and `range_length`, that specify a part of the value
1151 | starting at byte `range_start` (inclusive) and having a length of
1152 | `range_length` bytes. `range_length` may be none, indicating all
1153 | available data until the end of the referenced value. For example
1154 | `range` ``[0, none]`` specifies the full value. Stores that do not
1155 | support partial access can still fulfill partial requests by first extracting
1156 | the full value and then returning a subset of bytes.
1157 | 
1158 | The store interface also defines some operations involving
1159 | `prefixes`. In the context of this interface, a prefix is a string
1160 | containing only characters that are valid for use in `keys` and ending
1161 | with a trailing ``/`` character.
1162 | 
1163 | The store operations are grouped into three sets of capabilities:
1164 | **readable**, **writeable** and **listable**. It is not necessary for
1165 | a store implementation to support all of these capabilities.
1166 | 
1167 | A **readable store** supports the following operations:
1168 | 
1169 | 
1170 | ``get`` - Retrieve the `value` associated with a given `key`.
1171 | 
1172 |     | Parameters: `key`
1173 |     | Output: `value`
1174 | 
1175 | ``get_partial_values`` - Retrieve possibly partial `values` from given `key_ranges`.
1176 | 
1177 |     | Parameters: `key_ranges`: ordered set of `key`, `range` pairs,
1178 |     |   a `key` may occur multiple times with different `ranges`
1179 |     | Output: list of `values`, in the order of the `key_ranges`,
1180 |     |   may contain null/none for missing keys
1181 | 
1182 | A **writeable store** supports the following operations:
1183 | 
1184 | ``set`` - Store a (`key`, `value`) pair.
1185 | 
1186 |     | Parameters: `key`, `value`
1187 |     | Output: none
1188 | 
1189 | ``set_partial_values`` - Store `values` at a given `key`, starting at byte `range_start`.
1190 | 
1191 |     | Parameters: `key_start_values`: set of `key`,
1192 |     |   `range_start`, `values` triples, a `key` may occur multiple
1193 |     |   times with different `range_starts`, `range_starts` (considering
1194 |     |   the length of the respective `values`) must not specify overlapping
1195 |     |   ranges for the same `key`
1196 |     | Output: none
1197 | 
1198 | ``erase`` - Erase the given key/value pair from the store.
1199 | 
1200 |     | Parameters: `key`
1201 |     | Output: none
1202 | 
1203 | ``erase_values`` - Erase the given key/value pairs from the store.
1204 | 
1205 |     | Parameters: `keys`: set of `keys`
1206 |     | Output: none
1207 | 
1208 | ``erase_prefix`` - Erase all keys with the given prefix from the store:
1209 | 
1210 |     | Parameter: `prefix`
1211 |     | Output: none
1212 | 
1213 | .. note::
1214 | 
1215 |    Some stores allow creating and updating keys, but not deleting them. For
1216 |    example, Zip archives do not allow removal of content without recreating the
1217 |    full archive.
1218 | 
1219 |    Inability to delete can impair the ability to rename keys, as a rename
1220 |    is often a sequence or atomic combination of a deletion and a creation.
1221 | 
1222 | A **listable store** supports any one or more of the following
1223 | operations:
1224 | 
1225 | ``list`` - Retrieve all `keys` in the store.
1226 | 
1227 |     | Parameters: none
1228 |     | Output: set of `keys`
1229 | 
1230 | ``list_prefix`` - Retrieve all keys with a given prefix.
1231 | 
1232 |     | Parameters: `prefix`
1233 |     | Output: set of `keys` with the given `prefix`,
1234 | 
1235 |     For example, if a store contains the keys "a/b", "a/c/d" and
1236 |     "e/f/g", then ``list_prefix("a/")`` would return "a/b" and "a/c/d".
1237 | 
1238 |     Note: the behaviour of ``list_prefix`` is undefined if ``prefix`` does not end
1239 |     with a trailing slash ``/`` and the store can assume there is at least one key
1240 |     that starts with ``prefix``.
1241 | 
1242 | ``list_dir`` - Retrieve all keys and prefixes with a given prefix and
1243 | which do not contain the character "/" after the given prefix.
1244 | 
1245 |     | Parameters: `prefix`
1246 |     | Output: set of `keys` and set of `prefixes`
1247 | 
1248 |     For example, if a store contains the keys "a/b", "a/c", "a/d/e",
1249 |     "a/f/g", then ``list_dir("a/")`` would return keys "a/b" and "a/c"
1250 |     and prefixes "a/d/" and "a/f/". ``list_dir("b/")`` would return
1251 |     the empty set.
1252 | 
1253 | 
1254 | Note that because keys are case sensitive, it is assumed that the
1255 | operations ``set("foo", a)`` and ``set("FOO", b)`` will result in two
1256 | separate (key, value) pairs being stored. Subsequently ``get("foo")``
1257 | will return *a* and ``get("FOO")`` will return *b*.
1258 | 
1259 | It is recommended that the implementation of the
1260 | ``get_partial_values``, ``set_partial_values`` and
1261 | ``erase_values`` methods is made optional, providing fallbacks
1262 | for them by default. However, it is recommended to supply those operations
1263 | where possible for efficiency. Also, the ``get``, ``set`` and ``erase``
1264 | can easily be mapped onto their `partial_values` counterparts.
1265 | Therefore, it is also recommended to supply fallbacks for those if the
1266 | `partial_values` operations can be implemented.
1267 | An entity containing those fallbacks could be named ``StoreWithPartialAccess``.
1268 | 
1269 | Store implementations
1270 | ---------------------
1271 | 
1272 | (This subsection is not normative.)
1273 | 
1274 | A store implementation maps the abstract operations of the store
1275 | interface onto concrete operations on some underlying storage
1276 | system. This specification does not constrain or make any assumptions
1277 | about the nature of the underlying storage system. Thus it is possible
1278 | to implement the store interface in a variety of different ways.
1279 | 
1280 | For example, a store implementation might use a conventional file
1281 | system as the underlying storage system, mapping keys onto file paths
1282 | and values onto file contents. The ``get`` operation could then be
1283 | implemented by reading a file, the ``set`` operation implemented by
1284 | writing a file, and the ``list_dir`` operation implemented by listing
1285 | a directory.
1286 | 
1287 | For example, a store implementation might use a key-value database
1288 | such as BerkeleyDB or LMDB as the underlying storage system. In this
1289 | case the implementation of ``get`` and ``set`` operations would be
1290 | whatever native operations are provided by the
1291 | database for getting and setting key/value pairs. Such a store
1292 | implementation might natively support the ``list`` operation but might
1293 | not support ``list_prefix`` or ``list_dir``, although these could be
1294 | implemented via ``list`` with post-processing of the returned keys.
1295 | 
1296 | For example, a store implementation might use a cloud object storage
1297 | service such as Amazon S3, Azure Blob Storage, or Google Cloud Storage
1298 | as the underlying storage system, mapping keys to object names and
1299 | values to object contents. The store interface operations would then
1300 | be implemented via concrete operations of the service's REST API,
1301 | i.e., via HTTP requests. E.g., the ``get`` operation could be
1302 | implemented via an HTTP GET request to an object URL, the ``set``
1303 | operation could be implemented via an HTTP PUT request to an object
1304 | URL, and the list operations could be implemented via an HTTP GET
1305 | request to a bucket URL (i.e., listing a bucket).
1306 | 
1307 | The examples above are meant to be illustrative only, and other
1308 | implementations are possible. This specification does not attempt to
1309 | standardise any store implementations, however where a store
1310 | implementation is expected to be widely used then it is recommended to
1311 | create a store implementation spec and contribute it to the `zarr-specs GitHub repository`_.
1312 | For an example of a store implementation spec, see the
1313 | :ref:`file-system-store-v1` specification.
1314 | 
1315 | 
1316 | Storage
1317 | =======
1318 | 
1319 | This section describes how to translate high level operations to
1320 | create, erase or modify Zarr hierarchies, groups or arrays, into low
1321 | level operations on the key/value store interface defined above.
1322 | 
1323 | In this section a "hierarchy path" is a logical path which identifies
1324 | a group or array node within a Zarr hierarchy, and a "storage key" is
1325 | a key used to store and retrieve data via the store interface. There
1326 | is a further distinction between "metadata keys" which are storage
1327 | keys used to store metadata documents, and "chunk keys" which are
1328 | storage keys used to store encoded chunks.
1329 | 
1330 | Note that any non-root hierarchy path will have ancestor paths that
1331 | identify ancestor nodes in the hierarchy. For example, the path
1332 | "/foo/bar" has ancestor paths "/foo" and "/".
1333 | 
1334 | 
1335 | Operations
1336 | ----------
1337 | 
1338 | The following section describes possible operations of an implementation as a
1339 | non-normative guide-line.
1340 | 
1341 | Let `P` be an arbitrary hierarchy path.
1342 | 
1343 | Let ``meta_key(P)`` be the metadata key for `P`, ``P/zarr.json``.
1344 | 
1345 | Let ``data_key(P, j, i ...)`` be the data key for `P` for the chunk
1346 | with grid coordinates (`j`, `i`, ...).
1347 | 
1348 | Let "+" be the string concatenation operator.
1349 | 
1350 | 
1351 | **Create a group**
1352 | 
1353 |     To create a group at hierarchy path `P`, perform
1354 |     ``set(meta_key(P), value)``, where `value` is the
1355 |     serialization of a valid group metadata document, and
1356 |     ensure the existence of groups at all ancestor paths of `P`.
1357 | 
1358 | **Create an array**
1359 | 
1360 |     To create an array at hierarchy path `P`, perform
1361 |     ``set(meta_key(P), value)``, where `value` is the serialisation of a valid
1362 |     array metadata document.
1363 | 
1364 |     Creating an array at path `P` implies the existence of groups at all
1365 |     ancestor paths of `P`.
1366 | 
1367 | **Store chunk data in an array**
1368 | 
1369 |     To store chunk data in an array at path `P` and chunk coordinate (`j`, `i`,
1370 |     ...), perform ``set(data_key(P, j, i, ...), value)``, where `value` is the
1371 |     serialisation of the corresponding chunk, encoded according to the
1372 |     information in the array metadata stored under the key ``meta_key(P)``.
1373 | 
1374 | **Retrieve chunk data in an array**
1375 | 
1376 |     To retrieve chunk data in an array at path `P` and chunk coordinate (`i`,
1377 |     `j`, ...), perform ``get(data_key(P, j, i, ...))``. The returned
1378 |     value is the serialisation of the corresponding chunk, encoded according to
1379 |     the array metadata stored at ``meta_key(P)``.
1380 | 
1381 | **Discover children of a group**
1382 | 
1383 |     To discover the children of a group at hierarchy path `P`, perform
1384 |     ``list_dir(P + "/")``. Any returned prefix ``Q`` not starting with ``__``
1385 |     indicates a child array or group. To determine whether the child is
1386 |     an array or group, the document ``meta_key(Q)`` must be checked.
1387 | 
1388 |     For example, if a group is created at path "/foo/bar" and an array
1389 |     is created at path "/foo/baz/qux", then the store will contain the
1390 |     keys "foo/bar/zarr.json" and "foo/baz/qux/zarr.json".
1391 |     Groups at paths "/", "/foo" and "/foo/baz" have not been explicitly
1392 |     created but are implied by their descendants. To list the children
1393 |     of the group at path "/foo", perform ``list_dir("/foo/")``,
1394 |     which will return the prefixes "foo/bar" and "foo/baz".
1395 |     From this it can be inferred that child groups or arrays
1396 |     "/foo/bar" and "/foo/baz" are present.
1397 | 
1398 |     If a store does not support any of the list operations then discovery of
1399 |     group children is not possible, and the contents of the hierarchy must be
1400 |     communicated by some other means, such as via an extension (see
1401 |     https://github.com/zarr-developers/zarr-specs/issues/15) or via some out of
1402 |     band communication.
1403 | 
1404 | **Discover all nodes in a hierarchy**
1405 | 
1406 |     To discover all nodes in a hierarchy, one should discover the children of
1407 |     the root of the hierarchy and then recursively list children of child
1408 |     groups.
1409 | 
1410 |     For hierarchies without group storage transformers one may also call
1411 |     ``list_prefix("/")``. All ``zarr.json`` keys represent either groups or arrays.
1412 | 
1413 | **Erase a group or array**
1414 | 
1415 |     To erase an array at path `P`, erase the metadata document and array data
1416 |     for the array, ``erase_prefix(P + "/")``.
1417 | 
1418 |     To erase a group at path `P`: erase all nodes under
1419 |     this group and its metadata document - it should be sufficient to perform
1420 |     ``erase_prefix(P + "/")``
1421 | 
1422 | **Determine if a node exists**
1423 | 
1424 |     To determine if a node exists at path ``P``, try in the following order
1425 | 
1426 |     - ``get(meta_key(P))``
1427 |       (success implies an array or group at ``P``);
1428 | 
1429 |     .. note::
1430 |         For listable stores, ``list_dir(parent(P))`` can be an alternative.
1431 | 
1432 | 
1433 | Storage transformers
1434 | ====================
1435 | 
1436 | A Zarr storage transformer modifies a request to read or write data before passing
1437 | that request to the following transformer or store.
1438 | The stored transformed data is restored to its original state whenever data is requested
1439 | by the Array. Storage transformers can be configured per array via the
1440 | `storage_transformers <storage_transformers_>`_ name in the `array metadata`_. Storage transformers which do
1441 | not change the storage layout (e.g. for caching) may be specified at runtime without
1442 | adding them to the array metadata.
1443 | 
1444 | .. note::
1445 |     It is planned to add storage transformers also to groups in a later revision
1446 |     of this spec, see https://github.com/zarr-developers/zarr-specs/issues/215.
1447 | 
1448 | A storage transformer serves the same `abstract store interface`_ as the store_.
1449 | However, it should not persistently store any information necessary to restore the original data,
1450 | but instead propagates this to the next storage transformer or the final store.
1451 | From the perspective of an array or a previous stage transformer, both store and storage transformer follow the same
1452 | protocol and can be interchanged regarding the protocol. The behaviour can still be different,
1453 | e.g. requests may be cached or the form of the underlying data can change.
1454 | 
1455 | Storage transformers may be stacked to combine different functionalities:
1456 | 
1457 | .. mermaid::
1458 | 
1459 |     graph LR
1460 |       Array --> t1
1461 |       subgraph stack [Storage transformers]
1462 |         t1[Transformer 1] --> t2[...] --> t3[Transformer N]
1463 |       end
1464 |       t3 --> Store
1465 | 
1466 | 
1467 | .. _extensions_section:
1468 | 
1469 | Extensions
1470 | ==========
1471 | 
1472 | Additional functionality and features can be enabled in Zarr datasets through
1473 | extensions defined in `metadata documents`_. Each extension corresponds to a
1474 | specific extension point, such as data types or codecs. Extensions may include
1475 | optional configuration, which can be provided via structured objects. Proper
1476 | naming is essential for cross-implementation interoperability, ensuring
1477 | extensions are recognized and used consistently. This section outlines
1478 | available extension points, the structural constraints on extensions, and
1479 | naming conventions.
1480 | 
1481 | .. _extension-points:
1482 | 
1483 | Extension points
1484 | ----------------
1485 | 
1486 | Different types of extensions can exist and they can be grouped as follows:
1487 | 
1488 | =========== ======================= ================================================================== ================================
1489 | node_type   extension point         metadata definition                                                list of core extensions
1490 | =========== ======================= ================================================================== ================================
1491 | array       data type               :ref:`data-type <array-metadata-data-type>`                        :ref:`data-type-list`
1492 | array       chunk grid              :ref:`chunk-grid <array-metadata-chunk-grid>`                      :ref:`chunk-grid-list`
1493 | array       chunk key encoding      :ref:`chunk-key-encoding <array-metadata-chunk-key-encoding>`      :ref:`chunk-key-encoding-list`
1494 | array       codecs                  :ref:`codecs <array-metadata-codecs>`                              :ref:`codec-list`
1495 | array       storage transformer     :ref:`storage-transformers <array-metadata-storage-transformers>`  :ref:`storage-transformer-list`
1496 | =========== ======================= ================================================================== ================================
1497 | 
1498 | Note, that ``fill_value`` is not its own extension point, but is dependent on the data type.
1499 | 
1500 | New extension points may be proposed to the Zarr community through the ZEP
1501 | process. See `ZEP 0 <https://zarr.dev/zeps/active/ZEP0000.html>`_ for more information.
1502 | 
1503 | .. _extension-definition:
1504 | 
1505 | Extension definition
1506 | --------------------
1507 | 
1508 | .. _extension-definition-object:
1509 | 
1510 | Objects
1511 | ^^^^^^^
1512 | 
1513 | In `metadata documents`_, extensions can be encoded either as objects or as
1514 | short-hand names.
1515 | 
1516 | If using an object definition, the member ``name``
1517 | MUST be a plain string which conforms to :ref:`extension name <extension-naming>`.
1518 | Optionally, the member ``configuration`` MAY be present but if so MUST be
1519 | an object.
1520 | 
1521 | For example::
1522 | 
1523 |     {
1524 |         "name": "<name>",        # conformant name
1525 |         "configuration": { ... } # optional object
1526 |     }
1527 | 
1528 | .. _extension-definition-short-hand-name:
1529 | 
1530 | Short-hand names
1531 | ^^^^^^^^^^^^^^^^
1532 | 
1533 | Instead of extension objects, short-hand names MAY be used if no
1534 | configuration metadata is required. They are equivalent to extension
1535 | objects with just a `name` key.
1536 | 
1537 | .. _extension-definition-must-understand:
1538 | 
1539 | `must_understand`
1540 | ^^^^^^^^^^^^^^^^^
1541 | 
1542 | An extension object is interpreted to have an implicit field `must_understand` set to
1543 | `True`, unless otherwise stated. An extension object MAY explicitly set `must_understand=False` if
1544 | implementations can ignore its presence.
1545 | 
1546 | An implementation MUST fail to open Zarr groups or arrays if any
1547 | metadata fields are present which (a) the
1548 | implementation does not recognize and (b) are not explicitly
1549 | set to ``"must_understand": false``.
1550 | 
1551 | `must_understand=False` is not supported for the following extension points:
1552 | data type, chunk grid, and chunk key encoding.
1553 | 
1554 | Use of `must_understand=False` to add top-level keys is discouraged in favor
1555 | of the explicit use of :ref:`extension-points`.
1556 | 
1557 | .. _extension-naming:
1558 | 
1559 | Extension naming
1560 | ----------------
1561 | 
1562 | The `name` field of an extension is an identifier that has been registered
1563 | prior to release in any implementation within the `zarr-extensions`_ Github
1564 | repository, where extensions and their specification are listed. The Zarr
1565 | Steering Council or by delegation a maintainer team reserves the right to
1566 | refuse name assignment at its own discretion.
1567 | 
1568 | .. _extension-naming-registered-names:
1569 | 
1570 | Registered names consist of a single string that is unique within the Zarr ecosystem.
1571 | Registered names are intended for well-known extensions aimed at broad adoption and maximum interoperability.
1572 | Registered names are unique and immutable.
1573 | 
1574 | Registered names MUST start with one lower case letter a-z and then be followed
1575 | by only lower case letters a-z, numerals 0-9, underscores, dots and dashes.
1576 | 
1577 | - **Accepted regex:** ``^[a-z][a-z0-9-_.]+$``
1578 | - **Valid examples:**
1579 |     - ``zstd``
1580 |     - ``numcodecs.adler32``
1581 | - **Invalid examples:**
1582 |     - ``foo/bar``
1583 |     - ``foo:bar``
1584 | 
1585 | .. note::
1586 |   In previous versions of the v3 spec, the name of an extension was required
1587 |   to be a URI. That is now discouraged for new extensions, though, for
1588 |   backwards compatibility with existing extensions, URIs names are still
1589 |   permitted.
1590 | 
1591 |   A proposal to additionally support multiple registration mechanisms is under
1592 |   discussion in https://github.com/zarr-developers/zarr-specs/pull/330 .
1593 | 
1594 | .. _extension-guidance:
1595 | 
1596 | Guidance for extension authors
1597 | ------------------------------
1598 | 
1599 | *This section is non-normative and provides assistance for the authors of
1600 | extensions, especially those who are just getting started.*
1601 | 
1602 | The Zarr maintainers endeavor to make the registration of names as
1603 | straight-forward as possible. We encourage all authors to make use of the extensions
1604 | repository to prevent duplicate efforts across the community where possible.
1605 | 
1606 | * **During development**: Authors should use whatever name makes sense
1607 |   for their extension, provided it is not already reserved in the registry.
1608 |   Once there is a working implementation of the extension (e.g. a PR to an
1609 |   existing Zarr implementation), the extension should be submitted to the registry.
1610 | 
1611 | * **Well-known extensions**: Authors implementing a well-known extension
1612 |   like a data type or codec that is already referred to by name in the
1613 |   community may want to check the `zarr-extensions`_ repository to see if
1614 |   someone has already implemented the extension.
1615 | 
1616 | * **Production extensions**: Authors intending to create significant amounts of
1617 |   data or widely distributed data should consider registering all extensions in
1618 |   the extension registry to increase the long-term maintainability of the data.
1619 | 
1620 | Extension versioning
1621 | --------------------
1622 | 
1623 | Registered extensions SHOULD follow the compatibility and versioning `stability policy`_.
1624 | 
1625 | Extension example
1626 | -----------------
1627 | 
1628 | The following example of array metadata demonstrates these extension naming schemes::
1629 | 
1630 |     {
1631 |         "zarr_format": 3,
1632 |         "data_type": "string", // registered, short-hand name
1633 |         "chunk_key_encoding": {
1634 |             "name": "default", // core
1635 |             "configuration": { "separator": "." }
1636 |         },
1637 |         "codecs": [
1638 |             {
1639 |                 "name": "vlen-utf8" // registered name
1640 |             },
1641 |             {
1642 |                 "name": "zstd", // registered name
1643 |                 "configuration": { ... }
1644 |             }
1645 |         ],
1646 |         "chunk_grid": {
1647 |             "name": "regular", // core
1648 |             "configuration": { "chunk_shape": [ 32 ] }
1649 |         },
1650 |         "shape": [ 128 ],
1651 |         "dimension_names": [ "x" ],
1652 |         "attributes": { ... }
1653 |     }
1654 | 
1655 | Extension specifications
1656 | ------------------------
1657 | 
1658 | Extensions SHOULD have a published specification. A published specification
1659 | facilitates multiple implementations of an extension.
1660 | 
1661 | For extensions with registered names, the `zarr-extensions`_ repository
1662 | SHOULD either contain the specification or link to it.
1663 | 
1664 | Implementation Notes
1665 | ====================
1666 | 
1667 | This section is non-normative and presents notes from implementers about cases
1668 | that need to be carefully considered but do not strictly fall into the spec.
1669 | 
1670 | Resizing
1671 | --------
1672 | 
1673 | In general, arrays can be resized for writable (and, if necessary, deletable)
1674 | stores. In the most basic case, two scenarios can be considered: shrinking along
1675 | an array dimension, or increasing its size.
1676 | 
1677 | When shrinking, implementations can consider whether to delete chunks if the
1678 | store allows this, or keep them. This should either be configurable, or be
1679 | communicated to the user appropriately.
1680 | 
1681 | When increasing an array along a dimension, chunks may or may not have existed
1682 | in the new area. For areas where no chunks existed previously, they implicitly
1683 | have the fill value after updating the metadata, no new chunks need to be
1684 | written in this case. Previous partial chunks will contain the fill value at the
1685 | time of writing them by default. If there was chunk data in the new area which
1686 | was not deleted when shrinking the array, this data will be shown by default.
1687 | The latter case should be signalled to the user appropriately. An implementation
1688 | can also allow the user to choose to delete previous data explicitly when
1689 | increasing the array (by writing the fill value into partial chunks and deleting
1690 | others), but this should not be the default behaviour.
1691 | 
1692 | 
1693 | Comparison with Zarr v2
1694 | =======================
1695 | 
1696 | This section is informative.
1697 | 
1698 | Below is a summary of the key differences between this specification
1699 | (v3) and Zarr v2.
1700 | 
1701 | - v3 has explicit support for extensions via defined
1702 |   extension points and mechanisms.
1703 | 
1704 | - The set of data types specified in v3 is less than in v2. Additional
1705 |   data types will be defined via extensions.
1706 | 
1707 | References
1708 | ==========
1709 | 
1710 | .. [RFC8259] T. Bray, Ed. The JavaScript Object Notation (JSON) Data
1711 |    Interchange Format. December 2017. Best Current Practice. URL:
1712 |    https://tools.ietf.org/html/rfc8259
1713 | 
1714 | .. [RFC2119] S. Bradner. Key words for use in RFCs to Indicate
1715 |    Requirement Levels. March 1997. Best Current Practice. URL:
1716 |    https://tools.ietf.org/html/rfc2119
1717 | 
1718 | 
1719 | Change log
1720 | ==========
1721 | 
1722 | All notable and possibly implementation-affecting changes to this specification
1723 | are documented in this section, grouped by the specification status and ordered
1724 | by time.
1725 | 
1726 | 3.1
1727 | ---
1728 | 
1729 | - Clarification of extensions. `PR #330
1730 |   <https://github.com/zarr-developers/zarr-specs/pull/330/>`_. With this change,
1731 |   it is now possible to add user-defined extensions.
1732 |   Additionally, extensions may be marked with `must_understand=False` in case
1733 |   a non-implementing library can safely ignore them.
1734 |   Please see the new :ref:`Extensions section <extensions_section>`
1735 |   for details.
1736 | 
1737 | Changes after Provisional Acceptance
1738 | ------------------------------------
1739 | - Support for implicit groups was removed. `PR #292
1740 |   <https://github.com/zarr-developers/zarr-specs/pull/292/>`_
1741 | - ``endian`` codec was renamed to ``bytes`` codec.  `PR #263
1742 |   <https://github.com/zarr-developers/zarr-specs/pull/263/>`_
1743 | - Fallback data type support was removed.  `PR #248
1744 |   <https://github.com/zarr-developers/zarr-specs/pull/248/>`_
1745 | - It is now required to specify an ``array -> bytes`` codec in the ``codecs``
1746 |   array metadata field.  `PR #249
1747 |   <https://github.com/zarr-developers/zarr-specs/pull/249>`_
1748 | - The representation of fill values for floating point numbers was changed to
1749 |   avoid ambiguity.  `PR #236
1750 |   <https://github.com/zarr-developers/zarr-specs/pull/236>`_
1751 | 
1752 | Draft Changes
1753 | -------------
1754 | 
1755 | - Removed `extensions` field and clarified extension point behaviour, changing the config format of
1756 |   data-types, chunk-grid, storage-transformers and codecs. `PR #204
1757 |   <https://github.com/zarr-developers/zarr-specs/pull/204>`_
1758 | - Changed `format_version` to the int ``3``, added key ``node_type`` to group and array metadata. `PR #204
1759 |   <https://github.com/zarr-developers/zarr-specs/pull/204>`_
1760 | - Restructured keys and removed entry-point metadata. `PR #200
1761 |   <https://github.com/zarr-developers/zarr-specs/pull/200>`_
1762 | - Added the ``dimension_names`` array metadata field. `PR #162
1763 |   <https://github.com/zarr-developers/zarr-specs/pull/162>`_
1764 | - Replaced ``chunk_memory_layout`` with transpose codec.  `PR #189
1765 |   <https://github.com/zarr-developers/zarr-specs/pull/189>`_
1766 | - Allowed to have a list of fallback data types. `PR #167
1767 |   <https://github.com/zarr-developers/zarr-specs/pull/167>`_
1768 | - Removed the 255 character limit for paths. `PR #175
1769 |   <https://github.com/zarr-developers/zarr-specs/pull/175>`_
1770 | - Removed the ``/root`` prefix for paths. `PR #175
1771 |   <https://github.com/zarr-developers/zarr-specs/pull/175>`_
1772 | 
1773 |   * ``meta/root.array.json`` is now ``meta/array.json``
1774 |   * ``meta/root/foo/bar.group.json`` is now ``meta/foo/bar.group.json``
1775 | - Moved the ``metadata_key_suffix`` entrypoint metadata key into ``metadata_encoding``,
1776 |   which now just specifies `"json"` via the `type` key and is an extension point.
1777 |   `PR #171 <https://github.com/zarr-developers/zarr-specs/pull/171>`_
1778 | - Changed data type names and changed endianness to be handled by a codec.
1779 |   `PR #155 <https://github.com/zarr-developers/zarr-specs/pull/155>`_
1780 | - Replaced the ``compressor`` field in the array metadata with a ``codecs``
1781 |   field that can specify a list of codecs. `PR #153
1782 |   <https://github.com/zarr-developers/zarr-specs/pull/153>`_
1783 | - Required ``fill_value`` in the array metadata to be defined.
1784 |   `PR #145 <https://github.com/zarr-developers/zarr-specs/pull/145>`_
1785 | - Added array storage transformers which can be configured per array via the
1786 |   storage_transformers name in the array metadata.
1787 |   `PR #134 <https://github.com/zarr-developers/zarr-specs/pull/134>`_
1788 | - The changelog is incomplete before 2022, please refer to the commits on
1789 |   GitHub.
1790 | 
1791 | .. _zarr-specs GitHub repository: https://github.com/zarr-developers/zarr-specs
1792 | .. _zarr-extensions: https://github.com/zarr-developers/zarr-extensions
1793 | 


--------------------------------------------------------------------------------
/docs/v3/core/terminology-hierarchy.excalidraw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zarr-developers/zarr-specs/b880fb385bedb18dd78ffef1bd683e7e93270c74/docs/v3/core/terminology-hierarchy.excalidraw.png


--------------------------------------------------------------------------------
/docs/v3/core/terminology-read.excalidraw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zarr-developers/zarr-specs/b880fb385bedb18dd78ffef1bd683e7e93270c74/docs/v3/core/terminology-read.excalidraw.png


--------------------------------------------------------------------------------
/docs/v3/data-types/index.rst:
--------------------------------------------------------------------------------
  1 | .. _data-type-list:
  2 | 
  3 | ==========
  4 | Data Types
  5 | ==========
  6 | 
  7 | The following section specifies data types which SHOULD
  8 | be implemented by all implementations.
  9 | 
 10 | Core data types
 11 | ---------------
 12 | 
 13 | .. list-table:: Data types
 14 |    :header-rows: 1
 15 | 
 16 |    * - Identifier
 17 |      - Numerical Type
 18 |    * - ``bool``
 19 |      - Boolean
 20 |    * - ``int8``
 21 |      - Integer in ``[-2^7, 2^7-1]``
 22 |    * - ``int16``
 23 |      - Integer in ``[-2^15, 2^15-1]``
 24 |    * - ``int32``
 25 |      - Integer in ``[-2^31, 2^31-1]``
 26 |    * - ``int64``
 27 |      - Integer in ``[-2^63, 2^63-1]``
 28 |    * - ``uint8``
 29 |      - Integer in ``[0, 2^8-1]``
 30 |    * - ``uint16``
 31 |      - Integer in ``[0, 2^16-1]``
 32 |    * - ``uint32``
 33 |      - Integer in ``[0, 2^32-1]``
 34 |    * - ``uint64``
 35 |      - Integer in ``[0, 2^64-1]``
 36 |    * - ``float16`` (optionally supported)
 37 |      - IEEE 754 half-precision floating point: sign bit, 5 bits exponent, 10 bits mantissa
 38 |    * - ``float32``
 39 |      - IEEE 754 single-precision floating point: sign bit, 8 bits exponent, 23 bits mantissa
 40 |    * - ``float64``
 41 |      - IEEE 754 double-precision floating point: sign bit, 11 bits exponent, 52 bits mantissa
 42 |    * - ``complex64``
 43 |      - real and complex components are each IEEE 754 single-precision floating point
 44 |    * - ``complex128``
 45 |      - real and complex components are each IEEE 754 double-precision floating point
 46 |    * - ``r*`` (Optional)
 47 |      - raw bits, variable size given by ``*``, limited to be a multiple of 8
 48 | 
 49 | .. _fill-value-list:
 50 | 
 51 | Permitted fill values
 52 | ^^^^^^^^^^^^^^^^^^^^^
 53 | 
 54 | The permitted values depend on the data type:
 55 | 
 56 |     ``bool``
 57 |       The value must be a JSON boolean (``false`` or ``true``).
 58 | 
 59 |     Integers (``{uint,int}{8,16,32,64}``)
 60 |       The value must be a JSON number with no fraction or exponent part that is
 61 |       within the representable range of the data type.
 62 | 
 63 |     IEEE 754 floating point numbers (``float{16,32,64}``)
 64 |       The value may be either:
 65 | 
 66 |       - A JSON number, that will be rounded to the nearest representable value.
 67 | 
 68 |       - A JSON string of the form:
 69 | 
 70 |         - ``"Infinity"``, denoting positive infinity;
 71 |         - ``"-Infinity"``, denoting negative infinity;
 72 |         - ``"NaN"``, denoting the not-a-number (NaN) value where the sign bit is
 73 |           0 (positive), the most significant bit (MSB) of the mantissa is 1, and
 74 |           all other bits of the mantissa are zero;
 75 |         - ``"0xYYYYYYYY"``, specifying the byte representation of the floating
 76 |           point number as an unsigned integer.  For example, for ``float32``,
 77 |           ``"NaN"`` is equivalent to ``"0x7fc00000"``.  This representation is
 78 |           the only way to specify a NaN value other than the specific NaN value
 79 |           denoted by ``"NaN"``.
 80 | 
 81 |         .. warning::
 82 | 
 83 |            While this NaN syntax is consistent with the syntax accepted by the
 84 |            C99 ``strtod`` function, C99 leaves the meaning of the NaN payload
 85 |            string implementation defined, which may not match the Zarr
 86 |            definition.
 87 | 
 88 |     Complex numbers (``complex{64,128}``)
 89 |       The value must be a two-element array, specifying the real and imaginary
 90 |       components respectively, where each component is specified as defined
 91 |       above for floating point number.
 92 | 
 93 |       For example, ``[1, 2]`` indicates ``1 + 2i`` and ``["-Infinity", "NaN"]``
 94 |       indicates a complex number with real component of -inf and imaginary
 95 |       component of NaN.
 96 | 
 97 |     Raw data types (``r<N>``)
 98 |       An array of integers, with length equal to ``<N>``, where each integer is
 99 |       in the range ``[0, 255]``.
100 | 
101 | Extensions
102 | ----------
103 | 
104 | Registered data type extensions can be found under
105 | `zarr-extensions::data-types <https://github.com/zarr-developers/zarr-extensions/tree/main/data-types>`_.
106 | 


--------------------------------------------------------------------------------
/docs/v3/storage-transformers/index.rst:
--------------------------------------------------------------------------------
 1 | .. _storage-transformer-list:
 2 | 
 3 | ==========================
 4 | Array Storage Transformers
 5 | ==========================
 6 | 
 7 | .. COMMENT TO BE REMOVED WHEN ONE IS ADDED
 8 | 
 9 |    The following documents specify core storage transformers which SHOULD
10 |    be implemented by all implementations.
11 | 
12 |    toctree::
13 |    :glob:
14 |    :maxdepth: 1
15 |    :titlesonly:
16 |    :caption: Contents:
17 | 
18 |    */*
19 | 
20 | Currently, no core storage transformers are defined by this specification.
21 | 
22 | Extensions
23 | ----------
24 | 
25 | Registered storage transform extensions can be found under
26 | `zarr-extensions::storage-transformers <https://github.com/zarr-developers/zarr-extensions/tree/main/storage-transformers>`_.
27 | 


--------------------------------------------------------------------------------
/docs/v3/stores/filesystem/index.rst:
--------------------------------------------------------------------------------
  1 | .. _file-system-store-v1:
  2 | 
  3 | =================
  4 | File system store
  5 | =================
  6 | 
  7 | Version:
  8 |     1.0
  9 | Specification URI:
 10 |     https://zarr-specs.readthedocs.io/en/latest/v3/stores/filesystem/
 11 | Corresponding ZEP:
 12 |     `ZEP0001 — Zarr specification version 3 <https://zarr.dev/zeps/accepted/ZEP0001.html>`_
 13 | Issue tracking:
 14 |     `GitHub issues <https://github.com/zarr-developers/zarr-specs/labels/stores-filesystem-v1.0>`_
 15 | Suggest an edit for this spec:
 16 |     `GitHub editor <https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/stores/filesystem/index.rst>`_
 17 | 
 18 | Copyright 2019-Present Zarr core development team. This work is
 19 | licensed under a `Creative Commons Attribution 3.0 Unported License
 20 | <https://creativecommons.org/licenses/by/3.0/>`_.
 21 | 
 22 | ----
 23 | 
 24 | 
 25 | Abstract
 26 | ========
 27 | 
 28 | This specification defines an implementation of the Zarr abstract
 29 | store API using a file system.
 30 | 
 31 | 
 32 | Status of this document
 33 | =======================
 34 | 
 35 | ZEP0001 was accepted on May 15th, 2023 via https://github.com/zarr-developers/zarr-specs/issues/227.
 36 | 
 37 | 
 38 | Notes about design decisions for the native File System Store 
 39 | =============================================================
 40 | 
 41 | The original file system store is designed for simplicity and easy manipulation
 42 | and transfer  by external tools not aware of the store structure. In particular,
 43 | tools like ``gsutil`` can be use to transfer a local directory store to cloud
 44 | base storage, hence the keys choices will be conserved.
 45 | 
 46 | 
 47 | Document conventions
 48 | ====================
 49 | 
 50 | Conformance requirements are expressed with a combination of
 51 | descriptive assertions and [RFC2119]_ terminology. The key words
 52 | "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
 53 | "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in the normative
 54 | parts of this document are to be interpreted as described in
 55 | [RFC2119]_. However, for readability, these words do not appear in all
 56 | uppercase letters in this specification.
 57 | 
 58 | All of the text of this specification is normative except sections
 59 | explicitly marked as non-normative, examples, and notes. Examples in
 60 | this specification are introduced with the words "for example".
 61 | 
 62 | 
 63 | Native storage operations
 64 | =========================
 65 | 
 66 | Here we consider a file system to be any system comprised of files and
 67 | directories, where:
 68 | 
 69 | * Each file has a name (sequence of characters) and contents
 70 |   (sequence of bytes).
 71 | 
 72 | * Each directory has a name (sequence of characters) and children (set
 73 |   of zero or more files and/or directories).
 74 | 
 75 | * Each file or directory can be addressed by a path, comprised of its
 76 |   name and the names of all ancestor directories, which uniquely
 77 |   identifies it within the file system.
 78 | 
 79 | … and where the following native operations are supported:
 80 | 
 81 | * Create a file.
 82 | 
 83 | * Write the contents of a file.
 84 | 
 85 | * Read the contents of a file.
 86 | 
 87 | * Delete a file.
 88 | 
 89 | * Create a directory.
 90 | 
 91 | * List the children of a directory, returning the name and type (file
 92 |   or directory) of each child.
 93 | 
 94 | * Delete a directory.
 95 | 
 96 | 
 97 | Key translation
 98 | ===============
 99 | 
100 | The Zarr store interface is defined in terms of `keys` and `values`,
101 | where a `key` is a sequence of characters and a `value` is a sequence
102 | of bytes. A file system store translates keys into file system
103 | paths. This translation assumes that the store has been defined
104 | relative to a base directory. The translation is as follows:
105 | 
106 | * Replace any forward slash characters ('/') in the key with the
107 |   native directory separator for the file system.
108 | 
109 | * Join the result to the base directory path, using the native
110 |   directory separator.
111 | 
112 | For example, if the file system is a POSIX file system, and the base
113 | directory path is "/data", then the key "foo/bar" is translated to the
114 | file system path "/data/foo/bar".
115 | 
116 | For example, if the file system is a Windows file system, and the base
117 | directory path is "C:\\data", then the key "foo/bar" is translated to
118 | the file system path "C:\\data\\foo\\bar".
119 | 
120 | When returning information about available keys, a file system store
121 | performs the reverse translation from file system paths to keys. This
122 | translation is as follows:
123 | 
124 | * Replace any native directory separator characters with the forward
125 |   slash character.
126 | 
127 | * Strip the base directory path from the beginning of the path.
128 | 
129 | For example, if the file system is a POSIX file system, and the base
130 | directory path is "/data", then the file system path "/data/foo/bar"
131 | is translated to the key "foo/bar".
132 | 
133 | For example, if the file system is a Windows file system, and the base
134 | directory path is "C:\\data", then the file system path
135 | "C:\\data\\foo\\bar" is translated to the key "foo/bar".
136 | 
137 | 
138 | Store API implementation
139 | ========================
140 | 
141 | The section below defines an implementation of the Zarr
142 | :ref:`abstract-store-interface` in terms of the native operations of this
143 | storage system. Below ``fspath_to_key()`` is a function that
144 | translates file system paths to store keys, and ``key_to_fspath()`` is
145 | a function that translates store keys to file system paths, as defined
146 | in the section above.
147 | 
148 | * ``get(key) -> value`` : Read and return the contents of the file at
149 |   file system path ``key_to_fspath(key)``.
150 | 
151 | * ``set(key, value)`` : Write ``value`` as the contents of the file at
152 |   file system path ``key_to_fspath(key)``.
153 | 
154 | * ``delete(key)`` : Delete the file or directory at file system path
155 |   ``key_to_fspath(key)``.
156 | 
157 | * ``list()`` : Recursively walk the file system from the base
158 |   directory, returning an iterator over keys obtained by calling
159 |   ``fspath_to_key(fp)`` for each descendant file path ``fp``.
160 | 
161 | * ``list_prefix(prefix)`` : Obtain a file system path by calling
162 |   ``key_to_fspath(prefix)``. If the result is a directory path,
163 |   recursively walk the file system from this directory, returning an
164 |   iterator over keys obtained by calling ``fspath_to_key(fp)`` for
165 |   each descendant file path ``fp``.
166 | 
167 | * ``list_dir(prefix)`` : Obtain a file system path by calling
168 |   ``key_to_fspath(prefix)``. If the result is a directory path, list
169 |   the directory children. Return a set of keys obtained by calling
170 |   ``fspath_to_key(fp)`` for each child file path ``fp``, and a set of
171 |   prefixes obtained by calling ``fspath_to_key(dp)`` for each child
172 |   directory path ``dp``.
173 | 
174 | 
175 | Canonical URI
176 | =============
177 | 
178 | The canonical URI format for this store follows the file URI scheme of the base
179 | directory path, as defined in [RFC8089]_. For a Windows base directory path
180 | "c:\\my data" the canonical URI would be "file:///c:/my%20data", for a Posix
181 | base directory "/my data" it would be"file:///my%20data".
182 | 
183 | When expecting a URI string, but no scheme is present, implementations may
184 | assume a filesystem store with the (supposedly URI) string as the base directory
185 | path.
186 | 
187 | 
188 | Store limitations
189 | =================
190 | 
191 | The following limitations for this store are know:
192 | 
193 | * `260 characters path length limit in Windows <https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation>`_
194 | * `Windows paths are case-insensitive by default <https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions>`_
195 | * `MacOS paths are case-insensitive by default <https://support.apple.com/guide/disk-utility/file-system-formats-dsku19ed921c/mac>`_
196 | 
197 | 
198 | References
199 | ==========
200 | 
201 | .. [RFC2119] S. Bradner. Key words for use in RFCs to Indicate
202 |    Requirement Levels. March 1997. Best Current Practice. URL:
203 |    https://tools.ietf.org/html/rfc2119
204 | 
205 | .. [RFC8089] M. Kerwin. The "file" URI Scheme. February 2017. Proposed Standard.
206 |    URL: https://tools.ietf.org/html/rfc8089
207 | 
208 | 
209 | Change log
210 | ==========
211 | 
212 | No changes yet.
213 | 


--------------------------------------------------------------------------------
/docs/v3/stores/index.rst:
--------------------------------------------------------------------------------
 1 | .. _store-list:
 2 | 
 3 | ======
 4 | Stores
 5 | ======
 6 | 
 7 | The following documents specify stores which SHOULD
 8 | be implemented by all implementations.
 9 | 
10 | .. toctree::
11 |    :glob:
12 |    :maxdepth: 1
13 |    :titlesonly:
14 |    :caption: Contents:
15 | 
16 |    */*
17 | 
18 | .. note::
19 |    Stores are *not* extension points since they define the mechanism
20 |    for loading metadata documents such that extensions can be loaded.
21 | 


--------------------------------------------------------------------------------