├── doc ├── dune └── index.mld ├── zarr ├── src │ ├── storage │ │ ├── storage.mli │ │ ├── memory.ml │ │ ├── zip_archive.ml │ │ ├── storage_intf.ml │ │ └── storage.ml │ ├── zarr.ml │ ├── dune │ ├── util.ml │ ├── util.mli │ ├── extensions.mli │ ├── ebuffer.mli │ ├── types.ml │ ├── zarr.mli │ ├── node.ml │ ├── ndarray.mli │ ├── metadata.mli │ ├── codecs.mli │ ├── node.mli │ ├── ebuffer.ml │ ├── extensions.ml │ ├── ndarray.ml │ └── metadata.ml └── test │ ├── dune │ ├── test_all.ml │ ├── test_indexing.ml │ ├── test_ndarray.ml │ ├── test_node.ml │ └── test_codecs.ml ├── examples ├── data │ └── testdata.zip ├── dune ├── picos_fs_store.ml └── zipstore.ml ├── zarr-eio ├── test │ ├── dune │ └── test_eio.ml └── src │ ├── dune │ ├── storage.mli │ └── storage.ml ├── zarr-lwt ├── test │ ├── dune │ └── test_lwt.ml └── src │ ├── dune │ ├── storage.mli │ └── storage.ml ├── zarr-sync ├── test │ ├── dune │ └── test_sync.ml └── src │ ├── dune │ ├── storage.mli │ └── storage.ml ├── .gitignore ├── Makefile ├── zarr-sync.opam ├── zarr-eio.opam ├── zarr-lwt.opam ├── zarr.opam ├── LICENSE ├── dune-project ├── .github └── workflows │ └── build-and-test.yml └── README.md /doc/dune: -------------------------------------------------------------------------------- 1 | (documentation 2 | (package zarr)) 3 | -------------------------------------------------------------------------------- /zarr/src/storage/storage.mli: -------------------------------------------------------------------------------- 1 | include Storage_intf.Interface 2 | -------------------------------------------------------------------------------- /examples/data/testdata.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zoj613/zarr-ml/HEAD/examples/data/testdata.zip -------------------------------------------------------------------------------- /zarr/test/dune: -------------------------------------------------------------------------------- 1 | (test 2 | (name test_all) 3 | (libraries 4 | zarr 5 | ounit2) 6 | (package zarr) 7 | (preprocess 8 | (pps ppx_deriving.show))) 9 | -------------------------------------------------------------------------------- /zarr-eio/test/dune: -------------------------------------------------------------------------------- 1 | (test 2 | (name test_eio) 3 | (libraries 4 | zarr-eio 5 | ounit2) 6 | (package zarr-eio) 7 | (preprocess 8 | (pps ppx_deriving.show))) 9 | -------------------------------------------------------------------------------- /zarr-lwt/test/dune: -------------------------------------------------------------------------------- 1 | (test 2 | (name test_lwt) 3 | (libraries 4 | zarr-lwt 5 | ounit2) 6 | (package zarr-lwt) 7 | (preprocess 8 | (pps ppx_deriving.show))) 9 | -------------------------------------------------------------------------------- /zarr-sync/test/dune: -------------------------------------------------------------------------------- 1 | (test 2 | (name test_sync) 3 | (libraries 4 | zarr-sync 5 | ounit2) 6 | (package zarr-sync) 7 | (preprocess 8 | (pps ppx_deriving.show))) 9 | -------------------------------------------------------------------------------- /zarr-sync/src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name zarr_sync) 3 | (public_name zarr-sync) 4 | (libraries zarr) 5 | (ocamlopt_flags 6 | (:standard -O3)) 7 | (instrumentation 8 | (backend bisect_ppx))) 9 | -------------------------------------------------------------------------------- /zarr-eio/src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name zarr_eio) 3 | (public_name zarr-eio) 4 | (libraries 5 | zarr 6 | eio_main) 7 | (ocamlopt_flags 8 | (:standard -O3)) 9 | (instrumentation 10 | (backend bisect_ppx))) 11 | -------------------------------------------------------------------------------- /zarr-lwt/src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name zarr_lwt) 3 | (public_name zarr-lwt) 4 | (libraries 5 | zarr 6 | aws-s3-lwt 7 | lwt 8 | lwt.unix) 9 | (ocamlopt_flags 10 | (:standard -O3)) 11 | (instrumentation 12 | (backend bisect_ppx))) 13 | -------------------------------------------------------------------------------- /zarr/test/test_all.ml: -------------------------------------------------------------------------------- 1 | open OUnit2 2 | 3 | let () = 4 | let suite = "Run All tests" >::: 5 | Test_node.tests @ 6 | Test_indexing.tests @ 7 | Test_metadata.tests @ 8 | Test_ndarray.tests @ 9 | Test_codecs.tests 10 | in run_test_tt_main suite 11 | -------------------------------------------------------------------------------- /zarr/src/zarr.ml: -------------------------------------------------------------------------------- 1 | module Node = Node 2 | module Util = Util 3 | module Indexing = Ndarray.Indexing 4 | module Metadata = Metadata 5 | module Storage = Storage 6 | module Codecs = Codecs 7 | module Memory = Memory 8 | module Zip = Zip_archive 9 | module Types = Types 10 | module Ndarray = Ndarray 11 | -------------------------------------------------------------------------------- /examples/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name zipstore) 3 | (modules zipstore) 4 | (ocamlopt_flags (:standard -O3)) 5 | (libraries zarr-eio zipc)) 6 | 7 | (executable 8 | (name picos_fs_store) 9 | (modules picos_fs_store) 10 | (ocamlopt_flags (:standard -O3)) 11 | (libraries zarr_sync picos_io picos_mux.random)) 12 | -------------------------------------------------------------------------------- /zarr/src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name zarr) 3 | (public_name zarr) 4 | (libraries 5 | yojson 6 | bytesrw.zstd 7 | bytesrw.zlib 8 | camlzip 9 | stdint 10 | checkseum) 11 | (ocamlopt_flags 12 | (:standard -O3)) 13 | (instrumentation 14 | (backend bisect_ppx))) 15 | 16 | (include_subdirs unqualified) 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.annot 2 | *.cmo 3 | *.cma 4 | *.cmi 5 | *.a 6 | *.o 7 | *.cmx 8 | *.cmxs 9 | *.cmxa 10 | 11 | # ocamlbuild working directory 12 | _build/ 13 | 14 | # ocamlbuild targets 15 | *.byte 16 | *.native 17 | 18 | # oasis generated files 19 | setup.data 20 | setup.log 21 | 22 | # Merlin configuring file for Vim and Emacs 23 | .merlin 24 | 25 | # Dune generated files 26 | *.install 27 | 28 | # Local OPAM switch 29 | _opam/ 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build 2 | build: 3 | dune build zarr zarr-lwt zarr-sync zarr-eio 4 | 5 | .PHONY: clean 6 | clean: 7 | dune clean 8 | 9 | .PHONY: test 10 | test: build 11 | dune runtest --force 12 | 13 | .PHONY: test-cov 14 | test-cov: build 15 | OUNIT_CI=true dune runtest --instrument-with bisect_ppx --force 16 | bisect-ppx-report summary --per-file 17 | 18 | .PHONY: show-cov 19 | show-cov: test-cov 20 | bisect-ppx-report html 21 | chromium _coverage/index.html 22 | 23 | docs: 24 | dune build @doc 25 | 26 | .PHONY: view-docs 27 | view-docs: docs 28 | chromium _build/default/_doc/_html/index.html 29 | 30 | .PHONY: minio 31 | minio: 32 | mkdir -p /tmp/minio/test-bucket-lwt 33 | docker run --rm -it -p 9000:9000 -v /tmp/minio:/minio minio/minio:latest server /minio 34 | -------------------------------------------------------------------------------- /zarr-sync/src/storage.mli: -------------------------------------------------------------------------------- 1 | module IO : Zarr.Types.IO with type 'a t = 'a 2 | 3 | (** A blocking I/O in-memory storage backend for Zarr v3 hierarchy. *) 4 | module MemoryStore : Zarr.Memory.S with type 'a io := 'a 5 | 6 | (** A blocking I/O Zip file storage backend for a Zarr v3 hierarchy. *) 7 | module ZipStore : Zarr.Zip.S with type 'a io := 'a 8 | 9 | (** A blocking I/O local filesystem storage backend for a Zarr v3 hierarchy. *) 10 | module FilesystemStore : sig 11 | include Zarr.Storage.S with type 'a io := 'a 12 | 13 | val create : ?perm:int -> string -> t 14 | (** [create ~perm dir] returns a new filesystem store. 15 | 16 | @raise Failure if [dir] is a directory that already exists.*) 17 | 18 | val open_store : ?perm:int -> string -> t 19 | (** [open_store ~perm dir] returns an existing filesystem Zarr store. 20 | 21 | @raise Failure if [dir] is not a Zarr store path. *) 22 | end 23 | -------------------------------------------------------------------------------- /doc/index.mld: -------------------------------------------------------------------------------- 1 | {0 The [zarr] library} 2 | 3 | The Zarr library provides an OCaml implementation of the Zarr version 3 4 | storage format specification for chunked & compressed multi-dimensional 5 | arrays, designed for use in parallel computing. The storage format is used 6 | by many companies including Google, NASA, Microsoft and {{:https://zarr.dev/adopters/}many others}. 7 | Zarr's goal is to provide the following features: 8 | - Chunk multi-dimensional arrays along any dimension. 9 | - Store arrays in memory, on disk, inside a Zip file or any remote storage backend. 10 | - Read and write arrays concurrently from multiple threads or processes. 11 | - Organize arrays into hierarchies using groups. 12 | 13 | See {{:https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html}Zarr V3 specification}. 14 | 15 | Author: Zolisa Bleki 16 | 17 | {1 Entry Point} 18 | 19 | The entry point of this library is the module {!zarr}. 20 | -------------------------------------------------------------------------------- /zarr-sync.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | version: "0.1.0" 4 | synopsis: "Synchronous API for Zarr" 5 | maintainer: ["Zolisa Bleki"] 6 | authors: ["Zolisa Bleki"] 7 | license: "BSD-3-Clause" 8 | homepage: "https://github.com/zoj613/zarr-ml" 9 | doc: "https://zoj613.github.io/zarr-ml" 10 | bug-reports: "https://github.com/zoj613/zarr-ml/issues" 11 | depends: [ 12 | "dune" {>= "3.15"} 13 | "ocaml" {>= "4.14.0"} 14 | "zarr" {= version} 15 | "odoc" {with-doc} 16 | "ounit2" {with-test} 17 | "ppx_deriving" {with-test} 18 | "bisect_ppx" {dev & >= "2.5.0" & with-test} 19 | ] 20 | build: [ 21 | ["dune" "subst"] {dev} 22 | [ 23 | "dune" 24 | "build" 25 | "-p" 26 | name 27 | "-j" 28 | jobs 29 | "@install" 30 | "@runtest" {with-test} 31 | "@doc" {with-doc} 32 | ] 33 | ] 34 | dev-repo: "git+https://github.com/zoj613/zarr-ml.git" 35 | -------------------------------------------------------------------------------- /zarr-eio.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | version: "0.1.0" 4 | synopsis: "Eio-aware API for Zarr" 5 | maintainer: ["Zolisa Bleki"] 6 | authors: ["Zolisa Bleki"] 7 | license: "BSD-3-Clause" 8 | homepage: "https://github.com/zoj613/zarr-ml" 9 | doc: "https://zoj613.github.io/zarr-ml" 10 | bug-reports: "https://github.com/zoj613/zarr-ml/issues" 11 | depends: [ 12 | "dune" {>= "3.15"} 13 | "ocaml" {>= "5.1.0"} 14 | "zarr" {= version} 15 | "eio_main" {>= "1.0"} 16 | "odoc" {with-doc} 17 | "ounit2" {with-test} 18 | "ppx_deriving" {with-test} 19 | "bisect_ppx" {dev & >= "2.5.0" & with-test} 20 | ] 21 | build: [ 22 | ["dune" "subst"] {dev} 23 | [ 24 | "dune" 25 | "build" 26 | "-p" 27 | name 28 | "-j" 29 | jobs 30 | "@install" 31 | "@runtest" {with-test} 32 | "@doc" {with-doc} 33 | ] 34 | ] 35 | dev-repo: "git+https://github.com/zoj613/zarr-ml.git" 36 | -------------------------------------------------------------------------------- /zarr-lwt.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | version: "0.1.0" 4 | synopsis: "Lwt-aware API for Zarr" 5 | maintainer: ["Zolisa Bleki"] 6 | authors: ["Zolisa Bleki"] 7 | license: "BSD-3-Clause" 8 | homepage: "https://github.com/zoj613/zarr-ml" 9 | doc: "https://zoj613.github.io/zarr-ml" 10 | bug-reports: "https://github.com/zoj613/zarr-ml/issues" 11 | depends: [ 12 | "dune" {>= "3.15"} 13 | "ocaml" {>= "4.14.0"} 14 | "zarr" {= version} 15 | "lwt" {>= "2.5.1"} 16 | "aws-s3-lwt" {>= "4.8.1"} 17 | "odoc" {with-doc} 18 | "ounit2" {with-test} 19 | "ppx_deriving" {with-test} 20 | "bisect_ppx" {dev & >= "2.5.0" & with-test} 21 | ] 22 | build: [ 23 | ["dune" "subst"] {dev} 24 | [ 25 | "dune" 26 | "build" 27 | "-p" 28 | name 29 | "-j" 30 | jobs 31 | "@install" 32 | "@runtest" {with-test} 33 | "@doc" {with-doc} 34 | ] 35 | ] 36 | dev-repo: "git+https://github.com/zoj613/zarr-ml.git" 37 | -------------------------------------------------------------------------------- /zarr-eio/src/storage.mli: -------------------------------------------------------------------------------- 1 | module IO : Zarr.Types.IO with type 'a t = 'a 2 | 3 | (** An Eio-aware in-memory storage backend for Zarr v3 hierarchy. *) 4 | module MemoryStore : Zarr.Memory.S with type 'a io := 'a 5 | 6 | (** An Eio-aware Zip file storage backend for a Zarr v3 hierarchy. *) 7 | module ZipStore : Zarr.Zip.S with type 'a io := 'a 8 | 9 | (** An Eio-aware local filesystem storage backend for a Zarr v3 hierarchy. *) 10 | module FilesystemStore : sig 11 | include Zarr.Storage.S with type 'a io := 'a 12 | 13 | val create : ?perm:Eio.File.Unix_perm.t -> env: -> string -> t 14 | (** [create ~perm ~env dir] returns a new filesystem store. 15 | 16 | @raise Failure if [dir] is a directory that already exists.*) 17 | 18 | val open_store : ?perm:Eio.File.Unix_perm.t -> env: -> string -> t 19 | (** [open_store ~perm ~env dir] returns an existing filesystem Zarr store. 20 | 21 | @raise Failure if [dir] is a file and not a Zarr store path. *) 22 | end 23 | -------------------------------------------------------------------------------- /zarr/src/util.ml: -------------------------------------------------------------------------------- 1 | module CoordMap = struct 2 | include Map.Make (struct 3 | type t = int list 4 | let compare : t -> t -> int = Stdlib.compare 5 | end) 6 | 7 | let add_to_list k v map = 8 | let f ~v = function 9 | | None -> Some [v] 10 | | Some l -> Some (v :: l) 11 | in 12 | update k (f ~v) map 13 | end 14 | 15 | module Result_syntax = struct 16 | let (let*) = Result.bind 17 | let (let+) x f = Result.map f x 18 | end 19 | 20 | let get_name j = Yojson.Safe.Util.(member "name" j |> to_string) 21 | let max = Array.fold_left Int.max Int.min_int 22 | 23 | (* Obtained from: https://discuss.ocaml.org/t/how-to-create-a-new-file-while-automatically-creating-any-intermediate-directories/14837/5?u=zoj613 *) 24 | let rec create_parent_dir fn perm = 25 | let parent_dir = Filename.dirname fn in 26 | if not (Sys.file_exists parent_dir) then begin 27 | create_parent_dir parent_dir perm; 28 | Sys.mkdir parent_dir perm 29 | end 30 | 31 | let sanitize_dir dir = match Filename.chop_suffix_opt ~suffix:"/" dir with 32 | | None -> dir 33 | | Some d -> d 34 | -------------------------------------------------------------------------------- /zarr/src/util.mli: -------------------------------------------------------------------------------- 1 | (** A finite map over Zarr array coordinate keys. *) 2 | module CoordMap : sig 3 | include Map.S with type key = int list 4 | val add_to_list : int list -> 'a -> 'a list t -> 'a list t 5 | (** [add_to_list k v map] is [map] with [k] mapped to [l] such that [l] 6 | is [v :: ArrayMap.find k map] if [k] was bound in [map] and [v] otherwise.*) 7 | end 8 | 9 | (** Result monad operator syntax. *) 10 | module Result_syntax : sig 11 | val (let*) : ('a, 'e) result -> ('a -> ('b, 'e) result ) -> ('b, 'e) result 12 | val (let+) : ('a, 'e) result -> ('a -> 'b) -> ('b, 'e) result 13 | end 14 | 15 | val get_name : Yojson.Safe.t -> string 16 | (** [get_name c] returns the name value of a JSON metadata extension point 17 | configuration of the form [{"name": value, "configuration": ...}], 18 | as defined in the Zarr V3 specification. *) 19 | 20 | val max : int array -> int 21 | (** [max x] returns the maximum element of an integer array [x]. *) 22 | 23 | val create_parent_dir : string -> int -> unit 24 | (** [create_parent_dir f p] creates all the parent directories of file name 25 | [f] if they don't exist given file permissions [p]. *) 26 | 27 | val sanitize_dir : string -> string 28 | (** [sanitize_dir d] Chops off any trailing '/' in directory path [d]. *) 29 | -------------------------------------------------------------------------------- /zarr.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | version: "0.1.0" 4 | synopsis: "An Ocaml implementation of the Zarr V3 specification" 5 | description: """ 6 | The Zarr library provides an OCaml implementation of the Zarr version 3 7 | storage format specification for chunked & compressed multi-dimensional 8 | arrays, designed for use in parallel computing.""" 9 | maintainer: ["Zolisa Bleki"] 10 | authors: ["Zolisa Bleki"] 11 | license: "BSD-3-Clause" 12 | tags: ["zarr" "chunked arrays" "zarr version 3"] 13 | homepage: "https://github.com/zoj613/zarr-ml" 14 | doc: "https://zoj613.github.io/zarr-ml" 15 | bug-reports: "https://github.com/zoj613/zarr-ml/issues" 16 | depends: [ 17 | "dune" {>= "3.15"} 18 | "ocaml" {>= "4.14.0"} 19 | "yojson" {>= "1.6.0"} 20 | "stdint" {>= "0.7.2"} 21 | "camlzip" {>= "1.13"} 22 | "bytesrw" {>= "0.1.0"} 23 | "checkseum" {>= "0.4.0"} 24 | "odoc" {with-doc} 25 | "ounit2" {with-test} 26 | "ppx_deriving" {with-test} 27 | "bisect_ppx" {dev & >= "2.5.0" & with-test} 28 | ] 29 | build: [ 30 | ["dune" "subst"] {dev} 31 | [ 32 | "dune" 33 | "build" 34 | "-p" 35 | name 36 | "-j" 37 | jobs 38 | "@install" 39 | "@runtest" {with-test} 40 | "@doc" {with-doc} 41 | ] 42 | ] 43 | dev-repo: "git+https://github.com/zoj613/zarr-ml.git" 44 | -------------------------------------------------------------------------------- /zarr/src/extensions.mli: -------------------------------------------------------------------------------- 1 | module RegularGrid : sig 2 | exception Grid_shape_mismatch 3 | type t 4 | val create : array_shape:int list -> int list -> t 5 | val chunk_shape : t -> int list 6 | val indices : t -> int list -> int list list 7 | val index_coord_pair : t -> int list -> int list * int list 8 | val ( = ) : t -> t -> bool 9 | val of_yojson : int list -> Yojson.Safe.t -> (t, string) result 10 | val to_yojson : t -> Yojson.Safe.t 11 | end 12 | 13 | module ChunkKeyEncoding : sig 14 | type t 15 | val create : [< `Slash | `Dot > `Slash ] -> t 16 | val encode : t -> int list -> string 17 | val ( = ) : t -> t -> bool 18 | val of_yojson : Yojson.Safe.t -> (t, string) result 19 | val to_yojson : t -> Yojson.Safe.t 20 | end 21 | 22 | module Datatype : sig 23 | (** Data types as defined in the Zarr V3 specification *) 24 | 25 | type t = 26 | | Char 27 | | Bool 28 | | Int8 29 | | Uint8 30 | | Int16 31 | | Uint16 32 | | Int32 33 | | Int64 34 | | Uint64 35 | | Float32 36 | | Float64 37 | | Complex32 38 | | Complex64 39 | | Int 40 | | Nativeint 41 | (** A type for the supported data types of a Zarr array. *) 42 | 43 | val ( = ) : t -> t -> bool 44 | val of_kind : 'a Ndarray.dtype -> t 45 | val of_yojson : Yojson.Safe.t -> (t, string) result 46 | val to_yojson : t -> Yojson.Safe.t 47 | end 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2024, Zolisa Bleki 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /zarr/src/ebuffer.mli: -------------------------------------------------------------------------------- 1 | module type S = sig 2 | val set_char : bytes -> int -> char -> unit 3 | val set_bool : bytes -> int -> bool -> unit 4 | val set_int8 : bytes -> int -> int -> unit 5 | val set_uint8 : bytes -> int -> int -> unit 6 | val set_int16 : bytes -> int -> int -> unit 7 | val set_uint16 : bytes -> int -> int -> unit 8 | val set_int32 : bytes -> int -> int32 -> unit 9 | val set_int64 : bytes -> int -> int64 -> unit 10 | val set_uint64 : bytes -> int -> Stdint.uint64 -> unit 11 | val set_float32 : bytes -> int -> float -> unit 12 | val set_float64 : bytes -> int -> float -> unit 13 | val set_complex32 : bytes -> int -> Complex.t -> unit 14 | val set_complex64 : bytes -> int -> Complex.t -> unit 15 | val set_int : bytes -> int -> int -> unit 16 | val set_nativeint : bytes -> int -> nativeint -> unit 17 | 18 | val get_char : bytes -> int -> char 19 | val get_bool : bytes -> int -> bool 20 | val get_int8 : bytes -> int -> int 21 | val get_uint8 : bytes -> int -> int 22 | val get_int16 : bytes -> int -> int 23 | val get_uint16 : bytes -> int -> int 24 | val get_int32 : bytes -> int -> int32 25 | val get_int64 : bytes -> int -> int64 26 | val get_uint64 : bytes -> int -> Stdint.uint64 27 | val get_float32 : bytes -> int -> float 28 | val get_float64 : bytes -> int -> float 29 | val get_complex32 : bytes -> int -> Complex.t 30 | val get_complex64 : bytes -> int -> Complex.t 31 | val get_int : bytes -> int -> int 32 | val get_nativeint : bytes -> int -> nativeint 33 | end 34 | 35 | module Little : sig include S end 36 | 37 | module Big : sig include S end 38 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 3.15) 2 | 3 | (name zarr) 4 | 5 | (version 0.1.0) 6 | 7 | (generate_opam_files true) 8 | 9 | (source 10 | (github zoj613/zarr-ml)) 11 | 12 | (authors "Zolisa Bleki") 13 | 14 | (maintainers "Zolisa Bleki") 15 | 16 | (license BSD-3-Clause) 17 | 18 | (documentation https://zoj613.github.io/zarr-ml) 19 | 20 | (package 21 | (name zarr) 22 | (synopsis "An Ocaml implementation of the Zarr V3 specification") 23 | (description 24 | "The Zarr library provides an OCaml implementation of the Zarr version 3 25 | storage format specification for chunked & compressed multi-dimensional 26 | arrays, designed for use in parallel computing.") 27 | (depends 28 | dune 29 | (ocaml 30 | (and (>= 4.14.0))) 31 | (yojson (>= 1.6.0)) 32 | (stdint (>= 0.7.2)) 33 | (camlzip (>= 1.13)) 34 | (bytesrw (>= 0.1.0)) 35 | (checkseum (>= 0.4.0)) 36 | (odoc :with-doc) 37 | (ounit2 :with-test) 38 | (ppx_deriving :with-test) 39 | (bisect_ppx 40 | (and :dev (>= 2.5.0) :with-test))) 41 | (tags 42 | ("zarr" "chunked arrays" "zarr version 3"))) 43 | 44 | (package 45 | (name zarr-sync) 46 | (synopsis "Synchronous API for Zarr") 47 | (depends 48 | dune 49 | (ocaml 50 | (and (>= 4.14.0))) 51 | (zarr (= :version)) 52 | (odoc :with-doc) 53 | (ounit2 :with-test) 54 | (ppx_deriving :with-test) 55 | (bisect_ppx 56 | (and :dev (>= 2.5.0) :with-test)))) 57 | 58 | (package 59 | (name zarr-lwt) 60 | (synopsis "Lwt-aware API for Zarr") 61 | (depends 62 | dune 63 | (ocaml 64 | (and (>= 4.14.0))) 65 | (zarr (= :version)) 66 | (lwt (>= 2.5.1)) 67 | (aws-s3-lwt (>= 4.8.1)) 68 | (odoc :with-doc) 69 | (ounit2 :with-test) 70 | (ppx_deriving :with-test) 71 | (bisect_ppx 72 | (and :dev (>= 2.5.0) :with-test)))) 73 | 74 | (package 75 | (name zarr-eio) 76 | (synopsis "Eio-aware API for Zarr") 77 | (depends 78 | dune 79 | (ocaml 80 | (and (>= 5.1.0))) 81 | (zarr (= :version)) 82 | (eio_main (>= 1.0)) 83 | (odoc :with-doc) 84 | (ounit2 :with-test) 85 | (ppx_deriving :with-test) 86 | (bisect_ppx 87 | (and :dev (>= 2.5.0) :with-test)))) 88 | -------------------------------------------------------------------------------- /zarr-lwt/src/storage.mli: -------------------------------------------------------------------------------- 1 | module IO : Zarr.Types.IO with type 'a t = 'a Lwt.t 2 | 3 | (** An Lwt-aware in-memory storage backend for Zarr v3 hierarchy. *) 4 | module MemoryStore : Zarr.Memory.S with type 'a io := 'a Lwt.t 5 | 6 | (** An Lwt-aware Zip file storage backend for a Zarr v3 hierarchy. *) 7 | module ZipStore : Zarr.Zip.S with type 'a io := 'a Lwt.t 8 | 9 | (** An Lwt-aware local filesystem storage backend for a Zarr V3 hierarchy. *) 10 | module FilesystemStore : sig 11 | include Zarr.Storage.S with type 'a io := 'a Lwt.t 12 | 13 | val create : ?perm:Unix.file_perm -> string -> t 14 | (** [create ~perm dir] returns a new filesystem store. 15 | 16 | @raise Failure if [dir] is a directory that already exists.*) 17 | 18 | val open_store : ?perm:Unix.file_perm -> string -> t 19 | (** [open_store ~perm dir] returns an existing filesystem Zarr store. 20 | 21 | @raise Failure if [dir] is not a Zarr store path. *) 22 | end 23 | 24 | (** An Lwt-aware Amazon S3 bucket storage backend for a Zarr V3 hierarchy. *) 25 | module AmazonS3Store : sig 26 | exception Request_failed of Aws_s3_lwt.S3.error 27 | 28 | include Zarr.Storage.S with type 'a io := 'a Lwt.t 29 | 30 | val with_open : 31 | ?scheme:[ `Http | `Https ] -> 32 | ?inet:[ `V4 | `V6 ] -> 33 | ?retries:int -> 34 | region:Aws_s3.Region.t -> 35 | bucket:string -> 36 | profile:string -> 37 | (t -> 'a Lwt.t) -> 38 | 'a Lwt.t 39 | (** [with_open ~region ~bucket ~profile f] opens an S3 bucket store with 40 | bucket name [bucket] at region [region] using credentials specified by 41 | profile [profile]. The credentials are read locally from a [~/.aws/credentials] 42 | file or from an IAM service if the profile or file is not available. 43 | Function [f] is applied to the store's open handle and its output is 44 | returned to the caller. 45 | 46 | {ul 47 | {- [scheme] is the HTTP scheme to use when connecting to S3, and must be 48 | one of [`Http | `Https]. Defaults to [`Http].} 49 | {- [inet] is the IP version and must be one of [`V4 | `V6]. Defaults to [`V4].} 50 | {- [retries] is the number of times to retry a request should it return an error.} 51 | } 52 | 53 | @raise Request_failed if an error occurs while sending a request to the S3 service. *) 54 | end 55 | -------------------------------------------------------------------------------- /zarr/src/types.ml: -------------------------------------------------------------------------------- 1 | module type IO = sig 2 | type 'a t 3 | val return : 'a -> 'a t 4 | val bind : 'a t -> ('a -> 'b t) -> 'b t 5 | val map : ('a -> 'b) -> 'a t -> 'b t 6 | val return_unit : unit t 7 | val iter : ('a -> unit t) -> 'a list -> unit t 8 | val fold_left : ('acc -> 'a -> 'acc t) -> 'acc -> 'a list -> 'acc t 9 | val concat_map : ('a -> 'b list t) -> 'a list -> 'b list t 10 | module Infix : sig 11 | val (>>=) : 'a t -> ('a -> 'b t) -> 'b t 12 | val (>>|) : 'a t -> ('a -> 'b) -> 'b t 13 | end 14 | module Syntax : sig 15 | val (let*) : 'a t -> ('a -> 'b t) -> 'b t 16 | val (let+) : 'a t -> ('a -> 'b) -> 'b t 17 | end 18 | end 19 | 20 | type key = string 21 | type range = int * int option 22 | type value = string 23 | type range_start = int 24 | type prefix = string 25 | 26 | module type Store = sig 27 | (** The abstract store interface that stores should implement. 28 | 29 | The store interface defines a set of operations involving keys and values. 30 | In the context of this interface, a key is a Unicode string, where the final 31 | character is not a "/". In general, a value is a sequence of bytes. 32 | Specific stores may choose more specific storage formats, which must be 33 | stated in the specification of the respective store. 34 | 35 | It is assumed that the store holds (key, value) pairs, with only one 36 | such pair for any given key. (i.e. a store is a mapping from keys to 37 | values). It is also assumed that keys are case sensitive, i.e., the keys 38 | “foo” and “FOO” are different. The store interface also defines some 39 | operations involving prefixes. In the context of this interface, 40 | a prefix is a string containing only characters that are valid for use 41 | in keys and ending with a trailing / character. *) 42 | type t 43 | type 'a io 44 | val size : t -> key -> int io 45 | val get : t -> key -> value io 46 | val get_partial_values : t -> string -> range list -> value list io 47 | val set : t -> key -> value -> unit io 48 | val set_partial_values : t -> key -> ?append:bool -> (range_start * value) list -> unit io 49 | val erase : t -> key -> unit io 50 | val erase_prefix : t -> key -> unit io 51 | val list : t -> key list io 52 | val list_dir : t -> key -> (key list * prefix list) io 53 | val is_member : t -> key -> bool io 54 | val rename : t -> key -> key -> unit io 55 | end 56 | -------------------------------------------------------------------------------- /zarr/test/test_indexing.ml: -------------------------------------------------------------------------------- 1 | open OUnit2 2 | open Zarr 3 | open Zarr.Indexing 4 | 5 | 6 | let tests = [ 7 | 8 | "slice from coords" >:: (fun _ -> 9 | (* slice_of_coords should be duplicate coord-aware *) 10 | let coords = [[0; 1; 2; 3]; [9; 8; 7; 6]; [9; 8; 7; 6]; [5; 4; 3; 2]] in 11 | let expected = [L [0; 5; 9]; L [1; 4; 8]; L [2; 3; 7]; L [2; 3; 6]] in 12 | assert_equal expected @@ Indexing.slice_of_coords coords; 13 | assert_equal [] @@ Indexing.slice_of_coords []) 14 | ; 15 | "coords from slice" >:: (fun _ -> 16 | let shape = [10; 10; 10] in 17 | let slice = [L [0; 9; 5]; I 1; R' (9, 3, -3)] in 18 | let expected = [[0; 1; 9]; [0; 1; 6]; [0; 1; 3]; [9; 1; 9]; [9; 1; 6]; [9; 1; 3] ;[5; 1; 9]; [5; 1; 6]; [5; 1; 3]] in 19 | assert_equal ~printer:[%show: int list list] expected @@ Indexing.coords_of_slice slice shape; 20 | 21 | (* test using an empty slice translates to selection the whole array. *) 22 | assert_equal [[0; 0]; [0; 1]; [1; 0]; [1; 1]] (Indexing.coords_of_slice [] [2; 2]); 23 | 24 | (* test missing definition on higher dimensions *) 25 | let shape = [3; 3; 3] in 26 | let expected = [[2; 0; 0]; [2; 0; 1]; [2; 0; 2]] in 27 | let slice = [I 2; I 0] in 28 | assert_equal expected @@ Indexing.coords_of_slice slice shape; 29 | (* test negative I value *) 30 | let expected = [[2; 2; 0]; [2; 2; 1]; [2; 2; 2]] in 31 | let slice = [I 2; I (-1)] in 32 | assert_equal expected @@ Indexing.coords_of_slice slice shape; 33 | let slice = [R (-1, 2); L [-1]; L [0; 0; 0]] in 34 | let expected = [[2; 2; 0]; [2; 2; 0]; [2; 2; 0]] in 35 | assert_equal expected @@ Indexing.coords_of_slice slice shape; 36 | let slice = [R (0, -2); T 1; T (-1)] in 37 | let expected = [[0; 1; 2]; [1; 1; 2]] in 38 | assert_equal expected @@ Indexing.coords_of_slice slice shape; 39 | let slice = [R (1, 0); T 1; T (-1)] in 40 | let expected = [[1; 1; 2]; [0; 1; 2]] in 41 | assert_equal expected @@ Indexing.coords_of_slice slice shape; 42 | let slice = [I 2; I (-1); R' (-1, -1, 1)] in 43 | let expected = [[2; 2; 2]] in 44 | assert_equal expected @@ Indexing.coords_of_slice slice shape 45 | ) 46 | ; 47 | "compute slice shape" >:: (fun _ -> 48 | let shape = [10; 10; 10] in 49 | let slice = Ndarray.Indexing.[L [0; 9; 5]; I 1; R' (2, 9, 1)] in 50 | assert_equal [3; 1; 8] @@ Indexing.slice_shape slice shape; 51 | assert_equal shape @@ Indexing.slice_shape [] shape) 52 | ; 53 | "cartesian product" >:: (fun _ -> 54 | let ll = [[1; 2]; [3; 8]; [9; 4]] in 55 | let expected = 56 | [[1; 3; 9]; [1; 3; 4]; [1; 8; 9]; [1; 8; 4] 57 | ;[2; 3; 9]; [2; 3; 4]; [2; 8; 9]; [2; 8; 4]] 58 | in assert_equal expected @@ Indexing.cartesian_prod ll) 59 | ] 60 | -------------------------------------------------------------------------------- /zarr/src/zarr.mli: -------------------------------------------------------------------------------- 1 | (* Copyright (c) 2024, Zolisa Bleki 2 | 3 | SPDX-License-Identifier: BSD-3-Clause *) 4 | 5 | (** 6 | [zarr] Provides an Ocaml implementation of the Zarr version 3 storage 7 | format specification. It supports creation of arrays and groups as well 8 | as chunking arrays along any dimension. One can store a Zarr hierarchy in 9 | memory or on disk. Zarr also supports reading zarr hierarchies created using 10 | other implementations, as long as they are spec-compliant. 11 | 12 | Consult the {{!examples}examples} and {{!limitations}limitations} for more info. 13 | 14 | {3 References} 15 | {ul 16 | {- {{:https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html}The Zarr Version 3 specification.}} 17 | {- {{:https://zarr.dev/}Zarr community site.}} 18 | } 19 | *) 20 | 21 | (** {1 Node} *) 22 | 23 | module Node = Node 24 | 25 | (** {1 Metadata} *) 26 | 27 | module Metadata = Metadata 28 | 29 | (** {1 Storage} *) 30 | 31 | module Storage = Storage 32 | module Memory = Memory 33 | module Zip = Zip_archive 34 | module Types = Types 35 | 36 | (** {1 Codecs} *) 37 | 38 | module Codecs = Codecs 39 | 40 | (** {1 Indexing} *) 41 | 42 | module Indexing = Ndarray.Indexing 43 | 44 | (** {1 Utils} *) 45 | 46 | module Util = Util 47 | 48 | (** {1 Ndarray} *) 49 | 50 | module Ndarray = Ndarray 51 | 52 | (** {1:examples Examples} 53 | 54 | {2:create_array Create, read & write array.} 55 | Here we show how the library's asynchronous API using Lwt's concurrency monad can be used. 56 | {@ocaml[ 57 | open Zarr 58 | open Zarr.Ndarray 59 | open Zarr.Indexing 60 | open Zarr.Codecs 61 | open Zarr_lwt.Storage 62 | open FilesystemStore.Deferred.Syntax 63 | 64 | let _ = 65 | Lwt_main.run begin 66 | let store = FilesystemStore.create "testdata.zarr" in 67 | let group_node = Node.Group.root in 68 | let* () = FilesystemStore.Group.create group_node in 69 | let array_node = ArrayNode.(group_node / "name") in 70 | let* () = FilesystemStore.Array.create 71 | ~codecs:[`Bytes BE] ~shape:[|100; 100; 50|] ~chunks:[|10; 15; 20|] 72 | Ndarray.Float32 Float.neg_infinity array_node store in 73 | let slice = [|R [|0; 20|]; I 10; L [||]|] in 74 | let* x = FilesystemStore.Array.read store array_node slice Ndarray.Float32 in 75 | let x' = Ndarray.map (fun _ -> Random.int 11 |> Float.of_int) x 76 | in FilesystemStore.Array.write store array_node slice x' 77 | end 78 | ]} *) 79 | 80 | (** {1:extensions Extension Points} 81 | 82 | This library also provides custom extensions not defined in the version 3 83 | specification. These are tabulated below: 84 | {table 85 | {tr 86 | {th Extension Point} 87 | {th Details}} 88 | {tr 89 | {td Data Types} 90 | {td [char], [complex32], [int] (63-bit integer), [nativeint]}} 91 | } 92 | *) 93 | 94 | (** {1:limitations Limitations} 95 | 96 | Although this implementation tries to be spec compliant, it does come with 97 | a few limitations: 98 | {ul 99 | {- Currently we do not support the following data types: [float16], 100 | [uint32], [complex128], [r*], and variable length strings.} 101 | } 102 | *) 103 | -------------------------------------------------------------------------------- /zarr/src/node.ml: -------------------------------------------------------------------------------- 1 | exception Node_invariant 2 | exception Cannot_rename_root 3 | 4 | (* Check if the path's name satisfies path invariants *) 5 | let rep_ok name = 6 | (String.empty <> name) && 7 | not (String.contains name '/') && 8 | not (String.for_all (Char.equal '.') name) && 9 | not (String.starts_with ~prefix:"__" name) 10 | 11 | module Group = struct 12 | type t = Root | Cons of t * string 13 | 14 | let create parent name = if rep_ok name then Cons (parent, name) else raise Node_invariant 15 | 16 | let of_path = function 17 | | "/" -> Root 18 | | s -> 19 | if not (String.starts_with ~prefix:"/" s) || String.ends_with ~suffix:"/" s 20 | then raise Node_invariant 21 | else List.fold_left create Root (List.tl @@ String.split_on_char '/' s) 22 | 23 | let name = function 24 | | Root -> "" 25 | | Cons (_, n) -> n 26 | 27 | let parent = function 28 | | Root -> None 29 | | Cons (parent, _) -> Some parent 30 | 31 | let rec ( = ) x y = match x, y with 32 | | Root, Root -> true 33 | | Root, Cons _ | Cons _, Root -> false 34 | | Cons (p, n), Cons (q, m) -> ( = ) p q && String.equal n m 35 | 36 | let rec fold f acc = function 37 | | Root -> f acc Root 38 | | Cons (parent, _) as p -> fold f (f acc p) parent 39 | 40 | let prepend_name acc = function 41 | | Root -> acc 42 | | Cons (_, n) -> "/" :: n :: acc 43 | 44 | let to_path = function 45 | | Root -> "/" 46 | | p -> String.concat "" (fold prepend_name [] p) 47 | 48 | let prepend_node acc = function 49 | | Root -> acc 50 | | Cons (p, _) -> p :: acc 51 | 52 | let to_key p = 53 | let str = to_path p in 54 | String.sub str 1 (String.length str - 1) 55 | 56 | let to_prefix = function 57 | | Root -> "" 58 | | p -> to_key p ^ "/" 59 | 60 | let is_child_group x y = match x, y with 61 | | _, Root -> false 62 | | v, Cons (parent, _) -> parent = v 63 | 64 | let rename t str = match t with 65 | | Cons (parent, _) when rep_ok str -> Cons (parent, str) 66 | | Cons _ -> raise Node_invariant 67 | | Root -> raise Cannot_rename_root 68 | 69 | let root = Root 70 | let ( / ) = create 71 | let show = to_path 72 | let ancestors p = fold prepend_node [] p 73 | let pp fmt t = Format.fprintf fmt "%s" (show t) 74 | let to_metakey p = to_prefix p ^ "zarr.json" 75 | end 76 | 77 | module Array = struct 78 | type t = {parent : Group.t option; name : string} 79 | 80 | let of_path p = 81 | let g = Group.of_path p in 82 | match Group.parent g with 83 | | Some _ as parent -> {parent; name = Group.name g} 84 | | None -> raise Node_invariant 85 | 86 | let to_path {parent = p; name} = match p with 87 | | None -> "/" 88 | | Some g when Group.(g = root) -> "/" ^ name 89 | | Some g -> Group.to_path g ^ "/" ^ name 90 | 91 | let ancestors {parent; _} = match parent with 92 | | None -> [] 93 | | Some g -> g :: Group.ancestors g 94 | 95 | let is_parent {parent; _} y = match parent with 96 | | None -> false 97 | | Some g -> Group.(g = y) 98 | 99 | let to_key {parent; name} = match parent with 100 | | Some g -> Group.to_prefix g ^ name 101 | | None -> "" 102 | 103 | let to_metakey = function 104 | | {parent = None; _} -> "zarr.json" 105 | | p -> to_key p ^ "/zarr.json" 106 | 107 | let rename t name = match t.parent with 108 | | Some _ when rep_ok name -> {t with name} 109 | | Some _ -> raise Node_invariant 110 | | None -> raise Cannot_rename_root 111 | 112 | let create g name = if rep_ok name then {parent = Some g; name} else raise Node_invariant 113 | let ( / ) = create 114 | let show = to_path 115 | let root = {parent = None; name = ""} 116 | let ( = ) {parent = p; name = n} {parent = q; name = m} = p = q && n = m 117 | let parent {parent = p; _} = p 118 | let name {parent = _; name = n} = n 119 | let pp fmt t = Format.fprintf fmt "%s" (show t) 120 | end 121 | -------------------------------------------------------------------------------- /.github/workflows/build-and-test.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | 4 | on: 5 | push: 6 | branches: [ main ] 7 | pull_request: 8 | branches: [ main ] 9 | 10 | workflow_dispatch: 11 | 12 | defaults: 13 | run: 14 | shell: bash -l {0} 15 | 16 | 17 | jobs: 18 | build-and-test-core: 19 | runs-on: ${{ matrix.os }} 20 | 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | os: 25 | - ubuntu-latest 26 | ocaml-compiler: 27 | - "5.3.0" 28 | - "4.14.2" 29 | local-packages: 30 | - zarr.opam 31 | 32 | env: 33 | AWS_ACCESS_KEY_ID: minioadmin 34 | AWS_SECRET_ACCESS_KEY: minioadmin 35 | 36 | services: 37 | minio: 38 | image: fclairamb/minio-github-actions 39 | ports: 40 | - 9000:9000 41 | 42 | name: Ocaml version - ${{ matrix.ocaml-compiler }} - ${{ matrix.os }} 43 | steps: 44 | - name: checkout 45 | uses: actions/checkout@v4 46 | with: 47 | fetch-depth: 2 48 | 49 | - name: Setup Minio 50 | run: | 51 | mkdir ~/.aws 52 | echo '[default]' > ~/.aws/credentials 53 | echo 'aws_access_key_id = minioadmin' >> ~/.aws/credentials 54 | echo 'aws_secret_access_key = minioadmin' >> ~/.aws/credentials 55 | pip3 install minio 56 | python3 - <<'EOF' 57 | from minio import Minio 58 | minio = Minio( 59 | 'localhost:9000', 60 | access_key='minioadmin', 61 | secret_key='minioadmin', 62 | secure=False 63 | ) 64 | minio.make_bucket('test-bucket-lwt', location='us-east-1') 65 | EOF 66 | 67 | - name: setup-ocaml 68 | uses: ocaml/setup-ocaml@v3 69 | with: 70 | ocaml-compiler: ${{ matrix.ocaml-compiler }} 71 | 72 | - name: setup 73 | run: | 74 | opam install --deps-only --with-test --with-doc --yes zarr 75 | opam install conf-zlib conf-zstd --yes 76 | opam install lwt aws-s3-lwt --yes 77 | opam exec -- dune build zarr zarr-sync zarr-lwt 78 | 79 | - name: setup ocaml-5-specific 80 | if: ${{ matrix.ocaml-compiler >= '5.1' }} 81 | run: | 82 | opam install eio_main --yes 83 | opam exec -- dune build zarr-eio 84 | 85 | - name: test 86 | run: | 87 | opam exec -- dune exec --instrument-with bisect_ppx --force -- _build/default/zarr/test/test_all.exe -runner sequential -ci true 88 | opam exec -- dune exec --instrument-with bisect_ppx --force -- _build/default/zarr-sync/test/test_sync.exe -runner sequential -ci true 89 | opam exec -- dune exec --instrument-with bisect_ppx --force -- _build/default/zarr-lwt/test/test_lwt.exe -runner sequential -ci true 90 | 91 | - name: test ocaml-5-specific libs 92 | if: ${{ matrix.ocaml-compiler >= '5.1' }} 93 | run: | 94 | opam exec -- dune exec --instrument-with bisect_ppx --force -- _build/default/zarr-eio/test/test_eio.exe -runner sequential -ci true 95 | 96 | - name: Upload code coverage report 97 | if: ${{ matrix.ocaml-compiler >= '5.1' }} 98 | run: opam exec -- bisect-ppx-report send-to Codecov 99 | env: 100 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 101 | 102 | - name: Build Docs 103 | if: ${{ matrix.ocaml-compiler >= '5.1' }} 104 | run: opam exec -- dune build @doc 105 | 106 | - name: Upload API Docs artifact 107 | if: ${{ matrix.ocaml-compiler >= '5.1' }} 108 | uses: actions/upload-artifact@v3.1.3 109 | with: 110 | name: docs 111 | path: ./_build/default/_doc/_html 112 | 113 | - name: Deploy API Docs 114 | if: ${{ matrix.ocaml-compiler >= '5.1' }} 115 | uses: peaceiris/actions-gh-pages@v4 116 | with: 117 | github_token: ${{ secrets.GITHUB_TOKEN }} 118 | publish_dir: ./_build/default/_doc/_html 119 | -------------------------------------------------------------------------------- /zarr-sync/src/storage.ml: -------------------------------------------------------------------------------- 1 | module IO = struct 2 | type 'a t = 'a 3 | let return = Fun.id 4 | let bind x f = f x 5 | let map f x = f x 6 | let return_unit = () 7 | let iter = List.iter 8 | let fold_left = List.fold_left 9 | let concat_map = List.concat_map 10 | 11 | module Infix = struct 12 | let (>>=) = bind 13 | let (>>|) = (>>=) 14 | end 15 | 16 | module Syntax = struct 17 | let (let*) = bind 18 | let (let+) = (let*) 19 | end 20 | end 21 | 22 | module ZipStore = Zarr.Zip.Make(IO) 23 | module MemoryStore = Zarr.Memory.Make(IO) 24 | 25 | module FilesystemStore = struct 26 | module S = struct 27 | type t = {dirname : string; perm : int} 28 | type 'a io = 'a IO.t 29 | 30 | let fspath_to_key t path = 31 | let pos = String.length t.dirname + 1 in 32 | String.sub path pos (String.length path - pos) 33 | 34 | let key_to_fspath t key = Filename.concat t.dirname key 35 | 36 | let get t key = 37 | try In_channel.(with_open_gen [Open_rdonly] t.perm (key_to_fspath t key) input_all) with 38 | | Sys_error _ -> raise (Zarr.Storage.Key_not_found key) 39 | 40 | let get_partial_values t key ranges = 41 | let read_range ~ic ~size (ofs, len) = 42 | In_channel.seek ic (Int64.of_int ofs); 43 | match len with 44 | | None -> really_input_string ic (size - ofs) 45 | | Some rs -> really_input_string ic rs 46 | in 47 | In_channel.with_open_gen [Open_rdonly] t.perm (key_to_fspath t key) @@ fun ic -> 48 | let size = Int64.to_int (In_channel.length ic) in 49 | List.map (read_range ~ic ~size) ranges 50 | 51 | let set t key v = 52 | let p = key_to_fspath t key in 53 | Zarr.Util.create_parent_dir p t.perm; 54 | let f = [Open_wronly; Open_trunc; Open_creat] in 55 | Out_channel.(with_open_gen f t.perm p @@ fun oc -> output_string oc v; flush oc) 56 | 57 | let set_partial_values t key ?(append=false) rvs = 58 | let write ~oc (rs, value) = 59 | Out_channel.seek oc (Int64.of_int rs); 60 | Out_channel.output_string oc value 61 | in 62 | let p = key_to_fspath t key in 63 | Zarr.Util.create_parent_dir p t.perm; 64 | let flags = match append with 65 | | false -> [Open_creat; Open_wronly] 66 | | true -> [Open_append; Open_creat; Open_wronly] 67 | in 68 | Out_channel.with_open_gen flags t.perm p @@ fun oc -> 69 | List.iter (write ~oc) rvs; 70 | Out_channel.flush oc 71 | 72 | let size t key = 73 | try In_channel.(with_open_gen [Open_rdonly] t.perm (key_to_fspath t key) length) |> Int64.to_int with 74 | | Sys_error _ -> 0 75 | 76 | let rec walk t acc dir = 77 | let accumulate ~t a x = match Filename.concat dir x with 78 | | p when Sys.is_directory p -> walk t a p 79 | | p -> (fspath_to_key t p) :: a 80 | in 81 | let dir_contents = Array.to_list (Sys.readdir dir) in 82 | List.fold_left (accumulate ~t) acc dir_contents 83 | 84 | let list_dir t prefix = 85 | let choose ~t ~dir x = match Filename.concat dir x with 86 | | p when Sys.is_directory p -> Either.right @@ (fspath_to_key t p) ^ "/" 87 | | p -> Either.left (fspath_to_key t p) 88 | in 89 | let dir = key_to_fspath t prefix in 90 | let dir_contents = Array.to_list (Sys.readdir dir) in 91 | List.partition_map (choose ~t ~dir) dir_contents 92 | 93 | let list t = walk t [] (key_to_fspath t "") 94 | let list_prefix t prefix = walk t [] (key_to_fspath t prefix) 95 | let erase t key = Sys.remove (key_to_fspath t key) 96 | let erase_prefix t pre = List.iter (erase t) (list_prefix t pre) 97 | let rename t k k' = Sys.rename (key_to_fspath t k) (key_to_fspath t k') 98 | let is_member t key = Sys.file_exists (key_to_fspath t key) 99 | end 100 | 101 | let create ?(perm=0o700) dirname = 102 | Zarr.Util.create_parent_dir dirname perm; 103 | Sys.mkdir dirname perm; 104 | S.{dirname = Zarr.Util.sanitize_dir dirname; perm} 105 | 106 | let open_store ?(perm=0o700) dirname = 107 | if Sys.is_directory dirname 108 | then S.{dirname = Zarr.Util.sanitize_dir dirname; perm} 109 | else raise (Zarr.Storage.Not_a_filesystem_store dirname) 110 | 111 | include Zarr.Storage.Make(IO)(S) 112 | end 113 | -------------------------------------------------------------------------------- /zarr/src/storage/memory.ml: -------------------------------------------------------------------------------- 1 | module type S = sig 2 | include Storage.S 3 | val create : unit -> t 4 | (** [create ()] returns a new In-memory Zarr store type.*) 5 | end 6 | 7 | module Make (IO : Types.IO) : S with type 'a io := 'a IO.t = struct 8 | open IO.Syntax 9 | 10 | module M = Map.Make(String) 11 | 12 | module Store = struct 13 | type t = string M.t Atomic.t 14 | type 'a io = 'a IO.t 15 | 16 | let get : t -> string -> string io = fun t key -> 17 | match M.find_opt key (Atomic.get t) with 18 | | None -> raise (Storage.Key_not_found key) 19 | | Some v -> IO.return v 20 | 21 | let rec set : t -> string -> string -> unit io = fun t key value -> 22 | let m = Atomic.get t in 23 | if Atomic.compare_and_set t m (M.add key value m) 24 | then IO.return_unit else set t key value 25 | 26 | let list : t -> string list io = fun t -> 27 | let m = Atomic.get t in 28 | IO.return @@ M.fold (fun k _ acc -> k :: acc) m [] 29 | 30 | let is_member : t -> string -> bool io = fun t key -> 31 | let m = Atomic.get t in 32 | IO.return (M.mem key m) 33 | 34 | let rec erase : t -> string -> unit io = fun t key -> 35 | let m = Atomic.get t in 36 | let m' = M.update key (Fun.const None) m in 37 | if Atomic.compare_and_set t m m' 38 | then IO.return_unit else erase t key 39 | 40 | let size : t -> string -> int io = fun t key -> 41 | match M.find_opt key (Atomic.get t) with 42 | | None -> IO.return 0 43 | | Some e -> IO.return (String.length e) 44 | 45 | let rec erase_prefix : t -> string -> unit io = fun t prefix -> 46 | let pred ~prefix k v = if String.starts_with ~prefix k then None else Some v in 47 | let m = Atomic.get t in 48 | let m' = M.filter_map (pred ~prefix) m in 49 | if Atomic.compare_and_set t m m' 50 | then IO.return_unit else erase_prefix t prefix 51 | 52 | let get_partial_values t key (ranges : Types.range list) = 53 | let read_range ~data ~size (ofs, len) = match len with 54 | | Some l -> String.sub data ofs l 55 | | None -> String.sub data ofs (size - ofs) 56 | in 57 | let+ data = get t key in 58 | let size = String.length data in 59 | List.map (read_range ~data ~size) ranges 60 | 61 | let rec set_partial_values t key ?(append=false) (rv : (int * string) list) = 62 | let m = Atomic.get t in 63 | let ov = Option.fold ~none:String.empty ~some:Fun.id (M.find_opt key m) in 64 | let f = if append || ov = String.empty then 65 | fun acc (_, v) -> acc ^ v else 66 | fun acc (rs, v) -> 67 | let s = Bytes.unsafe_of_string acc in 68 | Bytes.blit_string v 0 s rs String.(length v); 69 | Bytes.unsafe_to_string s 70 | in 71 | let m' = M.add key (List.fold_left f ov rv) m in 72 | if Atomic.compare_and_set t m m' 73 | then IO.return_unit else set_partial_values t key ~append rv 74 | 75 | let list_dir : t -> string -> (string list * string list) io = fun t prefix -> 76 | let module S = Set.Make(String) in 77 | let add ~size ~prefix key _ ((l, r) as acc) = 78 | if not (String.starts_with ~prefix key) then acc else 79 | if not (String.contains_from key size '/') then key :: l, r else 80 | l, S.add String.(sub key 0 @@ 1 + index_from key size '/') r 81 | in 82 | let size = String.length prefix in 83 | let m = Atomic.get t in 84 | let keys, prefixes = M.fold (add ~prefix ~size) m ([], S.empty) in 85 | IO.return (keys, S.elements prefixes) 86 | 87 | let rec rename : t -> string -> string -> unit io = fun t prefix new_prefix -> 88 | let add ~prefix ~new_prefix k v acc = 89 | if not (String.starts_with ~prefix k) then M.add k v acc else 90 | let l = String.length prefix in 91 | let k' = new_prefix ^ String.sub k l (String.length k - l) in 92 | M.add k' v acc 93 | in 94 | let m = Atomic.get t in 95 | let m' = M.fold (add ~prefix ~new_prefix) m M.empty in 96 | if Atomic.compare_and_set t m m' 97 | then IO.return_unit else rename t prefix new_prefix 98 | end 99 | 100 | let create : unit -> Store.t = fun () -> Atomic.make M.empty 101 | 102 | include Storage.Make(IO)(Store) 103 | end 104 | -------------------------------------------------------------------------------- /zarr/src/ndarray.mli: -------------------------------------------------------------------------------- 1 | (** Supported data types for a Zarr array. *) 2 | type _ dtype = 3 | | Char : char dtype 4 | | Bool : bool dtype 5 | | Int8 : int dtype 6 | | Uint8 : int dtype 7 | | Int16 : int dtype 8 | | Uint16 : int dtype 9 | | Int32 : int32 dtype 10 | | Int64 : int64 dtype 11 | | Uint64 : Stdint.uint64 dtype 12 | | Float32 : float dtype 13 | | Float64 : float dtype 14 | | Complex32 : Complex.t dtype 15 | | Complex64 : Complex.t dtype 16 | | Int : int dtype 17 | | Nativeint : nativeint dtype 18 | 19 | type 'a t 20 | (** The type for n-dimensional view of a Zarr array.*) 21 | 22 | val dtype_size : 'a dtype -> int 23 | (** [dtype_size kind] returns the size in bytes of data type [kind].*) 24 | 25 | val create : 'a dtype -> int list -> 'a -> 'a t 26 | (** [create k s v] creates an N-dimensional array with data_type [k], 27 | shape [s] and fill value [v].*) 28 | 29 | val init : 'a dtype -> int list -> (int -> 'a) -> 'a t 30 | (** [init k s f] creates an N-dimensional array with data_type [k], 31 | shape [s] and every element value is assigned using function [f].*) 32 | 33 | val data_type : 'a t -> 'a dtype 34 | (** [data_type x] returns the data_type associated with [x].*) 35 | 36 | val size : 'a t -> int 37 | (** [size x] is the total number of elements of [x].*) 38 | 39 | val ndims : 'a t -> int 40 | (** [ndims x] is the number of dimensions of [x].*) 41 | 42 | val shape : 'a t -> int list 43 | (** [shape x] returns an array with the size of each dimension of [x].*) 44 | 45 | val byte_size : 'a t -> int 46 | (** [byte_size x] is the total size occupied by the byte sequence of elements 47 | of [x]. *) 48 | 49 | val to_array : 'a t -> 'a array 50 | (** [to_array x] returns the data of [x] as a 1-d array of type determined by 51 | {!data_type}. Note that data is not copied, so if the caller modifies the 52 | returned array, the changes will be reflected in [x].*) 53 | 54 | val of_array : 'a dtype -> int list -> 'a array -> 'a t 55 | (** [of_array k s x] creates an n-dimensional array of shape [s] and data_type 56 | [k] using elements of [x]. Note that the data is not copied, so the 57 | caller must ensure not to modify [x] afterwards.*) 58 | 59 | val get : 'a t -> int list -> 'a 60 | (** [get x c] returns element of [x] at coordinate [c].*) 61 | 62 | val set : 'a t -> int list -> 'a -> unit 63 | (** [set x c v] sets coordinate [c] of [x] to value [v].*) 64 | 65 | val iteri : (int -> 'a -> unit) -> 'a t -> unit 66 | (** Same as {!iter} but the function is applied to the index of the element as 67 | first argument and the element value as the second.*) 68 | 69 | val fill : 'a t -> 'a -> unit 70 | (** [fill x v] replaces all elements of [x] with value [v].*) 71 | 72 | val map : ('a -> 'a) -> 'a t -> 'a t 73 | (** [map f x] applies function [f] to all elements of [x] and builds an 74 | n-dimensional array of same shape and data_type as [x] with the result.*) 75 | 76 | val iter : ('a -> unit) -> 'a t -> unit 77 | (** [iteri f x] applies function [f] to all elements of [x] in row-major order.*) 78 | 79 | val equal : 'a t -> 'a t -> bool 80 | (** [equal x y] is [true] iff [x] and [y] are equal, else [false].*) 81 | 82 | val transpose : ?axes:int list -> 'a t -> 'a t 83 | (** [transpose o x] permutes the axes of [x] according to [o].*) 84 | 85 | val to_bigarray : 'a t -> ('a, 'b) Bigarray.kind -> ('a, 'b, Bigarray.c_layout) Bigarray.Genarray.t 86 | (** [to_bigarray x] returns a C-layout Bigarray representation of [x]. *) 87 | 88 | val of_bigarray : ('a, 'b, 'c) Bigarray.Genarray.t -> 'a t 89 | (** [of_bigarray x] returns an N-dimensional array representation of [x].*) 90 | 91 | module Indexing : sig 92 | (** A module housing functions for creating and manipulating indices and 93 | slices for working with Zarr arrays. *) 94 | 95 | type index = 96 | | F 97 | | I of int 98 | | T of int 99 | | L of int list 100 | | R of int * int 101 | | R' of int * int * int 102 | 103 | val slice_of_coords : int list list -> index list 104 | (** [slice_of_coords c] takes a list of array coordinates and returns 105 | a slice corresponding to the coordinates. Elements of each slice 106 | variant are sorted in increasing order.*) 107 | 108 | val coords_of_slice : index list -> int list -> int list list 109 | (** [coords_of_slice s shp] returns an array of coordinates given 110 | a slice [s] and array shape [shp]. *) 111 | 112 | val cartesian_prod : int list list -> int list list 113 | (** [cartesian_prod ll] returns a cartesian product of the elements of 114 | list [ll]. It is mainly used to generate a C-order of chunk indices 115 | in a regular Zarr array grid. *) 116 | 117 | val slice_shape : index list -> int list -> int list 118 | (** [slice_shape s shp] returns the shape of slice [s] within an array 119 | of shape [shp]. *) 120 | end 121 | -------------------------------------------------------------------------------- /zarr/src/metadata.mli: -------------------------------------------------------------------------------- 1 | (** This module provides functionality for manipulating a Zarr node's 2 | metadata JSON document. 3 | 4 | The Zarr V3 specification defines two types of metadata documents: 5 | array and group metadata. Both types are stored under the key 6 | [zarr.json] within the prefix of a group or array.*) 7 | 8 | exception Parse_error of string 9 | (** raised when parsing a metadata JSON document fails. *) 10 | 11 | module Array : sig 12 | (** A module which contains functionality to work with a parsed JSON 13 | Zarr array metadata document. *) 14 | 15 | type t 16 | (** A type representing a parsed array metadata document. *) 17 | 18 | val create : 19 | ?sep:[< `Dot | `Slash > `Slash ] -> 20 | ?dimension_names:string option list -> 21 | ?attributes:Yojson.Safe.t -> 22 | codecs:Codecs.Chain.t -> 23 | shape:int list -> 24 | 'a Ndarray.dtype -> 25 | 'a -> 26 | int list -> 27 | t 28 | (** [create ~codecs ~shape kind fv cshp] Creates a new array metadata 29 | document with codec chain [codecs], shape [shape], fill value [fv], 30 | data type [kind] and chunk shape [cshp]. 31 | 32 | @raise Failure if shape and chunks are incompatible. *) 33 | 34 | val encode : t -> string 35 | (** [encode t] returns a byte string representing a JSON Zarr array metadata. *) 36 | 37 | val decode : string -> t 38 | (** [decode s] decodes a bytes string [s] into a {!ArrayMetadata.t} type. 39 | 40 | @raise Parse_error if metadata string is invalid. *) 41 | 42 | val shape : t -> int list 43 | (** [shape t] returns the shape of the zarr array represented by metadata type [t]. *) 44 | 45 | val chunk_shape : t -> int list 46 | (** [chunk_shape t] returns the shape a chunk in this zarr array. *) 47 | 48 | val is_valid_kind : t -> 'a Ndarray.dtype -> bool 49 | (** [is_valid_kind t kind] checks if [kind] is a valid Bigarray kind that 50 | matches the data type of the zarr array represented by this metadata type. *) 51 | 52 | val fillvalue_of_kind : t -> 'a Ndarray.dtype -> 'a 53 | (** [fillvalue_of_kind t kind] returns the fill value of uninitialized 54 | chunks in this zarr array given [kind]. Raises Failure if the kind 55 | is not compatible with this array's fill value. *) 56 | 57 | val attributes : t -> Yojson.Safe.t 58 | (** [attributes t] Returns a Yojson type containing user attributes assigned 59 | to the zarr array represented by [t]. *) 60 | 61 | val dimension_names : t -> string option list 62 | (** [dimension_name t] returns a list of dimension names. If none are 63 | defined then an empty list is returned. *) 64 | 65 | val codecs : t -> Codecs.Chain.t 66 | (** [codecs t] Returns a type representing the chain of codecs applied 67 | when decoding/encoding a Zarr array chunk. *) 68 | 69 | val index_coord_pair : t -> int list -> int list * int list 70 | (** [index_coord_pair t coord] maps a coordinate of this Zarr array to 71 | a pair of chunk index and coordinate {i within} that chunk. *) 72 | 73 | val chunk_indices : t -> int list -> int list list 74 | (** [chunk_indices t shp] returns a list of all chunk indices that would 75 | be contained in a zarr array of shape [shp] given the regular grid 76 | defined in array metadata [t]. *) 77 | 78 | val chunk_key : t -> int list -> string 79 | (** [chunk_key t idx] returns a key encoding of a the chunk index [idx]. *) 80 | 81 | val update_attributes : t -> Yojson.Safe.t -> t 82 | (** [update_attributes t json] returns a new metadata type with an updated 83 | attribute field containing contents in [json] *) 84 | 85 | val update_shape : t -> int list -> t 86 | (** [update_shape t new_shp] returns a new metadata type containing 87 | shape [new_shp]. *) 88 | 89 | val ( = ) : t -> t -> bool 90 | (** [a = b] returns true if [a] [b] are equal array metadata documents 91 | and false otherwise. *) 92 | end 93 | 94 | module Group : sig 95 | (** A module which contains functionality to work with a parsed JSON 96 | Zarr group metadata document. *) 97 | 98 | type t 99 | (** A type representing a parsed group metadata document. *) 100 | 101 | val default : t 102 | (** Return a group metadata type with default values for all fields. *) 103 | 104 | val encode : t -> string 105 | (** [encode t] returns a byte string representing a JSON Zarr group metadata. *) 106 | 107 | val decode : string -> t 108 | (** [decode s] decodes a bytes string [s] into a {!t} type. 109 | 110 | @raise Parse_error if metadata string is invalid. *) 111 | 112 | val update_attributes : t -> Yojson.Safe.t -> t 113 | (** [update_attributes t json] returns a new metadata type with an updated 114 | attribute field containing contents in [json]. *) 115 | 116 | val show : t -> string 117 | (** [show t] pretty-prints the contents of the group metadata type t. *) 118 | 119 | val attributes : t -> Yojson.Safe.t 120 | (** [attributes t] Returns a Yojson type containing user attributes assigned 121 | to the zarr group represented by [t]. *) 122 | 123 | val ( = ) : t -> t -> bool 124 | (** [a = b] returns true if [a] [b] are equal array metadata documents 125 | and false otherwise. *) 126 | end 127 | -------------------------------------------------------------------------------- /zarr/src/codecs.mli: -------------------------------------------------------------------------------- 1 | (** An array has an associated list of codecs. Each codec specifies a 2 | bidirectional transform (an encode transform and a decode transform). 3 | This module contains building blocks for creating and working with 4 | a chain of codecs. *) 5 | 6 | exception Array_to_bytes_invariant 7 | (** raised when a codec chain contains more than 1 array->bytes codec. *) 8 | 9 | exception Invalid_transpose_order 10 | (** raised when a codec chain contains a Transpose codec with an incorrect order. *) 11 | 12 | exception Invalid_sharding_chunk_shape 13 | (** raise when a codec chain contains a shardingindexed codec with an incorrect inner chunk shape. *) 14 | 15 | exception Invalid_codec_ordering 16 | (** raised when a codec chain has incorrect ordering of codecs. i.e if the 17 | ordering is not [arraytoarray list -> 1 arraytobytes -> bytestobytes list]. *) 18 | 19 | exception Invalid_zstd_level 20 | (** raised when a codec chain contains a Zstd codec with an incorrect compression value.*) 21 | 22 | (** The type of [array -> array] codecs. *) 23 | type arraytoarray = [ `Transpose of int list ] 24 | 25 | (** A type representing valid compression levels of the DEFLATE algorithm. *) 26 | type deflate_level = L0 | L1 | L2 | L3 | L4 | L5 | L6 | L7 | L8 | L9 27 | 28 | (** A type representing [bytes -> bytes] codecs that produce 29 | fixed sized encoded strings. *) 30 | type fixed_bytestobytes = [ `Crc32c ] 31 | 32 | (** A type representing [bytes -> bytes] codecs that produce 33 | variable sized encoded strings. *) 34 | type variable_bytestobytes = [ `Gzip of deflate_level | `Zstd of int * bool ] 35 | 36 | (** The type of [bytes -> bytes] codecs. *) 37 | type bytestobytes = [ fixed_bytestobytes | variable_bytestobytes ] 38 | 39 | (** A type representing the configured endianness of an array. *) 40 | type endianness = LE | BE 41 | 42 | (** A type representing the location of a shard's index array in 43 | an encoded byte string. *) 44 | type loc = Start | End 45 | 46 | (** The type of [array -> bytes] codecs that produce 47 | fixed sized encoded string. *) 48 | type fixed_arraytobytes = [ `Bytes of endianness ] 49 | 50 | (** The type of [array -> bytes] codecs that produce 51 | variable sized encoded string. *) 52 | type variable_array_tobytes = [ `ShardingIndexed of shard_config ] 53 | and codec = [ arraytoarray | fixed_arraytobytes | `ShardingIndexed of shard_config | bytestobytes ] 54 | and index_codec = [ arraytoarray | fixed_arraytobytes | fixed_bytestobytes ] 55 | 56 | (** A type representing the Sharding indexed codec's configuration parameters. *) 57 | and shard_config = 58 | {chunk_shape : int list 59 | ;codecs : codec list 60 | ;index_codecs : index_codec list 61 | ;index_location : loc} 62 | 63 | (** The type summarizing the decoded/encoded representation of a Zarr array 64 | or chunk. *) 65 | type 'a array_repr = {kind : 'a Ndarray.dtype; shape : int list} 66 | 67 | (** A module containing functions to encode/decode an array chunk using a 68 | predefined set of codecs. *) 69 | module Chain : sig 70 | (** A type representing a valid chain of codecs for 71 | decoding/encoding a Zarr array chunk. *) 72 | type t 73 | 74 | (** [create s c] returns a type representing a chain of codecs defined by 75 | chain [c] and chunk shape [s]. 76 | 77 | @raise Bytes_to_bytes_invariant 78 | if [c] contains more than one bytes->bytes codec. 79 | @raise Invalid_transpose_order 80 | if [c] contains a transpose codec with invalid order array. 81 | @raise Invalid_zstd_level 82 | if [c] contains a Zstd codec whose compression level is invalid. 83 | @raise Invalid_sharding_chunk_shape 84 | if [c] contains a shardingindexed codec with an 85 | incorrect inner chunk shape. *) 86 | val create : int list -> codec list -> t 87 | 88 | (** [encode t x] computes the encoded byte string representation of 89 | array chunk [x]. *) 90 | val encode : t -> 'a Ndarray.t -> string 91 | 92 | (** [decode t repr x] decodes the byte string [x] using codec chain [t] 93 | and decoded representation type [repr]. *) 94 | val decode : t -> 'a array_repr -> string -> 'a Ndarray.t 95 | 96 | (** [x = y] returns true if chain [x] is equal to chain [y], 97 | and false otherwise. *) 98 | val ( = ) : t -> t -> bool 99 | 100 | (** [of_yojson x] returns a code chain of type {!t} from its json object 101 | representation. *) 102 | val of_yojson : int list -> Yojson.Safe.t -> (t, string) result 103 | 104 | (** [to_yojson x] returns a json object representation of codec chain [x]. *) 105 | val to_yojson : t -> Yojson.Safe.t 106 | end 107 | 108 | (** A functor for generating a Sharding Indexed codec that supports partial 109 | (en/de)coding via IO operations. *) 110 | module Make (IO : Types.IO) : sig 111 | 112 | (** [is_just_sharding t] is [true] if the codec chain [t] contains only 113 | the [sharding_indexed] codec. *) 114 | val is_just_sharding : Chain.t -> bool 115 | 116 | val partial_encode : 117 | Chain.t -> 118 | (Types.range list -> string list IO.t) -> 119 | (?append:bool -> (int * string) list -> unit IO.t) -> 120 | int -> 121 | 'a array_repr -> 122 | (int list * 'a) list -> 123 | 'a -> 124 | unit IO.t 125 | 126 | val partial_decode : 127 | Chain.t -> 128 | (Types.range list -> string list IO.t) -> 129 | int -> 130 | 'a array_repr -> 131 | (int * int list) list -> 132 | 'a -> 133 | (int * 'a) list IO.t 134 | end 135 | -------------------------------------------------------------------------------- /zarr-eio/src/storage.ml: -------------------------------------------------------------------------------- 1 | module IO = struct 2 | type 'a t = 'a 3 | let return = Fun.id 4 | let bind x f = f x 5 | let map f x = f x 6 | let return_unit = () 7 | let iter f xs = Eio.Fiber.List.iter f xs 8 | let fold_left = List.fold_left 9 | let concat_map f xs = List.concat (Eio.Fiber.List.map f xs) 10 | 11 | module Infix = struct 12 | let (>>=) = bind 13 | let (>>|) = (>>=) 14 | end 15 | 16 | module Syntax = struct 17 | let (let*) = bind 18 | let (let+) = (let*) 19 | end 20 | end 21 | 22 | module ZipStore = Zarr.Zip.Make(IO) 23 | module MemoryStore = Zarr.Memory.Make(IO) 24 | 25 | module FilesystemStore = struct 26 | module S = struct 27 | type t = {root : Eio.Fs.dir_ty Eio.Path.t; perm : Eio.File.Unix_perm.t} 28 | type 'a io = 'a IO.t 29 | 30 | let fspath_to_key t (path : Eio.Fs.dir_ty Eio.Path.t) = 31 | let s = snd path and pos = String.length (snd t.root) + 1 in 32 | String.sub s pos (String.length s - pos) 33 | 34 | let key_to_fspath t key = Eio.Path.(t.root / key) 35 | 36 | let size t key = 37 | let flow_size flow = Optint.Int63.to_int (Eio.File.size flow) in 38 | try Eio.Path.with_open_in (key_to_fspath t key) flow_size with 39 | | Eio.Io (Eio.Fs.E Not_found Eio_unix.Unix_error _, _) -> 0 40 | 41 | let get t key = 42 | try Eio.Path.load (key_to_fspath t key) with 43 | | Eio.Io (Eio.Fs.E Not_found Eio_unix.Unix_error _, _) -> 44 | raise (Zarr.Storage.Key_not_found key) 45 | 46 | let get_partial_values t key ranges = 47 | let add ~size a (s, l) = 48 | let a' = Option.fold ~none:(a + size - s) ~some:(Int.add a) l in 49 | a', (Optint.Int63.of_int s, a, a' - a) 50 | in 51 | let read ~flow ~buffer (file_offset, off, len) = 52 | let _ = Eio.File.seek flow file_offset `Set in 53 | let buf = Cstruct.of_bigarray ~off ~len buffer in 54 | Eio.File.pread_exact flow ~file_offset [buf]; 55 | Cstruct.to_string buf 56 | in 57 | Eio.Path.with_open_in (key_to_fspath t key) @@ fun flow -> 58 | let size = Optint.Int63.to_int (Eio.File.size flow) in 59 | let size', ranges' = List.fold_left_map (add ~size) 0 ranges in 60 | let buffer = Bigarray.Array1.create Char C_layout size' in 61 | List.map (read ~flow ~buffer) ranges' 62 | 63 | let create_parent_dir fp perm = 64 | Option.fold 65 | ~some:(fun (p, _) -> Eio.Path.mkdirs ~exists_ok:true ~perm p) 66 | ~none:() 67 | (Eio.Path.split fp) 68 | 69 | let set t key value = 70 | let fp = key_to_fspath t key in 71 | create_parent_dir fp t.perm; 72 | Eio.Path.save ~create:(`Or_truncate t.perm) fp value 73 | 74 | let set_partial_values t key ?(append=false) rvs = 75 | let write = if append then 76 | fun ~flow ~allocator (_, str) -> 77 | Eio.File.pwrite_all flow ~file_offset:Optint.Int63.max_int [Cstruct.of_string ~allocator str] 78 | else 79 | fun ~flow ~allocator (ofs, str) -> 80 | let file_offset = Eio.File.seek flow (Optint.Int63.of_int ofs) `Set in 81 | Eio.File.pwrite_all flow ~file_offset [Cstruct.of_string ~allocator str] 82 | in 83 | let l = List.fold_left (fun a (_, s) -> Int.max a (String.length s)) 0 rvs in 84 | let buffer = Bigarray.Array1.create Char C_layout l in 85 | let allocator len = Cstruct.of_bigarray ~off:0 ~len buffer in 86 | let fp = key_to_fspath t key in 87 | create_parent_dir fp t.perm; 88 | Eio.Path.with_open_out ~append ~create:(`If_missing t.perm) fp @@ fun flow -> 89 | List.iter (write ~flow ~allocator) rvs 90 | 91 | let rec walk t acc dir = 92 | let add ~t ~dir a x = match Eio.Path.(dir / x) with 93 | | p when Eio.Path.is_directory p -> walk t a p 94 | | p -> (fspath_to_key t p) :: a 95 | in 96 | List.fold_left (add ~t ~dir) acc (Eio.Path.read_dir dir) 97 | 98 | let list t = walk t [] t.root 99 | let list_prefix t prefix = walk t [] (key_to_fspath t prefix) 100 | let is_member t key = Eio.Path.is_file (key_to_fspath t key) 101 | let erase t key = Eio.Path.unlink (key_to_fspath t key) 102 | let rename t k k' = Eio.Path.rename (key_to_fspath t k) (key_to_fspath t k') 103 | 104 | let erase_prefix t pre = 105 | (* if prefix points to the root of the store, only delete sub-dirs and files.*) 106 | let prefix = key_to_fspath t pre in 107 | if Filename.chop_suffix (snd prefix) "/" = snd t.root 108 | then Eio.Fiber.List.iter (erase t) (list_prefix t pre) 109 | else Eio.Path.rmtree ~missing_ok:true prefix 110 | 111 | let list_dir t prefix = 112 | let choose ~t ~dir x = match Eio.Path.(dir / x) with 113 | | p when Eio.Path.is_directory p -> Either.right @@ (fspath_to_key t p) ^ "/" 114 | | p -> Either.left (fspath_to_key t p) 115 | in 116 | let dir = key_to_fspath t prefix in 117 | List.partition_map (choose ~t ~dir) (Eio.Path.read_dir dir) 118 | end 119 | 120 | let create ?(perm=0o700) ~env dirname = 121 | Zarr.Util.create_parent_dir dirname perm; 122 | Sys.mkdir dirname perm; 123 | S.{root = Eio.Path.(Eio.Stdenv.fs env / Zarr.Util.sanitize_dir dirname); perm} 124 | 125 | let open_store ?(perm=0o700) ~env dirname = 126 | if Sys.is_directory dirname 127 | then S.{root = Eio.Path.(Eio.Stdenv.fs env / Zarr.Util.sanitize_dir dirname); perm} 128 | else raise (Zarr.Storage.Not_a_filesystem_store dirname) 129 | 130 | include Zarr.Storage.Make(IO)(S) 131 | end 132 | -------------------------------------------------------------------------------- /zarr/test/test_ndarray.ml: -------------------------------------------------------------------------------- 1 | open OUnit2 2 | 3 | module M = Zarr.Ndarray 4 | 5 | let run_test : 6 | type a. a Zarr.Codecs.array_repr -> a -> int -> unit = fun repr fv is -> 7 | let x = M.create repr.kind repr.shape fv in 8 | 9 | assert_equal repr.shape (M.shape x); 10 | let num_elt = List.fold_left Int.mul 1 repr.shape in 11 | assert_equal (num_elt * is) (M.byte_size x); 12 | assert_equal num_elt (M.size x); 13 | assert_equal is (M.dtype_size @@ M.data_type x); 14 | assert_equal (List.length repr.shape) (M.ndims x); 15 | 16 | let y = M.init repr.kind repr.shape (Fun.const fv) in 17 | assert_equal x y; 18 | M.fill y fv; 19 | assert_equal x y; 20 | assert_equal fv (M.get x [0; 0; 0]); 21 | M.set x [0; 0; 0] fv; 22 | assert_bool "" @@ M.equal x y; 23 | M.iteri (fun _ v -> ignore v) x 24 | 25 | let tests = [ 26 | "test char ndarray" >:: (fun _ -> 27 | let shape = [2; 5; 3] in 28 | run_test {shape; kind = M.Char} '?' 1; 29 | 30 | run_test {shape; kind = M.Bool} false 1; 31 | 32 | run_test {shape; kind = M.Int8} 0 1; 33 | 34 | run_test {shape; kind = M.Uint8} 0 1; 35 | 36 | run_test {shape; kind = M.Int16} 0 2; 37 | 38 | run_test {shape; kind = M.Uint16} 0 2; 39 | 40 | run_test {shape; kind = M.Int32} Int32.max_int 4; 41 | 42 | run_test {shape; kind = M.Int64} Int64.max_int 8; 43 | 44 | run_test {shape; kind = M.Float32} Float.neg_infinity 4; 45 | 46 | run_test {shape; kind = M.Float64} Float.neg_infinity 8; 47 | 48 | run_test {shape; kind = M.Complex32} Complex.zero 8; 49 | 50 | run_test {shape; kind = M.Complex64} Complex.zero 16; 51 | 52 | run_test {shape; kind = M.Int} Int.max_int @@ Sys.word_size / 8; 53 | 54 | run_test {shape; kind = M.Nativeint} Nativeint.max_int @@ Sys.word_size / 8 55 | ) 56 | ; 57 | "test map, iter and fold" >:: (fun _ -> 58 | let shape = [2; 5; 3] in 59 | let x = M.create Int32 shape 0l in 60 | let x' = M.map (Int32.add 1l) x in 61 | assert_equal 1l (M.get x' [0;0;0]); 62 | 63 | let x = M.create Char [4] '?' in 64 | let buf = Buffer.create @@ M.byte_size x in 65 | M.iter (Buffer.add_char buf) x; 66 | assert_equal ~printer:Fun.id "????" (Buffer.contents buf); 67 | ) 68 | ; 69 | "test transpose functionality" >:: (fun _ -> 70 | let shape = [2; 1; 3] 71 | and axes = [2; 0; 1] 72 | and a = [|0.15458236; 0.94363903; 0.63893012; 0.29207497; 0.31390295; 0.42341309|] in 73 | let x = M.of_array Float32 shape a in 74 | let x' = M.transpose ~axes x in 75 | assert_equal ~printer:[%show: int list] [3; 2; 1] (M.shape x'); 76 | (* test if a particular value is transposed correctly. *) 77 | assert_equal ~printer:string_of_float (M.get x [1; 0; 2]) (M.get x' [2; 1; 0]); 78 | let flat_exp = [|0.15458236; 0.29207497; 0.94363903; 0.31390295; 0.63893012; 0.42341309|] in 79 | assert_equal ~printer:[%show: float array] flat_exp (M.to_array x'); 80 | let inv_order = Array.(make (List.length axes) 0) in 81 | List.iteri (fun i x -> inv_order.(x) <- i) axes; 82 | assert_equal true @@ M.equal x (M.transpose ~axes:(Array.to_list inv_order) x') 83 | ) 84 | ; 85 | "test interop with bigarrays" >:: (fun _ -> 86 | let s = [|2; 5; 3|] in 87 | let module B = Bigarray in 88 | 89 | let convert_to : 90 | type a b. a M.dtype -> (a, b) B.kind -> a -> unit 91 | = fun fromdtype todtype fv -> 92 | let x = M.create fromdtype (Array.to_list s) fv in 93 | let y = M.to_bigarray x todtype in 94 | assert_equal s (B.Genarray.dims y); 95 | assert_equal fv (B.Genarray.get y [|0; 0; 0|]); 96 | assert_equal B.c_layout (B.Genarray.layout y) 97 | in 98 | convert_to M.Char B.Char '?'; 99 | convert_to M.Int8 B.Int8_signed 127; 100 | convert_to M.Uint8 B.Int8_unsigned 255; 101 | convert_to M.Int16 B.Int16_signed 32767; 102 | convert_to M.Uint16 B.Int16_unsigned 32767; 103 | convert_to M.Int32 B.Int32 Int32.max_int; 104 | convert_to M.Int64 B.Int64 Int64.max_int; 105 | convert_to M.Float32 B.Float32 Float.neg_infinity; 106 | convert_to M.Float64 B.Float64 Float.infinity; 107 | convert_to M.Complex32 B.Complex32 Complex.one; 108 | convert_to M.Complex64 B.Complex64 Complex.zero; 109 | convert_to M.Int B.Int Int.max_int; 110 | convert_to M.Nativeint B.Nativeint Nativeint.max_int; 111 | 112 | let convert_from : 113 | type a b c. (a, b, c) B.Genarray.t -> a M.dtype -> unit = fun x dtype -> 114 | let y = M.of_bigarray x in 115 | assert_equal dtype (M.data_type y); 116 | assert_equal 117 | ~printer:[%show: int list] 118 | (List.rev @@ Array.to_list @@ B.Genarray.dims x) 119 | (M.shape y); 120 | assert_equal (B.Genarray.get x [|1; 1; 1|]) (M.get y [0; 0; 0]) 121 | in 122 | convert_from (B.Genarray.create Char Fortran_layout s) M.Char; 123 | convert_from (B.Genarray.create Int8_signed Fortran_layout s) M.Int8; 124 | convert_from (B.Genarray.create Int8_unsigned Fortran_layout s) M.Uint8; 125 | convert_from (B.Genarray.create Int16_signed Fortran_layout s) M.Int16; 126 | convert_from (B.Genarray.create Int16_unsigned Fortran_layout s) M.Uint16; 127 | convert_from (B.Genarray.create Int32 Fortran_layout s) M.Int32; 128 | convert_from (B.Genarray.create Int64 Fortran_layout s) M.Int64; 129 | convert_from (B.Genarray.create Float32 Fortran_layout s) M.Float32; 130 | convert_from (B.Genarray.create Float64 Fortran_layout s) M.Float64; 131 | convert_from (B.Genarray.create Complex32 Fortran_layout s) M.Complex32; 132 | convert_from (B.Genarray.create Complex64 Fortran_layout s) M.Complex64; 133 | convert_from (B.Genarray.create Int Fortran_layout s) M.Int; 134 | convert_from (B.Genarray.create Nativeint Fortran_layout s) M.Nativeint 135 | ) 136 | ] 137 | -------------------------------------------------------------------------------- /zarr/src/node.mli: -------------------------------------------------------------------------------- 1 | (** This module provides functionality for manipulating Zarr nodes. 2 | 3 | A Zarr V3 node is associated with either a group or an array. 4 | All nodes in a hierarchy have a name and a path. The root node does not 5 | have a name and is the empty string "". Except for the root node, each 6 | node in a hierarchy must have a name, which is a string of unicode code 7 | points. The following constraints apply to node names: 8 | - must not be the empty string (""). 9 | - must not include the character "/". 10 | - must not be a string composed only of period characters, e.g. "." or "..". 11 | - must not start with the reserved prefix "__".*) 12 | 13 | exception Node_invariant 14 | (** raised when a node's invariants are violated. *) 15 | 16 | exception Cannot_rename_root 17 | (** raised when attempting to rename a root node. *) 18 | 19 | module Group : sig 20 | type t 21 | (** The type of a Group node. *) 22 | 23 | val root : t 24 | (** creates the root node *) 25 | 26 | val create : t -> string -> t 27 | (** [create p n] returns a group node with parent [p] and name [n]. 28 | @raise Node_invariant if node invariants are not satisfied. *) 29 | 30 | val ( / ) : t -> string -> t 31 | (** The infix operator alias of {!create} *) 32 | 33 | val of_path : string -> t 34 | (** [of_path s] returns a node from string [s]. 35 | @raise Node_invariant if node invariants are not satisfied. *) 36 | 37 | val to_path : t -> string 38 | (** [to_path n] returns node [n] as a string path. *) 39 | 40 | val name : t -> string 41 | (** [name n] returns the name of node [n]. The root node does not have a 42 | name and thus the empty string [""] is returned if [n] is a root node. *) 43 | 44 | val parent : t -> t option 45 | (** [parent n] returns [Some p] where [p] is the parent node of [n] 46 | of [None] if node [n] is the root node. *) 47 | 48 | val ( = ) : t -> t -> bool 49 | (** [x = y] returns [true] if nodes [x] and [y] are equal, 50 | and [false] otherwise. *) 51 | 52 | val ancestors : t -> t list 53 | (** [ancestors n] returns ancestor nodes of [n] including the root node. 54 | The root node has no ancestors, thus this returns the empty list 55 | if called on a root node. *) 56 | 57 | val to_key : t -> string 58 | (** [to_key n] converts a node's path to a key, as defined in the Zarr V3 59 | specification. *) 60 | 61 | val to_prefix : t -> string 62 | (** [to_prefix n] converts a node's path to a prefix key, as defined 63 | in the Zarr V3 specification. *) 64 | 65 | val to_metakey : t -> string 66 | (** [to_metakey n] returns the metadata key associated with node [n], 67 | as defined in the Zarr V3 specification. *) 68 | 69 | val is_child_group : t -> t -> bool 70 | (** [is_child_group m n] Tests if group node [m] is the immediate parent of 71 | group node [n]. Returns [true] when the test passes and [false] otherwise. *) 72 | 73 | val show : t -> string 74 | (** [show n] returns a string representation of a node type.*) 75 | 76 | val pp : Format.formatter -> t -> unit 77 | (** [pp fmt t] pretty prints a node type value.*) 78 | 79 | val rename : t -> string -> t 80 | (** [rename t s] returns a new group node with all properties of [t] 81 | but with its name changed to [s]. 82 | 83 | @raise Node_invariant if [s] is invalid name. 84 | @raise Cannot_rename_root if [t] is a root node.*) 85 | end 86 | 87 | module Array : sig 88 | type t 89 | (** The type of an array node. *) 90 | 91 | val create : Group.t -> string -> t 92 | (** [create p n] returns an array node with parent [p] and name [n]. 93 | @raise Node_invariant if node invariants are not satisfied. *) 94 | 95 | val ( / ) : Group.t -> string -> t 96 | (** The infix operator alias of {!create} *) 97 | 98 | val root : t 99 | (** creates an array root node *) 100 | 101 | val of_path : string -> t 102 | (** [of_path s] returns an array node from string [s]. 103 | @raise Node_invariant if node invariants are not satisfied. *) 104 | 105 | val to_path : t -> string 106 | (** [to_path n] returns array node [n] as a string path. *) 107 | 108 | val name : t -> string 109 | (** [name n] returns the name of array node [n]. *) 110 | 111 | val parent : t -> Group.t option 112 | (** [parent n] returns [Some p] where [p] is the parent group node of [n] 113 | or [None] if node [n] is a root node. *) 114 | 115 | val ( = ) : t -> t -> bool 116 | (** [x = y] returns [true] if nodes [x] and [y] are equal, 117 | and [false] otherwise. *) 118 | 119 | val ancestors : t -> Group.t list 120 | (** [ancestors n] returns ancestor group nodes of [n]. *) 121 | 122 | val is_parent : t -> Group.t -> bool 123 | (** [is_parent n g] returns [true] if group node [g] is the immediate 124 | parent of array node [n] and [false] otherwise. *) 125 | 126 | val to_key : t -> string 127 | (** [to_key n] converts a node's path to a key, as defined in the Zarr V3 128 | specification. *) 129 | 130 | val to_metakey : t -> string 131 | (** [to_metakey n] returns the metadata key associated with node [n], 132 | as defined in the Zarr V3 specification. *) 133 | 134 | val show : t -> string 135 | (** [show n] returns a string representation of a node type. *) 136 | 137 | val pp : Format.formatter -> t -> unit 138 | (** [pp fmt t] pretty prints a node type value. *) 139 | 140 | val rename : t -> string -> t 141 | (** [rename t s] returns a new node with all properties of [t] 142 | but with its name changed to [s]. 143 | 144 | @raise Node_invariant if [s] is invalid name. 145 | @raise Cannot_rename_root if [t] is a root node.*) 146 | end 147 | -------------------------------------------------------------------------------- /zarr/src/ebuffer.ml: -------------------------------------------------------------------------------- 1 | module type S = sig 2 | val set_char : bytes -> int -> char -> unit 3 | val set_bool : bytes -> int -> bool -> unit 4 | val set_int8 : bytes -> int -> int -> unit 5 | val set_uint8 : bytes -> int -> int -> unit 6 | val set_int16 : bytes -> int -> int -> unit 7 | val set_uint16 : bytes -> int -> int -> unit 8 | val set_int32 : bytes -> int -> int32 -> unit 9 | val set_int64 : bytes -> int -> int64 -> unit 10 | val set_uint64 : bytes -> int -> Stdint.uint64 -> unit 11 | val set_float32 : bytes -> int -> float -> unit 12 | val set_float64 : bytes -> int -> float -> unit 13 | val set_complex32 : bytes -> int -> Complex.t -> unit 14 | val set_complex64 : bytes -> int -> Complex.t -> unit 15 | val set_int : bytes -> int -> int -> unit 16 | val set_nativeint : bytes -> int -> nativeint -> unit 17 | 18 | val get_char : bytes -> int -> char 19 | val get_bool : bytes -> int -> bool 20 | val get_int8 : bytes -> int -> int 21 | val get_uint8 : bytes -> int -> int 22 | val get_int16 : bytes -> int -> int 23 | val get_uint16 : bytes -> int -> int 24 | val get_int32 : bytes -> int -> int32 25 | val get_int64 : bytes -> int -> int64 26 | val get_uint64 : bytes -> int -> Stdint.uint64 27 | val get_float32 : bytes -> int -> float 28 | val get_float64 : bytes -> int -> float 29 | val get_complex32 : bytes -> int -> Complex.t 30 | val get_complex64 : bytes -> int -> Complex.t 31 | val get_int : bytes -> int -> int 32 | val get_nativeint : bytes -> int -> nativeint 33 | end 34 | 35 | module Little = struct 36 | let set_int8 = Bytes.set_int8 37 | let set_uint8 = Bytes.set_uint8 38 | let set_char buf i v = Char.code v |> set_uint8 buf i 39 | let set_bool buf i v = Bool.to_int v |> set_uint8 buf i 40 | let set_int16 buf i v = Bytes.set_int16_le buf (2*i) v 41 | let set_uint16 buf i v = Bytes.set_uint16_le buf (2*i) v 42 | let set_int32 buf i v = Bytes.set_int32_le buf (4*i) v 43 | let set_int64 buf i v = Bytes.set_int64_le buf (8*i) v 44 | let set_uint64 buf i v = Stdint.Uint64.to_bytes_little_endian v buf (8*i) 45 | let set_int buf i v = Int64.of_int v |> set_int64 buf i 46 | let set_nativeint buf i v = Int64.of_nativeint v |> set_int64 buf i 47 | let set_float32 buf i v = Int32.bits_of_float v |> set_int32 buf i 48 | let set_float64 buf i v = Int64.bits_of_float v |> set_int64 buf i 49 | let set_complex32 buf i Complex.{re; im} = 50 | Int32.bits_of_float re |> Bytes.set_int32_le buf (8*i); 51 | Int32.bits_of_float im |> Bytes.set_int32_le buf (8*i + 4) 52 | let set_complex64 buf i Complex.{re; im} = 53 | Int64.bits_of_float re |> Bytes.set_int64_le buf (16*i); 54 | Int64.bits_of_float im |> Bytes.set_int64_le buf (16*i + 8) 55 | 56 | let get_int8 = Bytes.get_int8 57 | let get_uint8 = Bytes.get_uint8 58 | let get_char buf i = get_uint8 buf i |> Char.chr 59 | let get_bool buf i = match get_uint8 buf i with | 0 -> false | _ -> true 60 | let get_int16 = Bytes.get_int16_le 61 | let get_uint16 = Bytes.get_uint16_le 62 | let get_int32 = Bytes.get_int32_le 63 | let get_int64 = Bytes.get_int64_le 64 | let get_uint64 = Stdint.Uint64.of_bytes_little_endian 65 | let get_int buf i = get_int64 buf i |> Int64.to_int 66 | let get_nativeint buf i = get_int64 buf i |> Int64.to_nativeint 67 | let get_float32 buf i = get_int32 buf i |> Int32.float_of_bits 68 | let get_float64 buf i = get_int64 buf i |> Int64.float_of_bits 69 | let get_complex32 buf i = 70 | let re, im = get_float32 buf i, get_float32 buf (i + 4) in 71 | Complex.{re; im} 72 | let get_complex64 buf i = 73 | let re, im = get_float64 buf i, get_float64 buf (i + 8) in 74 | Complex.{re; im} 75 | end 76 | 77 | module Big = struct 78 | let set_int8 = Bytes.set_int8 79 | let set_uint8 = Bytes.set_uint8 80 | let set_char buf i v = Char.code v |> set_uint8 buf i 81 | let set_bool buf i v = Bool.to_int v |> set_uint8 buf i 82 | let set_int16 buf i v = Bytes.set_int16_be buf (i * 2) v 83 | let set_uint16 buf i v = Bytes.set_uint16_be buf (i * 2) v 84 | let set_int32 buf i v = Bytes.set_int32_be buf (i * 4) v 85 | let set_int64 buf i v = Bytes.set_int64_be buf (i * 8) v 86 | let set_uint64 buf i v = Stdint.Uint64.to_bytes_big_endian v buf (i * 8) 87 | let set_int buf i v = Int64.of_int v |> set_int64 buf i 88 | let set_nativeint buf i v = Int64.of_nativeint v |> set_int64 buf i 89 | let set_float32 buf i v = Int32.bits_of_float v |> set_int32 buf i 90 | let set_float64 buf i v = Int64.bits_of_float v |> set_int64 buf i 91 | let set_complex32 buf i Complex.{re; im} = 92 | Int32.bits_of_float re |> Bytes.set_int32_be buf (8*i); 93 | Int32.bits_of_float im |> Bytes.set_int32_be buf (8*i + 4) 94 | let set_complex64 buf i Complex.{re; im} = 95 | Int64.bits_of_float re |> Bytes.set_int64_be buf (16*i); 96 | Int64.bits_of_float im |> Bytes.set_int64_be buf (16*i + 8) 97 | 98 | let get_int8 = Bytes.get_int8 99 | let get_uint8 = Bytes.get_uint8 100 | let get_char buf i = get_uint8 buf i |> Char.chr 101 | let get_bool buf i = match get_uint8 buf i with | 0 -> false | _ -> true 102 | let get_int16 = Bytes.get_int16_be 103 | let get_uint16 = Bytes.get_uint16_be 104 | let get_int32 = Bytes.get_int32_be 105 | let get_int64 = Bytes.get_int64_be 106 | let get_uint64 = Stdint.Uint64.of_bytes_big_endian 107 | let get_int buf i = get_int64 buf i |> Int64.to_int 108 | let get_nativeint buf i = get_int64 buf i |> Int64.to_nativeint 109 | let get_float32 buf i = get_int32 buf i |> Int32.float_of_bits 110 | let get_float64 buf i = get_int64 buf i |> Int64.float_of_bits 111 | let get_complex32 buf i = 112 | let re, im = get_float32 buf i, get_float32 buf (i + 4) in 113 | Complex.{re; im} 114 | let get_complex64 buf i = 115 | let re, im = get_float64 buf i, get_float64 buf (i + 8) in 116 | Complex.{re; im} 117 | end 118 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![codecov][1]](https://codecov.io/gh/zoj613/zarr-ml) 2 | [![CI][2]](https://github.com/zoj613/zarr-ml/actions/workflows/) 3 | [![license][3]](https://github.com/zoj613/zarr-ml/blob/main/LICENSE) 4 | 5 | # zarr-ml 6 | This library provides an OCaml implementation of the Zarr version 3 7 | storage format specification for chunked & compressed multi-dimensional 8 | arrays, designed for use in parallel computing. 9 | 10 | ## Features 11 | - Supports creating n-dimensional Zarr arrays and chunking them along any dimension. 12 | - Compresses chunks using a variety of supported compression codecs. 13 | - Supports indexing operations to read/write views of a Zarr array. 14 | - Supports storing arrays in-memory or the local filesystem. It is also 15 | extensible, allowing users to easily create and use their own custom storage 16 | backends. See the example implementing an [In-memory Zip archive store][9] for more details. 17 | - Supports both synchronous and asynchronous I/O via [Lwt][4] and [Eio][8]. The user can 18 | easily use their own scheduler of choice. See the [example][10] implementing 19 | a filesystem store that uses the [Picos][11] concurrency library for non-blocking I/O. 20 | - Leverages the strong type system of Ocaml to create a type-safe API; making 21 | it impossible to create, read or write malformed arrays. 22 | - Supports organizing arrays into heirarchies via groups. 23 | 24 | ## Documentation 25 | API documentation can be found [here][5]. The full specification of the storage 26 | format can be found [there][6]. 27 | 28 | ## Installation 29 | The library comes in several flavors dependending on the synchronous/asynchronous 30 | backend of choice. To install the synchronous API, use 31 | ```shell 32 | $ opam install zarr-sync 33 | ``` 34 | To install zarr with an asynchronous API powered by `Lwt` or `Eio`, use 35 | ```shell 36 | $ opam install zarr-lwt 37 | $ opam install zarr-eio 38 | ``` 39 | To install the development version using the latest git commit, do 40 | ``` 41 | # for zarr-sync 42 | opam pin add zarr-sync git+https://github.com/zoj613/zarr-ml 43 | # for zarr-lwt 44 | opam pin add zarr-lwt git+https://github.com/zoj613/zarr-ml 45 | # for zarr-eio 46 | opam pin add zarr-eio git+https://github.com/zoj613/zarr-ml 47 | ``` 48 | 49 | ## Quick start 50 | Below is a demonstration of the library's API for synchronous reads/writes. 51 | A similar example using the `Lwt`-backed Asynchronous API can be found [here][7] 52 | ### setup 53 | ```ocaml 54 | open Zarr 55 | open Zarr.Codecs 56 | open Zarr.Indexing 57 | open Zarr_sync.Storage 58 | open IO.Infix (* opens infix operators >>= and >>| for monadic bind & map *) 59 | 60 | let store = FilesystemStore.create "testdata.zarr";; 61 | ``` 62 | ### create group 63 | ```ocaml 64 | let group_node = Node.Group.of_path "/some/group";; 65 | FilesystemStore.Group.create store group_node;; 66 | ``` 67 | ### create an array 68 | ```ocaml 69 | let array_node = Node.Array.(group_node / "name");; 70 | (* creates an array with char data type and fill value '?' *) 71 | FilesystemStore.Array.create 72 | ~codecs:[`Transpose [2; 0; 1]; `Bytes BE; `Gzip L2] 73 | ~shape:[100; 100; 50] 74 | ~chunks:[10; 15; 20] 75 | Ndarray.Char 76 | '?' 77 | array_node 78 | store;; 79 | ``` 80 | ### read/write from/to an array 81 | ```ocaml 82 | let slice = [R (0, 20); I 10; F];; 83 | let x = FilesystemStore.Array.read store array_node slice Ndarray.Char;; 84 | (* Do some computation on the array slice *) 85 | let x' = Zarr.Ndarray.map (fun _ -> Random.int 256 |> Char.chr) x;; 86 | FilesystemStore.Array.write store array_node slice x';; 87 | let y = FilesystemStore.Array.read store array_node slice Ndarray.Char;; 88 | assert (Ndarray.equal x' y);; 89 | ``` 90 | ### create an array with sharding 91 | ```ocaml 92 | let config = 93 | {chunk_shape = [5; 3; 5] 94 | ;codecs = [`Transpose [2; 0; 1]; `Bytes LE; `Zstd (0, true)] 95 | ;index_codecs = [`Bytes BE; `Crc32c] 96 | ;index_location = Start};; 97 | 98 | let shard_node = Node.Array.(group_node / "another");; 99 | 100 | FilesystemStore.Array.create 101 | ~codecs:[`ShardingIndexed config] 102 | ~shape:[100; 100; 50] 103 | ~chunks:[10; 15; 20] 104 | Ndarray.Complex32 105 | Complex.zero 106 | shard_node 107 | store;; 108 | ``` 109 | ### exploratory functions 110 | ```ocaml 111 | let a, g = FilesystemStore.hierarchy store;; 112 | List.map Node.Array.to_path a;; 113 | (*- : string list = ["/some/group/name"; "/some/group/another"] *) 114 | List.map Node.Group.to_path g;; 115 | (*- : string list = ["/"; "/some"; "/some/group"] *) 116 | 117 | FilesystemStore.Array.reshape store array_node [25; 32; 10];; 118 | 119 | let meta = FilesystemStore.Group.metadata store group_node;; 120 | Metadata.Group.show meta;; (* pretty prints the contents of the metadata *) 121 | 122 | FilesystemStore.Array.exists store shard_node;; 123 | FilesystemStore.Group.exists store group_node;; 124 | 125 | let a, g = FilesystemStore.Group.children store group_node;; 126 | List.map Node.Array.to_path a;; 127 | (*- : string list = ["/some/group/name"; "/some/group/another"] *) 128 | List.map Node.Group.to_path g;; 129 | (*- : string list = [] *) 130 | 131 | FilesystemStore.Group.delete store group_node;; 132 | FilesystemStore.clear store;; (* clears the store *) 133 | FilesystemStore.Group.rename store group_node "new_name";; 134 | FilesystemStore.Array.rename store anode "new_name";; 135 | ``` 136 | 137 | [1]: https://codecov.io/gh/zoj613/zarr-ml/graph/badge.svg?token=KOOG2Y1SH5 138 | [2]: https://img.shields.io/github/actions/workflow/status/zoj613/zarr-ml/build-and-test.yml?branch=main 139 | [3]: https://img.shields.io/github/license/zoj613/zarr-ml 140 | [4]: https://ocsigen.org/lwt/latest/manual/manual 141 | [5]: https://zoj613.github.io/zarr-ml 142 | [6]: https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html 143 | [7]: https://zoj613.github.io/zarr-ml/zarr/Zarr/index.html#examples 144 | [8]: https://github.com/ocaml-multicore/eio 145 | [9]: https://github.com/zoj613/zarr-ml/tree/main/examples/zipstore.ml 146 | [10]: https://github.com/zoj613/zarr-ml/tree/main/examples/picos_fs_store.ml 147 | [11]: https://ocaml-multicore.github.io/picos/ 148 | -------------------------------------------------------------------------------- /zarr-eio/test/test_eio.ml: -------------------------------------------------------------------------------- 1 | open OUnit2 2 | open Zarr 3 | open Zarr.Indexing 4 | open Zarr.Codecs 5 | open Zarr_eio.Storage 6 | 7 | let string_of_list = [%show: string list] 8 | let print_node_pair = [%show: Node.Array.t list * Node.Group.t list] 9 | 10 | module type EIO_STORE = Zarr.Storage.S with type 'a io := 'a 11 | 12 | let test_storage 13 | (type a) (module M : EIO_STORE with type t = a) (store : a) = 14 | let open M in 15 | let gnode = Node.Group.root in 16 | 17 | let nodes = hierarchy store in 18 | assert_equal ~printer:print_node_pair ([], []) nodes; 19 | 20 | Group.create store gnode; 21 | let exists = Group.exists store gnode in 22 | assert_equal ~printer:string_of_bool true exists; 23 | 24 | let meta = Group.metadata store gnode in 25 | assert_equal ~printer:Metadata.Group.show Metadata.Group.default meta; 26 | 27 | Group.delete store gnode; 28 | let exists = Group.exists store gnode in 29 | assert_equal ~printer:string_of_bool false exists; 30 | let nodes = hierarchy store in 31 | assert_equal ~printer:print_node_pair ([], []) nodes; 32 | 33 | let attrs = `Assoc [("questions", `String "answer")] in 34 | Group.create ~attrs store gnode; 35 | let meta = Group.metadata store gnode in 36 | assert_equal ~printer:Yojson.Safe.show attrs @@ Metadata.Group.attributes meta; 37 | 38 | let exists = Array.exists store @@ Node.Array.(gnode / "non-member") in 39 | assert_equal ~printer:string_of_bool false exists; 40 | 41 | let cfg = 42 | {chunk_shape = [2; 5; 5] 43 | ;index_location = End 44 | ;index_codecs = [`Bytes BE] 45 | ;codecs = [`Bytes LE]} in 46 | let anode = Node.Array.(gnode / "arrnode") in 47 | let slice = [R (0, 20); I 10; R (0, 29)] in 48 | let exp = Ndarray.init Complex32 [21; 1; 30] (Fun.const Complex.one) in 49 | 50 | List.iter 51 | (fun codecs -> 52 | Array.create ~codecs ~shape:[100; 100; 50] ~chunks:[10; 15; 20] Complex32 Complex.one anode store; 53 | Array.write store anode slice exp; 54 | let got = Array.read store anode slice Complex32 in 55 | assert_equal exp got; 56 | Ndarray.fill exp Complex.{re=2.0; im=0.}; 57 | Array.write store anode slice exp; 58 | let got = Array.read store anode slice Complex32 in 59 | assert_equal exp got; 60 | Ndarray.fill exp Complex.{re=0.; im=3.0}; 61 | Array.write store anode slice exp; 62 | let got = Array.read store anode slice Complex32 in 63 | assert_equal exp got; 64 | match codecs with 65 | | [`ShardingIndexed _] -> Array.delete store anode 66 | | _ -> IO.return_unit) 67 | [[`ShardingIndexed cfg]; [`Bytes BE]]; 68 | 69 | let child = Node.Group.of_path "/some/child/group" in 70 | Group.create store child; 71 | let arrays, groups = Group.children store gnode in 72 | assert_equal ~printer:string_of_list ["/arrnode"] (List.map Node.Array.to_path arrays); 73 | assert_equal ~printer:string_of_list ["/some"] (List.map Node.Group.to_path groups); 74 | let c = Group.children store @@ Node.Group.(root / "fakegroup") in 75 | assert_equal ([], []) c; 76 | let ac, gc = hierarchy store in 77 | let got = List.fast_sort String.compare @@ List.map Node.Array.show ac @ List.map Node.Group.show gc in 78 | assert_equal ~printer:string_of_list ["/"; "/arrnode"; "/some"; "/some/child"; "/some/child/group"] got; 79 | (* tests for renaming nodes *) 80 | let some = Node.Group.of_path "/some/child" in 81 | Array.rename store anode "ARRAYNODE"; 82 | Group.rename store some "CHILD"; 83 | let ac, gc = hierarchy store in 84 | let got = List.fast_sort String.compare (List.map Node.Array.show ac @ List.map Node.Group.show gc) in 85 | assert_equal ~printer:string_of_list ["/"; "/ARRAYNODE"; "/some"; "/some/CHILD"; "/some/CHILD/group"] got; 86 | (* restore old array node name. *) 87 | Array.rename store (Node.Array.of_path "/ARRAYNODE") "arrnode"; 88 | let nshape = [25; 32; 10] in 89 | Array.reshape store anode nshape; 90 | let meta = Array.metadata store anode in 91 | assert_equal ~printer:[%show : int list] nshape @@ Metadata.Array.shape meta; 92 | assert_raises 93 | (Zarr.Storage.Key_not_found "fakegroup/zarr.json") 94 | (fun () -> Array.metadata store Node.Array.(gnode / "fakegroup")); 95 | Array.delete store anode; 96 | clear store; 97 | let got = hierarchy store in 98 | assert_equal ~printer:print_node_pair ([], []) got 99 | 100 | let _ = 101 | run_test_tt_main @@ ("Run Zarr Eio API tests" >::: [ 102 | "test eio-based stores" >:: 103 | (fun _ -> 104 | Eio_main.run @@ fun env -> 105 | let rand_num = string_of_int (Random.int 10_000) in 106 | let tmp_dir = Filename.(concat (get_temp_dir_name ()) (rand_num ^ ".zarr")) in 107 | let s = FilesystemStore.create ~env tmp_dir in 108 | 109 | assert_raises 110 | (Sys_error (Format.sprintf "%s: File exists" tmp_dir)) 111 | (fun () -> FilesystemStore.create ~env tmp_dir); 112 | (* ensure it works with an extra "/" appended to directory name. *) 113 | ignore @@ FilesystemStore.open_store ~env (tmp_dir ^ "/"); 114 | let fakedir = "non-existant-zarr-store112345.zarr" in 115 | assert_raises 116 | (Sys_error (Printf.sprintf "%s: No such file or directory" fakedir)) 117 | (fun () -> FilesystemStore.open_store ~env fakedir); 118 | let fn = Filename.temp_file "nonexistantfile" ".zarr" in 119 | assert_raises 120 | (Zarr.Storage.Not_a_filesystem_store fn) 121 | (fun () -> FilesystemStore.open_store ~env fn); 122 | 123 | (* test with non-existant archive *) 124 | let zpath = tmp_dir ^ ".zip" in 125 | test_storage (module ZipStore) (ZipStore.create zpath); 126 | assert_raises (ZipStore.Path_already_exists zpath) (fun () -> ZipStore.create zpath); 127 | (* test just opening the now existant archive created by the previous test. *) 128 | let levels = [L0; L1; L2; L3; L4; L5; L6; L7; L8; L9] in 129 | List.iter (fun level -> ZipStore.open_store ~level zpath |> ignore) levels; 130 | test_storage (module MemoryStore) (MemoryStore.create ()); 131 | test_storage (module FilesystemStore) s) 132 | ]) 133 | -------------------------------------------------------------------------------- /zarr/test/test_node.ml: -------------------------------------------------------------------------------- 1 | open OUnit2 2 | open Zarr 3 | 4 | let group_node = [ 5 | "group node tests" >:: (fun _ -> 6 | let n = Node.Group.(root / "somename") in 7 | 8 | (* test node invariants *) 9 | List.iter 10 | (fun x -> 11 | assert_raises Zarr.Node.Node_invariant @@ fun () -> 12 | Node.Group.create n x) 13 | [""; "na/me"; "...."; "__name"]; 14 | 15 | (* creation from string path *) 16 | let r = Node.Group.of_path "/" in 17 | assert_equal ~printer:Node.Group.show Node.Group.root r; 18 | List.iter 19 | (fun x -> 20 | assert_raises Zarr.Node.Node_invariant @@ fun () -> 21 | Node.Group.of_path x) 22 | [""; "na/meas"; "/some/..."; "/root/__name"; "/sd/"]; 23 | 24 | (* node name tests *) 25 | let n = Node.Group.of_path "/some/dir/moredirs/path/pname" in 26 | assert_equal "pname" @@ Node.Group.name n; 27 | assert_equal "" @@ Node.Group.name Node.Group.root; 28 | 29 | (* parent tests *) 30 | assert_equal None @@ Node.Group.parent Node.Group.root; 31 | match Node.Group.parent n with 32 | | None -> 33 | assert_failure "A non-root node must have a parent."; 34 | | Some p -> 35 | assert_equal "/some/dir/moredirs/path" @@ Node.Group.show p; 36 | 37 | (* equality tests *) 38 | assert_equal ~printer:Node.Group.show Node.Group.root Node.Group.root; 39 | assert_bool 40 | "root node cannot be equal to its child" @@ 41 | not Node.Group.(root = n); 42 | assert_bool 43 | "non-root node cannot have root as child" @@ 44 | not Node.Group.(n = root); 45 | 46 | (* ancestory tests *) 47 | assert_equal [] @@ Node.Group.ancestors Node.Group.root; 48 | assert_equal 49 | ~printer:[%show: string list] 50 | ["/"; "/some"; "/some/dir"; "/some/dir/moredirs" 51 | ;"/some/dir/moredirs/path"] 52 | (Node.Group.ancestors n |> List.map Node.Group.show); 53 | let exp_parents = Node.Group.ancestors n in 54 | let r, l = List.fold_left_map 55 | (fun acc _ -> 56 | match Node.Group.parent acc with 57 | | Some acc' -> acc', acc' 58 | | None -> acc, acc) n exp_parents 59 | in 60 | assert_equal 61 | ~printer:[%show: Node.Group.t list] 62 | exp_parents @@ 63 | List.rev l; 64 | assert_equal ~printer:Node.Group.show r Node.Group.root; 65 | 66 | (* child node tests *) 67 | let p = Node.Group.parent n |> Option.get in 68 | assert_equal 69 | ~printer:string_of_bool 70 | true @@ 71 | Node.Group.is_child_group p n; 72 | assert_equal 73 | ~printer:string_of_bool 74 | false @@ 75 | Node.Group.is_child_group n Node.Group.root; 76 | assert_equal 77 | ~printer:string_of_bool 78 | false @@ 79 | Node.Group.is_child_group Node.Group.root Node.Group.root; 80 | 81 | (* rename tests *) 82 | assert_raises 83 | (Zarr.Node.Cannot_rename_root) 84 | (fun () -> Node.Group.rename Node.Group.root "somename"); 85 | assert_raises 86 | (Zarr.Node.Node_invariant) 87 | (fun () -> Node.Group.rename n "?illegal/"); 88 | let n' = Node.Group.rename n "newname" in 89 | assert_bool "" Node.Group.(name n' <> name n); 90 | 91 | (* stringify tests *) 92 | assert_equal 93 | ~printer:Fun.id "" @@ Node.Group.to_key Node.Group.root; 94 | 95 | assert_equal 96 | ~printer:Fun.id 97 | "some/dir/moredirs/path/pname" @@ 98 | Node.Group.to_key n; 99 | 100 | assert_equal 101 | ~printer:Fun.id 102 | "zarr.json" @@ 103 | Node.Group.to_metakey Node.Group.root; 104 | 105 | assert_equal 106 | ~printer:Fun.id 107 | ("some/dir/moredirs/path/pname/zarr.json") @@ 108 | Node.Group.to_metakey n) 109 | ] 110 | 111 | let array_node = [ 112 | "array node tests" >:: (fun _ -> 113 | let _ = Node.Array.(Node.Group.root / "somename") in 114 | 115 | (* test node invariants *) 116 | List.iter 117 | (fun x -> 118 | assert_raises Zarr.Node.Node_invariant @@ fun () -> 119 | Node.Array.create Node.Group.root x) 120 | [""; "na/me"; "...."; "__name"]; 121 | 122 | (* creation from string path *) 123 | List.iter 124 | (fun x -> 125 | assert_raises Zarr.Node.Node_invariant @@ fun () -> 126 | Node.Array.of_path x) 127 | ["/"; ""; "na/meas"; "/some/..."; "/root/__name"; "/sd/"]; 128 | 129 | (* node name tests *) 130 | let s = "/some/dir/moredirs/path/pname" in 131 | let n = Node.Array.of_path s in 132 | assert_equal "pname" @@ Node.Array.name n; 133 | assert_equal ~printer:Fun.id s @@ Node.Array.show n; 134 | 135 | (* parent tests *) 136 | assert_equal 137 | ~printer:Node.Group.show 138 | Node.Group.root @@ 139 | Option.get @@ Node.Array.parent @@ Node.Array.of_path "/nodename"; 140 | assert_equal None Node.Array.(parent root); 141 | 142 | (* equality tests *) 143 | let n' = Node.Array.of_path s in 144 | assert_equal ~printer:Node.Array.show n n'; 145 | assert_equal true Node.Array.(n = n'); 146 | assert_equal 147 | false @@ 148 | Node.Array.(n = Node.Array.of_path (s ^ "/more")); 149 | 150 | (* ancestory tests *) 151 | assert_equal [] Node.Array.(ancestors root); 152 | assert_equal 153 | ~printer:[%show: string list] 154 | ["/"; "/some"; "/some/dir"; "/some/dir/moredirs" 155 | ;"/some/dir/moredirs/path"] 156 | (Node.Array.ancestors n 157 | |> List.map Node.Group.show 158 | |> List.fast_sort String.compare); 159 | let m = Node.Array.of_path "/some" in 160 | assert_equal false Node.Array.(is_parent root Node.Group.root); 161 | assert_equal true @@ Node.Array.is_parent m Node.Group.root; 162 | 163 | (* rename tests *) 164 | assert_raises 165 | (Zarr.Node.Cannot_rename_root) 166 | (fun () -> Node.Array.rename Node.Array.root "somename"); 167 | assert_raises 168 | (Zarr.Node.Node_invariant) 169 | (fun () -> Node.Array.rename n "?illegal/"); 170 | let n' = Node.Array.rename n "newname" in 171 | assert_bool "" Node.Array.(name n' <> name n); 172 | 173 | (* stringify tests *) 174 | assert_equal 175 | ~printer:Fun.id 176 | "some/dir/moredirs/path/pname" @@ 177 | Node.Array.to_key n; 178 | assert_equal ~printer:Fun.id "" Node.Array.(to_key root); 179 | assert_equal ~printer:Fun.id "/" Node.Array.(to_path root); 180 | 181 | assert_equal ~printer:Fun.id "zarr.json" Node.Array.(to_metakey root); 182 | assert_equal 183 | ~printer:Fun.id 184 | ("some/dir/moredirs/path/pname/zarr.json") @@ 185 | Node.Array.to_metakey n) 186 | ] 187 | 188 | let tests = group_node @ array_node 189 | -------------------------------------------------------------------------------- /zarr/src/extensions.ml: -------------------------------------------------------------------------------- 1 | module RegularGrid = struct 2 | exception Grid_shape_mismatch 3 | type t = int list 4 | let chunk_shape : t -> int list = Fun.id 5 | let ceildiv x y = Float.(to_int @@ ceil (of_int x /. of_int y)) 6 | let floordiv x y = Float.(to_int @@ floor (of_int x /. of_int y)) 7 | let grid_shape t array_shape = List.map2 ceildiv array_shape t 8 | let index_coord_pair t coord = (List.map2 floordiv coord t, List.map2 Int.rem coord t) 9 | let ( = ) x y = List.equal Int.equal x y 10 | let max = List.fold_left Int.max Int.min_int 11 | 12 | let create ~array_shape chunk_shape = 13 | if List.(length chunk_shape <> length array_shape) || (max chunk_shape > max array_shape) 14 | then raise Grid_shape_mismatch else chunk_shape 15 | 16 | (* returns all chunk indices in this regular grid *) 17 | let indices t array_shape = 18 | let lol = List.map (fun x -> List.init x Fun.id) (grid_shape t array_shape) in 19 | Ndarray.Indexing.cartesian_prod lol 20 | 21 | let to_yojson (g : t) : Yojson.Safe.t = 22 | let name = ("name", `String "regular") in 23 | `Assoc [name; ("configuration", `Assoc [("chunk_shape", `List (List.map (fun x -> `Int x) g))])] 24 | 25 | let add (x : Yojson.Safe.t) acc = match x with 26 | | `Int i when i > 0 -> Result.map (List.cons i) acc 27 | | _ -> Error "chunk_shape must only contain positive ints." 28 | 29 | let of_yojson (array_shape: int list) (x : Yojson.Safe.t) = match x with 30 | | `Assoc ["name", `String "regular"; "configuration", `Assoc ["chunk_shape", `List l]] -> 31 | begin try Result.map (create ~array_shape) (List.fold_right add l (Ok [])) 32 | with Grid_shape_mismatch -> Error "grid shape mismatch." end 33 | | `Null -> Error "array metadata must contain a chunk_grid field." 34 | | _ -> Error "Invalid Chunk grid name or configuration." 35 | end 36 | 37 | module ChunkKeyEncoding = struct 38 | type kind = Default | V2 39 | type t = {name : kind; sep : string; is_default : bool} 40 | 41 | let create = function 42 | | `Dot -> {name = Default; sep = "."; is_default = false} 43 | | `Slash -> {name = Default; sep = "/"; is_default = false} 44 | 45 | (* map a chunk coordinate index to a key. E.g, (2,3,1) maps to c/2/3/1 *) 46 | let encode {name; sep; _} index = 47 | let xs = List.fold_right (fun i acc -> string_of_int i :: acc) index [] in 48 | match name with 49 | | Default -> String.concat sep ("c" :: xs) 50 | | V2 -> if List.length index = 0 then "0" else String.concat sep xs 51 | 52 | let ( = ) x y = Bool.equal x.is_default y.is_default && String.equal x.sep y.sep && x.name = y.name 53 | 54 | let to_yojson : t -> Yojson.Safe.t = fun {name; sep; is_default} -> 55 | let str = match name with 56 | | Default -> "default" 57 | | V2 -> "v2" 58 | in 59 | if is_default then `Assoc [("name", `String str)] else 60 | `Assoc [("name", `String str); ("configuration", `Assoc [("separator", `String sep)])] 61 | 62 | let of_yojson : Yojson.Safe.t -> (t, string) result = function 63 | | `Assoc [("name", `String "v2")] -> Ok {name = V2; sep = "."; is_default = true} 64 | | `Assoc [("name", `String "v2"); ("configuration", `Assoc [("separator", `String ("/" as slash))])] -> 65 | Ok {name = V2; sep = slash; is_default = false} 66 | | `Assoc [("name", `String "v2"); ("configuration", `Assoc [("separator", `String ("." as dot))])] -> 67 | Ok {name = V2; sep = dot; is_default = false} 68 | | `Assoc [("name", `String "default")] -> Ok {name = Default; sep = "/"; is_default = true} 69 | | `Assoc [("name", `String "default"); ("configuration", `Assoc [("separator", `String ("/" as slash))])] -> 70 | Ok {name = Default; sep = slash; is_default = false} 71 | | `Assoc [("name", `String "default"); ("configuration", `Assoc [("separator", `String ("." as dot))])] -> 72 | Ok {name = Default; sep = dot; is_default = false} 73 | | `Null -> Error "array metadata must contain a chunk_key_encoding field." 74 | | _ -> Error "Invalid chunk key encoding configuration." 75 | end 76 | 77 | module Datatype = struct 78 | type t = 79 | | Char 80 | | Bool 81 | | Int8 82 | | Uint8 83 | | Int16 84 | | Uint16 85 | | Int32 86 | | Int64 87 | | Uint64 88 | | Float32 89 | | Float64 90 | | Complex32 91 | | Complex64 92 | | Int 93 | | Nativeint 94 | 95 | let ( = ) : t -> t -> bool = fun x y -> x = y 96 | 97 | let of_kind : type a. a Ndarray.dtype -> t = function 98 | | Ndarray.Char -> Char 99 | | Ndarray.Bool -> Bool 100 | | Ndarray.Int8 -> Int8 101 | | Ndarray.Uint8 -> Uint8 102 | | Ndarray.Int16 -> Int16 103 | | Ndarray.Uint16 -> Uint16 104 | | Ndarray.Int32 -> Int32 105 | | Ndarray.Int64 -> Int64 106 | | Ndarray.Uint64 -> Uint64 107 | | Ndarray.Float32 -> Float32 108 | | Ndarray.Float64 -> Float64 109 | | Ndarray.Complex32 -> Complex32 110 | | Ndarray.Complex64 -> Complex64 111 | | Ndarray.Int -> Int 112 | | Ndarray.Nativeint -> Nativeint 113 | 114 | let to_yojson = function 115 | | Char -> `String "char" 116 | | Bool -> `String "bool" 117 | | Int8 -> `String "int8" 118 | | Uint8 -> `String "uint8" 119 | | Int16 -> `String "int16" 120 | | Uint16 -> `String "uint16" 121 | | Int32 -> `String "int32" 122 | | Int64 -> `String "int64" 123 | | Uint64 -> `String "uint64" 124 | | Float32 -> `String "float32" 125 | | Float64 -> `String "float64" 126 | | Complex32 -> `String "complex32" 127 | | Complex64 -> `String "complex64" 128 | | Int -> `String "int" 129 | | Nativeint -> `String "nativeint" 130 | 131 | let of_yojson = function 132 | | `String "char" -> Ok Char 133 | | `String "bool" -> Ok Bool 134 | | `String "int8" -> Ok Int8 135 | | `String "uint8" -> Ok Uint8 136 | | `String "int16" -> Ok Int16 137 | | `String "uint16" -> Ok Uint16 138 | | `String "int32" -> Ok Int32 139 | | `String "int64" -> Ok Int64 140 | | `String "uint64" -> Ok Uint64 141 | | `String "float32" -> Ok Float32 142 | | `String "float64" -> Ok Float64 143 | | `String "complex32" -> Ok Complex32 144 | | `String "complex64" -> Ok Complex64 145 | | `String "int" -> Ok Int 146 | | `String "nativeint" -> Ok Nativeint 147 | | `Null -> Error "array metadata must contain a data_type field." 148 | | _ -> Error "Unsupported metadata data_type" 149 | end 150 | -------------------------------------------------------------------------------- /examples/picos_fs_store.ml: -------------------------------------------------------------------------------- 1 | (* this module implements a local filesystem zarr store that is backed by 2 | the Picos library for concurrent reads/writes. The main requirements 3 | is to implement the signature of Zarr.Types.Store. 4 | 5 | To compile & run this example execute the command 6 | dune exec -- examples/picos_fs_store.exe 7 | in your shell at the root of this project. *) 8 | 9 | module PU = Picos_io.Unix 10 | module IO = Zarr_sync.Storage.IO 11 | 12 | module PicosFSStore : sig 13 | include Zarr.Storage.S with type 'a io := 'a 14 | val create : ?perm:Unix.file_perm -> string -> t 15 | end = struct 16 | 17 | module Store = struct 18 | type t = {dirname : string; perm : PU.file_perm} 19 | type 'a io = 'a IO.t 20 | 21 | let fspath_to_key t path = 22 | let pos = String.length t.dirname + 1 in 23 | String.sub path pos @@ String.length path - pos 24 | 25 | let key_to_fspath t key = Filename.concat t.dirname key 26 | 27 | let rec create_parent_dir fn perm = 28 | let parent_dir = Filename.dirname fn in 29 | try ignore @@ PU.stat parent_dir 30 | with PU.Unix_error (PU.ENOENT, _, _) -> 31 | create_parent_dir parent_dir perm; 32 | PU.mkdir parent_dir perm 33 | 34 | let size t key = 35 | match PU.openfile (key_to_fspath t key) [PU.O_RDONLY] t.perm with 36 | | exception Unix.Unix_error (Unix.ENOENT, "open", _) -> 0 37 | | fd -> 38 | Fun.protect ~finally:(fun () -> PU.close fd) @@ fun () -> 39 | PU.set_nonblock fd; 40 | PU.(fstat fd).st_size 41 | 42 | let get t key = 43 | let fd = PU.openfile (key_to_fspath t key) [PU.O_RDONLY] t.perm in 44 | Fun.protect ~finally:(fun () -> PU.close fd) @@ fun () -> 45 | PU.set_nonblock fd; 46 | let l = PU.(fstat fd).st_size in 47 | let buf = Bytes.create l in 48 | let _ = PU.read fd buf 0 l in 49 | Bytes.unsafe_to_string buf 50 | 51 | let get_partial_values t key ranges = 52 | let fd = PU.openfile (key_to_fspath t key) [PU.O_RDONLY] t.perm in 53 | Fun.protect ~finally:(fun () -> PU.close fd) @@ fun () -> 54 | PU.set_nonblock fd; 55 | let tot = PU.(fstat fd).st_size in 56 | let l = List.fold_left 57 | (fun a (s, l) -> 58 | Option.fold ~none:(Int.max a (tot - s)) ~some:(Int.max a) l) 0 ranges in 59 | let buf = Bytes.create l in 60 | List.fold_right 61 | (fun (ofs, len) acc -> 62 | let _ = PU.lseek fd ofs PU.SEEK_SET in 63 | let size = Option.fold ~none:(tot - ofs) ~some:Fun.id len in 64 | let _ = PU.read fd buf 0 size in 65 | Bytes.sub_string buf 0 size :: acc) ranges [] 66 | 67 | let set t key v = 68 | let p = key_to_fspath t key in 69 | create_parent_dir p t.perm; 70 | let fd = PU.openfile p PU.[O_WRONLY; O_TRUNC; O_CREAT] t.perm in 71 | Fun.protect ~finally:(fun () -> PU.close fd) @@ fun () -> 72 | PU.set_nonblock fd; 73 | ignore @@ PU.write_substring fd v 0 (String.length v) 74 | 75 | let set_partial_values t key ?(append=false) rvs = 76 | let flags = match append with 77 | | false -> PU.[O_WRONLY; O_CREAT] 78 | | true -> PU.[O_APPEND; O_WRONLY; O_CREAT] 79 | in 80 | let p = key_to_fspath t key in 81 | create_parent_dir p t.perm; 82 | let fd = PU.openfile p flags t.perm in 83 | Fun.protect ~finally:(fun () -> PU.close fd) @@ fun () -> 84 | rvs |> List.iter @@ fun (ofs, v) -> 85 | if append then ignore @@ PU.lseek fd 0 PU.SEEK_END 86 | else ignore @@ PU.lseek fd ofs PU.SEEK_SET; 87 | ignore @@ PU.write_substring fd v 0 (String.length v) 88 | 89 | let is_member t key = 90 | match PU.stat @@ key_to_fspath t key with 91 | | exception PU.Unix_error (PU.ENOENT, _, _) -> false 92 | | _ -> true 93 | 94 | let rec entries h acc = 95 | match PU.readdir h with 96 | | exception End_of_file -> acc 97 | | "." | ".." -> entries h acc 98 | | e -> entries h (e :: acc) 99 | 100 | let rec walk t acc dir = 101 | let h = PU.opendir dir in 102 | Fun.protect ~finally:(fun () -> PU.closedir h) @@ fun () -> 103 | List.fold_left 104 | (fun a x -> 105 | match Filename.concat dir x with 106 | | p when (PU.stat p).st_kind = PU.S_DIR -> walk t a p 107 | | p -> (fspath_to_key t p) :: a) acc @@ entries h [] 108 | 109 | let list t = walk t [] (key_to_fspath t "") 110 | 111 | let list_prefix t prefix = walk t [] (key_to_fspath t prefix) 112 | 113 | let erase t key = PU.unlink @@ key_to_fspath t key 114 | 115 | let erase_prefix t pre = List.iter (erase t) @@ list_prefix t pre 116 | 117 | let list_dir t prefix = 118 | let dir = key_to_fspath t prefix in 119 | let h = PU.opendir dir in 120 | Fun.protect ~finally:(fun () -> PU.closedir h) @@ fun () -> 121 | entries h [] |> List.partition_map @@ fun x -> 122 | match Filename.concat dir x with 123 | | p when (PU.stat p).st_kind = PU.S_DIR -> Either.right @@ (fspath_to_key t p) ^ "/" 124 | | p -> Either.left @@ fspath_to_key t p 125 | 126 | let rename t k k' = PU.rename (key_to_fspath t k) (key_to_fspath t k') 127 | end 128 | 129 | include Zarr.Storage.Make(IO)(Store) 130 | 131 | let create ?(perm=0o700) dirname = 132 | Zarr.Util.create_parent_dir dirname perm; 133 | Sys.mkdir dirname perm; 134 | Store.{dirname = Zarr.Util.sanitize_dir dirname; perm} 135 | end 136 | 137 | let _ = 138 | Picos_mux_random.run_on ~n_domains:1 @@ fun () -> 139 | let open Zarr in 140 | let open Zarr.Codecs in 141 | let open Zarr.Ndarray in 142 | let open Zarr.Indexing in 143 | 144 | let store = PicosFSStore.create "picosdata.zarr" in 145 | let gnode = Node.Group.of_path "/some/group" in 146 | PicosFSStore.Group.create store gnode; 147 | let anode = Node.Array.(gnode / "name") in 148 | let config = 149 | {chunk_shape = [5; 3; 5] 150 | ;codecs = [`Bytes LE; `Gzip L5] 151 | ;index_codecs = [`Bytes BE; `Crc32c] 152 | ;index_location = Start} in 153 | PicosFSStore.Array.create 154 | ~codecs:[`ShardingIndexed config] 155 | ~shape:[100; 100; 50] 156 | ~chunks:[10; 15; 20] 157 | Char '?' anode store; 158 | let slice = [R (0, 20); I 10; F] in 159 | let x = PicosFSStore.Array.read store anode slice Char in 160 | let x' = Zarr.Ndarray.map (fun _ -> Random.int 256 |> Char.chr) x in 161 | PicosFSStore.Array.write store anode slice x'; 162 | let y = PicosFSStore.Array.read store anode slice Char in 163 | assert (equal x' y) 164 | -------------------------------------------------------------------------------- /zarr-lwt/test/test_lwt.ml: -------------------------------------------------------------------------------- 1 | open OUnit2 2 | open Zarr 3 | open Zarr.Indexing 4 | open Zarr.Codecs 5 | open Zarr_lwt.Storage 6 | 7 | let string_of_list = [%show: string list] 8 | let print_node_pair = [%show: Node.Array.t list * Node.Group.t list] 9 | 10 | module type LWT_STORE = Zarr.Storage.S with type 'a io := 'a Lwt.t 11 | 12 | let test_storage 13 | (type a) (module M : LWT_STORE with type t = a) (store : a) = 14 | let open M in 15 | let open IO.Infix in 16 | let gnode = Node.Group.root in 17 | 18 | hierarchy store >>= fun nodes -> 19 | assert_equal ~printer:print_node_pair ([], []) nodes; 20 | 21 | Group.create store gnode >>= fun () -> 22 | Group.exists store gnode >>= fun exists -> 23 | assert_equal ~printer:string_of_bool true exists; 24 | 25 | Group.metadata store gnode >>= fun meta -> 26 | assert_equal ~printer:Metadata.Group.show Metadata.Group.default meta; 27 | 28 | Group.delete store gnode >>= fun () -> 29 | Group.exists store gnode >>= fun exists -> 30 | assert_equal ~printer:string_of_bool false exists; 31 | hierarchy store >>= fun nodes -> 32 | assert_equal ~printer:print_node_pair ([], []) nodes; 33 | 34 | let attrs = `Assoc [("questions", `String "answer")] in 35 | Group.create ~attrs store gnode >>= fun () -> 36 | Group.metadata store gnode >>= fun meta -> 37 | assert_equal ~printer:Yojson.Safe.show attrs @@ Metadata.Group.attributes meta; 38 | 39 | Array.exists store @@ Node.Array.(gnode / "non-member") >>= fun exists -> 40 | assert_equal ~printer:string_of_bool false exists; 41 | 42 | let cfg = 43 | {chunk_shape = [2; 5; 5] 44 | ;index_location = End 45 | ;index_codecs = [`Bytes BE] 46 | ;codecs = [`Bytes LE]} in 47 | let anode = Node.Array.(gnode / "arrnode") in 48 | let slice = [R (0, 20); I 10; R (0, 29)] in 49 | let exp = Ndarray.init Ndarray.Complex32 [21; 1; 30] (Fun.const Complex.one) in 50 | 51 | Lwt_list.iter_s 52 | (fun codecs -> 53 | Array.create 54 | ~codecs ~shape:[100; 100; 50] ~chunks:[10; 15; 20] 55 | Ndarray.Complex32 Complex.one anode store >>= fun () -> 56 | Array.write store anode slice exp >>= fun () -> 57 | Array.read store anode slice Complex32 >>= fun got -> 58 | assert_equal exp got; 59 | Ndarray.fill exp Complex.{re=2.0; im=0.}; 60 | Array.write store anode slice exp >>= fun () -> 61 | Array.read store anode slice Complex32 >>= fun arr -> 62 | assert_equal exp arr; 63 | Ndarray.fill exp Complex.{re=0.; im=3.0}; 64 | Array.write store anode slice exp >>= fun () -> 65 | Array.read store anode slice Complex32 >>= fun got -> 66 | assert_equal exp got; 67 | match codecs with 68 | | [`ShardingIndexed _] -> Array.delete store anode 69 | | _ -> IO.return_unit) 70 | [[`ShardingIndexed cfg]; [`Bytes BE]] >>= fun () -> 71 | 72 | let child = Node.Group.of_path "/some/child/group" in 73 | Group.create store child >>= fun () -> 74 | Group.children store gnode >>= fun (arrays, groups) -> 75 | assert_equal 76 | ~printer:string_of_list ["/arrnode"] (List.map Node.Array.to_path arrays); 77 | assert_equal 78 | ~printer:string_of_list ["/some"] (List.map Node.Group.to_path groups); 79 | 80 | Group.children store @@ Node.Group.(root / "fakegroup") >>= fun c -> 81 | assert_equal ([], []) c; 82 | 83 | hierarchy store >>= fun (ac, gc) -> 84 | let got = 85 | List.fast_sort String.compare @@ 86 | List.map Node.Array.show ac @ List.map Node.Group.show gc in 87 | assert_equal 88 | ~printer:string_of_list 89 | ["/"; "/arrnode"; "/some"; "/some/child"; "/some/child/group"] got; 90 | 91 | (* tests for renaming nodes *) 92 | let some = Node.Group.of_path "/some/child" in 93 | Array.rename store anode "ARRAYNODE" >>= fun () -> 94 | Group.rename store some "CHILD" >>= fun () -> 95 | hierarchy store >>= fun (ac, gc) -> 96 | let got = 97 | List.fast_sort String.compare @@ 98 | List.map Node.Array.show ac @ List.map Node.Group.show gc in 99 | assert_equal 100 | ~printer:string_of_list 101 | ["/"; "/ARRAYNODE"; "/some"; "/some/CHILD"; "/some/CHILD/group"] got; 102 | (* restore old array node name. *) 103 | Array.rename store (Node.Array.of_path "/ARRAYNODE") "arrnode" >>= fun () -> 104 | 105 | let nshape = [25; 32; 10] in 106 | Array.reshape store anode nshape >>= fun () -> 107 | Array.metadata store anode >>= fun meta -> 108 | assert_equal ~printer:[%show : int list] nshape @@ Metadata.Array.shape meta; 109 | 110 | Array.delete store anode >>= fun () -> 111 | clear store >>= fun () -> 112 | hierarchy store >>= fun got -> 113 | assert_equal ~printer:print_node_pair ([], []) got; 114 | IO.return_unit 115 | 116 | let _ = 117 | run_test_tt_main @@ ("Run Zarr Lwt API tests" >::: [ 118 | "test lwt-based stores" >:: 119 | (fun _ -> 120 | let rand_num = string_of_int @@ Random.int 100_000 in 121 | let tmp_dir = Filename.(concat (get_temp_dir_name ()) (rand_num ^ ".zarr")) in 122 | let s = FilesystemStore.create tmp_dir in 123 | 124 | assert_raises 125 | (Sys_error (Format.sprintf "%s: File exists" tmp_dir)) 126 | (fun () -> FilesystemStore.create tmp_dir); 127 | 128 | (* ensure it works with an extra "/" appended to directory name. *) 129 | ignore @@ FilesystemStore.open_store (tmp_dir ^ "/"); 130 | 131 | let fakedir = "non-existant-zarr-store112345.zarr" in 132 | assert_raises 133 | (Sys_error (Printf.sprintf "%s: No such file or directory" fakedir)) 134 | (fun () -> FilesystemStore.open_store fakedir); 135 | 136 | let fn = Filename.temp_file "nonexistantfile" ".zarr" in 137 | assert_raises 138 | (Zarr.Storage.Not_a_filesystem_store fn) 139 | (fun () -> FilesystemStore.open_store fn); 140 | 141 | (* ZipStore configuration *) 142 | let zpath = tmp_dir ^ ".zip" 143 | and levels = [L0; L1; L2; L3; L4; L5; L6; L7; L8; L9] 144 | (* AmazonS3Store configuration *) 145 | and region = Aws_s3.Region.minio ~port:9000 ~host:"localhost" () 146 | and bucket = "test-bucket-lwt" 147 | and profile = "default" in 148 | 149 | Lwt_main.run @@ Lwt.join 150 | [Lwt.bind (ZipStore.create zpath) (test_storage (module ZipStore)) 151 | ;IO.iter (fun level -> ignore (ZipStore.open_store ~level zpath); Lwt.return_unit) levels 152 | ;AmazonS3Store.with_open ~region ~bucket ~profile (test_storage (module AmazonS3Store)) 153 | ;test_storage (module MemoryStore) @@ MemoryStore.create () 154 | ;test_storage (module FilesystemStore) s]) 155 | ]) 156 | -------------------------------------------------------------------------------- /zarr/src/storage/zip_archive.ml: -------------------------------------------------------------------------------- 1 | module type S = sig 2 | exception Path_already_exists of string 3 | include Storage.S 4 | val open_store : ?level:Codecs.deflate_level -> string -> t 5 | (** [open_store ?level p] returns a store instance representing a zip 6 | archive of a Zarr v3 hierarchy stored at path [p]. [level] is the DEFLATE 7 | algorithm compression setting used when writing new entries into the archive. *) 8 | 9 | val create : ?level:Codecs.deflate_level -> string -> t io 10 | (** [create ?level p] creates a zip archive at path [p] and then returns a 11 | store instance representing the zip archive. [level] is the DEFLATE algorithm 12 | compression setting used when writing new entries into the archive. 13 | 14 | @raise Path_already_exists if a file already exists at path [p]. *) 15 | end 16 | 17 | module Make (IO : Types.IO) : S with type 'a io := 'a IO.t = struct 18 | open IO.Syntax 19 | 20 | let with_open_in path f = 21 | let* ic = IO.map Zip.open_in (IO.return path) in 22 | Fun.protect ~finally:(fun () -> Zip.close_in ic) (fun () -> IO.return (f ic)) 23 | 24 | let with_open_out path f = 25 | let* oc = IO.map Zip.open_update (IO.return path) in 26 | Fun.protect ~finally:(fun () -> Zip.close_out oc) (fun () -> IO.return (f oc)) 27 | 28 | module Store = struct 29 | type t = {path : string; level : int} 30 | type 'a io = 'a IO.t 31 | 32 | let is_member t key = 33 | let entry_exists ~key ic = match Zip.find_entry ic key with 34 | | exception Not_found -> false 35 | | _ -> true 36 | in 37 | with_open_in t.path (entry_exists ~key) 38 | 39 | let size t key = 40 | let entry_size ~key ic = match Zip.find_entry ic key with 41 | | exception Not_found -> 0 42 | | e -> e.uncompressed_size 43 | in 44 | with_open_in t.path (entry_size ~key) 45 | 46 | let get t key = 47 | let read_entry ~key ic = match Zip.find_entry ic key with 48 | | exception Not_found -> raise (Storage.Key_not_found key) 49 | | e -> Zip.read_entry ic e 50 | in 51 | with_open_in t.path (read_entry ~key) 52 | 53 | let get_partial_values t key ranges = 54 | let read_range ~data ~size (ofs, len) = match len with 55 | | Some l -> String.sub data ofs l 56 | | None -> String.sub data ofs (size - ofs) 57 | in 58 | let+ data = get t key in 59 | let size = String.length data in 60 | List.map (read_range ~data ~size) ranges 61 | 62 | let list t = 63 | let to_filename : Zip.entry -> string = fun e -> e.filename in 64 | let get_keys ic = List.map to_filename (Zip.entries ic) in 65 | with_open_in t.path get_keys 66 | 67 | module StrSet = Set.Make(String) 68 | 69 | let list_dir t prefix = 70 | let n = String.length prefix in 71 | let add_entry_with_prefix ((l, r) as acc) = function 72 | | (e : Zip.entry) when not (String.starts_with ~prefix e.filename) -> acc 73 | | e when String.contains_from e.filename n '/' -> 74 | let key = e.filename in 75 | let pre = String.sub key 0 (1 + String.index_from key n '/') in 76 | StrSet.add pre l, r 77 | | e -> l, e.filename :: r 78 | in 79 | let+ entries = with_open_in t.path Zip.entries in 80 | let prefs, keys = List.fold_left add_entry_with_prefix (StrSet.empty, []) entries in 81 | keys, StrSet.elements prefs 82 | 83 | let set t key value = 84 | let level = t.level in 85 | with_open_out t.path (fun oc -> Zip.add_entry ~level value oc key) 86 | 87 | let set_partial_values t key ?(append=false) rvs = 88 | let* ov = try get t key with 89 | | Storage.Key_not_found _ -> IO.return String.empty 90 | in 91 | let f = if append || ov = String.empty then 92 | fun acc (_, v) -> acc ^ v else 93 | fun acc (rs, v) -> 94 | let s = Bytes.unsafe_of_string acc in 95 | Bytes.blit_string v 0 s rs String.(length v); 96 | Bytes.unsafe_to_string s 97 | in 98 | set t key (List.fold_left f ov rvs) 99 | 100 | let add_to_zip ~oc ~level (path, v) = Zip.add_entry ~level v oc path 101 | 102 | let rename t prefix new_prefix = 103 | let add_pair ~ic ~prefix ~new_prefix acc = function 104 | | (e : Zip.entry) when not (String.starts_with ~prefix e.filename) -> 105 | (e.filename, Zip.read_entry ic e) :: acc 106 | | e -> 107 | let l = String.length prefix in 108 | let path = new_prefix ^ String.sub e.filename l (String.length e.filename - l) in 109 | (path, Zip.read_entry ic e) :: acc 110 | in 111 | let rename_entries ic = List.fold_left (add_pair ~ic ~prefix ~new_prefix) [] (Zip.entries ic) in 112 | let* pairs = with_open_in t.path rename_entries in 113 | let oc = Zip.open_out t.path in Zip.close_out oc; (* truncate the old zip file *) 114 | let level = t.level in 115 | with_open_out t.path (fun oc -> List.iter (add_to_zip ~oc ~level) pairs) 116 | 117 | let erase t key = 118 | let filter ~ic acc = function 119 | | (e : Zip.entry) when e.filename = key -> acc 120 | | e -> (e.filename, Zip.read_entry ic e) :: acc 121 | in 122 | let filter_entries ic = List.fold_left (filter ~ic) [] (Zip.entries ic) in 123 | let* pairs = with_open_in t.path filter_entries in 124 | let oc = Zip.open_out t.path in Zip.close_out oc; (* truncate the old zip file *) 125 | with_open_out t.path (fun oc -> List.iter (add_to_zip ~oc ~level:t.level) pairs) 126 | 127 | let erase_prefix t prefix = 128 | let filter ~ic ~prefix acc = function 129 | | (e : Zip.entry) when String.starts_with ~prefix e.filename -> acc 130 | | e -> (e.filename, Zip.read_entry ic e) :: acc 131 | in 132 | let filter_entries ic = List.fold_left (filter ~ic ~prefix) [] (Zip.entries ic) in 133 | let* pairs = with_open_in t.path filter_entries in 134 | let oc = Zip.open_out t.path in Zip.close_out oc; (* truncate the old zip file *) 135 | with_open_out t.path (fun oc -> List.iter (add_to_zip ~oc ~level:t.level) pairs) 136 | end 137 | 138 | exception Path_already_exists of string 139 | 140 | let open_store ?(level=Codecs.L6) path = 141 | let l = match level with 142 | | L0 -> 0 | L1 -> 1 | L2 -> 2 | L3 -> 3 | L4 -> 4 143 | | L5 -> 5 | L6 -> 6 | L7 -> 7 | L8 -> 8 | L9 -> 9 144 | in 145 | Store.{path; level = l} 146 | 147 | let create ?(level=Codecs.L6) path = 148 | if Sys.file_exists path then raise (Path_already_exists path) 149 | else 150 | let* oc = IO.map Zip.open_out (IO.return path) in 151 | Fun.protect ~finally:(fun () -> Zip.close_out oc) (fun () -> IO.return (open_store ~level path)) 152 | 153 | include Storage.Make(IO)(Store) 154 | end 155 | -------------------------------------------------------------------------------- /zarr/src/storage/storage_intf.ml: -------------------------------------------------------------------------------- 1 | exception Invalid_resize_shape 2 | exception Invalid_data_type 3 | exception Invalid_array_slice 4 | exception Key_not_found of string 5 | exception Not_a_filesystem_store of string 6 | 7 | module type S = sig 8 | type t 9 | (** The storage type. *) 10 | 11 | type 'a io 12 | (** The I/O monad type.*) 13 | 14 | module Group : sig 15 | val create : ?attrs:Yojson.Safe.t -> t -> Node.Group.t -> unit io 16 | (** [create ?attrs t node] creates a group node in store [t] 17 | containing attributes [attrs]. This is a no-op if [node] 18 | is already a member of this store. *) 19 | 20 | val metadata : t -> Node.Group.t -> Metadata.Group.t io 21 | (** [metadata node t] returns the metadata of group node [node]. 22 | 23 | @raise Key_not_found if node is not a member of store [t].*) 24 | 25 | val children : t -> Node.Group.t -> (Node.Array.t list * Node.Group.t list) io 26 | (** [children t n] returns a tuple of child nodes of group node [n]. 27 | This operation returns a pair of empty lists if node [n] has no 28 | children or is not a member of store [t]. 29 | 30 | @raise Parse_error if any child node has invalid [node_type] metadata.*) 31 | 32 | val delete : t -> Node.Group.t -> unit io 33 | (** [delete t n] erases group node [n] from store [t]. This also 34 | erases all child nodes of [n]. If node [n] is not a member 35 | of store [t] then this is a no-op. *) 36 | 37 | val exists : t -> Node.Group.t -> bool io 38 | (** [exists t n] returns [true] if group node [n] is a member 39 | of store [t] and [false] otherwise. *) 40 | 41 | val rename : t -> Node.Group.t -> string -> unit io 42 | (** [rename t g name] changes the name of group node [g] in store [t] to [name]. 43 | 44 | @raise Key_not_found if [g] is not a member of store [t]. 45 | @raise Renaming_root if [g] is the store's root node. 46 | @raise Node_invariant if [name] is an invalid node name.*) 47 | end 48 | 49 | module Array : sig 50 | val create : 51 | ?sep:[< `Dot | `Slash > `Slash ] -> 52 | ?dimension_names:string option list -> 53 | ?attributes:Yojson.Safe.t -> 54 | codecs:Codecs.codec list -> 55 | shape:int list -> 56 | chunks:int list -> 57 | 'a Ndarray.dtype -> 58 | 'a -> 59 | Node.Array.t -> 60 | t -> 61 | unit io 62 | (** [create ~sep ~dimension_names ~attributes ~codecs ~shape ~chunks kind fill node t] 63 | creates an array node in store [t] where: 64 | - Separator [sep] is used in the array's chunk key encoding. 65 | - Dimension names [dimension_names] and user attributes [attributes] 66 | are included in it's metadata document. 67 | - A codec chain defined by [codecs]. 68 | - The array has shape [shape] and chunk shape [chunks]. 69 | - The array has data kind [kind] and fill value [fv]. 70 | 71 | @raise Codecs.Bytes_to_bytes_invariant 72 | if [codecs] contains more than one bytes->bytes codec. 73 | @raise Codecs.Invalid_transpose_order 74 | if [codecs] contains a transpose codec with invalid order array. 75 | @raise Codecs.Invalid_sharding_chunk_shape 76 | if [codecs] contains a shardingindexed codec with an 77 | incorrect inner chunk shape. *) 78 | 79 | val metadata : t -> Node.Array.t -> Metadata.Array.t io 80 | (** [metadata node t] returns the metadata of array node [node]. 81 | 82 | @raise Key_not_found if node is not a member of store [t]. *) 83 | 84 | val delete : t -> Node.Array.t -> unit io 85 | (** [delete t n] erases array node [n] from store [t]. If node [n] 86 | is not a member of store [t] then this is a no-op. *) 87 | 88 | val exists : t -> Node.Array.t -> bool io 89 | (** [exists t n] returns [true] if array node [n] is a member 90 | of store [t] and [false] otherwise. *) 91 | 92 | val write : t -> Node.Array.t -> Ndarray.Indexing.index list -> 'a Ndarray.t -> unit io 93 | (** [write t n s x] writes n-dimensional array [x] to the slice [s] 94 | of array node [n] in store [t]. 95 | 96 | @raise Invalid_array_slice 97 | if the ndarray [x] size does not equal slice [s]. 98 | @raise Invalid_data_type 99 | if the kind of [x] is not compatible with node [n]'s data type as 100 | described in its metadata document. *) 101 | 102 | val read : t -> Node.Array.t -> Ndarray.Indexing.index list -> 'a Ndarray.dtype -> 'a Ndarray.t io 103 | (** [read t n s k] reads an n-dimensional array of size determined 104 | by slice [s] from array node [n]. 105 | 106 | @raise Invalid_data_type 107 | if kind [k] is not compatible with node [n]'s data type as described 108 | in its metadata document. 109 | @raise Invalid_array_slice 110 | if the slice [s] is not a valid slice of array node [n].*) 111 | 112 | val reshape : t -> Node.Array.t -> int list -> unit io 113 | (** [reshape t n shape] resizes array node [n] of store [t] into new 114 | size [shape]. Note that when the resizing involves shrinking an array 115 | along any dimensions, any old unreachable chunks that fall outside of 116 | the array's new shape are deleted from the store. 117 | 118 | @raise Invalid_resize_shape 119 | if [shape] does not have the same dimensions as [n]'s shape. 120 | @raise Key_not_found 121 | if node [n] is not a member of store [t]. *) 122 | 123 | val rename : t -> Node.Array.t -> string -> unit io 124 | (** [rename t n name] changes the name of array node [n] in store [t] to [name]. 125 | 126 | @raise Key_not_found if [g] is not a member of store [t]. 127 | @raise Renaming_root if [g] is the store's root node. 128 | @raise Node_invariant if [name] is an invalid node name.*) 129 | end 130 | 131 | val hierarchy : t -> (Node.Array.t list * Node.Group.t list) io 132 | (** [hierarchy t] returns [p] where [p] is a pair of lists 133 | representing all nodes in store [t]. The first element of the pair 134 | is a list of all array nodes, and the second element is a list of 135 | all group nodes. This operation returns a pair of empty lists if 136 | store [t] is empty. 137 | 138 | @raise Parse_error if any node has invalid [node_type] metadata.*) 139 | 140 | val clear : t -> unit io 141 | (** [clear t] clears the store [t] by deleting all nodes. 142 | If the store is already empty, this is a no-op. *) 143 | end 144 | 145 | module type Interface = sig 146 | (** A Zarr store is a system that can be used to store and retrieve data 147 | from a Zarr hierarchy. For a store to be compatible with this 148 | specification, it must support a set of operations defined in the 149 | Abstract store interface {!STORE}. The store interface can be 150 | implemented using a variety of underlying storage technologies. *) 151 | 152 | exception Invalid_resize_shape 153 | (** raised when resizing a Zarr array with an incorrect shape. *) 154 | 155 | exception Invalid_data_type 156 | (** raised when supplied data type is not the same as Zarr array's. *) 157 | 158 | exception Invalid_array_slice 159 | (** raised when requesting a view of a Zarr array with an incorrect slice. *) 160 | 161 | exception Key_not_found of string 162 | (** raised when a node's chunk key or metadata key is found in a store. *) 163 | 164 | exception Not_a_filesystem_store of string 165 | (** raised when opening a file that as if it was a Filesystem Zarr store. *) 166 | 167 | module type S = S 168 | (** The module interface that all supported stores must implement. *) 169 | 170 | module Make : functor (IO : Types.IO) (Store : Types.Store with type 'a io = 'a IO.t) -> S 171 | with type t = Store.t and type 'a io = 'a IO.t 172 | (** A functor for minting a new storage type as long as it's argument 173 | module implements the {!Store} interface. *) 174 | end 175 | -------------------------------------------------------------------------------- /zarr-sync/test/test_sync.ml: -------------------------------------------------------------------------------- 1 | open OUnit2 2 | open Zarr 3 | open Zarr.Indexing 4 | open Zarr.Codecs 5 | open Zarr_sync.Storage 6 | 7 | let string_of_list = [%show: string list] 8 | let print_node_pair = [%show: Node.Array.t list * Node.Group.t list] 9 | 10 | module type SYNC_STORE = Zarr.Storage.S with type 'a io := 'a 11 | 12 | let test_storage 13 | (type a) (module M : SYNC_STORE with type t = a) (store : a) = 14 | let open M in 15 | let gnode = Node.Group.root in 16 | 17 | let nodes = hierarchy store in 18 | assert_equal ~printer:print_node_pair ([], []) nodes; 19 | 20 | Group.create store gnode; 21 | let exists = Group.exists store gnode in 22 | assert_equal ~printer:string_of_bool true exists; 23 | assert_equal ~printer:print_node_pair ([], [gnode]) (hierarchy store); 24 | 25 | let meta = Group.metadata store gnode in 26 | assert_equal ~printer:Metadata.Group.show Metadata.Group.default meta; 27 | 28 | Group.delete store gnode; 29 | let exists = Group.exists store gnode in 30 | assert_equal ~printer:string_of_bool false exists; 31 | let nodes = hierarchy store in 32 | assert_equal ~printer:print_node_pair ([], []) nodes; 33 | 34 | let attrs = `Assoc [("questions", `String "answer")] in 35 | Group.create ~attrs store gnode; 36 | let meta = Group.metadata store gnode in 37 | assert_equal ~printer:Yojson.Safe.show attrs @@ Metadata.Group.attributes meta; 38 | 39 | let exists = Array.exists store @@ Node.Array.(gnode / "non-member") in 40 | assert_equal ~printer:string_of_bool false exists; 41 | 42 | let cfg = 43 | {chunk_shape = [2; 5; 5] 44 | ;index_location = End 45 | ;index_codecs = [`Bytes LE; `Crc32c] 46 | ;codecs = [`Transpose [2; 0; 1]; `Bytes BE; `Zstd (0, false)]} in 47 | let cfg2 = 48 | {chunk_shape = [2; 5; 5] 49 | ;index_location = Start 50 | ;index_codecs = [`Bytes BE] 51 | ;codecs = [`Bytes LE]} in 52 | let anode = Node.Array.(gnode / "arrnode") in 53 | let slice = [R (0, 20); I 10; R (0, 29)] in 54 | let bigger_slice = [R (0, 21); L [9; 10] ; R (0, 30)] in 55 | 56 | List.iter 57 | (fun codecs -> 58 | Array.create ~codecs ~shape:[100; 100; 50] ~chunks:[10; 15; 20] Complex32 Complex.one anode store; 59 | let exp = Ndarray.init Complex32 [21; 1; 30] (Fun.const Complex.one) in 60 | let got = Array.read store anode slice Complex32 in 61 | assert_equal exp got; 62 | Ndarray.fill exp Complex.{re=2.0; im=0.}; 63 | Array.write store anode slice exp; 64 | let got = Array.read store anode slice Complex32 in 65 | (* test if a bigger slice containing new elements can be read from store *) 66 | let _ = Array.read store anode bigger_slice Complex32 in 67 | assert_equal exp got; 68 | (* test writing a bigger slice to store *) 69 | Array.write store anode bigger_slice @@ Ndarray.init Complex32 [22; 2; 31] (Fun.const Complex.{re=0.; im=3.0}); 70 | let got = Array.read store anode slice Complex32 in 71 | Ndarray.fill exp Complex.{re=0.; im=3.0}; 72 | assert_equal exp got; 73 | Array.delete store anode) 74 | [[`ShardingIndexed cfg]; [`ShardingIndexed cfg2]]; 75 | 76 | (* repeat tests for non-sharding codec chain *) 77 | Array.create ~sep:`Dot ~codecs:[`Bytes BE] ~shape:[100; 100; 50] ~chunks:[10; 15; 20] Ndarray.Int Int.max_int anode store; 78 | assert_equal ~printer:print_node_pair ([anode], [gnode]) (hierarchy store); 79 | (* test path where there is no chunk key present in store *) 80 | let exp = Ndarray.init Int [21; 1; 30] (Fun.const Int.max_int) in 81 | Array.write store anode slice exp; 82 | let got = Array.read store anode slice Int in 83 | assert_equal exp got; 84 | (* test path where there is a chunk key present in store at write time. *) 85 | Array.write store anode slice exp; 86 | let got = Array.read store anode slice Int in 87 | assert_equal exp got; 88 | 89 | assert_raises 90 | (Zarr.Storage.Invalid_data_type) 91 | (fun () -> Array.read store anode slice Ndarray.Char); 92 | let badslice = [R (0, 20); I 10; F; F] in 93 | assert_raises 94 | (Zarr.Storage.Invalid_array_slice) 95 | (fun () -> Array.read store anode badslice Ndarray.Int); 96 | assert_raises 97 | (Zarr.Storage.Invalid_array_slice) 98 | (fun () -> Array.write store anode badslice exp); 99 | assert_raises 100 | (Zarr.Storage.Invalid_array_slice) 101 | (fun () -> Array.write store anode [R (0, 20); F; F] exp); 102 | let badarray = Ndarray.init Float64 [21; 1; 30] (Fun.const 0.) in 103 | assert_raises 104 | (Zarr.Storage.Invalid_data_type) 105 | (fun () -> Array.write store anode slice badarray); 106 | 107 | let child = Node.Group.of_path "/some/child/group" in 108 | Group.create store child; 109 | let arrays, groups = Group.children store gnode in 110 | assert_equal 111 | ~printer:string_of_list ["/arrnode"] (List.map Node.Array.to_path arrays); 112 | assert_equal 113 | ~printer:string_of_list ["/some"] (List.map Node.Group.to_path groups); 114 | 115 | assert_equal ([], []) @@ Group.children store child; 116 | assert_equal ([], []) @@ Group.children store Node.Group.(root / "fakegroup"); 117 | 118 | let ac, gc = hierarchy store in 119 | let got = 120 | List.fast_sort String.compare @@ 121 | List.map Node.Array.show ac @ List.map Node.Group.show gc in 122 | assert_equal 123 | ~printer:string_of_list 124 | ["/"; "/arrnode"; "/some"; "/some/child"; "/some/child/group"] got; 125 | 126 | (* tests for renaming nodes *) 127 | let some = Node.Group.of_path "/some/child" in 128 | Group.rename store some "CHILD"; 129 | Array.rename store anode "ARRAYNODE"; 130 | let ac, gc = hierarchy store in 131 | let got = 132 | List.fast_sort String.compare @@ 133 | List.map Node.Array.show ac @ List.map Node.Group.show gc in 134 | assert_equal 135 | ~printer:string_of_list 136 | ["/"; "/ARRAYNODE"; "/some"; "/some/CHILD"; "/some/CHILD/group"] got; 137 | assert_raises 138 | (Zarr.Storage.Key_not_found "fakegroup") 139 | (fun () -> Group.rename store Node.Group.(gnode / "fakegroup") "somename"); 140 | assert_raises 141 | (Zarr.Storage.Key_not_found "fakearray") 142 | (fun () -> Array.rename store Node.Array.(gnode / "fakearray") "somename"); 143 | 144 | (* restore old array node name. *) 145 | Array.rename store (Node.Array.of_path "/ARRAYNODE") "arrnode"; 146 | let nshape = [25; 32; 10] in 147 | Array.reshape store anode nshape; 148 | let meta = Array.metadata store anode in 149 | assert_equal ~printer:[%show : int list] nshape @@ Metadata.Array.shape meta; 150 | assert_raises 151 | (Zarr.Storage.Invalid_resize_shape) 152 | (fun () -> Array.reshape store anode [25; 10]); 153 | assert_raises 154 | (Zarr.Storage.Key_not_found "fakegroup/zarr.json") 155 | (fun () -> Array.metadata store Node.Array.(gnode / "fakegroup")); 156 | 157 | Array.delete store anode; 158 | clear store; 159 | let got = hierarchy store in 160 | assert_equal ~printer:print_node_pair ([], []) got 161 | 162 | let _ = 163 | run_test_tt_main @@ ("Run Zarr sync API tests" >::: [ 164 | "test sync-based stores" >:: 165 | (fun _ -> 166 | let rand_num = string_of_int @@ Random.int 1_000_000 in 167 | let tmp_dir = Filename.(concat (get_temp_dir_name ()) (rand_num ^ ".zarr")) in 168 | let s = FilesystemStore.create tmp_dir in 169 | 170 | assert_raises 171 | (Sys_error (Format.sprintf "%s: File exists" tmp_dir)) 172 | (fun () -> FilesystemStore.create tmp_dir); 173 | 174 | (* inject a bad metadata document to test correct parsing of bad child 175 | nodes when discovering children of a group. *) 176 | let dname = tmp_dir ^ "/badnode" in 177 | let fname = Filename.concat dname "zarr.json" in 178 | Sys.mkdir dname 0o700; 179 | Out_channel.with_open_bin 180 | fname 181 | (Fun.flip Out_channel.output_string {|{"zarr_format":3,"node_type":"unknown"}|}); 182 | assert_raises 183 | (Zarr.Metadata.Parse_error "invalid node_type in badnode/zarr.json") 184 | (fun () -> FilesystemStore.hierarchy s); 185 | Sys.(remove fname; rmdir dname); 186 | 187 | (* ensure it works with an extra "/" appended to directory name. *) 188 | ignore @@ FilesystemStore.open_store (tmp_dir ^ "/"); 189 | 190 | let fakedir = "non-existant-zarr-store112345.zarr" in 191 | assert_raises 192 | (Sys_error (Printf.sprintf "%s: No such file or directory" fakedir)) 193 | (fun () -> FilesystemStore.open_store fakedir); 194 | 195 | let fn = Filename.temp_file "nonexistantfile" ".zarr" in 196 | assert_raises 197 | (Zarr.Storage.Not_a_filesystem_store fn) 198 | (fun () -> FilesystemStore.open_store fn); 199 | 200 | (* test with non-existant archive *) 201 | let zpath = tmp_dir ^ ".zip" in 202 | test_storage (module ZipStore) (ZipStore.create zpath); 203 | (* test just opening the now existant archive created by the previous test. *) 204 | ignore (ZipStore.open_store zpath); 205 | assert_raises (ZipStore.Path_already_exists zpath) (fun () -> ZipStore.create zpath); 206 | let levels = [L0; L1; L2; L3; L4; L5; L7; L8; L9] in 207 | List.iter (fun level -> ZipStore.open_store ~level zpath |> ignore) levels; 208 | test_storage (module MemoryStore) @@ MemoryStore.create (); 209 | test_storage (module FilesystemStore) s) 210 | ]) 211 | -------------------------------------------------------------------------------- /zarr/src/storage/storage.ml: -------------------------------------------------------------------------------- 1 | include Storage_intf 2 | 3 | module Make (IO : Types.IO) (Store : Types.Store with type 'a io = 'a IO.t) = struct 4 | module IO_chain = Codecs.Make(IO) 5 | 6 | open IO.Infix 7 | open IO.Syntax 8 | open Store 9 | 10 | type t = Store.t 11 | type 'a io = 'a IO.t 12 | 13 | let maybe_rename t old_name new_name = function 14 | | false -> raise (Key_not_found old_name) 15 | | true -> rename t old_name new_name 16 | 17 | let node_kind t metakey = 18 | let+ s = get t metakey in 19 | match Yojson.Safe.(Util.member "node_type" @@ from_string s) with 20 | | `String "array" -> `Array 21 | | `String "group" -> `Group 22 | | _ -> raise (Metadata.Parse_error (Printf.sprintf "invalid node_type in %s" metakey)) 23 | 24 | let choose path left right = function 25 | | `Array -> Node.Array.of_path path :: left, right 26 | | `Group -> left, Node.Group.of_path path :: right 27 | 28 | let hierarchy t = 29 | let add ~t ((left, right) as acc) k = 30 | if not (String.ends_with ~suffix:"zarr.json" k) then IO.return acc else 31 | let path = if k = "zarr.json" then "/" else "/" ^ String.(sub k 0 (length k - 10)) in 32 | IO.map (choose path left right) (node_kind t k) 33 | in 34 | list t >>= IO.fold_left (add ~t) ([], []) 35 | 36 | let clear t = erase_prefix t "" 37 | 38 | module Group = struct 39 | let exists t node = is_member t (Node.Group.to_metakey node) 40 | let delete t node = erase_prefix t (Node.Group.to_prefix node) 41 | let metadata t node = IO.map Metadata.Group.decode (get t @@ Node.Group.to_metakey node) 42 | 43 | (* This recursively creates parent group nodes if they don't exist.*) 44 | let rec create ?(attrs=`Null) t node = 45 | let maybe_create ~attrs t node = function 46 | | true -> IO.return_unit 47 | | false -> 48 | let key = Node.Group.to_metakey node 49 | and meta = Metadata.Group.(update_attributes default attrs) in 50 | let* () = set t key (Metadata.Group.encode meta) in 51 | match Node.Group.parent node with 52 | | None -> IO.return_unit 53 | | Some p -> create t p 54 | in 55 | exists t node >>= maybe_create ~attrs t node 56 | 57 | let children t node = 58 | let add ~t (left, right) prefix = 59 | let path = "/" ^ String.sub prefix 0 (String.length prefix - 1) in 60 | IO.map (choose path left right) (node_kind t @@ prefix ^ "zarr.json") 61 | in 62 | let maybe_enumerate t node = function 63 | | false -> IO.return ([], []) 64 | | true -> 65 | let* _, ps = list_dir t (Node.Group.to_prefix node) in 66 | IO.fold_left (add ~t) ([], []) ps 67 | in 68 | exists t node >>= maybe_enumerate t node 69 | 70 | let rename t node str = 71 | let key = Node.Group.to_key node 72 | and key' = Node.Group.(rename node str |> to_key) in 73 | exists t node >>= maybe_rename t key key' 74 | end 75 | 76 | module Array = struct 77 | module CoordMap = Util.CoordMap 78 | module Indexing = Ndarray.Indexing 79 | let exists t node = is_member t (Node.Array.to_metakey node) 80 | let delete t node = erase_prefix t (Node.Array.to_key node ^ "/") 81 | let metadata t node = IO.map Metadata.Array.decode (get t @@ Node.Array.to_metakey node) 82 | 83 | (* This recursively creates parent group nodes if they don't exist.*) 84 | let create ?(sep=`Slash) ?(dimension_names=[]) ?(attributes=`Null) ~codecs ~shape ~chunks kind fv node t = 85 | let c = Codecs.Chain.create chunks codecs in 86 | let m = Metadata.Array.create ~sep ~codecs:c ~dimension_names ~attributes ~shape kind fv chunks in 87 | let value = Metadata.Array.encode m in 88 | let key = Node.Array.to_metakey node in 89 | let* () = set t key value in 90 | Option.fold ~none:IO.return_unit ~some:(Group.create t) (Node.Array.parent node) 91 | 92 | let write t node slice x = 93 | let update_ndarray ~arr (c, v) = Ndarray.set arr c v in 94 | let add_coord_value ~meta acc co y = 95 | let chunk_idx, c = Metadata.Array.index_coord_pair meta co in 96 | CoordMap.add_to_list chunk_idx (c, y) acc 97 | in 98 | let update_chunk ~t ~meta ~prefix ~chain ~fv ~repr (idx, pairs) = 99 | let ckey = prefix ^ Metadata.Array.chunk_key meta idx in 100 | if IO_chain.is_just_sharding chain then 101 | let pget = get_partial_values t ckey and pset = set_partial_values t ckey in 102 | let* shardsize = size t ckey in 103 | IO_chain.partial_encode chain pget pset shardsize repr pairs fv 104 | else is_member t ckey >>= function 105 | | true -> 106 | let* v = get t ckey in 107 | let arr = Codecs.Chain.decode chain repr v in 108 | List.iter (update_ndarray ~arr) pairs; 109 | set t ckey (Codecs.Chain.encode chain arr) 110 | | false -> 111 | let arr = Ndarray.create repr.kind repr.shape fv in 112 | List.iter (update_ndarray ~arr) pairs; 113 | set t ckey (Codecs.Chain.encode chain arr) 114 | in 115 | let* meta = metadata t node in 116 | let shape = Metadata.Array.shape meta in 117 | let slice_shape = try Indexing.slice_shape slice shape with 118 | | Assert_failure _ -> raise Invalid_array_slice 119 | in 120 | if Ndarray.shape x <> slice_shape then raise Invalid_array_slice else 121 | let kind = Ndarray.data_type x in 122 | if not (Metadata.Array.is_valid_kind meta kind) then raise Invalid_data_type else 123 | let coords = Indexing.coords_of_slice slice shape in 124 | let m = List.fold_left2 (add_coord_value ~meta) CoordMap.empty coords (Ndarray.to_array x |> Array.to_list) in 125 | let fv = Metadata.Array.fillvalue_of_kind meta kind 126 | and repr = Codecs.{kind; shape = Metadata.Array.chunk_shape meta} 127 | and prefix = Node.Array.to_key node ^ "/" 128 | and chain = Metadata.Array.codecs meta in 129 | IO.iter (update_chunk ~t ~meta ~prefix ~chain ~fv ~repr) (CoordMap.bindings m) 130 | 131 | let read (type a) t node slice (kind : a Ndarray.dtype) = 132 | let add_indexed_coord ~meta acc i y = 133 | let chunk_idx, c = Metadata.Array.index_coord_pair meta y in 134 | CoordMap.add_to_list chunk_idx (i, c) acc 135 | in 136 | let read_chunk ~t ~meta ~prefix ~chain ~fv ~repr (idx, pairs) = 137 | let ckey = prefix ^ Metadata.Array.chunk_key meta idx in 138 | size t ckey >>= function 139 | | 0 -> IO.return @@ List.map (fun (i, _) -> i, fv) pairs 140 | | shardsize when IO_chain.is_just_sharding chain -> 141 | let pget = get_partial_values t ckey in 142 | IO_chain.partial_decode chain pget shardsize repr pairs fv 143 | | _ -> 144 | let+ v = get t ckey in 145 | let arr = Codecs.Chain.decode chain repr v in 146 | List.map (fun (i, c) -> i, Ndarray.get arr c) pairs 147 | in 148 | let* meta = metadata t node in 149 | if not (Metadata.Array.is_valid_kind meta kind) then raise Invalid_data_type else 150 | let shape = Metadata.Array.shape meta in 151 | let slice_shape = try Indexing.slice_shape slice shape with 152 | | Assert_failure _ -> raise Invalid_array_slice 153 | in 154 | let numel = List.fold_left Int.mul 1 slice_shape in 155 | let coords = Indexing.coords_of_slice slice shape in 156 | let m = List.fold_left2 (add_indexed_coord ~meta) CoordMap.empty List.(init numel Fun.id) coords 157 | and chain = Metadata.Array.codecs meta 158 | and prefix = Node.Array.to_key node ^ "/" 159 | and fv = Metadata.Array.fillvalue_of_kind meta kind 160 | and repr = Codecs.{kind; shape = Metadata.Array.chunk_shape meta} in 161 | let+ ps = IO.concat_map (read_chunk ~t ~meta ~prefix ~chain ~fv ~repr) (CoordMap.bindings m) in 162 | (* sorting restores the C-order of the decoded array coordinates.*) 163 | let sorted_pairs = List.fast_sort (fun (x, _) (y, _) -> Int.compare x y) ps in 164 | let vs = List.map snd sorted_pairs in 165 | Ndarray.of_array kind slice_shape (Array.of_list vs) 166 | 167 | let reshape t node new_shape = 168 | let module S = Set.Make (struct 169 | type t = int list 170 | let compare : t -> t -> int = Stdlib.compare 171 | end) 172 | in 173 | let maybe_erase t key = function 174 | | false -> IO.return_unit 175 | | true -> erase t key 176 | in 177 | let remove ~t ~meta ~prefix v = 178 | let key = prefix ^ Metadata.Array.chunk_key meta v in 179 | is_member t key >>= maybe_erase t key 180 | in 181 | let* meta = metadata t node in 182 | let old_shape = Metadata.Array.shape meta in 183 | if List.(length new_shape <> length old_shape) then raise Invalid_resize_shape else 184 | let s = S.of_list (Metadata.Array.chunk_indices meta old_shape) 185 | and s' = S.of_list (Metadata.Array.chunk_indices meta new_shape) in 186 | let unreachable_chunks = S.elements (S.diff s s') 187 | and prefix = Node.Array.to_key node ^ "/" in 188 | let* () = IO.iter (remove ~t ~meta ~prefix) unreachable_chunks in 189 | set t (Node.Array.to_metakey node) Metadata.Array.(encode @@ update_shape meta new_shape) 190 | 191 | let rename t node str = 192 | let key = Node.Array.to_key node 193 | and key' = Node.Array.(rename node str |> to_key) in 194 | exists t node >>= maybe_rename t key key' 195 | end 196 | end 197 | -------------------------------------------------------------------------------- /examples/zipstore.ml: -------------------------------------------------------------------------------- 1 | (* This module implements a Zip archive zarr store that uses the Eio library for 2 | non-blocking I/O operations. The main requirement is to implement the signature 3 | of Zarr.Types.Store. Below we show how to implement this custom Zarr Store. 4 | 5 | To compile & run this example execute the command 6 | dune exec -- examples/zipstore.exe 7 | in your shell at the root of this project. *) 8 | 9 | module IO = Zarr_eio.Storage.IO 10 | 11 | module ZipStore : sig 12 | include Zarr.Storage.S with type 'a io := 'a 13 | val with_open : 14 | ?level:[ `None | `Fast | `Default | `Best ] -> 15 | ?perm:int -> 16 | [< `Read_only | `Read_write ] -> 17 | string -> 18 | (t -> 'a) -> 19 | 'a 20 | (** [with_open mode p f] opens the zip archive at path [p] and applies 21 | function [f] to its open handle and writes any changes back to the zip 22 | archive if [mode] is [`Read_write], otherwise discards them at exit. 23 | If [p] does not exist, a handle to an empty zip archive is opened. 24 | Note that this function loads the entire zip archive bytes into memory, 25 | so care must be taken to ensure that these bytes can fit into the local 26 | machine's available memory. For now it does not handle ZIP64. ZIP64 is 27 | needed if your ZIP archive or decompressed file sizes exceed 2{^32}-1 28 | bytes or if you need more than 65535 archive members. 29 | 30 | {ul 31 | {- [level] is the DEFLATE algorithm compression level used when writing 32 | data to the store and defaults to [`Default]. Choose [`None] for no 33 | compression, [`Fast] for best speed, [`Best] for high compression rate 34 | and [`Default] for a mix of good speed and compression rate.} 35 | {- [perm] is the file permission to use when opening an existing zip file 36 | and defaults to [0o700].} 37 | } *) 38 | end = struct 39 | open IO.Syntax 40 | 41 | let fold_kind ~dir ~file = function 42 | | Zipc.Member.Dir -> dir 43 | | Zipc.Member.File f -> file f 44 | 45 | let fold_result ~ok res = Result.fold ~error:failwith ~ok res 46 | 47 | module Store = struct 48 | type t = {ic : Zipc.t Atomic.t; level : Zipc_deflate.level} 49 | type 'a io = 'a IO.t 50 | 51 | let is_member t key = 52 | let z = Atomic.get t.ic in 53 | IO.return (Zipc.mem key z) 54 | 55 | let size t key = 56 | let decompressed_size = function 57 | | None -> 0 58 | | Some m -> 59 | let entry_kind = Zipc.Member.kind m in 60 | fold_kind ~dir:0 ~file:Zipc.File.decompressed_size entry_kind 61 | in 62 | let z = Atomic.get t.ic in 63 | IO.return (decompressed_size @@ Zipc.find key z) 64 | 65 | let get t key = 66 | let to_string f = fold_result ~ok:Fun.id (Zipc.File.to_binary_string f) in 67 | let decompressed_value = function 68 | | None -> raise (Zarr.Storage.Key_not_found key) 69 | | Some m -> 70 | let entry_kind = Zipc.Member.kind m in 71 | fold_kind ~dir:String.empty ~file:to_string entry_kind 72 | in 73 | let z = Atomic.get t.ic in 74 | IO.return (decompressed_value @@ Zipc.find key z) 75 | 76 | let get_partial_values t key ranges = 77 | let read_range ~data ~size (ofs, len) = match len with 78 | | None -> String.sub data ofs (size - ofs) 79 | | Some l -> String.sub data ofs l 80 | in 81 | let+ data = get t key in 82 | let size = String.length data in 83 | List.map (read_range ~data ~size) ranges 84 | 85 | let list t = 86 | let accumulate_path m acc = Zipc.Member.path m :: acc in 87 | let z = Atomic.get t.ic in 88 | IO.return (Zipc.fold accumulate_path z []) 89 | 90 | let list_dir t prefix = 91 | let module S = Set.Make(String) in 92 | let accumulate ~prefix m ((l, r) as acc) = 93 | let key = Zipc.Member.path m in 94 | if not (String.starts_with ~prefix key) then acc else 95 | let n = String.length prefix in 96 | if not (String.contains_from key n '/') then key :: l, r else 97 | l, S.add String.(sub key 0 @@ 1 + index_from key n '/') r 98 | in 99 | let z = Atomic.get t.ic in 100 | let ks, ps = Zipc.fold (accumulate ~prefix) z ([], S.empty) in 101 | IO.return (ks, S.elements ps) 102 | 103 | let rec set t key value = 104 | let res = Zipc.File.deflate_of_binary_string ~level:t.level value in 105 | let f = Zipc.Member.File (fold_result ~ok:Fun.id res) in 106 | let m = fold_result ~ok:Fun.id Zipc.Member.(make ~path:key f) in 107 | let z = Atomic.get t.ic in 108 | if Atomic.compare_and_set t.ic z (Zipc.add m z) 109 | then IO.return_unit else set t key value 110 | 111 | let rec set_partial_values t key ?(append=false) rv = 112 | let to_string f = fold_result ~ok:Fun.id (Zipc.File.to_binary_string f) in 113 | let empty = 114 | let res = Zipc.File.deflate_of_binary_string ~level:t.level String.empty in 115 | let res' = Zipc.Member.File (fold_result ~ok:Fun.id res) in 116 | fold_result ~ok:Fun.id Zipc.Member.(make ~path:key res') 117 | in 118 | let z = Atomic.get t.ic in 119 | let mem = Option.fold ~none:empty ~some:Fun.id (Zipc.find key z) in 120 | let ov = fold_kind ~dir:String.empty ~file:to_string (Zipc.Member.kind mem) in 121 | let f = if append || ov = String.empty then 122 | fun acc (_, v) -> acc ^ v else 123 | fun acc (rs, v) -> 124 | let s = Bytes.unsafe_of_string acc in 125 | Bytes.blit_string v 0 s rs String.(length v); 126 | Bytes.unsafe_to_string s 127 | in 128 | let ov' = List.fold_left f ov rv in 129 | let res = Zipc.File.deflate_of_binary_string ~level:t.level ov' in 130 | let file = Zipc.Member.File (fold_result ~ok:Fun.id res) in 131 | let m = fold_result ~ok:Fun.id Zipc.Member.(make ~path:key file) in 132 | if Atomic.compare_and_set t.ic z (Zipc.add m z) 133 | then IO.return_unit else set_partial_values t key ~append rv 134 | 135 | let rec erase t key = 136 | let z = Atomic.get t.ic in 137 | if Atomic.compare_and_set t.ic z (Zipc.remove key z) 138 | then IO.return_unit else erase t key 139 | 140 | let rec erase_prefix t prefix = 141 | let accumulate ~prefix m acc = 142 | if String.starts_with ~prefix (Zipc.Member.path m) 143 | then acc else Zipc.add m acc 144 | in 145 | let z = Atomic.get t.ic in 146 | let z' = Zipc.fold (accumulate ~prefix) z Zipc.empty in 147 | if Atomic.compare_and_set t.ic z z' 148 | then IO.return_unit else erase_prefix t prefix 149 | 150 | (* Adapted from: https://github.com/dbuenzli/zipc/issues/8#issuecomment-2392417890 *) 151 | let rec rename t prefix new_prefix = 152 | let accumulate ~prefix ~new_prefix m acc = 153 | let path = Zipc.Member.path m in 154 | if not (String.starts_with ~prefix path) then Zipc.add m acc else 155 | let l = String.length prefix in 156 | let path = new_prefix ^ String.sub path l (String.length path - l) in 157 | let mtime = Zipc.Member.mtime m in 158 | let mode = Zipc.Member.mode m in 159 | let kind = Zipc.Member.kind m in 160 | let m' = Zipc.Member.make ~mtime ~mode ~path kind in 161 | Zipc.add (fold_result ~ok:Fun.id m') acc 162 | in 163 | let z = Atomic.get t.ic in 164 | let z' = Zipc.fold (accumulate ~prefix ~new_prefix) z Zipc.empty in 165 | if Atomic.compare_and_set t.ic z z' 166 | then IO.return_unit else rename t prefix new_prefix 167 | end 168 | 169 | include Zarr.Storage.Make(IO)(Store) 170 | 171 | let with_open ?(level=`Default) ?(perm=0o700) mode path f = 172 | let write_to_disk ~perm ~path str = 173 | let write ~str oc = Out_channel.output_string oc str; flush oc in 174 | let flags = [Open_wronly; Open_trunc; Open_creat] in 175 | Out_channel.with_open_gen flags perm path (write ~str) 176 | in 177 | let make z = Store.{ic = Atomic.make z; level} in 178 | let x = if not (Sys.file_exists path) then make Zipc.empty else 179 | let s = In_channel.(with_open_bin path input_all) in 180 | fold_result ~ok:make (Zipc.of_binary_string s) 181 | in 182 | match mode with 183 | | `Read_only -> f x 184 | | `Read_write -> 185 | let+ out = f x in 186 | let str = Zipc.to_binary_string (Atomic.get x.ic) in 187 | fold_result ~ok:(write_to_disk ~perm ~path) str; 188 | out 189 | end 190 | 191 | let _ = 192 | Eio_main.run @@ fun _ -> 193 | let open Zarr in 194 | let open Zarr.Ndarray in 195 | let open Zarr.Indexing in 196 | 197 | let test_functionality store = 198 | let xs, _ = ZipStore.hierarchy store in 199 | let anode = List.hd @@ List.filter 200 | (fun node -> Node.Array.to_path node = "/some/group/name") xs in 201 | let slice = [R (0, 20); I 10; F] in 202 | let x = ZipStore.Array.read store anode slice Char in 203 | let x' = Zarr.Ndarray.map (fun _ -> Random.int 256 |> Char.chr) x in 204 | ZipStore.Array.write store anode slice x'; 205 | let y = ZipStore.Array.read store anode slice Char in 206 | assert (Zarr.Ndarray.equal x' y); 207 | ZipStore.Array.rename store anode "name2"; 208 | let exists = ZipStore.Array.exists store @@ Node.Array.of_path "/some/group/name2" in 209 | assert exists; 210 | ZipStore.clear store (* deletes all zip entries *) 211 | in 212 | ZipStore.with_open `Read_only "examples/data/testdata.zip" test_functionality 213 | -------------------------------------------------------------------------------- /zarr/src/ndarray.ml: -------------------------------------------------------------------------------- 1 | type _ dtype = 2 | | Char : char dtype 3 | | Bool : bool dtype 4 | | Int8 : int dtype 5 | | Uint8 : int dtype 6 | | Int16 : int dtype 7 | | Uint16 : int dtype 8 | | Int32 : int32 dtype 9 | | Int64 : int64 dtype 10 | | Uint64 : Stdint.uint64 dtype 11 | | Float32 : float dtype 12 | | Float64 : float dtype 13 | | Complex32 : Complex.t dtype 14 | | Complex64 : Complex.t dtype 15 | | Int : int dtype 16 | | Nativeint : nativeint dtype 17 | 18 | let dtype_size : type a. a dtype -> int = function 19 | | Char -> 1 20 | | Bool -> 1 21 | | Int8 -> 1 22 | | Uint8 -> 1 23 | | Int16 -> 2 24 | | Uint16 -> 2 25 | | Int32 -> 4 26 | | Int64 -> 8 27 | | Uint64 -> 8 28 | | Float32 -> 4 29 | | Float64 -> 8 30 | | Complex32 -> 8 31 | | Complex64 -> 16 32 | | Int -> Sys.word_size / 8 33 | | Nativeint -> Sys.word_size / 8 34 | 35 | let prod x = List.fold_left Int.mul 1 x 36 | 37 | let cumprod x start stop = 38 | let acc = ref 1 in 39 | for i = start to stop do acc := !acc * (List.nth x i) done; !acc 40 | 41 | (*strides[k] = [cumulative_product with start=k+1 end=n-1] of shape *) 42 | let make_strides shape = 43 | let n = List.length shape - 1 in 44 | Array.init (n + 1) (fun i -> cumprod shape (i + 1) n) 45 | 46 | type 'a t = {shape : int list; strides : int array; dtype : 'a dtype; data : 'a array} 47 | let equal x y = x.data = y.data && x.shape = y.shape && x.dtype = y.dtype && x.strides = y.strides 48 | (* 1d index of coord [i0; ...; in] is SUM(i0 * strides[0] + ... + in * strides[n-1] *) 49 | let coord_to_index i s = Array.fold_left (fun a (x, y) -> Int.add a (x * y)) 0 @@ Array.combine i s 50 | let coord_to_index' x s = let acc = ref 0 in List.iteri (fun i v -> acc := !acc + v * s.(i)) x; !acc 51 | let create dtype shape fv = {shape; dtype; strides = make_strides shape; data = Array.make (prod shape) fv} 52 | let init dtype shape f = {shape; dtype; strides = make_strides shape; data = Array.init (prod shape) f} 53 | let of_array dtype shape xs = {shape; dtype; strides = make_strides shape; data = xs} 54 | let data_type t = t.dtype 55 | let size t = prod t.shape 56 | let ndims t = List.length t.shape 57 | let get t i = t.data.(coord_to_index' i t.strides) 58 | let set t i x = t.data.(coord_to_index' i t.strides) <- x 59 | let set' t i x = t.data.(coord_to_index i t.strides) <- x 60 | let fill t v = Array.iteri (fun i _ -> t.data.(i) <- v) t.data 61 | let map f t = {t with data = Array.map f t.data} 62 | let iteri f t = Array.iteri f t.data 63 | let iter f t = Array.iter f t.data 64 | let byte_size t = size t * dtype_size t.dtype 65 | let to_array t = t.data 66 | let shape t = t.shape 67 | 68 | (* This snippet is adapted from the Owl project. 69 | 70 | The MIT License (MIT) 71 | Copyright (c) 2016-2022 Liang Wang liang@ocaml.xyz *) 72 | let index_to_coord ~strides i j = 73 | j.(0) <- i / strides.(0); 74 | for k = 1 to Array.length strides - 1 do 75 | j.(k) <- i mod strides.(k - 1) / strides.(k) 76 | done 77 | 78 | module B = Bigarray 79 | 80 | let to_bigarray : 81 | type a b. a t -> (a, b) B.kind -> (a, b, B.c_layout) B.Genarray.t 82 | = fun x kind -> 83 | let initialize ~x c = x.data.(coord_to_index c x.strides) in 84 | let shape = Array.of_list x.shape in 85 | let f k = B.Genarray.init k C_layout shape (initialize ~x) in 86 | match[@warning "-8"] kind with 87 | | B.Char as k -> f k 88 | | B.Int8_signed as k -> f k 89 | | B.Int8_unsigned as k -> f k 90 | | B.Int16_signed as k -> f k 91 | | B.Int16_unsigned as k -> f k 92 | | B.Int32 as k -> f k 93 | | B.Int64 as k -> f k 94 | | B.Float32 as k -> f k 95 | | B.Float64 as k -> f k 96 | | B.Nativeint as k -> f k 97 | | B.Int as k -> f k 98 | | B.Complex32 as k -> f k 99 | | B.Complex64 as k -> f k 100 | 101 | let of_bigarray : 102 | type a b c. (a, b, c) B.Genarray.t -> a t = fun x -> 103 | let x' = B.Genarray.change_layout x C_layout in 104 | let shape = B.Genarray.dims x' |> Array.to_list in 105 | let coord = Array.make (B.Genarray.num_dims x') 0 in 106 | let strides = make_strides shape in 107 | let initialize ~strides ~coord ~x' i = 108 | index_to_coord ~strides i coord; 109 | B.Genarray.get x' coord 110 | in 111 | let f d = init d shape (initialize ~strides ~coord ~x') in 112 | match[@warning "-8"] B.Genarray.kind x with 113 | | B.Char -> f Char 114 | | B.Int8_signed -> f Int8 115 | | B.Int8_unsigned -> f Uint8 116 | | B.Int16_signed -> f Int16 117 | | B.Int16_unsigned -> f Uint16 118 | | B.Int32 -> f Int32 119 | | B.Int64 -> f Int64 120 | | B.Float32 -> f Float32 121 | | B.Float64 -> f Float64 122 | | B.Nativeint -> f Nativeint 123 | | B.Int -> f Int 124 | | B.Complex32 -> f Complex32 125 | | B.Complex64 -> f Complex64 126 | 127 | (* validation for [axis] is done at the boundaries of the system and thus doing 128 | so inside this function would be redundant work. Also, the output array 129 | shares internal data with the input. Since this function is only ever 130 | used when serializing/deserializing an Ndarray.t type then this should not 131 | be an issue since the input array is never used again after it is transposed. *) 132 | let transpose ?axes x = 133 | let n = ndims x in 134 | let p = Option.fold ~none:(List.init n (fun i -> n - 1 - i)) ~some:Fun.id axes in 135 | let shape = List.map (fun i -> List.nth x.shape i) p in 136 | let x' = {x with shape; strides = make_strides shape; data = Array.copy x.data} in 137 | let c = Array.make n 0 and c' = Array.make n 0 in 138 | (* Project a 1d-indexed value of the input ndarray into its corresponding 139 | n-dimensional index/coordinate of the transposed ndarray according to the 140 | permutation described by [p].*) 141 | let project_1d_to_nd i a = 142 | index_to_coord ~strides:x.strides i c; 143 | List.iteri (fun j b -> c'.(j) <- c.(b)) p; 144 | set' x' c' a 145 | in 146 | iteri project_1d_to_nd x; 147 | x' 148 | 149 | (* The [index] type definition as well as functions tagged with [@coverage off] 150 | in this Indexing module were directly copied and modified from the Owl project 151 | to emulate its logic for munipulating slices. The code is licenced under the 152 | MIT license and can be found at: https://github.com/owlbarn/owl 153 | 154 | The MIT License (MIT) 155 | Copyright (c) 2016-2022 Liang Wang liang@ocaml.xyz *) 156 | module Indexing = struct 157 | type index = 158 | | F 159 | | I of int 160 | | T of int 161 | | L of int list 162 | | R of int * int 163 | | R' of int * int * int 164 | 165 | (* internal restricted representation of index type *) 166 | type index' = L of int list | R' of int * int * int 167 | 168 | (* this is copied from the Owl project so we skip testing it. *) 169 | let[@coverage off] check_slice_definition axis shp = 170 | let axis_len = List.length axis in 171 | let shp_len = List.length shp in 172 | assert (axis_len <= shp_len); 173 | (* add missing definition on higher dimensions *) 174 | let axis = if axis_len < shp_len then axis @ List.init (shp_len - axis_len) (fun _ -> F) else axis in 175 | (* re-format slice definition, note I_ will be replaced with L_ *) 176 | List.map2 177 | (fun i n -> match i with 178 | | I x -> 179 | let x = if x >= 0 then x else n + x in 180 | assert (x < n); 181 | R' (x, x, 1) 182 | | L x -> 183 | let is_cont = ref true in 184 | if List.length x <> n then is_cont := false; 185 | let x = 186 | List.mapi 187 | (fun i j -> 188 | let j = if j >= 0 then j else n + j in 189 | assert (j < n); 190 | if i <> j then is_cont := false; 191 | j) 192 | x 193 | in 194 | if !is_cont = true then R' (0, n-1, 1) else L x 195 | | F -> R' (0, n - 1, 1) 196 | | T x -> 197 | let a = if x >= 0 then x else n + x in 198 | assert (a < n); 199 | R' (a, a, 1) 200 | | R (x, y) -> 201 | let a = if x >= 0 then x else n + x in 202 | let b = if y >= 0 then y else n + y in 203 | let c = if a <= b then 1 else -1 in 204 | assert (not (a >= n || b >= n)); 205 | R' (a, b, c) 206 | | R' (x, y, c) -> 207 | let a = if x >= 0 then x else n + x in 208 | let b = if y >= 0 then y else n + y in 209 | assert (not (a >= n || b >= n || c = 0)); 210 | assert (not ((a < b && c < 0) || (a > b && c > 0))); 211 | R' (a, b, c)) axis shp 212 | 213 | (* this was opied from the Owl project so we skip testing it. *) 214 | let[@coverage off] calc_slice_shape axis = 215 | let f = function 216 | | L x -> List.length x 217 | | R' (x, y, z) -> abs ((y - x) / z) + 1 218 | in 219 | List.map f axis 220 | 221 | let rec cartesian_prod : int list list -> int list list = function 222 | | [] -> [[]] 223 | | x :: xs -> List.concat_map (fun i -> List.map (List.cons i) (cartesian_prod xs)) x 224 | 225 | let range ~step start stop = 226 | let rec aux ~step ~stop acc = function 227 | | x when (step < 0 && x < stop) || (step > 0 && x > stop) -> List.rev acc 228 | | x -> aux ~step ~stop (x :: acc) (x + step) 229 | in 230 | aux ~step ~stop [] start 231 | 232 | (* get indices from a reformated slice *) 233 | let indices_of_slice = function 234 | | R' (start, stop, step) -> range ~step start stop 235 | | L x -> x 236 | 237 | let coords_of_slice slice shape = 238 | cartesian_prod @@ List.map indices_of_slice (check_slice_definition slice shape) 239 | 240 | let slice_of_coords = function 241 | | [] as x -> x 242 | | x :: _ as xs -> 243 | let module S = Set.Make(Int) in 244 | let add_unique ~acc i y = if S.mem y acc.(i) then () else acc.(i) <- S.add y acc.(i) in 245 | let fill_dims coord acc = List.iteri (add_unique ~acc) coord; acc in 246 | let ndims = List.length x in 247 | let indices = Array.make ndims S.empty in 248 | let dimsets = List.fold_right fill_dims xs indices in 249 | List.map (fun s -> (L (S.elements s) : index)) (Array.to_list dimsets) 250 | 251 | let slice_shape slice array_shape = 252 | calc_slice_shape (check_slice_definition slice array_shape) 253 | end 254 | -------------------------------------------------------------------------------- /zarr-lwt/src/storage.ml: -------------------------------------------------------------------------------- 1 | module IO = struct 2 | type 'a t = 'a Lwt.t 3 | let return = Lwt.return 4 | let bind = Lwt.bind 5 | let map = Lwt.map 6 | let return_unit = Lwt.return_unit 7 | let iter = Lwt_list.iter_s 8 | let fold_left = Lwt_list.fold_left_s 9 | let concat_map f l = Lwt.map List.concat (Lwt_list.map_p f l) 10 | 11 | module Infix = struct 12 | let (>>=) = Lwt.Infix.(>>=) 13 | let (>>|) = Lwt.Infix.(>|=) 14 | end 15 | 16 | module Syntax = struct 17 | let (let*) = Lwt.bind 18 | let (let+) x f = Lwt.map f x 19 | end 20 | end 21 | 22 | module ZipStore = Zarr.Zip.Make(IO) 23 | module MemoryStore = Zarr.Memory.Make(IO) 24 | 25 | module FilesystemStore = struct 26 | module S = struct 27 | open IO.Infix 28 | open IO.Syntax 29 | 30 | type t = {dirname : string; perm : Lwt_unix.file_perm} 31 | type 'a io = 'a IO.t 32 | 33 | let fspath_to_key t path = 34 | let pos = String.length t.dirname + 1 in 35 | String.sub path pos (String.length path - pos) 36 | 37 | let key_to_fspath t key = Filename.concat t.dirname key 38 | 39 | let rec create_parent_dir fn perm = 40 | let maybe_create ~perm parent_dir = function 41 | | true -> Lwt.return_unit 42 | | false -> 43 | let* () = create_parent_dir parent_dir perm in 44 | Lwt_unix.mkdir parent_dir perm 45 | in 46 | let parent_dir = Filename.dirname fn in 47 | Lwt_unix.file_exists parent_dir >>= maybe_create ~perm parent_dir 48 | 49 | let size t key = 50 | let file_length path () = Lwt.map Int64.to_int (Lwt_io.file_length path) 51 | and filepath = key_to_fspath t key in 52 | Lwt.catch (file_length filepath) (Fun.const @@ IO.return 0) 53 | 54 | let get t key = 55 | let* buf_size = size t key in 56 | Lwt_io.with_file 57 | ~buffer:(Lwt_bytes.create buf_size) 58 | ~flags:[Unix.O_RDONLY] 59 | ~perm:t.perm 60 | ~mode:Lwt_io.Input 61 | (key_to_fspath t key) 62 | Lwt_io.read 63 | 64 | let get_partial_values t key ranges = 65 | let max_range ~tot acc (s, l) = match l with 66 | | None -> Int.max acc (tot - s) 67 | | Some rs -> Int.max acc rs 68 | in 69 | let read_range ~tot ~ic (ofs, len) = 70 | let* () = Lwt_io.set_position ic (Int64.of_int ofs) in 71 | match len with 72 | | None -> Lwt_io.read ~count:(tot - ofs) ic 73 | | Some count -> Lwt_io.read ~count ic 74 | in 75 | let* tot = size t key in 76 | let buf_size = List.fold_left (max_range ~tot) 0 ranges in 77 | Lwt_io.with_file 78 | ~buffer:(Lwt_bytes.create buf_size) 79 | ~flags:[Unix.O_RDONLY] 80 | ~perm:t.perm 81 | ~mode:Lwt_io.Input 82 | (key_to_fspath t key) 83 | (fun ic -> Lwt_list.map_s (read_range ~tot ~ic) ranges) 84 | 85 | let set t key value = 86 | let write ~value oc = Lwt_io.write oc value in 87 | let filename = key_to_fspath t key in 88 | let* () = create_parent_dir filename t.perm in 89 | Lwt_io.with_file 90 | ~buffer:(Lwt_bytes.create (String.length value)) 91 | ~flags:Unix.[O_WRONLY; O_TRUNC; O_CREAT] 92 | ~perm:t.perm 93 | ~mode:Lwt_io.Output 94 | filename 95 | (write ~value) 96 | 97 | let set_partial_values t key ?(append=false) rvs = 98 | let write_all rvs oc = 99 | let write ~oc (ofs, value) = 100 | let* () = Lwt_io.set_position oc (Int64.of_int ofs) in 101 | Lwt_io.write oc value 102 | in 103 | Lwt_list.iter_s (write ~oc) rvs 104 | in 105 | let l = List.fold_left (fun a (_, s) -> Int.max a (String.length s)) 0 rvs in 106 | let flags = match append with 107 | | false -> Unix.[O_WRONLY; O_CREAT] 108 | | true -> Unix.[O_APPEND; O_WRONLY; O_CREAT] 109 | in 110 | let filepath = key_to_fspath t key in 111 | let* () = create_parent_dir filepath t.perm in 112 | Lwt_io.with_file 113 | ~buffer:(Lwt_bytes.create l) 114 | ~perm:t.perm 115 | ~mode:Lwt_io.Output 116 | ~flags 117 | filepath 118 | (write_all rvs) 119 | 120 | let rec walk t acc dir = 121 | let accumulate ~t x a = 122 | if x = "." || x = ".." then Lwt.return a else 123 | match Filename.concat dir x with 124 | | p when Sys.is_directory p -> walk t a p 125 | | p -> Lwt.return (fspath_to_key t p :: a) 126 | in 127 | Lwt_stream.fold_s (accumulate ~t) (Lwt_unix.files_of_directory dir) acc 128 | 129 | let list_dir t prefix = 130 | let choose ~t ~dir x = match Filename.concat dir x with 131 | | p when Sys.is_directory p -> Either.right @@ (fspath_to_key t p) ^ "/" 132 | | p -> Either.left (fspath_to_key t p) 133 | in 134 | let predicate x = if x = "." || x = ".." then false else true in 135 | let dir = key_to_fspath t prefix in 136 | let relevant = Lwt_stream.filter predicate (Lwt_unix.files_of_directory dir) in 137 | let+ dir_contents = Lwt_stream.to_list relevant in 138 | List.partition_map (choose ~t ~dir) dir_contents 139 | 140 | let list t = walk t [] (key_to_fspath t "") 141 | let list_prefix t prefix = walk t [] (key_to_fspath t prefix) 142 | let is_member t key = Lwt_unix.file_exists (key_to_fspath t key) 143 | let erase t key = Lwt_unix.unlink (key_to_fspath t key) 144 | let erase_prefix t pre = list_prefix t pre >>= Lwt_list.iter_s (erase t) 145 | let rename t k k' = Lwt_unix.rename (key_to_fspath t k) (key_to_fspath t k') 146 | end 147 | 148 | let create ?(perm=0o700) dirname = 149 | Zarr.Util.create_parent_dir dirname perm; 150 | Sys.mkdir dirname perm; 151 | S.{dirname = Zarr.Util.sanitize_dir dirname; perm} 152 | 153 | let open_store ?(perm=0o700) dirname = 154 | if Sys.is_directory dirname 155 | then S.{dirname = Zarr.Util.sanitize_dir dirname; perm} 156 | else raise (Zarr.Storage.Not_a_filesystem_store dirname) 157 | 158 | include Zarr.Storage.Make(IO)(S) 159 | end 160 | 161 | module AmazonS3Store = struct 162 | module Credentials = Aws_s3_lwt.Credentials 163 | module S3 = Aws_s3_lwt.S3 164 | 165 | open IO.Infix 166 | open IO.Syntax 167 | 168 | exception Request_failed of S3.error 169 | 170 | let empty_content () = S3.{ 171 | storage_class = Standard; 172 | meta_headers = None; 173 | etag = String.empty; 174 | key = String.empty; 175 | last_modified = 0.; 176 | size = 0 177 | } 178 | 179 | let fold_or_catch ~not_found res = 180 | let return_or_raise r () = match r with 181 | | Ok v -> IO.return v 182 | | Error e -> raise (Request_failed e) 183 | and on_exception ~not_found = function 184 | | Request_failed S3.Not_found -> Lwt.return (not_found ()) 185 | | exn -> raise exn 186 | in 187 | Lwt.catch (return_or_raise res) (on_exception ~not_found) 188 | 189 | let raise_not_found k () = raise (Zarr.Storage.Key_not_found k) 190 | let empty_Ls = Fun.const ([], S3.Ls.Done) 191 | 192 | let fold_continuation ~return ~more = function 193 | | S3.Ls.Done -> IO.return return 194 | | S3.Ls.More continuation -> 195 | continuation () >>= fold_or_catch ~not_found:empty_Ls >>= fun (xs, cont) -> 196 | more xs cont 197 | 198 | module S = struct 199 | type t = 200 | {retries : int 201 | ;bucket : string 202 | ;cred : Credentials.t 203 | ;endpoint : Aws_s3.Region.endpoint} 204 | type 'a io = 'a IO.t 205 | 206 | let size t key = 207 | let bucket = t.bucket and credentials = t.cred and endpoint = t.endpoint in 208 | let f ~endpoint () = S3.head ~bucket ~credentials ~key ~endpoint () in 209 | let* res = S3.retry ~retries:t.retries ~endpoint ~f () in 210 | Lwt.map (fun (x : S3.content) -> x.size) (fold_or_catch ~not_found:empty_content res) 211 | 212 | let is_member t key = Lwt.map (fun s -> if s = 0 then false else true) (size t key) 213 | 214 | let get t key = 215 | let bucket = t.bucket and credentials = t.cred and endpoint = t.endpoint in 216 | let f ~endpoint () = S3.get ~bucket ~credentials ~endpoint ~key () in 217 | let* res = S3.retry ~retries:t.retries ~endpoint ~f () in 218 | fold_or_catch ~not_found:(raise_not_found key) res 219 | 220 | let get_partial_values t key ranges = 221 | let read_range t key (ofs, len) = 222 | let range = match len with 223 | | None -> S3.{first = Some ofs; last = None} 224 | | Some l -> S3.{first = Some ofs; last = Some (ofs + l - 1)} 225 | in 226 | let bucket = t.bucket and credentials = t.cred and endpoint = t.endpoint in 227 | let f ~endpoint () = S3.get ~bucket ~credentials ~endpoint ~range ~key () in 228 | let* res = S3.retry ~retries:t.retries ~endpoint ~f () in 229 | Lwt.map (fun x -> [x]) (fold_or_catch ~not_found:(raise_not_found key) res) 230 | in 231 | IO.concat_map (read_range t key) ranges 232 | 233 | let set t key data = 234 | let bucket = t.bucket and credentials = t.cred and endpoint = t.endpoint in 235 | let f ~endpoint () = S3.put ~bucket ~credentials ~endpoint ~data ~key () in 236 | let* res = S3.retry ~retries:t.retries ~endpoint ~f () in 237 | let* _ = fold_or_catch ~not_found:(Fun.const String.empty) res in 238 | IO.return_unit 239 | 240 | let set_partial_values t key ?(append=false) rsv = 241 | let* size = size t key in 242 | let* ov = match size with 243 | | 0 -> IO.return String.empty 244 | | _ -> get t key 245 | in 246 | let f = if append || ov = String.empty then 247 | fun acc (_, v) -> acc ^ v else 248 | fun acc (rs, v) -> 249 | let s = Bytes.unsafe_of_string acc in 250 | Bytes.blit_string v 0 s rs String.(length v); 251 | Bytes.unsafe_to_string s 252 | in 253 | set t key (List.fold_left f ov rsv) 254 | 255 | let erase t key = 256 | let bucket = t.bucket and credentials = t.cred and endpoint = t.endpoint in 257 | let f ~endpoint () = S3.delete ~bucket ~credentials ~endpoint ~key () in 258 | S3.retry ~retries:t.retries ~endpoint ~f () >>= fold_or_catch ~not_found:(Fun.const ()) 259 | 260 | let rec delete_keys t cont () = 261 | let del t xs c = IO.iter (delete_content t) xs >>= delete_keys t c in 262 | fold_continuation ~return:() ~more:(del t) cont 263 | 264 | and delete_content t S3.{key; _} = erase t key 265 | 266 | and erase_prefix t prefix = 267 | let bucket = t.bucket and credentials = t.cred and endpoint = t.endpoint in 268 | let f ~endpoint () = S3.ls ~bucket ~credentials ~endpoint ~prefix () in 269 | let* res = S3.retry ~retries:t.retries ~endpoint ~f () in 270 | let* xs, rest = fold_or_catch ~not_found:empty_Ls res in 271 | IO.iter (delete_content t) xs >>= delete_keys t rest 272 | 273 | let rec list t = 274 | let bucket = t.bucket and credentials = t.cred and endpoint = t.endpoint in 275 | let f ~endpoint () = S3.ls ~bucket ~credentials ~endpoint () in 276 | let* res = S3.retry ~retries:t.retries ~endpoint ~f () in 277 | let* xs, rest = fold_or_catch ~not_found:empty_Ls res in 278 | accumulate_keys (List.map content_key xs) rest 279 | 280 | and content_key S3.{key; _} = key 281 | 282 | and accumulate_keys acc cont = 283 | let append acc xs c = accumulate_keys (acc @ List.map content_key xs) c in 284 | fold_continuation ~return:acc ~more:(append acc) cont 285 | 286 | module M = Set.Make(String) 287 | 288 | let rec partition_keys prefix ((l, r) as acc) cont = 289 | let split ~acc ~prefix xs c = partition_keys prefix (List.fold_left (add prefix) acc xs) c in 290 | fold_continuation ~return:(l, M.elements r) ~more:(split ~acc ~prefix) cont 291 | 292 | and add prefix (l, r) (c : S3.content) = 293 | let size = String.length prefix in 294 | if not (String.contains_from c.key size '/') then c.key :: l, r else 295 | l, M.add String.(sub c.key 0 @@ 1 + index_from c.key size '/') r 296 | 297 | and list_dir t prefix = 298 | let bucket = t.bucket and credentials = t.cred and endpoint = t.endpoint in 299 | let f ~endpoint () = S3.ls ~bucket ~credentials ~endpoint ~prefix () in 300 | let* res = S3.retry ~retries:t.retries ~endpoint ~f () in 301 | let* xs, rest = fold_or_catch ~not_found:empty_Ls res in 302 | let init = List.fold_left (add prefix) ([], M.empty) xs in 303 | partition_keys prefix init rest 304 | 305 | let rec rename t prefix new_prefix = 306 | let upload t (k, v) = set t k v in 307 | let* xs = list t in 308 | let to_delete = List.filter (String.starts_with ~prefix) xs in 309 | let* data = IO.fold_left (rename_and_add ~t ~prefix ~new_prefix) [] to_delete in 310 | let* () = IO.iter (upload t) data in 311 | IO.iter (erase t) to_delete 312 | 313 | and rename_and_add ~t ~prefix ~new_prefix acc k = 314 | let l = String.length prefix in 315 | let k' = new_prefix ^ String.sub k l (String.length k - l) in 316 | Lwt.map (fun a -> (k', a) :: acc) (get t k) 317 | end 318 | 319 | let with_open ?(scheme=`Http) ?(inet=`V4) ?(retries=3) ~region ~bucket ~profile f = 320 | let* res = Credentials.Helper.get_credentials ~profile () in 321 | let cred = Result.fold ~ok:Fun.id ~error:raise res in 322 | let endpoint = Aws_s3.Region.endpoint ~inet ~scheme region in 323 | f S.{bucket; cred; endpoint; retries} 324 | 325 | include Zarr.Storage.Make(IO)(S) 326 | end 327 | -------------------------------------------------------------------------------- /zarr/test/test_codecs.ml: -------------------------------------------------------------------------------- 1 | open OUnit2 2 | open Zarr 3 | open Zarr.Codecs 4 | 5 | let decode_chain ~shape ~str ~msg = begin match Chain.of_yojson shape @@ Yojson.Safe.from_string str with 6 | | Ok _ -> assert_failure "Impossible to decode an unsupported codec."; 7 | | Error s -> assert_equal ~printer:Fun.id msg s end 8 | 9 | let bytes_encode_decode (type a) (decoded_repr : a array_repr) (fill_value : a) = 10 | List.iter 11 | (fun bytes_codec -> 12 | let chain = [bytes_codec] in 13 | let c = Chain.create decoded_repr.shape chain in 14 | let arr = Ndarray.create decoded_repr.kind decoded_repr.shape fill_value in 15 | let decoded = Chain.decode c decoded_repr (Chain.encode c arr) in 16 | assert_equal arr decoded) 17 | [`Bytes LE; `Bytes BE] 18 | 19 | let tests = [ 20 | "test codec chain" >:: (fun _ -> 21 | let shape = [10; 15; 10] in 22 | let kind = Ndarray.Int16 in 23 | let fill_value = 10 in 24 | let shard_cfg = 25 | {chunk_shape = [2; 5; 5] 26 | ;index_location = End 27 | ;index_codecs = [`Bytes LE; `Crc32c] 28 | ;codecs = [`Transpose [0; 1; 2]; `Bytes BE; `Gzip L1]} 29 | in 30 | let chain = [`Transpose [2; 1; 0; 3]; `ShardingIndexed shard_cfg; `Crc32c; `Gzip L9] in 31 | assert_raises (Zarr.Codecs.Invalid_transpose_order) (fun () -> Chain.create shape chain); 32 | let chain = [`ShardingIndexed shard_cfg; `Transpose [2; 1; 0]; `Gzip L0] in 33 | assert_raises (Zarr.Codecs.Invalid_codec_ordering) (fun () -> Chain.create shape chain); 34 | let chain = [`Transpose [2; 1; 0]; `Crc32c] in 35 | assert_raises (Zarr.Codecs.Array_to_bytes_invariant) (fun () -> Chain.create shape chain); 36 | let chain = [`Transpose [2; 1; 0]; `ShardingIndexed shard_cfg; `Crc32c; `Gzip L9] in 37 | let c = Chain.create shape chain in 38 | let arr = Ndarray.create kind shape fill_value in 39 | let encoded = Chain.encode c arr in 40 | assert_equal arr @@ Chain.decode c {shape; kind} encoded; 41 | decode_chain ~shape ~str:"[]" ~msg:"Must be exactly one array->bytes codec."; 42 | decode_chain 43 | ~shape 44 | ~str:{|[{"name": "gzip", "configuration": {"level": 1}}]|} 45 | ~msg:"Must be exactly one array->bytes codec."; 46 | decode_chain 47 | ~shape 48 | ~str:{|[{"name": "fake_codec"}, {"name": "bytes", "configuration": {"endian": "little"}}]|} 49 | ~msg:"fake_codec codec is unsupported or has invalid configuration."; 50 | 51 | let str = Chain.to_yojson c |> Yojson.Safe.to_string in 52 | (match Chain.of_yojson shape @@ Yojson.Safe.from_string str with 53 | | Ok v -> assert_equal v c; 54 | | Error _ -> assert_failure "a serialized chain should successfully deserialize")) 55 | ; 56 | 57 | "test transpose codec" >:: (fun _ -> 58 | (* test decoding of chain with misspelled configuration name *) 59 | decode_chain 60 | ~shape:[1; 1] 61 | ~str:{|[{"name": "transpose", "configuration": {"ordeR": [0, 1]}}, 62 | {"name": "bytes", "configuration": {"endian": "little"}}]|} 63 | ~msg:"transpose codec is unsupported or has invalid configuration."; 64 | (* test decoding of chain with empty transpose order *) 65 | decode_chain 66 | ~shape:[] 67 | ~str:{|[{"name": "transpose", "configuration": {"order": []}}, 68 | {"name": "bytes", "configuration": {"endian": "little"}}]|} 69 | ~msg:"transpose codec is unsupported or has invalid configuration."; 70 | (* test decoding of chain with duplicated transpose order *) 71 | decode_chain 72 | ~shape:[1; 1] 73 | ~str:{|[{"name": "transpose", "configuration": {"order": [0, 0]}}, 74 | {"name": "bytes", "configuration": {"endian": "little"}}]|} 75 | ~msg:"transpose codec is unsupported or has invalid configuration."; 76 | (* test decoding with negative transpose dimensions. *) 77 | decode_chain 78 | ~shape:[1] 79 | ~str:{|[{"name": "transpose", "configuration": {"order": [-1]}}, 80 | {"name": "bytes", "configuration": {"endian": "little"}}]|} 81 | ~msg:"transpose codec is unsupported or has invalid configuration."; 82 | (* test decoding transpose order bigger than an array's dimensionality. *) 83 | decode_chain 84 | ~shape:[2; 2] 85 | ~str:{|[{"name": "transpose", "configuration": {"order": [0, 1, 2]}}, 86 | {"name": "bytes", "configuration": {"endian": "little"}}]|} 87 | ~msg:"transpose codec is unsupported or has invalid configuration."; 88 | (* test decoding transpose order containing non-integer value(s). *) 89 | decode_chain 90 | ~shape:[2; 2] 91 | ~str:{|[{"name": "transpose", "configuration": {"order": [0, 1, 2.0]}}, 92 | {"name": "bytes", "configuration": {"endian": "little"}}]|} 93 | ~msg:"transpose codec is unsupported or has invalid configuration."; 94 | (* test encoding of chain with an empty or too big transpose order. *) 95 | let shape = [2; 2; 2] in 96 | let chain = [`Transpose []; `Bytes LE] in 97 | assert_raises (Zarr.Codecs.Invalid_transpose_order) (fun () -> Chain.create shape chain); 98 | assert_raises (Zarr.Codecs.Invalid_transpose_order) (fun () -> Chain.create shape [`Transpose [4; 0; 1]; `Bytes LE])) 99 | ; 100 | 101 | "test sharding indexed codec" >:: (fun _ -> 102 | (* test missing chunk_shape field. *) 103 | decode_chain 104 | ~shape:[] 105 | ~str:{|[ 106 | {"name": "sharding_indexed", 107 | "configuration": 108 | {"index_location": "end", 109 | "codecs": 110 | [{"name": "bytes", "configuration": {"endian": "big"}}], 111 | "index_codecs": 112 | [{"name": "bytes", "configuration": {"endian": "big"}}]}}]|} 113 | ~msg:"Must be exactly one array->bytes codec."; 114 | (*test missing index_location field. *) 115 | decode_chain 116 | ~shape:[5; 5; 5] 117 | ~str:{|[ 118 | {"name": "sharding_indexed", 119 | "configuration": 120 | {"chunk_shape": [5, 5, 5], 121 | "codecs": 122 | [{"name": "bytes", "configuration": {"endian": "big"}}], 123 | "index_codecs": 124 | [{"name": "bytes", "configuration": {"endian": "big"}}]}}]|} 125 | ~msg:"Must be exactly one array->bytes codec."; 126 | (* test missing codecs field. *) 127 | decode_chain 128 | ~shape:[5; 5; 5] 129 | ~str:{|[ 130 | {"name": "sharding_indexed", 131 | "configuration": 132 | {"index_location": "end", 133 | "chunk_shape": [5, 5, 5], 134 | "index_codecs": 135 | [{"name": "bytes", "configuration": {"endian": "big"}}]}}]|} 136 | ~msg:"Must be exactly one array->bytes codec."; 137 | (* tests missing index_codecs field. *) 138 | decode_chain 139 | ~shape:[5; 5; 5] 140 | ~str:{|[ 141 | {"name": "sharding_indexed", 142 | "configuration": 143 | {"index_location": "start", 144 | "chunk_shape": [5, 5, 5], 145 | "codecs": 146 | [{"name": "bytes", "configuration": {"endian": "big"}}]}}]|} 147 | ~msg:"Must be exactly one array->bytes codec."; 148 | (* tests incorrect value for index_location field. *) 149 | decode_chain 150 | ~shape:[5; 5; 5] 151 | ~str:{|[ 152 | {"name": "sharding_indexed", 153 | "configuration": 154 | {"index_location": "MIDDLE", 155 | "chunk_shape": [5, 5, 5], 156 | "index_codecs": 157 | [{"name": "bytes", "configuration": {"endian": "big"}}], 158 | "codecs": 159 | [{"name": "bytes", "configuration": {"endian": "big"}}]}}]|} 160 | ~msg:"Must be exactly one array->bytes codec."; 161 | (* tests incorrect non-integer values for chunk_shape field. *) 162 | decode_chain 163 | ~shape:[5; 5; 5] 164 | ~str:{|[ 165 | {"name": "sharding_indexed", 166 | "configuration": 167 | {"index_location": "start", 168 | "chunk_shape": [5, -5, 5.5], 169 | "index_codecs": 170 | [{"name": "bytes", "configuration": {"endian": "big"}}], 171 | "codecs": 172 | [{"name": "bytes", "configuration": {"endian": "big"}}]}}]|} 173 | ~msg:"Must be exactly one array->bytes codec."; 174 | (* tests unspecified codecs field. *) 175 | decode_chain 176 | ~shape:[5; 5; 5] 177 | ~str:{|[ 178 | {"name": "sharding_indexed", 179 | "configuration": 180 | {"index_location": "start", 181 | "chunk_shape": [5, 5, 5], 182 | "index_codecs": [], 183 | "codecs": 184 | [{"name": "bytes", "configuration": {"endian": "big"}}]}}]|} 185 | ~msg:"Must be exactly one array->bytes codec."; 186 | (* tests ill-formed codecs/index_codecs field. In this case, missing 187 | the required bytes->bytes codec. *) 188 | decode_chain 189 | ~shape:[5; 5; 5] 190 | ~str:{|[ 191 | {"name": "sharding_indexed", 192 | "configuration": 193 | {"index_location": "start", 194 | "chunk_shape": [5, 5, 5], 195 | "index_codecs": [{"name": "crc32c"}], 196 | "codecs": 197 | [{"name": "bytes", "configuration": {"endian": "big"}}]}}]|} 198 | ~msg:"Must be exactly one array->bytes codec."; 199 | (* tests ill-formed codecs/index_codecs field. In this case, parsing 200 | an unsupported/unknown codec. *) 201 | decode_chain 202 | ~shape:[5; 5; 5] 203 | ~str:{|[ 204 | {"name": "sharding_indexed", 205 | "configuration": 206 | {"index_location": "start", 207 | "chunk_shape": [5, 5, 5], 208 | "index_codecs": 209 | [{"name": "bytes", "configuration": {"endian": "big"}}, 210 | {"name": "UNKNOWN_BYTESTOBYTES_CODEC"}], 211 | "codecs": 212 | [{"name": "bytes", "configuration": {"endian": "big"}}]}}]|} 213 | ~msg:"Must be exactly one array->bytes codec."; 214 | (* test violation of index_codec invariant when it contains variable-sized codecs. *) 215 | List.iter 216 | (fun c -> 217 | decode_chain 218 | ~shape:[5; 5; 5] 219 | ~str:(Format.sprintf {|[ 220 | {"name": "sharding_indexed", 221 | "configuration": 222 | {"index_location": "start", 223 | "chunk_shape": [5, 5, 5], 224 | "index_codecs": 225 | [{"name": "bytes", "configuration": {"endian": "big"}}, %s], 226 | "codecs": 227 | [{"name": "bytes", "configuration": {"endian": "big"}}]}}]|} c) 228 | ~msg:"Must be exactly one array->bytes codec.") 229 | [{|{"name": "zstd", "configuration": {"level": 0, "checksum": false}}|} 230 | ;{|{"name": "gzip", "configuration": {"level": 1}}|}]; 231 | 232 | let shape = [10; 15; 10] in 233 | let kind = Ndarray.Float64 in 234 | let cfg = 235 | {chunk_shape = [3; 5; 5] 236 | ;index_location = Start 237 | ;index_codecs = [`Transpose [0; 3; 1; 2]; `Bytes LE; `Crc32c] 238 | ;codecs = [`Bytes BE]} 239 | in 240 | let chain = [`ShardingIndexed cfg] in 241 | (*test failure for chunk shape not evenly dividing shard. *) 242 | assert_raises (Zarr.Codecs.Invalid_sharding_chunk_shape) (fun () -> Chain.create shape chain); 243 | (* test failure for chunk shape length not equal to dimensionality of shard.*) 244 | assert_raises 245 | (Zarr.Codecs.Invalid_sharding_chunk_shape) 246 | (fun () -> Chain.create shape @@ [`ShardingIndexed {cfg with chunk_shape = [5]}]); 247 | 248 | let chain = [`ShardingIndexed {cfg with chunk_shape = [5; 3; 5]}] in 249 | let c = Chain.create shape chain in 250 | let arr = Ndarray.create kind shape (-10.) in 251 | let encoded = Chain.encode c arr in 252 | assert_equal arr (Chain.decode c {shape; kind} encoded); 253 | 254 | (* test correctness of decoding nested sharding codecs.*) 255 | let str = 256 | {|[ 257 | {"name": "sharding_indexed", 258 | "configuration": 259 | {"index_location": "start", 260 | "chunk_shape": [5, 5, 5], 261 | "index_codecs": 262 | [{"name": "bytes", "configuration": {"endian": "big"}}], 263 | "codecs": 264 | [{"name": "sharding_indexed", 265 | "configuration": 266 | {"index_location": "end", 267 | "chunk_shape": [5, 5, 5], 268 | "index_codecs": 269 | [{"name": "bytes", "configuration": {"endian": "big"}}], 270 | "codecs": 271 | [{"name": "bytes", "configuration": {"endian": "big"}}]}}]}}]|} 272 | in 273 | let r = Chain.of_yojson shape @@ Yojson.Safe.from_string str in 274 | assert_bool "Encoding this nested sharding chain should not fail" @@ Result.is_ok r; 275 | (* test if decoding of indexed_codec with sharding for array->bytes fails.*) 276 | let str = 277 | {|[ 278 | {"name": "sharding_indexed", 279 | "configuration": 280 | {"index_location": "start", 281 | "chunk_shape": [5, 5, 5], 282 | "codecs": 283 | [{"name": "bytes", "configuration": {"endian": "big"}}], 284 | "index_codecs": 285 | [{"name": "sharding_indexed", 286 | "configuration": 287 | {"index_location": "end", 288 | "chunk_shape": [5, 5, 5, 1], 289 | "index_codecs": 290 | [{"name": "bytes", "configuration": {"endian": "big"}}], 291 | "codecs": 292 | [{"name": "bytes", "configuration": {"endian": "big"}}]}}]}}]|} 293 | in 294 | let r = Chain.of_yojson shape @@ Yojson.Safe.from_string str in 295 | assert_bool "Decoding of index_codec chain with sharding should fail" @@ Result.is_error r) 296 | ; 297 | 298 | 299 | "test gzip codec" >:: (fun _ -> 300 | (* test wrong compression level *) 301 | decode_chain 302 | ~shape:[] 303 | ~str:{|[{"name": "bytes", "configuration": {"endian": "little"}}, 304 | {"name": "gzip", "configuration": {"level": -1}}]|} 305 | ~msg:"gzip codec is unsupported or has invalid configuration."; 306 | (* test incorrect configuration *) 307 | decode_chain 308 | ~shape:[] 309 | ~str:{|[{"name": "bytes", "configuration": {"endian": "little"}}, 310 | {"name": "gzip", "configuration": {"something": -1}}]|} 311 | ~msg:"gzip codec is unsupported or has invalid configuration."; 312 | 313 | (* test correct deserialization of gzip compression level *) 314 | let shape = [10; 15; 10] in 315 | List.iter 316 | (fun level -> 317 | let str = 318 | Format.sprintf 319 | {|[{"name": "bytes", "configuration": {"endian": "little"}}, 320 | {"name": "gzip", "configuration": {"level": %d}}]|} level in 321 | let r = Chain.of_yojson shape @@ Yojson.Safe.from_string str in 322 | assert_bool "Encoding this chain should not fail" @@ Result.is_ok r) 323 | [0; 1; 2; 3; 4; 5; 6; 7; 8; 9]; 324 | 325 | (* test encoding/decoding for various compression levels *) 326 | let kind = Ndarray.Complex64 in 327 | let fill_value = Complex.one in 328 | let arr = Ndarray.create kind shape fill_value in 329 | let chain = [`Bytes LE] in 330 | List.iter 331 | (fun level -> 332 | let c = Chain.create shape @@ chain @ [`Gzip level] in 333 | let encoded = Chain.encode c arr in 334 | assert_equal arr @@ Chain.decode c {shape; kind} encoded) 335 | [L0; L1; L2; L3; L4; L5; L6; L7; L8; L9]) 336 | ; 337 | 338 | "test zstd codec" >:: (fun _ -> 339 | (* test wrong compression level *) 340 | List.iter 341 | (fun l -> 342 | decode_chain 343 | ~shape:[] 344 | ~str:(Format.sprintf {|[{"name": "bytes", "configuration": {"endian": "little"}}, 345 | {"name": "zstd", "configuration": {"level": %d, "checksum": false}}]|} l) 346 | ~msg:"zstd codec is unsupported or has invalid configuration.") 347 | [50; -500_000]; 348 | 349 | (* test incorrect configuration *) 350 | decode_chain 351 | ~shape:[] 352 | ~str:{|[{"name": "bytes", "configuration": {"endian": "little"}}, 353 | {"name": "zstd", "configuration": {"something": -1}}]|} 354 | ~msg:"zstd codec is unsupported or has invalid configuration."; 355 | 356 | (* test correct deserialization of zstd compression level *) 357 | let shape = [10; 15; 10] in 358 | List.iter 359 | (fun level -> 360 | let str = 361 | Format.sprintf 362 | {|[{"name": "bytes", "configuration": {"endian": "little"}}, 363 | {"name": "zstd", "configuration": {"level": %d, "checksum": false}}]|} level 364 | in 365 | let r = Chain.of_yojson shape @@ Yojson.Safe.from_string str in 366 | assert_bool "Encoding this chain should not fail" @@ Result.is_ok r) [-131072; 0]; 367 | 368 | (* test encoding/decoding for various compression levels *) 369 | let arr = Ndarray.create Int shape Int.max_int in 370 | List.iter 371 | (fun (level, checksum) -> 372 | let c = Chain.create shape [`Bytes LE; `Zstd (level, checksum)] in 373 | let encoded = Chain.encode c arr in 374 | assert_equal arr @@ Chain.decode c {shape; kind = Ndarray.Int} encoded) 375 | [(-131072, false); (-131072, true); (0, false); (0, true)]) 376 | ; 377 | 378 | "test bytes codec" >:: (fun _ -> 379 | let shape = [2; 2; 2] in 380 | (* test decoding of chain with invalid endianness name *) 381 | decode_chain 382 | ~shape 383 | ~str:{|[{"name": "bytes", "configuration": {"endian": "HUGE"}}]|} 384 | ~msg:"Must be exactly one array->bytes codec."; 385 | (* test decoding of chain with invalid configuration param. *) 386 | decode_chain 387 | ~shape 388 | ~str:{|[{"name": "bytes", "configuration": {"wrong": 5}}]|} 389 | ~msg:"Must be exactly one array->bytes codec."; 390 | 391 | (* test encoding/decoding of Char *) 392 | bytes_encode_decode {shape; kind = Ndarray.Char} '?'; 393 | (* test encoding/decoding of Bool *) 394 | bytes_encode_decode {shape; kind = Ndarray.Bool} false; 395 | bytes_encode_decode {shape; kind = Ndarray.Bool} true; 396 | (* test encoding/decoding of int8 *) 397 | bytes_encode_decode {shape; kind = Ndarray.Int8} 0; 398 | (* test encoding/decoding of uint8 *) 399 | bytes_encode_decode {shape; kind = Ndarray.Uint8} 0; 400 | (* test encoding/decoding of int16 *) 401 | bytes_encode_decode {shape; kind = Ndarray.Int16} 0; 402 | (* test encoding/decoding of uint16 *) 403 | bytes_encode_decode {shape; kind = Ndarray.Uint16} 0; 404 | (* test encoding/decoding of int32 *) 405 | bytes_encode_decode {shape; kind = Ndarray.Int32} 0l; 406 | (* test encoding/decoding of int64 *) 407 | bytes_encode_decode {shape; kind = Ndarray.Int64} 0L; 408 | (* test encoding/decoding of float32 *) 409 | bytes_encode_decode {shape; kind = Ndarray.Float32} 0.0; 410 | (* test encoding/decoding of float64 *) 411 | bytes_encode_decode {shape; kind = Ndarray.Float64} 0.0; 412 | (* test encoding and decoding of Complex32 *) 413 | bytes_encode_decode {shape; kind = Ndarray.Complex32} Complex.zero; 414 | (* test encoding/decoding of complex64 *) 415 | bytes_encode_decode {shape; kind = Ndarray.Complex64} Complex.zero; 416 | (* test encoding/decoding of int *) 417 | bytes_encode_decode {shape; kind = Ndarray.Int} Int.max_int; 418 | (* test encoding/decoding of int *) 419 | bytes_encode_decode {shape; kind = Ndarray.Nativeint} Nativeint.max_int) 420 | ] 421 | -------------------------------------------------------------------------------- /zarr/src/metadata.ml: -------------------------------------------------------------------------------- 1 | open Extensions 2 | 3 | exception Parse_error of string 4 | 5 | module FillValue = struct 6 | type t = 7 | | Char of char 8 | | Bool of bool 9 | | Int of int 10 | | Intlit of string * Stdint.uint64 (* for ints that cannot fit in a 63bit integer type *) 11 | | Float of float 12 | | IntFloat of int * float 13 | | IntlitFloat of string * float 14 | | StringFloat of string * float (* float represented using hex string in the metadata json. *) 15 | | IntComplex of (int * int) * Complex.t (* complex number represented using ints in the metadata json. *) 16 | | IntlitComplex of (string * string) * Complex.t (* complex number represented using ints in the metadata json. *) 17 | | FloatComplex of Complex.t (* complex number represented using floats in the metadata json. *) 18 | | StringComplex of (string * string) * Complex.t 19 | 20 | let rec create : type a. a Ndarray.dtype -> a -> t = fun kind x -> match kind with 21 | | Ndarray.Char -> Char x 22 | | Ndarray.Bool -> Bool x 23 | | Ndarray.Int8 -> Int x 24 | | Ndarray.Uint8 -> Int x 25 | | Ndarray.Int16 -> Int x 26 | | Ndarray.Uint16 -> Int x 27 | | Ndarray.Int32 -> Int (Int32.to_int x) 28 | | Ndarray.Int -> Int x 29 | | Ndarray.Int64 when x >= -4611686018427387904L && x <= 4611686018427387903L -> Int (Int64.to_int x) 30 | | Ndarray.Int64 -> Intlit (Int64.to_string x, Stdint.Uint64.of_int64 x) 31 | | Ndarray.Uint64 when Stdint.Uint64.(compare x (of_int Int.max_int)) < 0 -> Int (Stdint.Uint64.to_int x) 32 | | Ndarray.Uint64 -> Intlit (Stdint.Uint64.to_string x, x) 33 | | Ndarray.Float32 -> Float x 34 | | Ndarray.Float64 -> Float x 35 | | Ndarray.Complex32 -> FloatComplex x 36 | | Ndarray.Complex64 -> FloatComplex x 37 | | Ndarray.Nativeint -> create Ndarray.Int64 (Int64.of_nativeint x) 38 | 39 | let equal x y = match x, y with 40 | | Char a, Char b when Char.equal a b -> true 41 | | Bool false, Bool false -> true 42 | | Bool true, Bool true -> true 43 | | Int a, Int b when Int.equal a b -> true 44 | | Intlit (a, _), Intlit (b, _) when String.equal a b -> true 45 | | Float a, Float b when Float.equal a b -> true 46 | | IntFloat (a, _), IntFloat (b, _) when Int.equal a b -> true 47 | | IntlitFloat (a, _), IntlitFloat (b, _) when String.equal a b -> true 48 | | StringFloat ("Infinity", _), StringFloat ("Infinity", _) -> true 49 | | StringFloat ("-Infinity", _), StringFloat ("-Infinity", _) -> true 50 | | StringFloat ("NaN", _), StringFloat ("NaN", _) -> true 51 | | StringFloat (a, _), StringFloat (b, _) when String.equal a b -> true 52 | | IntComplex ((a1, b1), _), IntComplex ((a2, b2), _) when Int.(equal a1 a2 && equal b1 b2) -> true 53 | | IntlitComplex ((a1, b1), _), IntlitComplex ((a2, b2), _) when String.(equal a1 a2 && equal b1 b2) -> true 54 | | FloatComplex Complex.{re=r1;im=i1}, FloatComplex Complex.{re=r2;im=i2} when Float.(equal r1 r2 && equal i1 i2) -> true 55 | | StringComplex ((a1, b1), _), StringComplex ((a2, b2), _) when String.(equal a1 a2 && equal b1 b2) -> true 56 | | _ -> false 57 | 58 | (* This makes sure the way the fill-value is encoded in the metadata is 59 | preserved when converting a parsed FillValue.t back to it's JSON value. *) 60 | let rec of_yojson (d : Datatype.t) (x : Yojson.Safe.t) = match d, x with 61 | | Datatype.Char, `String s when String.length s = 1 -> Ok (Char (String.get s 0)) 62 | | Datatype.Bool, `Bool b -> Ok (Bool b) 63 | | Datatype.Int8, `Int a when a >= -128 && a <= 127 -> Ok (Int a) 64 | | Datatype.Uint8, `Int a when a >= 0 && a <= 255 -> Ok (Int a) 65 | | Datatype.Int16, `Int a when a >= -32768 && a <= 32767 -> Ok (Int a) 66 | | Datatype.Uint16, `Int a when a >= 0 && a <= 65535 -> Ok (Int a) 67 | | Datatype.Int32, `Int a when a >= -2147483648 && a <= 2147483647 -> Ok (Int a) 68 | | Datatype.Int, `Int a -> Ok (Int a) 69 | | Datatype.Int64, `Int a -> Ok (Int a) 70 | | Datatype.Int64, `Intlit a -> begin match Int64.of_string_opt a with 71 | | None -> Error "Unsupported fill value." 72 | | Some b -> Ok (Intlit (a, Stdint.Uint64.of_int64 b)) end 73 | | Datatype.Nativeint, a -> of_yojson Datatype.Int64 a 74 | | Datatype.Uint64, `Int a when a >= 0 -> Ok (Int a) 75 | | Datatype.Uint64, `Intlit a when not (String.starts_with ~prefix:"-" a) -> 76 | begin match Stdint.Uint64.of_string a with 77 | | exception Invalid_argument _ -> Error "Unsupported fill value." 78 | | b -> Ok (Intlit (a, b)) end 79 | | Datatype.Float32, `Float a -> Ok (Float a) 80 | | Datatype.Float32, `Int a -> Ok (IntFloat (a, Float.of_int a)) 81 | | Datatype.Float32, `Intlit a -> Ok (IntlitFloat (a, Float.of_string a)) 82 | | Datatype.Float32, `String ("Infinity" as s) -> Ok (StringFloat (s, Float.infinity)) 83 | | Datatype.Float32, `String ("-Infinity" as s) -> Ok (StringFloat (s, Float.neg_infinity)) 84 | | Datatype.Float32, `String ("NaN" as s) -> Ok (StringFloat (s, Float.nan)) 85 | | Datatype.Float32, `String s when String.starts_with ~prefix:"0x" s -> 86 | begin match Stdint.Uint64.of_string s with 87 | | exception Invalid_argument _ -> Error "Unsupported fill value." 88 | | a -> Ok (StringFloat (s, Stdint.Uint64.to_float a)) end 89 | | Datatype.Float64, `Float a -> Ok (Float a) 90 | | Datatype.Float64, `Int a -> Ok (IntFloat (a, Float.of_int a)) 91 | | Datatype.Float64, `Intlit a -> Ok (IntlitFloat (a, Float.of_string a)) 92 | | Datatype.Float64, `String ("Infinity" as s) -> Ok (StringFloat (s, Float.infinity)) 93 | | Datatype.Float64, `String ("-Infinity" as s) -> Ok (StringFloat (s, Float.neg_infinity)) 94 | | Datatype.Float64, `String ("NaN" as s) -> Ok (StringFloat (s, Float.nan)) 95 | | Datatype.Float64, `String s when String.starts_with ~prefix:"0x" s -> 96 | begin match Stdint.Uint64.of_string s with 97 | | exception Invalid_argument _ -> Error "Unsupported fill value." 98 | | a -> Ok (StringFloat (s, Stdint.Uint64.to_float a)) end 99 | | Datatype.Complex32, `List [`Int a; `Int b] -> Ok (IntComplex ((a, b), Complex.{re=Float.of_int a; im=Float.of_int b})) 100 | | Datatype.Complex32, `List [`Intlit a; `Intlit b] -> Ok (IntlitComplex ((a, b), Complex.{re=Float.of_string a; im=Float.of_string b})) 101 | | Datatype.Complex32, `List [`Float re; `Float im] -> Ok (FloatComplex Complex.{re; im}) 102 | | Datatype.Complex32, `List [`String a; `String b] -> Ok (StringComplex ((a, b), Complex.{re=Float.of_string a; im=Float.of_string b})) 103 | | Datatype.Complex64, `List [`Int a; `Int b] -> Ok (IntComplex ((a, b), Complex.{re=Float.of_int a; im=Float.of_int b})) 104 | | Datatype.Complex64, `List [`Intlit a; `Intlit b] -> Ok (IntlitComplex ((a, b), Complex.{re=Float.of_string a; im=Float.of_string b})) 105 | | Datatype.Complex64, `List [`Float re; `Float im] -> Ok (FloatComplex Complex.{re; im}) 106 | | Datatype.Complex64, `List [`String a; `String b] -> Ok (StringComplex ((a, b), Complex.{re=Float.of_string a; im=Float.of_string b})) 107 | | _, `Null -> Error "array metadata must contain a fill_value field." 108 | | _ -> Error "Unsupported fill value." 109 | 110 | let to_yojson : t -> Yojson.Safe.t = function 111 | | Char c -> `String (Printf.sprintf "%c" c) 112 | | Bool b -> `Bool b 113 | | Int i -> `Int i 114 | | Intlit (s, _) -> `Intlit s 115 | | Float f -> `Float f 116 | | IntFloat (i, _) -> `Int i 117 | | IntlitFloat (s, _) -> `Intlit s 118 | | StringFloat (s, _) -> `String s 119 | | IntComplex ((a, b), _) -> `List [`Int a; `Int b] 120 | | IntlitComplex ((a, b), _) -> `List [`Intlit a; `Intlit b] 121 | | FloatComplex Complex.{re; im} -> `List [`Float re; `Float im] 122 | | StringComplex ((a, b), _) -> `List [`String a; `String b] 123 | end 124 | 125 | module NodeType = struct 126 | type t = Array | Group 127 | let rec to_yojson x : Yojson.Safe.t = `String (show x) 128 | and show = function 129 | | Array -> "array" 130 | | Group -> "group" 131 | 132 | module Array = struct 133 | let of_yojson : Yojson.Safe.t -> (t, string) result = function 134 | | `String "array" -> Ok Array 135 | | `Null -> Error "metadata must contain a node_type field." 136 | | _ -> Error "node_type field must be 'array'." 137 | end 138 | 139 | module Group = struct 140 | let of_yojson : Yojson.Safe.t -> (t, string) result = function 141 | | `String "group" -> Ok Group 142 | | `Null -> Error "group metadata must contain a node_type field." 143 | | _ -> Error "node_type field must be 'group'." 144 | end 145 | end 146 | 147 | (* The shape of a Zarr array is the list of dimension lengths. It can be the 148 | empty list in the case of a zero-dimension array (scalar). *) 149 | module Shape = struct 150 | type t = Empty | Dims of int list 151 | 152 | let create = function 153 | | [] -> Empty 154 | | xs -> Dims xs 155 | 156 | let ( = ) x y = match x, y with 157 | | Empty, Empty -> true 158 | | Dims a, Dims b when List.equal Int.equal a b -> true 159 | | _ -> false 160 | 161 | let add (x : Yojson.Safe.t) acc = match x with 162 | | `Int i when i > 0 -> Result.map (List.cons i) acc 163 | | _ -> Error "shape field list must only contain positive integers." 164 | 165 | let of_yojson : Yojson.Safe.t -> (t, string) result = function 166 | | `List [] -> Ok Empty 167 | | `List xs -> Result.map (fun x -> Dims x) (List.fold_right add xs (Ok [])) 168 | | `Null -> Error "array metadata must contain a shape field." 169 | | _ -> Error "shape field must be a list of integers." 170 | 171 | let to_yojson : t -> Yojson.Safe.t = function 172 | | Empty -> `List [] 173 | | Dims xs -> `List (List.map (fun x -> `Int x) xs) 174 | 175 | let to_list = function 176 | | Empty -> [] 177 | | Dims xs -> xs 178 | 179 | let ndim = function 180 | | Empty -> 0 181 | | Dims xs -> List.length xs 182 | end 183 | 184 | module ZarrFormat = struct 185 | type t = int 186 | let to_yojson x : Yojson.Safe.t = `Int x 187 | let of_yojson = function 188 | | `Int (3 as i) -> Ok i 189 | | `Null -> Error "metadata must contain a zarr_format field." 190 | | _ -> Error "zarr_format field must be the integer 3." 191 | end 192 | 193 | module DimensionNames = struct 194 | type t = string option list 195 | 196 | let to_yojson (xs : t) : Yojson.Safe.t = 197 | `List (List.map (Option.fold ~none:`Null ~some:(fun s -> `String s)) xs) 198 | 199 | let add (x : Yojson.Safe.t) acc = match x with 200 | | `String s -> Result.map (List.cons (Some s)) acc 201 | | `Null -> Result.map (List.cons None) acc 202 | | _ -> Error "dimension_names must contain strings or null values." 203 | 204 | let of_yojson ndim x = match x with 205 | | `Null -> Ok [] 206 | | `List xs -> 207 | if List.length xs = ndim then List.fold_right add xs (Ok []) 208 | else Error "dimension_names length and array dimensionality must be equal." 209 | | _ -> Error "dimension_names field must be a list." 210 | end 211 | 212 | module Array = struct 213 | type t = 214 | {zarr_format : ZarrFormat.t 215 | ;shape : Shape.t 216 | ;node_type : NodeType.t 217 | ;data_type : Datatype.t 218 | ;codecs : Codecs.Chain.t 219 | ;fill_value : FillValue.t 220 | ;chunk_grid : RegularGrid.t 221 | ;chunk_key_encoding : ChunkKeyEncoding.t 222 | ;attributes : Yojson.Safe.t 223 | ;dimension_names : DimensionNames.t 224 | ;storage_transformers : Yojson.Safe.t list} 225 | 226 | let create ?(sep=`Slash) ?(dimension_names=[]) ?(attributes=`Null) ~codecs ~shape kind fv chunks = 227 | {codecs 228 | ;attributes 229 | ;dimension_names 230 | ;zarr_format = 3 231 | ;shape = Shape.create shape 232 | ;node_type = NodeType.Array 233 | ;storage_transformers = [] 234 | ;fill_value = FillValue.create kind fv 235 | ;data_type = Datatype.of_kind kind 236 | ;chunk_key_encoding = ChunkKeyEncoding.create sep 237 | ;chunk_grid = RegularGrid.create ~array_shape:shape chunks} 238 | 239 | let to_yojson : t -> Yojson.Safe.t = fun t -> 240 | let l = 241 | [("zarr_format", ZarrFormat.to_yojson t.zarr_format) 242 | ;("shape", Shape.to_yojson t.shape) 243 | ;("node_type", NodeType.to_yojson t.node_type) 244 | ;("data_type", Datatype.to_yojson t.data_type) 245 | ;("codecs", Codecs.Chain.to_yojson t.codecs) 246 | ;("fill_value", FillValue.to_yojson t.fill_value) 247 | ;("chunk_grid", RegularGrid.to_yojson t.chunk_grid) 248 | ;("chunk_key_encoding", ChunkKeyEncoding.to_yojson t.chunk_key_encoding)] 249 | in 250 | (* optional fields.*) 251 | match t.attributes, t.dimension_names with 252 | | `Null, [] -> `Assoc l 253 | | `Null, xs -> `Assoc (l @ ["dimension_names", DimensionNames.to_yojson xs]) 254 | | x, [] -> `Assoc (l @ ["attributes", x]) 255 | | x, xs -> `Assoc (l @ [("attributes", x); ("dimension_names", DimensionNames.to_yojson xs)]) 256 | 257 | let of_yojson x = 258 | let open Util.Result_syntax in 259 | let member = Yojson.Safe.Util.member in 260 | let* zarr_format = ZarrFormat.of_yojson (member "zarr_format" x) in 261 | let* shape = Shape.of_yojson (member "shape" x) in 262 | let* data_type = Datatype.of_yojson (member "data_type" x) in 263 | let* fill_value = FillValue.of_yojson data_type (member "fill_value" x) in 264 | let* chunk_key_encoding = ChunkKeyEncoding.of_yojson (member "chunk_key_encoding" x) in 265 | let* chunk_grid = RegularGrid.of_yojson (Shape.to_list shape) (member "chunk_grid" x) in 266 | let* codecs = Codecs.Chain.of_yojson (RegularGrid.chunk_shape chunk_grid) (member "codecs" x) in 267 | let* node_type = NodeType.Array.of_yojson (member "node_type" x) in 268 | (* Optional fields *) 269 | let* dimension_names = DimensionNames.of_yojson (Shape.ndim shape) (member "dimension_names" x) in 270 | let+ storage_transformers = match member "storage_transformers" x with 271 | | `Null -> Ok [] 272 | | _ -> Error "storage_transformers field is not yet supported." 273 | in 274 | let attributes = member "attributes" x in 275 | {zarr_format; shape; node_type; data_type; codecs; fill_value; chunk_grid 276 | ;chunk_key_encoding; attributes; dimension_names; storage_transformers} 277 | 278 | let ( = ) x y = 279 | Shape.(x.shape = y.shape) 280 | && Datatype.(x.data_type = y.data_type) 281 | && Codecs.Chain.(x.codecs = y.codecs) 282 | && FillValue.(equal x.fill_value y.fill_value) 283 | && RegularGrid.(x.chunk_grid = y.chunk_grid) 284 | && ChunkKeyEncoding.(x.chunk_key_encoding = y.chunk_key_encoding) 285 | && Yojson.Safe.(equal x.attributes y.attributes) 286 | && List.equal (fun a b -> Option.equal String.equal a b) x.dimension_names y.dimension_names 287 | && List.equal Yojson.Safe.equal x.storage_transformers y.storage_transformers 288 | 289 | let codecs t = t.codecs 290 | let attributes t = t.attributes 291 | let shape t = Shape.to_list t.shape 292 | let dimension_names t = t.dimension_names 293 | let chunk_shape t = RegularGrid.chunk_shape t.chunk_grid 294 | let index_coord_pair t coord = RegularGrid.index_coord_pair t.chunk_grid coord 295 | let chunk_key t index = ChunkKeyEncoding.encode t.chunk_key_encoding index 296 | let chunk_indices t shape = RegularGrid.indices t.chunk_grid shape 297 | let encode t = Yojson.Safe.to_string (to_yojson t) 298 | let update_attributes t attrs = {t with attributes = attrs} 299 | (* FIXME: must ensure the dimensions of the array remain unchanged. *) 300 | let update_shape t shape = {t with shape = Shape.create shape} 301 | 302 | let decode s = match of_yojson (Yojson.Safe.from_string s) with 303 | | Error e -> raise (Parse_error e) 304 | | Ok m -> m 305 | 306 | let is_valid_kind (type a) t (kind : a Ndarray.dtype) = match kind, t.data_type with 307 | | Ndarray.Char, Datatype.Char 308 | | Ndarray.Bool, Datatype.Bool 309 | | Ndarray.Int8, Datatype.Int8 310 | | Ndarray.Uint8, Datatype.Uint8 311 | | Ndarray.Int16, Datatype.Int16 312 | | Ndarray.Uint16, Datatype.Uint16 313 | | Ndarray.Int32, Datatype.Int32 314 | | Ndarray.Int64, Datatype.Int64 315 | | Ndarray.Uint64, Datatype.Uint64 316 | | Ndarray.Float32, Datatype.Float32 317 | | Ndarray.Float64, Datatype.Float64 318 | | Ndarray.Complex32, Datatype.Complex32 319 | | Ndarray.Complex64, Datatype.Complex64 320 | | Ndarray.Int, Datatype.Int 321 | | Ndarray.Nativeint, Datatype.Nativeint -> true 322 | | _ -> false 323 | 324 | let fillvalue_of_kind (type a) t (kind : a Ndarray.dtype) : a = match kind, t.fill_value with 325 | | Ndarray.Char, FillValue.Char c -> c 326 | | Ndarray.Bool, FillValue.Bool b -> b 327 | | Ndarray.Int8, FillValue.Int i -> i 328 | | Ndarray.Uint8, FillValue.Int i -> i 329 | | Ndarray.Int16, FillValue.Int i -> i 330 | | Ndarray.Uint16, FillValue.Int i -> i 331 | | Ndarray.Int32, FillValue.Int i -> Int32.of_int i 332 | | Ndarray.Int, FillValue.Int i -> i 333 | | Ndarray.Int64, FillValue.Int i -> Int64.of_int i 334 | | Ndarray.Int64, FillValue.Intlit (_, i) -> Stdint.Uint64.to_int64 i 335 | | Ndarray.Uint64, FillValue.Int i -> Stdint.Uint64.of_int i 336 | | Ndarray.Uint64, FillValue.Intlit (_, i) -> i 337 | | Ndarray.Nativeint, FillValue.Int i -> Nativeint.of_int i 338 | | Ndarray.Nativeint, FillValue.Intlit (_, i) -> Stdint.Uint64.to_nativeint i 339 | | Ndarray.Float32, FillValue.Float f -> f 340 | | Ndarray.Float64, FillValue.Float f -> f 341 | | Ndarray.Complex32, FillValue.FloatComplex f -> f 342 | | Ndarray.Complex64, FillValue.FloatComplex f -> f 343 | | _ -> failwith "kind is not compatible with node's fill value." 344 | end 345 | 346 | module Group = struct 347 | type t = {zarr_format : ZarrFormat.t; node_type : NodeType.t; attributes : Yojson.Safe.t} 348 | 349 | let to_yojson : t -> Yojson.Safe.t = fun t -> 350 | let l = [("zarr_format", ZarrFormat.to_yojson t.zarr_format); ("node_type", NodeType.to_yojson t.node_type)] in 351 | (* optional fields.*) 352 | match t.attributes with 353 | | `Null -> `Assoc l 354 | | x -> `Assoc (l @ [("attributes", x)]) 355 | 356 | let default = {zarr_format = 3; node_type = NodeType.Group; attributes = `Null} 357 | let encode t = Yojson.Safe.to_string (to_yojson t) 358 | let ( = ) x y = Yojson.Safe.(equal x.attributes y.attributes) 359 | let update_attributes t attrs = {t with attributes = attrs} 360 | let attributes t = t.attributes 361 | 362 | let of_yojson x = 363 | let open Util.Result_syntax in 364 | let* zarr_format = ZarrFormat.of_yojson Yojson.Safe.Util.(member "zarr_format" x) in 365 | let+ node_type = NodeType.Group.of_yojson Yojson.Safe.Util.(member "node_type" x) in 366 | {zarr_format; node_type; attributes = Yojson.Safe.Util.member "attributes" x} 367 | 368 | let decode s = match of_yojson (Yojson.Safe.from_string s) with 369 | | Error e -> raise (Parse_error e) 370 | | Ok m -> m 371 | 372 | let show t = 373 | let x, y = NodeType.show t.node_type, Yojson.Safe.show t.attributes in 374 | Format.sprintf {|"{zarr_format=%d; node_type=%s; attributes=%s}"|} t.zarr_format x y 375 | end 376 | --------------------------------------------------------------------------------